]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/utils.c
Merge pull request #4236 from mihalicyn/github_check_fixes
[mirror_lxc.git] / src / lxc / utils.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE 1
5 #endif
6 #define __STDC_FORMAT_MACROS /* Required for PRIu64 to work. */
7 #include <ctype.h>
8 #include <dirent.h>
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <grp.h>
12 #include <inttypes.h>
13 #include <libgen.h>
14 #include <pthread.h>
15 #include <signal.h>
16 #include <stddef.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <sys/mman.h>
21 #include <sys/mount.h>
22 #include <sys/param.h>
23 #include <sys/prctl.h>
24 #include <sys/stat.h>
25 #include <sys/types.h>
26 #include <sys/wait.h>
27 #include <unistd.h>
28
29 #include "config.h"
30 #include "log.h"
31 #include "lsm/lsm.h"
32 #include "lxclock.h"
33 #include "memory_utils.h"
34 #include "namespace.h"
35 #include "open_utils.h"
36 #include "parse.h"
37 #include "process_utils.h"
38 #include "syscall_wrappers.h"
39 #include "utils.h"
40
41 #if !HAVE_STRLCPY
42 #include "strlcpy.h"
43 #endif
44
45 #if !HAVE_STRLCAT
46 #include "strlcat.h"
47 #endif
48
49 #ifndef O_PATH
50 #define O_PATH 010000000
51 #endif
52
53 #ifndef O_NOFOLLOW
54 #define O_NOFOLLOW 00400000
55 #endif
56
57 lxc_log_define(utils, lxc);
58
59 /*
60 * if path is btrfs, tries to remove it and any subvolumes beneath it
61 */
62 extern bool btrfs_try_remove_subvol(const char *path);
63
64 static int _recursive_rmdir(const char *dirname, dev_t pdev,
65 const char *exclude, int level, bool onedev)
66 {
67 __do_closedir DIR *dir = NULL;
68 int failed = 0;
69 bool hadexclude = false;
70 int ret;
71 struct dirent *direntp;
72 char pathname[PATH_MAX];
73
74 dir = opendir(dirname);
75 if (!dir)
76 return log_error(-1, "Failed to open \"%s\"", dirname);
77
78 while ((direntp = readdir(dir))) {
79 int rc;
80 struct stat mystat;
81
82 if (strequal(direntp->d_name, ".") ||
83 strequal(direntp->d_name, ".."))
84 continue;
85
86 rc = strnprintf(pathname, sizeof(pathname), "%s/%s", dirname, direntp->d_name);
87 if (rc < 0) {
88 ERROR("The name of path is too long");
89 failed = 1;
90 continue;
91 }
92
93 if (!level && exclude && strequal(direntp->d_name, exclude)) {
94 ret = rmdir(pathname);
95 if (ret < 0) {
96 switch (errno) {
97 case ENOTEMPTY:
98 INFO("Not deleting snapshot \"%s\"", pathname);
99 hadexclude = true;
100 break;
101 case ENOTDIR:
102 ret = unlink(pathname);
103 if (ret)
104 INFO("Failed to remove \"%s\"", pathname);
105 break;
106 default:
107 SYSERROR("Failed to rmdir \"%s\"", pathname);
108 failed = 1;
109 break;
110 }
111 }
112
113 continue;
114 }
115
116 ret = lstat(pathname, &mystat);
117 if (ret) {
118 SYSERROR("Failed to stat \"%s\"", pathname);
119 failed = 1;
120 continue;
121 }
122
123 if (onedev && mystat.st_dev != pdev) {
124 if (btrfs_try_remove_subvol(pathname))
125 INFO("Removed btrfs subvolume at \"%s\"", pathname);
126 continue;
127 }
128
129 if (S_ISDIR(mystat.st_mode)) {
130 if (_recursive_rmdir(pathname, pdev, exclude, level + 1, onedev) < 0)
131 failed = 1;
132 } else {
133 ret = unlink(pathname);
134 if (ret < 0) {
135 __do_close int fd = -EBADF;
136
137 fd = open(pathname, O_RDONLY | O_CLOEXEC | O_NONBLOCK);
138 if (fd >= 0) {
139 /* The file might be marked immutable. */
140 int attr = 0;
141 ret = ioctl(fd, FS_IOC_GETFLAGS, &attr);
142 if (ret < 0)
143 SYSERROR("Failed to retrieve file flags");
144 attr &= ~FS_IMMUTABLE_FL;
145 ret = ioctl(fd, FS_IOC_SETFLAGS, &attr);
146 if (ret < 0)
147 SYSERROR("Failed to set file flags");
148 }
149
150 ret = unlink(pathname);
151 if (ret < 0) {
152 SYSERROR("Failed to delete \"%s\"", pathname);
153 failed = 1;
154 }
155 }
156 }
157 }
158
159 if (rmdir(dirname) < 0 && !btrfs_try_remove_subvol(dirname) && !hadexclude) {
160 SYSERROR("Failed to delete \"%s\"", dirname);
161 failed = 1;
162 }
163
164 return failed ? -1 : 0;
165 }
166
167 /*
168 * In overlayfs, st_dev is unreliable. So on overlayfs we don't do the
169 * lxc_rmdir_onedev().
170 */
171 static inline bool is_native_overlayfs(const char *path)
172 {
173 return has_fs_type(path, OVERLAY_SUPER_MAGIC) ||
174 has_fs_type(path, OVERLAYFS_SUPER_MAGIC);
175 }
176
177 /* returns 0 on success, -1 if there were any failures */
178 extern int lxc_rmdir_onedev(const char *path, const char *exclude)
179 {
180 struct stat mystat;
181 bool onedev = true;
182
183 if (is_native_overlayfs(path))
184 onedev = false;
185
186 if (lstat(path, &mystat) < 0) {
187 if (errno == ENOENT)
188 return 0;
189
190 return log_error_errno(-1, errno, "Failed to stat \"%s\"", path);
191 }
192
193 return _recursive_rmdir(path, mystat.st_dev, exclude, 0, onedev);
194 }
195
196 /* borrowed from iproute2 */
197 extern int get_u16(unsigned short *val, const char *arg, int base)
198 {
199 unsigned long res;
200 char *ptr;
201
202 if (!arg || !*arg)
203 return ret_errno(EINVAL);
204
205 errno = 0;
206 res = strtoul(arg, &ptr, base);
207 if (!ptr || ptr == arg || *ptr || res > 0xFFFF || errno != 0)
208 return ret_errno(ERANGE);
209
210 *val = res;
211
212 return 0;
213 }
214
215 int mkdir_p(const char *dir, mode_t mode)
216 {
217 const char *tmp = dir;
218 const char *orig = dir;
219
220 if (access(dir, F_OK) != -1)
221 return 0;
222
223 do {
224 __do_free char *makeme = NULL;
225 int ret;
226
227 dir = tmp + strspn(tmp, "/");
228 tmp = dir + strcspn(dir, "/");
229
230 makeme = strndup(orig, dir - orig);
231 if (!makeme)
232 return ret_set_errno(-1, ENOMEM);
233
234 ret = mkdir(makeme, mode);
235 if (ret < 0 && errno != EEXIST)
236 return log_error_errno(-1, errno, "Failed to create directory \"%s\"", makeme);
237
238 } while (tmp != dir);
239
240 return 0;
241 }
242
243 char *get_rundir(void)
244 {
245 __do_free char *rundir = NULL;
246 char *static_rundir;
247 int ret;
248 size_t len;
249 const char *homedir;
250 struct stat sb;
251
252 if (stat(RUNTIME_PATH, &sb) < 0)
253 return NULL;
254
255 if (geteuid() == sb.st_uid || getegid() == sb.st_gid)
256 return strdup(RUNTIME_PATH);
257
258 static_rundir = getenv("XDG_RUNTIME_DIR");
259 if (static_rundir)
260 return strdup(static_rundir);
261
262 INFO("XDG_RUNTIME_DIR isn't set in the environment");
263 homedir = getenv("HOME");
264 if (!homedir)
265 return log_error(NULL, "HOME isn't set in the environment");
266
267 len = strlen(homedir) + 17;
268 rundir = malloc(sizeof(char) * len);
269 if (!rundir)
270 return NULL;
271
272 ret = strnprintf(rundir, len, "%s/.cache/lxc/run/", homedir);
273 if (ret < 0)
274 return ret_set_errno(NULL, EIO);
275
276 return move_ptr(rundir);
277 }
278
279 int wait_for_pid(pid_t pid)
280 {
281 int status, ret;
282
283 again:
284 ret = waitpid(pid, &status, 0);
285 if (ret == -1) {
286 if (errno == EINTR)
287 goto again;
288
289 return -1;
290 }
291
292 if (ret != pid)
293 goto again;
294
295 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
296 return -1;
297
298 return 0;
299 }
300
301 int wait_for_pidfd(int pidfd)
302 {
303 int ret;
304 siginfo_t info = {
305 .si_signo = 0,
306 };
307
308 do {
309 ret = waitid(P_PIDFD, pidfd, &info, __WALL | WEXITED);
310 } while (ret < 0 && errno == EINTR);
311
312 return !ret && WIFEXITED(info.si_status) && WEXITSTATUS(info.si_status) == 0;
313 }
314
315 int lxc_wait_for_pid_status(pid_t pid)
316 {
317 int status, ret;
318
319 again:
320 ret = waitpid(pid, &status, 0);
321 if (ret == -1) {
322 if (errno == EINTR)
323 goto again;
324
325 return -1;
326 }
327
328 if (ret != pid)
329 goto again;
330
331 return status;
332 }
333
334 bool wait_exited(pid_t pid)
335 {
336 int status;
337
338 status = lxc_wait_for_pid_status(pid);
339 if (status < 0)
340 return log_error(false, "Failed to reap on child process %d", pid);
341 if (WIFSIGNALED(status))
342 return log_error(false, "Child process %d terminated by signal %d", pid, WTERMSIG(status));
343 if (!WIFEXITED(status))
344 return log_error(false, "Child did not termiate correctly");
345 if (WEXITSTATUS(status))
346 return log_error(false, "Child terminated with error %d", WEXITSTATUS(status));
347
348 TRACE("Reaped child process %d", pid);
349 return true;
350 }
351
352 #if HAVE_OPENSSL
353 #include <openssl/evp.h>
354
355 static int do_sha1_hash(const char *buf, int buflen, unsigned char *md_value,
356 unsigned int *md_len)
357 {
358 EVP_MD_CTX *mdctx;
359 const EVP_MD *md;
360
361 md = EVP_get_digestbyname("sha1");
362 if (!md)
363 return log_error(-1, "Unknown message digest: sha1\n");
364
365 mdctx = EVP_MD_CTX_create();
366 EVP_DigestInit_ex(mdctx, md, NULL);
367 EVP_DigestUpdate(mdctx, buf, buflen);
368 EVP_DigestFinal_ex(mdctx, md_value, md_len);
369 EVP_MD_CTX_destroy(mdctx);
370
371 return 0;
372 }
373
374 int sha1sum_file(char *fnam, unsigned char *digest, unsigned int *md_len)
375 {
376 __do_free char *buf = NULL;
377 __do_fclose FILE *f = NULL;
378 int ret;
379 ssize_t flen;
380 ssize_t nbytes;
381
382 if (!fnam)
383 return -1;
384
385 f = fopen_cloexec(fnam, "r");
386 if (!f)
387 return log_error_errno(-1, errno, "Failed to open template \"%s\"", fnam);
388
389 if (fseek(f, 0, SEEK_END) < 0)
390 return log_error_errno(-1, errno, "Failed to seek to end of template");
391
392 flen = ftell(f);
393 if (flen < 0)
394 return log_error_errno(-1, errno, "Failed to tell size of template");
395
396 if (fseek(f, 0, SEEK_SET) < 0)
397 return log_error_errno(-1, errno, "Failed to seek to start of template");
398
399 buf = malloc(flen + 1);
400 if (!buf)
401 return log_error_errno(-1, ENOMEM, "Out of memory");
402
403 nbytes = fread(buf, 1, flen, f);
404 if (nbytes < 0 || nbytes != flen)
405 return log_error_errno(-1, errno, "Failed to read template");
406
407 buf[flen] = '\0';
408 ret = do_sha1_hash(buf, flen, (void *)digest, md_len);
409 return ret;
410 }
411 #endif
412
413 struct lxc_popen_FILE *lxc_popen(const char *command)
414 {
415 int ret;
416 int pipe_fds[2];
417 pid_t child_pid;
418 struct lxc_popen_FILE *fp = NULL;
419
420 ret = pipe2(pipe_fds, O_CLOEXEC);
421 if (ret < 0)
422 return NULL;
423
424 child_pid = fork();
425 if (child_pid < 0)
426 goto on_error;
427
428 if (!child_pid) {
429 sigset_t mask;
430
431 close(pipe_fds[0]);
432
433 /* duplicate stdout */
434 if (pipe_fds[1] != STDOUT_FILENO)
435 ret = dup2(pipe_fds[1], STDOUT_FILENO);
436 else
437 ret = fcntl(pipe_fds[1], F_SETFD, 0);
438 if (ret < 0) {
439 close(pipe_fds[1]);
440 _exit(EXIT_FAILURE);
441 }
442
443 /* duplicate stderr */
444 if (pipe_fds[1] != STDERR_FILENO)
445 ret = dup2(pipe_fds[1], STDERR_FILENO);
446 else
447 ret = fcntl(pipe_fds[1], F_SETFD, 0);
448 close(pipe_fds[1]);
449 if (ret < 0)
450 _exit(EXIT_FAILURE);
451
452 /* unblock all signals */
453 ret = sigfillset(&mask);
454 if (ret < 0)
455 _exit(EXIT_FAILURE);
456
457 ret = pthread_sigmask(SIG_UNBLOCK, &mask, NULL);
458 if (ret < 0)
459 _exit(EXIT_FAILURE);
460
461 /* check if /bin/sh exist, otherwise try Android location /system/bin/sh */
462 if (file_exists("/bin/sh"))
463 execl("/bin/sh", "sh", "-c", command, (char *)NULL);
464 else
465 execl("/system/bin/sh", "sh", "-c", command, (char *)NULL);
466
467 _exit(127);
468 }
469
470 close(pipe_fds[1]);
471 pipe_fds[1] = -1;
472
473 fp = malloc(sizeof(*fp));
474 if (!fp)
475 goto on_error;
476
477 memset(fp, 0, sizeof(*fp));
478
479 fp->child_pid = child_pid;
480 fp->pipe = pipe_fds[0];
481
482 /* From now on, closing fp->f will also close fp->pipe. So only ever
483 * call fclose(fp->f).
484 */
485 fp->f = fdopen(pipe_fds[0], "r");
486 if (!fp->f)
487 goto on_error;
488
489 return fp;
490
491 on_error:
492 /* We can only close pipe_fds[0] if fdopen() didn't succeed or wasn't
493 * called yet. Otherwise the fd belongs to the file opened by fdopen()
494 * since it isn't dup()ed.
495 */
496 if (fp && !fp->f && pipe_fds[0] >= 0)
497 close(pipe_fds[0]);
498
499 if (pipe_fds[1] >= 0)
500 close(pipe_fds[1]);
501
502 if (fp && fp->f)
503 fclose(fp->f);
504
505 if (fp)
506 free(fp);
507
508 return NULL;
509 }
510
511 int lxc_pclose(struct lxc_popen_FILE *fp)
512 {
513 pid_t wait_pid;
514 int wstatus = 0;
515
516 if (!fp)
517 return -1;
518
519 do {
520 wait_pid = waitpid(fp->child_pid, &wstatus, 0);
521 } while (wait_pid < 0 && errno == EINTR);
522
523 fclose(fp->f);
524 free(fp);
525
526 if (wait_pid < 0)
527 return -1;
528
529 return wstatus;
530 }
531
532 int randseed(bool srand_it)
533 {
534 __do_fclose FILE *f = NULL;
535 /*
536 * srand pre-seed function based on /dev/urandom
537 */
538 unsigned int seed = time(NULL) + getpid();
539
540 f = fopen("/dev/urandom", "re");
541 if (f) {
542 int ret = fread(&seed, sizeof(seed), 1, f);
543 if (ret != 1)
544 SYSDEBUG("Unable to fread /dev/urandom, fallback to time+pid rand seed");
545 }
546
547 if (srand_it)
548 srand(seed);
549
550 return seed;
551 }
552
553 uid_t get_ns_uid(uid_t orig)
554 {
555 __do_free char *line = NULL;
556 __do_fclose FILE *f = NULL;
557 size_t sz = 0;
558 uid_t nsid, hostid, range;
559
560 f = fopen("/proc/self/uid_map", "re");
561 if (!f)
562 return log_error_errno(0, errno, "Failed to open uid_map");
563
564 while (getline(&line, &sz, f) != -1) {
565 if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3)
566 continue;
567
568 if (hostid <= orig && hostid + range > orig)
569 return nsid += orig - hostid;
570 }
571
572 return LXC_INVALID_UID;
573 }
574
575 gid_t get_ns_gid(gid_t orig)
576 {
577 __do_free char *line = NULL;
578 __do_fclose FILE *f = NULL;
579 size_t sz = 0;
580 gid_t nsid, hostid, range;
581
582 f = fopen("/proc/self/gid_map", "re");
583 if (!f)
584 return log_error_errno(0, errno, "Failed to open gid_map");
585
586 while (getline(&line, &sz, f) != -1) {
587 if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3)
588 continue;
589
590 if (hostid <= orig && hostid + range > orig)
591 return nsid += orig - hostid;
592 }
593
594 return LXC_INVALID_GID;
595 }
596
597 bool dir_exists(const char *path)
598 {
599 return exists_dir_at(-1, path);
600 }
601
602 /* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS.
603 * FNV has good anti collision properties and we're not worried
604 * about pre-image resistance or one-way-ness, we're just trying to make
605 * the name unique in the 108 bytes of space we have.
606 */
607 uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
608 {
609 unsigned char *bp;
610
611 for(bp = buf; bp < (unsigned char *)buf + len; bp++) {
612 /* xor the bottom with the current octet */
613 hval ^= (uint64_t)*bp;
614
615 /* gcc optimised:
616 * multiply by the 64 bit FNV magic prime mod 2^64
617 */
618 hval += (hval << 1) + (hval << 4) + (hval << 5) +
619 (hval << 7) + (hval << 8) + (hval << 40);
620 }
621
622 return hval;
623 }
624
625 bool is_shared_mountpoint(const char *path)
626 {
627 __do_fclose FILE *f = NULL;
628 __do_free char *line = NULL;
629 int i;
630 size_t len = 0;
631
632 f = fopen("/proc/self/mountinfo", "re");
633 if (!f)
634 return 0;
635
636 while (getline(&line, &len, f) > 0) {
637 char *slider1, *slider2;
638
639 for (slider1 = line, i = 0; slider1 && i < 4; i++)
640 slider1 = strchr(slider1 + 1, ' ');
641
642 if (!slider1)
643 continue;
644
645 slider2 = strchr(slider1 + 1, ' ');
646 if (!slider2)
647 continue;
648
649 *slider2 = '\0';
650 if (strequal(slider1 + 1, path)) {
651 /* This is the path. Is it shared? */
652 slider1 = strchr(slider2 + 1, ' ');
653 if (slider1 && strstr(slider1, "shared:"))
654 return true;
655 }
656 }
657
658 return false;
659 }
660
661 /*
662 * Detect whether / is mounted MS_SHARED. The only way I know of to
663 * check that is through /proc/self/mountinfo.
664 * I'm only checking for /. If the container rootfs or mount location
665 * is MS_SHARED, but not '/', then you're out of luck - figuring that
666 * out would be too much work to be worth it.
667 */
668 int detect_shared_rootfs(void)
669 {
670 if (is_shared_mountpoint("/"))
671 return 1;
672
673 return 0;
674 }
675
676 bool switch_to_ns(pid_t pid, const char *ns)
677 {
678 __do_close int fd = -EBADF;
679 int ret;
680 char nspath[STRLITERALLEN("/proc//ns/")
681 + INTTYPE_TO_STRLEN(pid_t)
682 + LXC_NAMESPACE_NAME_MAX];
683
684 /* Switch to new ns */
685 ret = strnprintf(nspath, sizeof(nspath), "/proc/%d/ns/%s", pid, ns);
686 if (ret < 0)
687 return false;
688
689 fd = open(nspath, O_RDONLY | O_CLOEXEC);
690 if (fd < 0)
691 return log_error_errno(false, errno, "Failed to open \"%s\"", nspath);
692
693 ret = setns(fd, 0);
694 if (ret)
695 return log_error_errno(false, errno, "Failed to set process %d to \"%s\" of %d", pid, ns, fd);
696
697 return true;
698 }
699
700 /*
701 * looking at fs/proc_namespace.c, it appears we can
702 * actually expect the rootfs entry to very specifically contain
703 * " - rootfs rootfs "
704 * IIUC, so long as we've chrooted so that rootfs is not our root,
705 * the rootfs entry should always be skipped in mountinfo contents.
706 */
707 bool detect_ramfs_rootfs(void)
708 {
709 __do_free char *line = NULL;
710 __do_free void *fopen_cache = NULL;
711 __do_fclose FILE *f = NULL;
712 size_t len = 0;
713
714 f = fopen_cached("/proc/self/mountinfo", "re", &fopen_cache);
715 if (!f)
716 return false;
717
718 while (getline(&line, &len, f) != -1) {
719 int i;
720 char *p, *p2;
721
722 for (p = line, i = 0; p && i < 4; i++)
723 p = strchr(p + 1, ' ');
724 if (!p)
725 continue;
726
727 p2 = strchr(p + 1, ' ');
728 if (!p2)
729 continue;
730 *p2 = '\0';
731 if (strequal(p + 1, "/")) {
732 /* This is '/'. Is it the ramfs? */
733 p = strchr(p2 + 1, '-');
734 if (p && strnequal(p, "- rootfs ", 9))
735 return true;
736 }
737 }
738
739 return false;
740 }
741
742 char *on_path(const char *cmd, const char *rootfs)
743 {
744 __do_free char *path = NULL;
745 char *entry = NULL;
746 char cmdpath[PATH_MAX];
747 int ret;
748
749 path = getenv("PATH");
750 if (!path)
751 return NULL;
752
753 path = strdup(path);
754 if (!path)
755 return NULL;
756
757 lxc_iterate_parts(entry, path, ":") {
758 if (rootfs)
759 ret = strnprintf(cmdpath, sizeof(cmdpath), "%s/%s/%s", rootfs, entry, cmd);
760 else
761 ret = strnprintf(cmdpath, sizeof(cmdpath), "%s/%s", entry, cmd);
762 if (ret < 0)
763 continue;
764
765 if (access(cmdpath, X_OK) == 0)
766 return strdup(cmdpath);
767 }
768
769 return NULL;
770 }
771
772 /* historically lxc-init has been under /usr/lib/lxc and under
773 * /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc.
774 */
775 char *choose_init(const char *rootfs)
776 {
777 char *retv = NULL;
778 const char *empty = "",
779 *tmp;
780 int ret, env_set = 0;
781
782 if (!getenv("PATH")) {
783 if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 0))
784 SYSERROR("Failed to setenv");
785
786 env_set = 1;
787 }
788
789 retv = on_path("init.lxc", rootfs);
790
791 if (env_set)
792 if (unsetenv("PATH"))
793 SYSERROR("Failed to unsetenv");
794
795 if (retv)
796 return retv;
797
798 retv = malloc(PATH_MAX);
799 if (!retv)
800 return NULL;
801
802 if (rootfs)
803 tmp = rootfs;
804 else
805 tmp = empty;
806
807 ret = strnprintf(retv, PATH_MAX, "%s/%s/%s", tmp, SBINDIR, "/init.lxc");
808 if (ret < 0) {
809 ERROR("The name of path is too long");
810 goto out1;
811 }
812
813 if (access(retv, X_OK) == 0)
814 return retv;
815
816 ret = strnprintf(retv, PATH_MAX, "%s/%s/%s", tmp, LXCINITDIR, "/lxc/lxc-init");
817 if (ret < 0) {
818 ERROR("The name of path is too long");
819 goto out1;
820 }
821
822 if (access(retv, X_OK) == 0)
823 return retv;
824
825 ret = strnprintf(retv, PATH_MAX, "%s/usr/lib/lxc/lxc-init", tmp);
826 if (ret < 0) {
827 ERROR("The name of path is too long");
828 goto out1;
829 }
830
831 if (access(retv, X_OK) == 0)
832 return retv;
833
834 ret = strnprintf(retv, PATH_MAX, "%s/sbin/lxc-init", tmp);
835 if (ret < 0) {
836 ERROR("The name of path is too long");
837 goto out1;
838 }
839
840 if (access(retv, X_OK) == 0)
841 return retv;
842
843 /*
844 * Last resort, look for the statically compiled init.lxc which we
845 * hopefully bind-mounted in.
846 * If we are called during container setup, and we get to this point,
847 * then the init.lxc.static from the host will need to be bind-mounted
848 * in. So we return NULL here to indicate that.
849 */
850 if (rootfs)
851 goto out1;
852
853 ret = strnprintf(retv, PATH_MAX, "/init.lxc.static");
854 if (ret < 0) {
855 WARN("Nonsense - name /lxc.init.static too long");
856 goto out1;
857 }
858
859 if (access(retv, X_OK) == 0)
860 return retv;
861
862 out1:
863 free(retv);
864 return NULL;
865 }
866
867 /*
868 * Given the '-t' template option to lxc-create, figure out what to
869 * do. If the template is a full executable path, use that. If it
870 * is something like 'sshd', then return $templatepath/lxc-sshd.
871 * On success return the template, on error return NULL.
872 */
873 char *get_template_path(const char *t)
874 {
875 int ret, len;
876 char *tpath;
877
878 if (t[0] == '/') {
879 if (access(t, X_OK) == 0) {
880 return strdup(t);
881 } else {
882 SYSERROR("Bad template pathname: %s", t);
883 return NULL;
884 }
885 }
886
887 len = strlen(LXCTEMPLATEDIR) + strlen(t) + strlen("/lxc-") + 1;
888
889 tpath = malloc(len);
890 if (!tpath)
891 return NULL;
892
893 ret = strnprintf(tpath, len, "%s/lxc-%s", LXCTEMPLATEDIR, t);
894 if (ret < 0) {
895 free(tpath);
896 return NULL;
897 }
898
899 if (access(tpath, X_OK) < 0) {
900 SYSERROR("bad template: %s", t);
901 free(tpath);
902 return NULL;
903 }
904
905 return tpath;
906 }
907
908 /*
909 * @path: a pathname where / replaced with '\0'.
910 * @offsetp: pointer to int showing which path segment was last seen.
911 * Updated on return to reflect the next segment.
912 * @fulllen: full original path length.
913 * Returns a pointer to the next path segment, or NULL if done.
914 */
915 static char *get_nextpath(char *path, int *offsetp, int fulllen)
916 {
917 int offset = *offsetp;
918
919 if (offset >= fulllen)
920 return NULL;
921
922 while (offset < fulllen && path[offset] != '\0')
923 offset++;
924
925 while (offset < fulllen && path[offset] == '\0')
926 offset++;
927
928 *offsetp = offset;
929
930 return (offset < fulllen) ? &path[offset] : NULL;
931 }
932
933 /*
934 * Check that @subdir is a subdir of @dir. @len is the length of
935 * @dir (to avoid having to recalculate it).
936 */
937 static bool is_subdir(const char *subdir, const char *dir, size_t len)
938 {
939 size_t subdirlen = strlen(subdir);
940
941 if (subdirlen < len)
942 return false;
943
944 if (!strnequal(subdir, dir, len))
945 return false;
946
947 if (dir[len-1] == '/')
948 return true;
949
950 if (subdir[len] == '/' || subdirlen == len)
951 return true;
952
953 return false;
954 }
955
956 /*
957 * Check if the open fd is a symlink. Return -ELOOP if it is. Return
958 * -ENOENT if we couldn't fstat. Return 0 if the fd is ok.
959 */
960 static int check_symlink(int fd)
961 {
962 struct stat sb;
963 int ret;
964
965 ret = fstat(fd, &sb);
966 if (ret < 0)
967 return -ENOENT;
968
969 if (S_ISLNK(sb.st_mode))
970 return -ELOOP;
971
972 return 0;
973 }
974
975 /*
976 * Open a file or directory, provided that it contains no symlinks.
977 *
978 * CAVEAT: This function must not be used for other purposes than container
979 * setup before executing the container's init
980 */
981 static int open_if_safe(int dirfd, const char *nextpath)
982 {
983 int newfd = openat(dirfd, nextpath, O_RDONLY | O_NOFOLLOW);
984 if (newfd >= 0) /* Was not a symlink, all good. */
985 return newfd;
986
987 if (errno == ELOOP)
988 return newfd;
989
990 if (errno == EPERM || errno == EACCES) {
991 /* We're not root (cause we got EPERM) so try opening with
992 * O_PATH.
993 */
994 newfd = openat(dirfd, nextpath, O_PATH | O_NOFOLLOW);
995 if (newfd >= 0) {
996 /* O_PATH will return an fd for symlinks. We know
997 * nextpath wasn't a symlink at last openat, so if fd is
998 * now a link, then something * fishy is going on.
999 */
1000 int ret = check_symlink(newfd);
1001 if (ret < 0) {
1002 close(newfd);
1003 newfd = ret;
1004 }
1005 }
1006 }
1007
1008 return newfd;
1009 }
1010
1011 /*
1012 * Open a path intending for mounting, ensuring that the final path
1013 * is inside the container's rootfs.
1014 *
1015 * CAVEAT: This function must not be used for other purposes than container
1016 * setup before executing the container's init
1017 *
1018 * @target: path to be opened
1019 * @prefix_skip: a part of @target in which to ignore symbolic links. This
1020 * would be the container's rootfs.
1021 *
1022 * Return an open fd for the path, or <0 on error.
1023 */
1024 static int open_without_symlink(const char *target, const char *prefix_skip)
1025 {
1026 int curlen = 0, dirfd, fulllen, i;
1027 char *dup;
1028
1029 fulllen = strlen(target);
1030
1031 /* make sure prefix-skip makes sense */
1032 if (prefix_skip && strlen(prefix_skip) > 0) {
1033 curlen = strlen(prefix_skip);
1034 if (!is_subdir(target, prefix_skip, curlen)) {
1035 ERROR("WHOA there - target \"%s\" didn't start with prefix \"%s\"",
1036 target, prefix_skip);
1037 return -EINVAL;
1038 }
1039
1040 /*
1041 * get_nextpath() expects the curlen argument to be
1042 * on a (turned into \0) / or before it, so decrement
1043 * curlen to make sure that happens
1044 */
1045 if (curlen)
1046 curlen--;
1047 } else {
1048 prefix_skip = "/";
1049 curlen = 0;
1050 }
1051
1052 /* Make a copy of target which we can hack up, and tokenize it */
1053 if ((dup = strdup(target)) == NULL) {
1054 ERROR("Out of memory checking for symbolic link");
1055 return -ENOMEM;
1056 }
1057
1058 for (i = 0; i < fulllen; i++) {
1059 if (dup[i] == '/')
1060 dup[i] = '\0';
1061 }
1062
1063 dirfd = open(prefix_skip, O_RDONLY);
1064 if (dirfd < 0) {
1065 SYSERROR("Failed to open path \"%s\"", prefix_skip);
1066 goto out;
1067 }
1068
1069 for (;;) {
1070 int newfd, saved_errno;
1071 char *nextpath;
1072
1073 if ((nextpath = get_nextpath(dup, &curlen, fulllen)) == NULL)
1074 goto out;
1075
1076 newfd = open_if_safe(dirfd, nextpath);
1077 saved_errno = errno;
1078 close(dirfd);
1079
1080 dirfd = newfd;
1081 if (newfd < 0) {
1082 errno = saved_errno;
1083 if (errno == ELOOP)
1084 SYSERROR("%s in %s was a symbolic link!", nextpath, target);
1085
1086 goto out;
1087 }
1088 }
1089
1090 out:
1091 free(dup);
1092 return dirfd;
1093 }
1094
1095 int __safe_mount_beneath_at(int beneath_fd, const char *src, const char *dst, const char *fstype,
1096 unsigned int flags, const void *data)
1097 {
1098 __do_close int source_fd = -EBADF, target_fd = -EBADF;
1099 struct open_how how = {
1100 .flags = PROTECT_OPATH_DIRECTORY,
1101 .resolve = PROTECT_LOOKUP_BENEATH_WITH_MAGICLINKS,
1102 };
1103 int ret;
1104 char src_buf[LXC_PROC_PID_FD_LEN], tgt_buf[LXC_PROC_PID_FD_LEN];
1105
1106 if (beneath_fd < 0)
1107 return -EINVAL;
1108
1109 if ((flags & MS_BIND) && src && src[0] != '/') {
1110 source_fd = openat2(beneath_fd, src, &how, sizeof(how));
1111 if (source_fd < 0)
1112 return -errno;
1113 ret = strnprintf(src_buf, sizeof(src_buf), "/proc/self/fd/%d", source_fd);
1114 if (ret < 0)
1115 return -EIO;
1116 } else {
1117 src_buf[0] = '\0';
1118 }
1119
1120 target_fd = openat2(beneath_fd, dst, &how, sizeof(how));
1121 if (target_fd < 0)
1122 return log_error_errno(-errno, errno, "Failed to open %d(%s)", beneath_fd, dst);
1123 ret = strnprintf(tgt_buf, sizeof(tgt_buf), "/proc/self/fd/%d", target_fd);
1124 if (ret < 0)
1125 return -EIO;
1126
1127 if (!is_empty_string(src_buf))
1128 ret = mount(src_buf, tgt_buf, fstype, flags, data);
1129 else
1130 ret = mount(src, tgt_buf, fstype, flags, data);
1131
1132 return ret;
1133 }
1134
1135 int safe_mount_beneath(const char *beneath, const char *src, const char *dst, const char *fstype,
1136 unsigned int flags, const void *data)
1137 {
1138 __do_close int beneath_fd = -EBADF;
1139 const char *path = beneath ? beneath : "/";
1140
1141 beneath_fd = openat(-1, path, PROTECT_OPATH_DIRECTORY);
1142 if (beneath_fd < 0)
1143 return log_error_errno(-errno, errno, "Failed to open %s", path);
1144
1145 return __safe_mount_beneath_at(beneath_fd, src, dst, fstype, flags, data);
1146 }
1147
1148 int safe_mount_beneath_at(int beneath_fd, const char *src, const char *dst, const char *fstype,
1149 unsigned int flags, const void *data)
1150 {
1151 return __safe_mount_beneath_at(beneath_fd, src, dst, fstype, flags, data);
1152 }
1153
1154 /*
1155 * Safely mount a path into a container, ensuring that the mount target
1156 * is under the container's @rootfs. (If @rootfs is NULL, then the container
1157 * uses the host's /)
1158 *
1159 * CAVEAT: This function must not be used for other purposes than container
1160 * setup before executing the container's init
1161 */
1162 int safe_mount(const char *src, const char *dest, const char *fstype,
1163 unsigned long flags, const void *data, const char *rootfs)
1164 {
1165 int destfd, ret, saved_errno;
1166 /* Only needs enough for /proc/self/fd/<fd>. */
1167 char srcbuf[50], destbuf[50];
1168 int srcfd = -1;
1169 const char *mntsrc = src;
1170
1171 if (!rootfs)
1172 rootfs = "";
1173
1174 /* todo - allow symlinks for relative paths if 'allowsymlinks' option is passed */
1175 if (flags & MS_BIND && src && src[0] != '/') {
1176 INFO("This is a relative bind mount");
1177
1178 srcfd = open_without_symlink(src, NULL);
1179 if (srcfd < 0)
1180 return srcfd;
1181
1182 ret = strnprintf(srcbuf, sizeof(srcbuf), "/proc/self/fd/%d", srcfd);
1183 if (ret < 0) {
1184 close(srcfd);
1185 ERROR("Out of memory");
1186 return -EINVAL;
1187 }
1188 mntsrc = srcbuf;
1189 }
1190
1191 destfd = open_without_symlink(dest, rootfs);
1192 if (destfd < 0) {
1193 if (srcfd != -1) {
1194 saved_errno = errno;
1195 close(srcfd);
1196 errno = saved_errno;
1197 }
1198
1199 return destfd;
1200 }
1201
1202 ret = strnprintf(destbuf, sizeof(destbuf), "/proc/self/fd/%d", destfd);
1203 if (ret < 0) {
1204 if (srcfd != -1)
1205 close(srcfd);
1206
1207 close(destfd);
1208 ERROR("Out of memory");
1209 return -EINVAL;
1210 }
1211
1212 ret = mount(mntsrc, destbuf, fstype, flags, data);
1213 saved_errno = errno;
1214 if (srcfd != -1)
1215 close(srcfd);
1216
1217 close(destfd);
1218 if (ret < 0) {
1219 errno = saved_errno;
1220 SYSERROR("Failed to mount \"%s\" onto \"%s\"", src ? src : "(null)", dest);
1221 return ret;
1222 }
1223
1224 return 0;
1225 }
1226
1227 int open_devnull(void)
1228 {
1229 int fd = open("/dev/null", O_RDWR);
1230 if (fd < 0)
1231 SYSERROR("Can't open /dev/null");
1232
1233 return fd;
1234 }
1235
1236 int set_stdfds(int fd)
1237 {
1238 int ret;
1239
1240 if (fd < 0)
1241 return -1;
1242
1243 ret = dup2(fd, STDIN_FILENO);
1244 if (ret < 0)
1245 return -1;
1246
1247 ret = dup2(fd, STDOUT_FILENO);
1248 if (ret < 0)
1249 return -1;
1250
1251 ret = dup2(fd, STDERR_FILENO);
1252 if (ret < 0)
1253 return -1;
1254
1255 return 0;
1256 }
1257
1258 int null_stdfds(void)
1259 {
1260 int ret = -1;
1261 int fd;
1262
1263 fd = open_devnull();
1264 if (fd >= 0) {
1265 ret = set_stdfds(fd);
1266 close(fd);
1267 }
1268
1269 return ret;
1270 }
1271
1272 /* Check whether a signal is blocked by a process. */
1273 /* /proc/pid-to-str/status\0 = (5 + 21 + 7 + 1) */
1274 #define __PROC_STATUS_LEN (6 + INTTYPE_TO_STRLEN(pid_t) + 7 + 1)
1275 bool task_blocks_signal(pid_t pid, int signal)
1276 {
1277 __do_free char *line = NULL;
1278 __do_fclose FILE *f = NULL;
1279 int ret;
1280 char status[__PROC_STATUS_LEN] = {0};
1281 uint64_t sigblk = 0, one = 1;
1282 size_t n = 0;
1283 bool bret = false;
1284
1285 ret = strnprintf(status, sizeof(status), "/proc/%d/status", pid);
1286 if (ret < 0)
1287 return bret;
1288
1289 f = fopen(status, "re");
1290 if (!f)
1291 return false;
1292
1293 while (getline(&line, &n, f) != -1) {
1294 char *numstr;
1295
1296 if (!strnequal(line, "SigBlk:", 7))
1297 continue;
1298
1299 numstr = lxc_trim_whitespace_in_place(line + 7);
1300 ret = lxc_safe_uint64(numstr, &sigblk, 16);
1301 if (ret < 0)
1302 return false;
1303
1304 break;
1305 }
1306
1307 if (sigblk & (one << (signal - 1)))
1308 bret = true;
1309
1310 return bret;
1311 }
1312
1313 int lxc_preserve_ns(const int pid, const char *ns)
1314 {
1315 int ret;
1316 /* 5 /proc + 21 /int_as_str + 3 /ns + 20 /NS_NAME + 1 \0 */
1317 #define __NS_PATH_LEN 50
1318 char path[__NS_PATH_LEN];
1319
1320 /* This way we can use this function to also check whether namespaces
1321 * are supported by the kernel by passing in the NULL or the empty
1322 * string.
1323 */
1324 ret = strnprintf(path, sizeof(path), "/proc/%d/ns%s%s", pid,
1325 !ns || strequal(ns, "") ? "" : "/",
1326 !ns || strequal(ns, "") ? "" : ns);
1327 if (ret < 0)
1328 return ret_errno(EIO);
1329
1330 return open(path, O_RDONLY | O_CLOEXEC);
1331 }
1332
1333 bool lxc_switch_uid_gid(uid_t uid, gid_t gid)
1334 {
1335 int ret = 0;
1336
1337 if (gid != LXC_INVALID_GID) {
1338 ret = setresgid(gid, gid, gid);
1339 if (ret < 0) {
1340 SYSERROR("Failed to switch to gid %d", gid);
1341 return false;
1342 }
1343 NOTICE("Switched to gid %d", gid);
1344 }
1345
1346 if (uid != LXC_INVALID_UID) {
1347 ret = setresuid(uid, uid, uid);
1348 if (ret < 0) {
1349 SYSERROR("Failed to switch to uid %d", uid);
1350 return false;
1351 }
1352 NOTICE("Switched to uid %d", uid);
1353 }
1354
1355 return true;
1356 }
1357
1358 /* Simple convenience function which enables uniform logging. */
1359 bool lxc_drop_groups(void)
1360 {
1361 int ret;
1362
1363 ret = setgroups(0, NULL);
1364 if (ret)
1365 return log_error_errno(false, errno, "Failed to drop supplimentary groups");
1366
1367 NOTICE("Dropped supplimentary groups");
1368 return ret == 0;
1369 }
1370
1371 bool lxc_setgroups(gid_t list[], size_t size)
1372 {
1373 int ret;
1374
1375 ret = setgroups(size, list);
1376 if (ret)
1377 return log_error_errno(false, errno, "Failed to set supplimentary groups");
1378
1379 if (size > 0 && lxc_log_trace()) {
1380 for (size_t i = 0; i < size; i++)
1381 TRACE("Setting supplimentary group %d", list[i]);
1382 }
1383
1384 NOTICE("Set supplimentary groups");
1385 return true;
1386 }
1387
1388 static int lxc_get_unused_loop_dev_legacy(char *loop_name)
1389 {
1390 struct dirent *dp;
1391 struct loop_info64 lo64;
1392 DIR *dir;
1393 int dfd = -1, fd = -1, ret = -1;
1394
1395 dir = opendir("/dev");
1396 if (!dir) {
1397 SYSERROR("Failed to open \"/dev\"");
1398 return -1;
1399 }
1400
1401 while ((dp = readdir(dir))) {
1402 if (!strnequal(dp->d_name, "loop", 4))
1403 continue;
1404
1405 dfd = dirfd(dir);
1406 if (dfd < 0)
1407 continue;
1408
1409 fd = openat(dfd, dp->d_name, O_RDWR);
1410 if (fd < 0)
1411 continue;
1412
1413 ret = ioctl(fd, LOOP_GET_STATUS64, &lo64);
1414 if (ret < 0) {
1415 if (ioctl(fd, LOOP_GET_STATUS64, &lo64) == 0 ||
1416 errno != ENXIO) {
1417 close(fd);
1418 fd = -1;
1419 continue;
1420 }
1421 }
1422
1423 ret = strnprintf(loop_name, LO_NAME_SIZE, "/dev/%s", dp->d_name);
1424 if (ret < 0) {
1425 close(fd);
1426 fd = -1;
1427 continue;
1428 }
1429
1430 break;
1431 }
1432
1433 closedir(dir);
1434
1435 if (fd < 0)
1436 return -1;
1437
1438 return fd;
1439 }
1440
1441 static int lxc_get_unused_loop_dev(char *name_loop)
1442 {
1443 int loop_nr, ret;
1444 int fd_ctl = -1, fd_tmp = -1;
1445
1446 fd_ctl = open("/dev/loop-control", O_RDWR | O_CLOEXEC);
1447 if (fd_ctl < 0) {
1448 SYSERROR("Failed to open loop control");
1449 return -ENODEV;
1450 }
1451
1452 loop_nr = ioctl(fd_ctl, LOOP_CTL_GET_FREE);
1453 if (loop_nr < 0) {
1454 SYSERROR("Failed to get loop control");
1455 goto on_error;
1456 }
1457
1458 ret = strnprintf(name_loop, LO_NAME_SIZE, "/dev/loop%d", loop_nr);
1459 if (ret < 0)
1460 goto on_error;
1461
1462 fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC);
1463 if (fd_tmp < 0) {
1464 /* on Android loop devices are moved under /dev/block, give it a shot */
1465 ret = strnprintf(name_loop, LO_NAME_SIZE, "/dev/block/loop%d", loop_nr);
1466 if (ret < 0)
1467 goto on_error;
1468
1469 fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC);
1470 if (fd_tmp < 0)
1471 SYSERROR("Failed to open loop \"%s\"", name_loop);
1472 }
1473
1474 on_error:
1475 close(fd_ctl);
1476 return fd_tmp;
1477 }
1478
1479 int lxc_prepare_loop_dev(const char *source, char *loop_dev, int flags)
1480 {
1481 int ret;
1482 struct loop_info64 lo64;
1483 int fd_img = -1, fret = -1, fd_loop = -1;
1484
1485 fd_loop = lxc_get_unused_loop_dev(loop_dev);
1486 if (fd_loop < 0) {
1487 if (fd_loop != -ENODEV)
1488 goto on_error;
1489
1490 fd_loop = lxc_get_unused_loop_dev_legacy(loop_dev);
1491 if (fd_loop < 0)
1492 goto on_error;
1493 }
1494
1495 fd_img = open(source, O_RDWR | O_CLOEXEC);
1496 if (fd_img < 0) {
1497 SYSERROR("Failed to open source \"%s\"", source);
1498 goto on_error;
1499 }
1500
1501 ret = ioctl(fd_loop, LOOP_SET_FD, fd_img);
1502 if (ret < 0) {
1503 SYSERROR("Failed to set loop fd");
1504 goto on_error;
1505 }
1506
1507 memset(&lo64, 0, sizeof(lo64));
1508 lo64.lo_flags = flags;
1509
1510 strlcpy((char *)lo64.lo_file_name, source, LO_NAME_SIZE);
1511
1512 ret = ioctl(fd_loop, LOOP_SET_STATUS64, &lo64);
1513 if (ret < 0) {
1514 SYSERROR("Failed to set loop status64");
1515 goto on_error;
1516 }
1517
1518 fret = 0;
1519
1520 on_error:
1521 if (fd_img >= 0)
1522 close(fd_img);
1523
1524 if (fret < 0 && fd_loop >= 0) {
1525 close(fd_loop);
1526 fd_loop = -1;
1527 }
1528
1529 return fd_loop;
1530 }
1531
1532 int lxc_unstack_mountpoint(const char *path, bool lazy)
1533 {
1534 int ret;
1535 int umounts = 0;
1536
1537 pop_stack:
1538 ret = umount2(path, lazy ? MNT_DETACH : 0);
1539 if (ret < 0) {
1540 /* We consider anything else than EINVAL deadly to prevent going
1541 * into an infinite loop. (The other alternative is constantly
1542 * parsing /proc/self/mountinfo which is yucky and probably
1543 * racy.)
1544 */
1545 if (errno != EINVAL)
1546 return -errno;
1547 } else {
1548 /* Just stop counting when this happens. That'd just be so
1549 * stupid that we won't even bother trying to report back the
1550 * correct value anymore.
1551 */
1552 if (umounts != INT_MAX)
1553 umounts++;
1554
1555 /* We succeeded in umounting. Make sure that there's no other
1556 * mountpoint stacked underneath.
1557 */
1558 goto pop_stack;
1559 }
1560
1561 return umounts;
1562 }
1563
1564 static int run_command_internal(char *buf, size_t buf_size, int (*child_fn)(void *), void *args, bool wait_status)
1565 {
1566 pid_t child;
1567 int ret, fret, pipefd[2];
1568 ssize_t bytes;
1569
1570 /* Make sure our callers do not receive uninitialized memory. */
1571 if (buf_size > 0 && buf)
1572 buf[0] = '\0';
1573
1574 if (pipe(pipefd) < 0) {
1575 SYSERROR("Failed to create pipe");
1576 return -1;
1577 }
1578
1579 child = lxc_raw_clone(0, NULL);
1580 if (child < 0) {
1581 close(pipefd[0]);
1582 close(pipefd[1]);
1583 SYSERROR("Failed to create new process");
1584 return -1;
1585 }
1586
1587 if (child == 0) {
1588 /* Close the read-end of the pipe. */
1589 close(pipefd[0]);
1590
1591 /* Redirect std{err,out} to write-end of the
1592 * pipe.
1593 */
1594 ret = dup2(pipefd[1], STDOUT_FILENO);
1595 if (ret >= 0)
1596 ret = dup2(pipefd[1], STDERR_FILENO);
1597
1598 /* Close the write-end of the pipe. */
1599 close(pipefd[1]);
1600
1601 if (ret < 0) {
1602 SYSERROR("Failed to duplicate std{err,out} file descriptor");
1603 _exit(EXIT_FAILURE);
1604 }
1605
1606 /* Does not return. */
1607 child_fn(args);
1608 ERROR("Failed to exec command");
1609 _exit(EXIT_FAILURE);
1610 }
1611
1612 /* close the write-end of the pipe */
1613 close(pipefd[1]);
1614
1615 if (buf && buf_size > 0) {
1616 bytes = lxc_read_nointr(pipefd[0], buf, buf_size - 1);
1617 if (bytes > 0)
1618 buf[bytes - 1] = '\0';
1619 }
1620
1621 if (wait_status)
1622 fret = lxc_wait_for_pid_status(child);
1623 else
1624 fret = wait_for_pid(child);
1625
1626 /* close the read-end of the pipe */
1627 close(pipefd[0]);
1628
1629 return fret;
1630 }
1631
1632 int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args)
1633 {
1634 return run_command_internal(buf, buf_size, child_fn, args, false);
1635 }
1636
1637 int run_command_status(char *buf, size_t buf_size, int (*child_fn)(void *), void *args)
1638 {
1639 return run_command_internal(buf, buf_size, child_fn, args, true);
1640 }
1641
1642 bool lxc_nic_exists(char *nic)
1643 {
1644 #define __LXC_SYS_CLASS_NET_LEN 15 + IFNAMSIZ + 1
1645 char path[__LXC_SYS_CLASS_NET_LEN];
1646 int ret;
1647 struct stat sb;
1648
1649 if (strequal(nic, "none"))
1650 return true;
1651
1652 ret = strnprintf(path, sizeof(path), "/sys/class/net/%s", nic);
1653 if (ret < 0)
1654 return false;
1655
1656 ret = stat(path, &sb);
1657 if (ret < 0)
1658 return false;
1659
1660 return true;
1661 }
1662
1663 uint64_t lxc_find_next_power2(uint64_t n)
1664 {
1665 /* 0 is not valid input. We return 0 to the caller since 0 is not a
1666 * valid power of two.
1667 */
1668 if (n == 0)
1669 return 0;
1670
1671 if (!(n & (n - 1)))
1672 return n;
1673
1674 while (n & (n - 1))
1675 n = n & (n - 1);
1676
1677 n = n << 1;
1678 return n;
1679 }
1680
1681 static int process_dead(/* takes */ int status_fd)
1682 {
1683 __do_close int dupfd = -EBADF;
1684 __do_free char *line = NULL;
1685 __do_fclose FILE *f = NULL;
1686 int ret = 0;
1687 size_t n = 0;
1688
1689 dupfd = dup(status_fd);
1690 if (dupfd < 0)
1691 return -1;
1692
1693 if (fd_cloexec(dupfd, true) < 0)
1694 return -1;
1695
1696 f = fdopen(dupfd, "re");
1697 if (!f)
1698 return -1;
1699
1700 /* Transfer ownership of fd. */
1701 move_fd(dupfd);
1702
1703 ret = 0;
1704 while (getline(&line, &n, f) != -1) {
1705 char *state;
1706
1707 if (!strnequal(line, "State:", 6))
1708 continue;
1709
1710 state = lxc_trim_whitespace_in_place(line + 6);
1711 /* only check whether process is dead or zombie for now */
1712 if (*state == 'X' || *state == 'Z')
1713 ret = 1;
1714 }
1715
1716 return ret;
1717 }
1718
1719 int lxc_set_death_signal(int signal, pid_t parent, int parent_status_fd)
1720 {
1721 int ret;
1722 pid_t ppid;
1723
1724 ret = prctl(PR_SET_PDEATHSIG, prctl_arg(signal), prctl_arg(0),
1725 prctl_arg(0), prctl_arg(0));
1726
1727 /* verify that we haven't been orphaned in the meantime */
1728 ppid = (pid_t)syscall(SYS_getppid);
1729 if (ppid == 0) { /* parent outside our pidns */
1730 if (parent_status_fd < 0)
1731 return 0;
1732
1733 if (process_dead(parent_status_fd) == 1)
1734 return raise(SIGKILL);
1735 } else if (ppid != parent) {
1736 return raise(SIGKILL);
1737 }
1738
1739 if (ret < 0)
1740 return -1;
1741
1742 return 0;
1743 }
1744
1745 int lxc_rm_rf(const char *dirname)
1746 {
1747 __do_closedir DIR *dir = NULL;
1748 int fret = 0;
1749 int ret;
1750 struct dirent *direntp;
1751
1752 dir = opendir(dirname);
1753 if (!dir)
1754 return log_error_errno(-1, errno, "Failed to open dir \"%s\"", dirname);
1755
1756 while ((direntp = readdir(dir))) {
1757 __do_free char *pathname = NULL;
1758 struct stat mystat;
1759
1760 if (strequal(direntp->d_name, ".") ||
1761 strequal(direntp->d_name, ".."))
1762 continue;
1763
1764 pathname = must_make_path(dirname, direntp->d_name, NULL);
1765 ret = lstat(pathname, &mystat);
1766 if (ret < 0) {
1767 if (!fret)
1768 SYSWARN("Failed to stat \"%s\"", pathname);
1769
1770 fret = -1;
1771 continue;
1772 }
1773
1774 if (!S_ISDIR(mystat.st_mode))
1775 continue;
1776
1777 ret = lxc_rm_rf(pathname);
1778 if (ret < 0)
1779 fret = -1;
1780 }
1781
1782 ret = rmdir(dirname);
1783 if (ret < 0)
1784 return log_warn_errno(-1, errno, "Failed to delete \"%s\"", dirname);
1785
1786 return fret;
1787 }
1788
1789 bool lxc_can_use_pidfd(int pidfd)
1790 {
1791 int ret;
1792
1793 if (pidfd < 0)
1794 return log_trace(false, "Kernel does not support pidfds");
1795
1796 /*
1797 * We don't care whether or not children were in a waitable state. We
1798 * just care whether waitid() recognizes P_PIDFD.
1799 *
1800 * Btw, while I have your attention, the above waitid() code is an
1801 * excellent example of how _not_ to do flag-based kernel APIs. So if
1802 * you ever go into kernel development or are already and you add this
1803 * kind of flag potpourri even though you have read this comment shame
1804 * on you. May the gods of operating system development have mercy on
1805 * your soul because I won't.
1806 */
1807 ret = waitid(P_PIDFD, pidfd, NULL,
1808 /* Type of children to wait for. */
1809 __WALL |
1810 /* How to wait for them. */
1811 WNOHANG | WNOWAIT |
1812 /* What state to wait for. */
1813 WEXITED | WSTOPPED | WCONTINUED);
1814 if (ret < 0)
1815 return log_error_errno(false, errno, "Kernel does not support waiting on processes through pidfds");
1816
1817 ret = lxc_raw_pidfd_send_signal(pidfd, 0, NULL, 0);
1818 if (ret)
1819 return log_error_errno(false, errno, "Kernel does not support sending singals through pidfds");
1820
1821 return log_trace(true, "Kernel supports pidfds");
1822 }
1823
1824 int fix_stdio_permissions(uid_t uid)
1825 {
1826 __do_close int devnull_fd = -EBADF;
1827 int fret = 0;
1828 int std_fds[] = {STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO};
1829 int ret;
1830 struct stat st, st_null;
1831
1832 devnull_fd = open_devnull();
1833 if (devnull_fd < 0)
1834 return log_trace_errno(-1, errno, "Failed to open \"/dev/null\"");
1835
1836 ret = fstat(devnull_fd, &st_null);
1837 if (ret)
1838 return log_trace_errno(-errno, errno, "Failed to stat \"/dev/null\"");
1839
1840 for (size_t i = 0; i < ARRAY_SIZE(std_fds); i++) {
1841 ret = fstat(std_fds[i], &st);
1842 if (ret) {
1843 SYSWARN("Failed to stat standard I/O file descriptor %d", std_fds[i]);
1844 fret = -1;
1845 continue;
1846 }
1847
1848 if (st.st_rdev == st_null.st_rdev)
1849 continue;
1850
1851 ret = fchown(std_fds[i], uid, st.st_gid);
1852 if (ret) {
1853 SYSTRACE("Failed to chown standard I/O file descriptor %d to uid %d and gid %d",
1854 std_fds[i], uid, st.st_gid);
1855 fret = -1;
1856 continue;
1857 }
1858
1859 ret = fchmod(std_fds[i], 0700);
1860 if (ret) {
1861 SYSTRACE("Failed to chmod standard I/O file descriptor %d", std_fds[i]);
1862 fret = -1;
1863 }
1864 }
1865
1866 return fret;
1867 }
1868
1869 bool multiply_overflow(int64_t base, uint64_t mult, int64_t *res)
1870 {
1871 if (base > 0 && base > (int64_t)(INT64_MAX / mult))
1872 return false;
1873
1874 if (base < 0 && base < (int64_t)(INT64_MIN / mult))
1875 return false;
1876
1877 *res = (int64_t)(base * mult);
1878 return true;
1879 }
1880
1881 int print_r(int fd, const char *path)
1882 {
1883 __do_close int dfd = -EBADF, dfd_dup = -EBADF;
1884 __do_closedir DIR *dir = NULL;
1885 int ret = 0;
1886 struct dirent *direntp;
1887 struct stat st;
1888
1889 if (is_empty_string(path)) {
1890 char buf[LXC_PROC_SELF_FD_LEN];
1891
1892 ret = strnprintf(buf, sizeof(buf), "/proc/self/fd/%d", fd);
1893 if (ret < 0)
1894 return ret_errno(EIO);
1895
1896 /*
1897 * O_PATH file descriptors can't be used so we need to re-open
1898 * just in case.
1899 */
1900 dfd = openat(-EBADF, buf, O_CLOEXEC | O_DIRECTORY, 0);
1901 } else {
1902 dfd = openat(fd, path, O_CLOEXEC | O_DIRECTORY, 0);
1903 }
1904 if (dfd < 0)
1905 return -1;
1906
1907 dfd_dup = dup_cloexec(dfd);
1908 if (dfd_dup < 0)
1909 return -1;
1910
1911 dir = fdopendir(dfd);
1912 if (!dir)
1913 return -1;
1914 /* Transfer ownership to fdopendir(). */
1915 move_fd(dfd);
1916
1917 while ((direntp = readdir(dir))) {
1918 if (!strcmp(direntp->d_name, ".") ||
1919 !strcmp(direntp->d_name, ".."))
1920 continue;
1921
1922 ret = fstatat(dfd_dup, direntp->d_name, &st, AT_SYMLINK_NOFOLLOW);
1923 if (ret < 0 && errno != ENOENT)
1924 break;
1925
1926 ret = 0;
1927 if (S_ISDIR(st.st_mode))
1928 ret = print_r(dfd_dup, direntp->d_name);
1929 else
1930 INFO("mode(%o):uid(%d):gid(%d) -> %d/%s\n",
1931 (st.st_mode & ~S_IFMT), st.st_uid, st.st_gid, dfd_dup,
1932 direntp->d_name);
1933 if (ret < 0 && errno != ENOENT)
1934 break;
1935 }
1936
1937 if (is_empty_string(path))
1938 ret = fstatat(fd, "", &st, AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH);
1939 else
1940 ret = fstatat(fd, path, &st, AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW);
1941 if (ret)
1942 return -1;
1943 else
1944 INFO("mode(%o):uid(%d):gid(%d) -> %s",
1945 (st.st_mode & ~S_IFMT), st.st_uid, st.st_gid, maybe_empty(path));
1946 return ret;
1947 }