]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/utils.c
utils: make keyring allocation failure non-fatal
[mirror_lxc.git] / src / lxc / utils.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #ifndef _GNU_SOURCE
25 #define _GNU_SOURCE 1
26 #endif
27 #define __STDC_FORMAT_MACROS /* Required for PRIu64 to work. */
28 #include <ctype.h>
29 #include <dirent.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <grp.h>
33 #include <inttypes.h>
34 #include <libgen.h>
35 #include <pthread.h>
36 #include <stddef.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <sys/mman.h>
41 #include <sys/mount.h>
42 #include <sys/param.h>
43 #include <sys/prctl.h>
44 #include <sys/stat.h>
45 #include <sys/types.h>
46 #include <sys/wait.h>
47 #include <unistd.h>
48
49 #include "config.h"
50 #include "log.h"
51 #include "lxclock.h"
52 #include "namespace.h"
53 #include "parse.h"
54 #include "raw_syscalls.h"
55 #include "syscall_wrappers.h"
56 #include "utils.h"
57
58 #ifndef HAVE_STRLCPY
59 #include "include/strlcpy.h"
60 #endif
61
62 #ifndef HAVE_STRLCAT
63 #include "include/strlcat.h"
64 #endif
65
66 #ifndef O_PATH
67 #define O_PATH 010000000
68 #endif
69
70 #ifndef O_NOFOLLOW
71 #define O_NOFOLLOW 00400000
72 #endif
73
74 lxc_log_define(utils, lxc);
75
76 /*
77 * if path is btrfs, tries to remove it and any subvolumes beneath it
78 */
79 extern bool btrfs_try_remove_subvol(const char *path);
80
81 static int _recursive_rmdir(const char *dirname, dev_t pdev,
82 const char *exclude, int level, bool onedev)
83 {
84 struct dirent *direntp;
85 DIR *dir;
86 int ret, failed = 0;
87 char pathname[PATH_MAX];
88 bool hadexclude = false;
89
90 dir = opendir(dirname);
91 if (!dir) {
92 ERROR("Failed to open \"%s\"", dirname);
93 return -1;
94 }
95
96 while ((direntp = readdir(dir))) {
97 struct stat mystat;
98 int rc;
99
100 if (!strcmp(direntp->d_name, ".") ||
101 !strcmp(direntp->d_name, ".."))
102 continue;
103
104 rc = snprintf(pathname, PATH_MAX, "%s/%s", dirname, direntp->d_name);
105 if (rc < 0 || rc >= PATH_MAX) {
106 ERROR("The name of path is too long");
107 failed=1;
108 continue;
109 }
110
111 if (!level && exclude && !strcmp(direntp->d_name, exclude)) {
112 ret = rmdir(pathname);
113 if (ret < 0) {
114 switch(errno) {
115 case ENOTEMPTY:
116 INFO("Not deleting snapshot \"%s\"", pathname);
117 hadexclude = true;
118 break;
119 case ENOTDIR:
120 ret = unlink(pathname);
121 if (ret)
122 INFO("Failed to remove \"%s\"", pathname);
123 break;
124 default:
125 SYSERROR("Failed to rmdir \"%s\"", pathname);
126 failed = 1;
127 break;
128 }
129 }
130
131 continue;
132 }
133
134 ret = lstat(pathname, &mystat);
135 if (ret) {
136 SYSERROR("Failed to stat \"%s\"", pathname);
137 failed = 1;
138 continue;
139 }
140
141 if (onedev && mystat.st_dev != pdev) {
142 /* TODO should we be checking /proc/self/mountinfo for
143 * pathname and not doing this if found? */
144 if (btrfs_try_remove_subvol(pathname))
145 INFO("Removed btrfs subvolume at \"%s\"", pathname);
146 continue;
147 }
148
149 if (S_ISDIR(mystat.st_mode)) {
150 if (_recursive_rmdir(pathname, pdev, exclude, level+1, onedev) < 0)
151 failed=1;
152 } else {
153 if (unlink(pathname) < 0) {
154 SYSERROR("Failed to delete \"%s\"", pathname);
155 failed=1;
156 }
157 }
158 }
159
160 if (rmdir(dirname) < 0 && !btrfs_try_remove_subvol(dirname) && !hadexclude) {
161 SYSERROR("Failed to delete \"%s\"", dirname);
162 failed=1;
163 }
164
165 ret = closedir(dir);
166 if (ret) {
167 SYSERROR("Failed to close directory \"%s\"", dirname);
168 failed=1;
169 }
170
171 return failed ? -1 : 0;
172 }
173
174 /* In overlayfs, st_dev is unreliable. So on overlayfs we don't do the
175 * lxc_rmdir_onedev()
176 */
177 static bool is_native_overlayfs(const char *path)
178 {
179 if (has_fs_type(path, OVERLAY_SUPER_MAGIC) ||
180 has_fs_type(path, OVERLAYFS_SUPER_MAGIC))
181 return true;
182
183 return false;
184 }
185
186 /* returns 0 on success, -1 if there were any failures */
187 extern int lxc_rmdir_onedev(const char *path, const char *exclude)
188 {
189 struct stat mystat;
190 bool onedev = true;
191
192 if (is_native_overlayfs(path))
193 onedev = false;
194
195 if (lstat(path, &mystat) < 0) {
196 if (errno == ENOENT)
197 return 0;
198
199 SYSERROR("Failed to stat \"%s\"", path);
200 return -1;
201 }
202
203 return _recursive_rmdir(path, mystat.st_dev, exclude, 0, onedev);
204 }
205
206 /* borrowed from iproute2 */
207 extern int get_u16(unsigned short *val, const char *arg, int base)
208 {
209 unsigned long res;
210 char *ptr;
211
212 if (!arg || !*arg)
213 return -1;
214
215 errno = 0;
216 res = strtoul(arg, &ptr, base);
217 if (!ptr || ptr == arg || *ptr || res > 0xFFFF || errno != 0)
218 return -1;
219
220 *val = res;
221
222 return 0;
223 }
224
225 int mkdir_p(const char *dir, mode_t mode)
226 {
227 const char *tmp = dir;
228 const char *orig = dir;
229
230 do {
231 int ret;
232 char *makeme;
233
234 dir = tmp + strspn(tmp, "/");
235 tmp = dir + strcspn(dir, "/");
236
237 errno = ENOMEM;
238 makeme = strndup(orig, dir - orig);
239 if (!makeme)
240 return -1;
241
242 ret = mkdir(makeme, mode);
243 if (ret < 0 && errno != EEXIST) {
244 SYSERROR("Failed to create directory \"%s\"", makeme);
245 free(makeme);
246 return -1;
247 }
248
249 free(makeme);
250 } while (tmp != dir);
251
252 return 0;
253 }
254
255 char *get_rundir()
256 {
257 char *rundir;
258 const char *homedir;
259 struct stat sb;
260
261 if (stat(RUNTIME_PATH, &sb) < 0)
262 return NULL;
263
264 if (geteuid() == sb.st_uid || getegid() == sb.st_gid) {
265 rundir = strdup(RUNTIME_PATH);
266 return rundir;
267 }
268
269 rundir = getenv("XDG_RUNTIME_DIR");
270 if (rundir) {
271 rundir = strdup(rundir);
272 return rundir;
273 }
274
275 INFO("XDG_RUNTIME_DIR isn't set in the environment");
276 homedir = getenv("HOME");
277 if (!homedir) {
278 ERROR("HOME isn't set in the environment");
279 return NULL;
280 }
281
282 rundir = malloc(sizeof(char) * (17 + strlen(homedir)));
283 if (!rundir)
284 return NULL;
285
286 sprintf(rundir, "%s/.cache/lxc/run/", homedir);
287
288 return rundir;
289 }
290
291 int wait_for_pid(pid_t pid)
292 {
293 int status, ret;
294
295 again:
296 ret = waitpid(pid, &status, 0);
297 if (ret == -1) {
298 if (errno == EINTR)
299 goto again;
300
301 return -1;
302 }
303
304 if (ret != pid)
305 goto again;
306
307 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
308 return -1;
309
310 return 0;
311 }
312
313 int lxc_wait_for_pid_status(pid_t pid)
314 {
315 int status, ret;
316
317 again:
318 ret = waitpid(pid, &status, 0);
319 if (ret == -1) {
320 if (errno == EINTR)
321 goto again;
322
323 return -1;
324 }
325
326 if (ret != pid)
327 goto again;
328
329 return status;
330 }
331
332 #if HAVE_LIBGNUTLS
333 #include <gnutls/gnutls.h>
334 #include <gnutls/crypto.h>
335
336 __attribute__((constructor))
337 static void gnutls_lxc_init(void)
338 {
339 gnutls_global_init();
340 }
341
342 int sha1sum_file(char *fnam, unsigned char *digest)
343 {
344 char *buf;
345 int ret;
346 FILE *f;
347 long flen;
348
349 if (!fnam)
350 return -1;
351
352 f = fopen_cloexec(fnam, "r");
353 if (!f) {
354 SYSERROR("Failed to open template \"%s\"", fnam);
355 return -1;
356 }
357
358 if (fseek(f, 0, SEEK_END) < 0) {
359 SYSERROR("Failed to seek to end of template");
360 fclose(f);
361 return -1;
362 }
363
364 if ((flen = ftell(f)) < 0) {
365 SYSERROR("Failed to tell size of template");
366 fclose(f);
367 return -1;
368 }
369
370 if (fseek(f, 0, SEEK_SET) < 0) {
371 SYSERROR("Failed to seek to start of template");
372 fclose(f);
373 return -1;
374 }
375
376 if ((buf = malloc(flen+1)) == NULL) {
377 SYSERROR("Out of memory");
378 fclose(f);
379 return -1;
380 }
381
382 if (fread(buf, 1, flen, f) != flen) {
383 SYSERROR("Failed to read template");
384 free(buf);
385 fclose(f);
386 return -1;
387 }
388
389 if (fclose(f) < 0) {
390 SYSERROR("Failed to close template");
391 free(buf);
392 return -1;
393 }
394
395 buf[flen] = '\0';
396 ret = gnutls_hash_fast(GNUTLS_DIG_SHA1, buf, flen, (void *)digest);
397 free(buf);
398 return ret;
399 }
400 #endif
401
402 struct lxc_popen_FILE *lxc_popen(const char *command)
403 {
404 int ret;
405 int pipe_fds[2];
406 pid_t child_pid;
407 struct lxc_popen_FILE *fp = NULL;
408
409 ret = pipe2(pipe_fds, O_CLOEXEC);
410 if (ret < 0)
411 return NULL;
412
413 child_pid = fork();
414 if (child_pid < 0)
415 goto on_error;
416
417 if (!child_pid) {
418 sigset_t mask;
419
420 close(pipe_fds[0]);
421
422 /* duplicate stdout */
423 if (pipe_fds[1] != STDOUT_FILENO)
424 ret = dup2(pipe_fds[1], STDOUT_FILENO);
425 else
426 ret = fcntl(pipe_fds[1], F_SETFD, 0);
427 if (ret < 0) {
428 close(pipe_fds[1]);
429 _exit(EXIT_FAILURE);
430 }
431
432 /* duplicate stderr */
433 if (pipe_fds[1] != STDERR_FILENO)
434 ret = dup2(pipe_fds[1], STDERR_FILENO);
435 else
436 ret = fcntl(pipe_fds[1], F_SETFD, 0);
437 close(pipe_fds[1]);
438 if (ret < 0)
439 _exit(EXIT_FAILURE);
440
441 /* unblock all signals */
442 ret = sigfillset(&mask);
443 if (ret < 0)
444 _exit(EXIT_FAILURE);
445
446 ret = pthread_sigmask(SIG_UNBLOCK, &mask, NULL);
447 if (ret < 0)
448 _exit(EXIT_FAILURE);
449
450 execl("/bin/sh", "sh", "-c", command, (char *)NULL);
451 _exit(127);
452 }
453
454 close(pipe_fds[1]);
455 pipe_fds[1] = -1;
456
457 fp = malloc(sizeof(*fp));
458 if (!fp)
459 goto on_error;
460
461 memset(fp, 0, sizeof(*fp));
462
463 fp->child_pid = child_pid;
464 fp->pipe = pipe_fds[0];
465
466 /* From now on, closing fp->f will also close fp->pipe. So only ever
467 * call fclose(fp->f).
468 */
469 fp->f = fdopen(pipe_fds[0], "r");
470 if (!fp->f)
471 goto on_error;
472
473 return fp;
474
475 on_error:
476 /* We can only close pipe_fds[0] if fdopen() didn't succeed or wasn't
477 * called yet. Otherwise the fd belongs to the file opened by fdopen()
478 * since it isn't dup()ed.
479 */
480 if (fp && !fp->f && pipe_fds[0] >= 0)
481 close(pipe_fds[0]);
482
483 if (pipe_fds[1] >= 0)
484 close(pipe_fds[1]);
485
486 if (fp && fp->f)
487 fclose(fp->f);
488
489 if (fp)
490 free(fp);
491
492 return NULL;
493 }
494
495 int lxc_pclose(struct lxc_popen_FILE *fp)
496 {
497 pid_t wait_pid;
498 int wstatus = 0;
499
500 if (!fp)
501 return -1;
502
503 do {
504 wait_pid = waitpid(fp->child_pid, &wstatus, 0);
505 } while (wait_pid < 0 && errno == EINTR);
506
507 fclose(fp->f);
508 free(fp);
509
510 if (wait_pid < 0)
511 return -1;
512
513 return wstatus;
514 }
515
516 int randseed(bool srand_it)
517 {
518 FILE *f;
519 /*
520 * srand pre-seed function based on /dev/urandom
521 */
522 unsigned int seed = time(NULL) + getpid();
523
524 f = fopen("/dev/urandom", "r");
525 if (f) {
526 int ret = fread(&seed, sizeof(seed), 1, f);
527 if (ret != 1)
528 SYSDEBUG("Unable to fread /dev/urandom, fallback to time+pid rand seed");
529
530 fclose(f);
531 }
532
533 if (srand_it)
534 srand(seed);
535
536 return seed;
537 }
538
539 uid_t get_ns_uid(uid_t orig)
540 {
541 char *line = NULL;
542 size_t sz = 0;
543 uid_t nsid, hostid, range;
544 FILE *f;
545
546 f = fopen("/proc/self/uid_map", "r");
547 if (!f) {
548 SYSERROR("Failed to open uid_map");
549 return 0;
550 }
551
552 while (getline(&line, &sz, f) != -1) {
553 if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3)
554 continue;
555
556 if (hostid <= orig && hostid + range > orig) {
557 nsid += orig - hostid;
558 goto found;
559 }
560 }
561
562 nsid = LXC_INVALID_UID;
563
564 found:
565 fclose(f);
566 free(line);
567 return nsid;
568 }
569
570 gid_t get_ns_gid(gid_t orig)
571 {
572 char *line = NULL;
573 size_t sz = 0;
574 gid_t nsid, hostid, range;
575 FILE *f;
576
577 f = fopen("/proc/self/gid_map", "r");
578 if (!f) {
579 SYSERROR("Failed to open gid_map");
580 return 0;
581 }
582
583 while (getline(&line, &sz, f) != -1) {
584 if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3)
585 continue;
586
587 if (hostid <= orig && hostid + range > orig) {
588 nsid += orig - hostid;
589 goto found;
590 }
591 }
592
593 nsid = LXC_INVALID_GID;
594
595 found:
596 fclose(f);
597 free(line);
598 return nsid;
599 }
600
601 bool dir_exists(const char *path)
602 {
603 struct stat sb;
604 int ret;
605
606 ret = stat(path, &sb);
607 if (ret < 0)
608 /* Could be something other than eexist, just say "no". */
609 return false;
610
611 return S_ISDIR(sb.st_mode);
612 }
613
614 /* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS.
615 * FNV has good anti collision properties and we're not worried
616 * about pre-image resistance or one-way-ness, we're just trying to make
617 * the name unique in the 108 bytes of space we have.
618 */
619 uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
620 {
621 unsigned char *bp;
622
623 for(bp = buf; bp < (unsigned char *)buf + len; bp++) {
624 /* xor the bottom with the current octet */
625 hval ^= (uint64_t)*bp;
626
627 /* gcc optimised:
628 * multiply by the 64 bit FNV magic prime mod 2^64
629 */
630 hval += (hval << 1) + (hval << 4) + (hval << 5) +
631 (hval << 7) + (hval << 8) + (hval << 40);
632 }
633
634 return hval;
635 }
636
637 bool is_shared_mountpoint(const char *path)
638 {
639 char buf[LXC_LINELEN];
640 FILE *f;
641 int i;
642 char *p, *p2;
643
644 f = fopen("/proc/self/mountinfo", "r");
645 if (!f)
646 return 0;
647
648 while (fgets(buf, LXC_LINELEN, f)) {
649 for (p = buf, i = 0; p && i < 4; i++)
650 p = strchr(p + 1, ' ');
651 if (!p)
652 continue;
653
654 p2 = strchr(p + 1, ' ');
655 if (!p2)
656 continue;
657
658 *p2 = '\0';
659 if (strcmp(p + 1, path) == 0) {
660 /* This is the path. Is it shared? */
661 p = strchr(p2 + 1, ' ');
662 if (p && strstr(p, "shared:")) {
663 fclose(f);
664 return true;
665 }
666 }
667 }
668
669 fclose(f);
670 return false;
671 }
672
673 /*
674 * Detect whether / is mounted MS_SHARED. The only way I know of to
675 * check that is through /proc/self/mountinfo.
676 * I'm only checking for /. If the container rootfs or mount location
677 * is MS_SHARED, but not '/', then you're out of luck - figuring that
678 * out would be too much work to be worth it.
679 */
680 int detect_shared_rootfs(void)
681 {
682 if (is_shared_mountpoint("/"))
683 return 1;
684
685 return 0;
686 }
687
688 bool switch_to_ns(pid_t pid, const char *ns)
689 {
690 int fd, ret;
691 char nspath[PATH_MAX];
692
693 /* Switch to new ns */
694 ret = snprintf(nspath, PATH_MAX, "/proc/%d/ns/%s", pid, ns);
695 if (ret < 0 || ret >= PATH_MAX)
696 return false;
697
698 fd = open(nspath, O_RDONLY);
699 if (fd < 0) {
700 SYSERROR("Failed to open \"%s\"", nspath);
701 return false;
702 }
703
704 ret = setns(fd, 0);
705 if (ret) {
706 SYSERROR("Failed to set process %d to \"%s\" of %d.", pid, ns, fd);
707 close(fd);
708 return false;
709 }
710
711 close(fd);
712 return true;
713 }
714
715 /*
716 * looking at fs/proc_namespace.c, it appears we can
717 * actually expect the rootfs entry to very specifically contain
718 * " - rootfs rootfs "
719 * IIUC, so long as we've chrooted so that rootfs is not our root,
720 * the rootfs entry should always be skipped in mountinfo contents.
721 */
722 bool detect_ramfs_rootfs(void)
723 {
724 FILE *f;
725 char *p, *p2;
726 char *line = NULL;
727 size_t len = 0;
728 int i;
729
730 f = fopen("/proc/self/mountinfo", "r");
731 if (!f) {
732 SYSERROR("Failed to open mountinfo");
733 return false;
734 }
735
736 while (getline(&line, &len, f) != -1) {
737 for (p = line, i = 0; p && i < 4; i++)
738 p = strchr(p + 1, ' ');
739 if (!p)
740 continue;
741
742 p2 = strchr(p + 1, ' ');
743 if (!p2)
744 continue;
745
746 *p2 = '\0';
747 if (strcmp(p + 1, "/") == 0) {
748 /* This is '/'. Is it the ramfs? */
749 p = strchr(p2 + 1, '-');
750 if (p && strncmp(p, "- rootfs rootfs ", 16) == 0) {
751 free(line);
752 fclose(f);
753 INFO("Rootfs is located on ramfs");
754 return true;
755 }
756 }
757 }
758
759 free(line);
760 fclose(f);
761 return false;
762 }
763
764 char *on_path(const char *cmd, const char *rootfs)
765 {
766 char *entry = NULL, *path = NULL;
767 char cmdpath[PATH_MAX];
768 int ret;
769
770 path = getenv("PATH");
771 if (!path)
772 return NULL;
773
774 path = strdup(path);
775 if (!path)
776 return NULL;
777
778 lxc_iterate_parts (entry, path, ":") {
779 if (rootfs)
780 ret = snprintf(cmdpath, PATH_MAX, "%s/%s/%s", rootfs,
781 entry, cmd);
782 else
783 ret = snprintf(cmdpath, PATH_MAX, "%s/%s", entry, cmd);
784 if (ret < 0 || ret >= PATH_MAX)
785 continue;
786
787 if (access(cmdpath, X_OK) == 0) {
788 free(path);
789 return strdup(cmdpath);
790 }
791 }
792
793 free(path);
794 return NULL;
795 }
796
797 bool cgns_supported(void)
798 {
799 return file_exists("/proc/self/ns/cgroup");
800 }
801
802 /* historically lxc-init has been under /usr/lib/lxc and under
803 * /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc.
804 */
805 char *choose_init(const char *rootfs)
806 {
807 char *retv = NULL;
808 const char *empty = "",
809 *tmp;
810 int ret, env_set = 0;
811
812 if (!getenv("PATH")) {
813 if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 0))
814 SYSERROR("Failed to setenv");
815
816 env_set = 1;
817 }
818
819 retv = on_path("init.lxc", rootfs);
820
821 if (env_set)
822 if (unsetenv("PATH"))
823 SYSERROR("Failed to unsetenv");
824
825 if (retv)
826 return retv;
827
828 retv = malloc(PATH_MAX);
829 if (!retv)
830 return NULL;
831
832 if (rootfs)
833 tmp = rootfs;
834 else
835 tmp = empty;
836
837 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, SBINDIR, "/init.lxc");
838 if (ret < 0 || ret >= PATH_MAX) {
839 ERROR("The name of path is too long");
840 goto out1;
841 }
842
843 if (access(retv, X_OK) == 0)
844 return retv;
845
846 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, LXCINITDIR, "/lxc/lxc-init");
847 if (ret < 0 || ret >= PATH_MAX) {
848 ERROR("The name of path is too long");
849 goto out1;
850 }
851
852 if (access(retv, X_OK) == 0)
853 return retv;
854
855 ret = snprintf(retv, PATH_MAX, "%s/usr/lib/lxc/lxc-init", tmp);
856 if (ret < 0 || ret >= PATH_MAX) {
857 ERROR("The name of path is too long");
858 goto out1;
859 }
860
861 if (access(retv, X_OK) == 0)
862 return retv;
863
864 ret = snprintf(retv, PATH_MAX, "%s/sbin/lxc-init", tmp);
865 if (ret < 0 || ret >= PATH_MAX) {
866 ERROR("The name of path is too long");
867 goto out1;
868 }
869
870 if (access(retv, X_OK) == 0)
871 return retv;
872
873 /*
874 * Last resort, look for the statically compiled init.lxc which we
875 * hopefully bind-mounted in.
876 * If we are called during container setup, and we get to this point,
877 * then the init.lxc.static from the host will need to be bind-mounted
878 * in. So we return NULL here to indicate that.
879 */
880 if (rootfs)
881 goto out1;
882
883 ret = snprintf(retv, PATH_MAX, "/init.lxc.static");
884 if (ret < 0 || ret >= PATH_MAX) {
885 WARN("Nonsense - name /lxc.init.static too long");
886 goto out1;
887 }
888
889 if (access(retv, X_OK) == 0)
890 return retv;
891
892 out1:
893 free(retv);
894 return NULL;
895 }
896
897 /*
898 * Given the '-t' template option to lxc-create, figure out what to
899 * do. If the template is a full executable path, use that. If it
900 * is something like 'sshd', then return $templatepath/lxc-sshd.
901 * On success return the template, on error return NULL.
902 */
903 char *get_template_path(const char *t)
904 {
905 int ret, len;
906 char *tpath;
907
908 if (t[0] == '/' && access(t, X_OK) == 0) {
909 tpath = strdup(t);
910 return tpath;
911 }
912
913 len = strlen(LXCTEMPLATEDIR) + strlen(t) + strlen("/lxc-") + 1;
914
915 tpath = malloc(len);
916 if (!tpath)
917 return NULL;
918
919 ret = snprintf(tpath, len, "%s/lxc-%s", LXCTEMPLATEDIR, t);
920 if (ret < 0 || ret >= len) {
921 free(tpath);
922 return NULL;
923 }
924
925 if (access(tpath, X_OK) < 0) {
926 SYSERROR("bad template: %s", t);
927 free(tpath);
928 return NULL;
929 }
930
931 return tpath;
932 }
933
934 /*
935 * @path: a pathname where / replaced with '\0'.
936 * @offsetp: pointer to int showing which path segment was last seen.
937 * Updated on return to reflect the next segment.
938 * @fulllen: full original path length.
939 * Returns a pointer to the next path segment, or NULL if done.
940 */
941 static char *get_nextpath(char *path, int *offsetp, int fulllen)
942 {
943 int offset = *offsetp;
944
945 if (offset >= fulllen)
946 return NULL;
947
948 while (offset < fulllen && path[offset] != '\0')
949 offset++;
950
951 while (offset < fulllen && path[offset] == '\0')
952 offset++;
953
954 *offsetp = offset;
955
956 return (offset < fulllen) ? &path[offset] : NULL;
957 }
958
959 /*
960 * Check that @subdir is a subdir of @dir. @len is the length of
961 * @dir (to avoid having to recalculate it).
962 */
963 static bool is_subdir(const char *subdir, const char *dir, size_t len)
964 {
965 size_t subdirlen = strlen(subdir);
966
967 if (subdirlen < len)
968 return false;
969
970 if (strncmp(subdir, dir, len) != 0)
971 return false;
972
973 if (dir[len-1] == '/')
974 return true;
975
976 if (subdir[len] == '/' || subdirlen == len)
977 return true;
978
979 return false;
980 }
981
982 /*
983 * Check if the open fd is a symlink. Return -ELOOP if it is. Return
984 * -ENOENT if we couldn't fstat. Return 0 if the fd is ok.
985 */
986 static int check_symlink(int fd)
987 {
988 struct stat sb;
989 int ret;
990
991 ret = fstat(fd, &sb);
992 if (ret < 0)
993 return -ENOENT;
994
995 if (S_ISLNK(sb.st_mode))
996 return -ELOOP;
997
998 return 0;
999 }
1000
1001 /*
1002 * Open a file or directory, provided that it contains no symlinks.
1003 *
1004 * CAVEAT: This function must not be used for other purposes than container
1005 * setup before executing the container's init
1006 */
1007 static int open_if_safe(int dirfd, const char *nextpath)
1008 {
1009 int newfd = openat(dirfd, nextpath, O_RDONLY | O_NOFOLLOW);
1010 if (newfd >= 0) /* Was not a symlink, all good. */
1011 return newfd;
1012
1013 if (errno == ELOOP)
1014 return newfd;
1015
1016 if (errno == EPERM || errno == EACCES) {
1017 /* We're not root (cause we got EPERM) so try opening with
1018 * O_PATH.
1019 */
1020 newfd = openat(dirfd, nextpath, O_PATH | O_NOFOLLOW);
1021 if (newfd >= 0) {
1022 /* O_PATH will return an fd for symlinks. We know
1023 * nextpath wasn't a symlink at last openat, so if fd is
1024 * now a link, then something * fishy is going on.
1025 */
1026 int ret = check_symlink(newfd);
1027 if (ret < 0) {
1028 close(newfd);
1029 newfd = ret;
1030 }
1031 }
1032 }
1033
1034 return newfd;
1035 }
1036
1037 /*
1038 * Open a path intending for mounting, ensuring that the final path
1039 * is inside the container's rootfs.
1040 *
1041 * CAVEAT: This function must not be used for other purposes than container
1042 * setup before executing the container's init
1043 *
1044 * @target: path to be opened
1045 * @prefix_skip: a part of @target in which to ignore symbolic links. This
1046 * would be the container's rootfs.
1047 *
1048 * Return an open fd for the path, or <0 on error.
1049 */
1050 static int open_without_symlink(const char *target, const char *prefix_skip)
1051 {
1052 int curlen = 0, dirfd, fulllen, i;
1053 char *dup;
1054
1055 fulllen = strlen(target);
1056
1057 /* make sure prefix-skip makes sense */
1058 if (prefix_skip && strlen(prefix_skip) > 0) {
1059 curlen = strlen(prefix_skip);
1060 if (!is_subdir(target, prefix_skip, curlen)) {
1061 ERROR("WHOA there - target \"%s\" didn't start with prefix \"%s\"",
1062 target, prefix_skip);
1063 return -EINVAL;
1064 }
1065
1066 /*
1067 * get_nextpath() expects the curlen argument to be
1068 * on a (turned into \0) / or before it, so decrement
1069 * curlen to make sure that happens
1070 */
1071 if (curlen)
1072 curlen--;
1073 } else {
1074 prefix_skip = "/";
1075 curlen = 0;
1076 }
1077
1078 /* Make a copy of target which we can hack up, and tokenize it */
1079 if ((dup = strdup(target)) == NULL) {
1080 ERROR("Out of memory checking for symbolic link");
1081 return -ENOMEM;
1082 }
1083
1084 for (i = 0; i < fulllen; i++) {
1085 if (dup[i] == '/')
1086 dup[i] = '\0';
1087 }
1088
1089 dirfd = open(prefix_skip, O_RDONLY);
1090 if (dirfd < 0) {
1091 SYSERROR("Failed to open path \"%s\"", prefix_skip);
1092 goto out;
1093 }
1094
1095 while (1) {
1096 int newfd, saved_errno;
1097 char *nextpath;
1098
1099 if ((nextpath = get_nextpath(dup, &curlen, fulllen)) == NULL)
1100 goto out;
1101
1102 newfd = open_if_safe(dirfd, nextpath);
1103 saved_errno = errno;
1104 close(dirfd);
1105
1106 dirfd = newfd;
1107 if (newfd < 0) {
1108 errno = saved_errno;
1109 if (errno == ELOOP)
1110 SYSERROR("%s in %s was a symbolic link!", nextpath, target);
1111
1112 goto out;
1113 }
1114 }
1115
1116 out:
1117 free(dup);
1118 return dirfd;
1119 }
1120
1121 /*
1122 * Safely mount a path into a container, ensuring that the mount target
1123 * is under the container's @rootfs. (If @rootfs is NULL, then the container
1124 * uses the host's /)
1125 *
1126 * CAVEAT: This function must not be used for other purposes than container
1127 * setup before executing the container's init
1128 */
1129 int safe_mount(const char *src, const char *dest, const char *fstype,
1130 unsigned long flags, const void *data, const char *rootfs)
1131 {
1132 int destfd, ret, saved_errno;
1133 /* Only needs enough for /proc/self/fd/<fd>. */
1134 char srcbuf[50], destbuf[50];
1135 int srcfd = -1;
1136 const char *mntsrc = src;
1137
1138 if (!rootfs)
1139 rootfs = "";
1140
1141 /* todo - allow symlinks for relative paths if 'allowsymlinks' option is passed */
1142 if (flags & MS_BIND && src && src[0] != '/') {
1143 INFO("This is a relative bind mount");
1144
1145 srcfd = open_without_symlink(src, NULL);
1146 if (srcfd < 0)
1147 return srcfd;
1148
1149 ret = snprintf(srcbuf, 50, "/proc/self/fd/%d", srcfd);
1150 if (ret < 0 || ret > 50) {
1151 close(srcfd);
1152 ERROR("Out of memory");
1153 return -EINVAL;
1154 }
1155 mntsrc = srcbuf;
1156 }
1157
1158 destfd = open_without_symlink(dest, rootfs);
1159 if (destfd < 0) {
1160 if (srcfd != -1) {
1161 saved_errno = errno;
1162 close(srcfd);
1163 errno = saved_errno;
1164 }
1165
1166 return destfd;
1167 }
1168
1169 ret = snprintf(destbuf, 50, "/proc/self/fd/%d", destfd);
1170 if (ret < 0 || ret > 50) {
1171 if (srcfd != -1)
1172 close(srcfd);
1173
1174 close(destfd);
1175 ERROR("Out of memory");
1176 return -EINVAL;
1177 }
1178
1179 ret = mount(mntsrc, destbuf, fstype, flags, data);
1180 saved_errno = errno;
1181 if (srcfd != -1)
1182 close(srcfd);
1183
1184 close(destfd);
1185 if (ret < 0) {
1186 errno = saved_errno;
1187 SYSERROR("Failed to mount \"%s\" onto \"%s\"", src ? src : "(null)", dest);
1188 return ret;
1189 }
1190
1191 return 0;
1192 }
1193
1194 /*
1195 * Mount a proc under @rootfs if proc self points to a pid other than
1196 * my own. This is needed to have a known-good proc mount for setting
1197 * up LSMs both at container startup and attach.
1198 *
1199 * @rootfs : the rootfs where proc should be mounted
1200 *
1201 * Returns < 0 on failure, 0 if the correct proc was already mounted
1202 * and 1 if a new proc was mounted.
1203 *
1204 * NOTE: not to be called from inside the container namespace!
1205 */
1206 int lxc_mount_proc_if_needed(const char *rootfs)
1207 {
1208 char path[PATH_MAX] = {0};
1209 int link_to_pid, linklen, mypid, ret;
1210 char link[INTTYPE_TO_STRLEN(pid_t)] = {0};
1211
1212 ret = snprintf(path, PATH_MAX, "%s/proc/self", rootfs);
1213 if (ret < 0 || ret >= PATH_MAX) {
1214 SYSERROR("The name of proc path is too long");
1215 return -1;
1216 }
1217
1218 linklen = readlink(path, link, sizeof(link));
1219
1220 ret = snprintf(path, PATH_MAX, "%s/proc", rootfs);
1221 if (ret < 0 || ret >= PATH_MAX) {
1222 SYSERROR("The name of proc path is too long");
1223 return -1;
1224 }
1225
1226 /* /proc not mounted */
1227 if (linklen < 0) {
1228 if (mkdir(path, 0755) && errno != EEXIST)
1229 return -1;
1230
1231 goto domount;
1232 } else if (linklen >= sizeof(link)) {
1233 link[linklen - 1] = '\0';
1234 ERROR("Readlink returned truncated content: \"%s\"", link);
1235 return -1;
1236 }
1237
1238 mypid = lxc_raw_getpid();
1239 INFO("I am %d, /proc/self points to \"%s\"", mypid, link);
1240
1241 if (lxc_safe_int(link, &link_to_pid) < 0)
1242 return -1;
1243
1244 /* correct procfs is already mounted */
1245 if (link_to_pid == mypid)
1246 return 0;
1247
1248 ret = umount2(path, MNT_DETACH);
1249 if (ret < 0)
1250 SYSWARN("Failed to umount \"%s\" with MNT_DETACH", path);
1251
1252 domount:
1253 /* rootfs is NULL */
1254 if (!strcmp(rootfs, ""))
1255 ret = mount("proc", path, "proc", 0, NULL);
1256 else
1257 ret = safe_mount("proc", path, "proc", 0, NULL, rootfs);
1258 if (ret < 0)
1259 return -1;
1260
1261 INFO("Mounted /proc in container for security transition");
1262 return 1;
1263 }
1264
1265 int open_devnull(void)
1266 {
1267 int fd = open("/dev/null", O_RDWR);
1268 if (fd < 0)
1269 SYSERROR("Can't open /dev/null");
1270
1271 return fd;
1272 }
1273
1274 int set_stdfds(int fd)
1275 {
1276 int ret;
1277
1278 if (fd < 0)
1279 return -1;
1280
1281 ret = dup2(fd, STDIN_FILENO);
1282 if (ret < 0)
1283 return -1;
1284
1285 ret = dup2(fd, STDOUT_FILENO);
1286 if (ret < 0)
1287 return -1;
1288
1289 ret = dup2(fd, STDERR_FILENO);
1290 if (ret < 0)
1291 return -1;
1292
1293 return 0;
1294 }
1295
1296 int null_stdfds(void)
1297 {
1298 int ret = -1;
1299 int fd;
1300
1301 fd = open_devnull();
1302 if (fd >= 0) {
1303 ret = set_stdfds(fd);
1304 close(fd);
1305 }
1306
1307 return ret;
1308 }
1309
1310 /* Check whether a signal is blocked by a process. */
1311 /* /proc/pid-to-str/status\0 = (5 + 21 + 7 + 1) */
1312 #define __PROC_STATUS_LEN (6 + INTTYPE_TO_STRLEN(pid_t) + 7 + 1)
1313 bool task_blocks_signal(pid_t pid, int signal)
1314 {
1315 int ret;
1316 char status[__PROC_STATUS_LEN] = {0};
1317 FILE *f;
1318 uint64_t sigblk = 0, one = 1;
1319 size_t n = 0;
1320 bool bret = false;
1321 char *line = NULL;
1322
1323 ret = snprintf(status, __PROC_STATUS_LEN, "/proc/%d/status", pid);
1324 if (ret < 0 || ret >= __PROC_STATUS_LEN)
1325 return bret;
1326
1327 f = fopen(status, "r");
1328 if (!f)
1329 return bret;
1330
1331 while (getline(&line, &n, f) != -1) {
1332 char *numstr;
1333
1334 if (strncmp(line, "SigBlk:", 7))
1335 continue;
1336
1337 numstr = lxc_trim_whitespace_in_place(line + 7);
1338 ret = lxc_safe_uint64(numstr, &sigblk, 16);
1339 if (ret < 0)
1340 goto out;
1341
1342 break;
1343 }
1344
1345 if (sigblk & (one << (signal - 1)))
1346 bret = true;
1347
1348 out:
1349 free(line);
1350 fclose(f);
1351 return bret;
1352 }
1353
1354 int lxc_preserve_ns(const int pid, const char *ns)
1355 {
1356 int ret;
1357 /* 5 /proc + 21 /int_as_str + 3 /ns + 20 /NS_NAME + 1 \0 */
1358 #define __NS_PATH_LEN 50
1359 char path[__NS_PATH_LEN];
1360
1361 /* This way we can use this function to also check whether namespaces
1362 * are supported by the kernel by passing in the NULL or the empty
1363 * string.
1364 */
1365 ret = snprintf(path, __NS_PATH_LEN, "/proc/%d/ns%s%s", pid,
1366 !ns || strcmp(ns, "") == 0 ? "" : "/",
1367 !ns || strcmp(ns, "") == 0 ? "" : ns);
1368 if (ret < 0 || (size_t)ret >= __NS_PATH_LEN) {
1369 errno = EFBIG;
1370 return -1;
1371 }
1372
1373 return open(path, O_RDONLY | O_CLOEXEC);
1374 }
1375
1376 bool lxc_switch_uid_gid(uid_t uid, gid_t gid)
1377 {
1378 int ret = 0;
1379
1380 if (gid != LXC_INVALID_GID) {
1381 ret = setgid(gid);
1382 if (ret < 0) {
1383 SYSERROR("Failed to switch to gid %d", gid);
1384 return false;
1385 }
1386 NOTICE("Switched to gid %d", gid);
1387 }
1388
1389 if (uid != LXC_INVALID_UID) {
1390 ret = setuid(uid);
1391 if (ret < 0) {
1392 SYSERROR("Failed to switch to uid %d", uid);
1393 return false;
1394 }
1395 NOTICE("Switched to uid %d", uid);
1396 }
1397
1398 return true;
1399 }
1400
1401 /* Simple convenience function which enables uniform logging. */
1402 bool lxc_setgroups(int size, gid_t list[])
1403 {
1404 if (setgroups(size, list) < 0) {
1405 SYSERROR("Failed to setgroups()");
1406 return false;
1407 }
1408 NOTICE("Dropped additional groups");
1409
1410 return true;
1411 }
1412
1413 static int lxc_get_unused_loop_dev_legacy(char *loop_name)
1414 {
1415 struct dirent *dp;
1416 struct loop_info64 lo64;
1417 DIR *dir;
1418 int dfd = -1, fd = -1, ret = -1;
1419
1420 dir = opendir("/dev");
1421 if (!dir) {
1422 SYSERROR("Failed to open \"/dev\"");
1423 return -1;
1424 }
1425
1426 while ((dp = readdir(dir))) {
1427 if (strncmp(dp->d_name, "loop", 4) != 0)
1428 continue;
1429
1430 dfd = dirfd(dir);
1431 if (dfd < 0)
1432 continue;
1433
1434 fd = openat(dfd, dp->d_name, O_RDWR);
1435 if (fd < 0)
1436 continue;
1437
1438 ret = ioctl(fd, LOOP_GET_STATUS64, &lo64);
1439 if (ret < 0) {
1440 if (ioctl(fd, LOOP_GET_STATUS64, &lo64) == 0 ||
1441 errno != ENXIO) {
1442 close(fd);
1443 fd = -1;
1444 continue;
1445 }
1446 }
1447
1448 ret = snprintf(loop_name, LO_NAME_SIZE, "/dev/%s", dp->d_name);
1449 if (ret < 0 || ret >= LO_NAME_SIZE) {
1450 close(fd);
1451 fd = -1;
1452 continue;
1453 }
1454
1455 break;
1456 }
1457
1458 closedir(dir);
1459
1460 if (fd < 0)
1461 return -1;
1462
1463 return fd;
1464 }
1465
1466 static int lxc_get_unused_loop_dev(char *name_loop)
1467 {
1468 int loop_nr, ret;
1469 int fd_ctl = -1, fd_tmp = -1;
1470
1471 fd_ctl = open("/dev/loop-control", O_RDWR | O_CLOEXEC);
1472 if (fd_ctl < 0) {
1473 SYSERROR("Failed to open loop control");
1474 return -ENODEV;
1475 }
1476
1477 loop_nr = ioctl(fd_ctl, LOOP_CTL_GET_FREE);
1478 if (loop_nr < 0) {
1479 SYSERROR("Failed to get loop control");
1480 goto on_error;
1481 }
1482
1483 ret = snprintf(name_loop, LO_NAME_SIZE, "/dev/loop%d", loop_nr);
1484 if (ret < 0 || ret >= LO_NAME_SIZE)
1485 goto on_error;
1486
1487 fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC);
1488 if (fd_tmp < 0)
1489 SYSERROR("Failed to open loop \"%s\"", name_loop);
1490
1491 on_error:
1492 close(fd_ctl);
1493 return fd_tmp;
1494 }
1495
1496 int lxc_prepare_loop_dev(const char *source, char *loop_dev, int flags)
1497 {
1498 int ret;
1499 struct loop_info64 lo64;
1500 int fd_img = -1, fret = -1, fd_loop = -1;
1501
1502 fd_loop = lxc_get_unused_loop_dev(loop_dev);
1503 if (fd_loop < 0) {
1504 if (fd_loop != -ENODEV)
1505 goto on_error;
1506
1507 fd_loop = lxc_get_unused_loop_dev_legacy(loop_dev);
1508 if (fd_loop < 0)
1509 goto on_error;
1510 }
1511
1512 fd_img = open(source, O_RDWR | O_CLOEXEC);
1513 if (fd_img < 0) {
1514 SYSERROR("Failed to open source \"%s\"", source);
1515 goto on_error;
1516 }
1517
1518 ret = ioctl(fd_loop, LOOP_SET_FD, fd_img);
1519 if (ret < 0) {
1520 SYSERROR("Failed to set loop fd");
1521 goto on_error;
1522 }
1523
1524 memset(&lo64, 0, sizeof(lo64));
1525 lo64.lo_flags = flags;
1526
1527 ret = ioctl(fd_loop, LOOP_SET_STATUS64, &lo64);
1528 if (ret < 0) {
1529 SYSERROR("Failed to set loop status64");
1530 goto on_error;
1531 }
1532
1533 fret = 0;
1534
1535 on_error:
1536 if (fd_img >= 0)
1537 close(fd_img);
1538
1539 if (fret < 0 && fd_loop >= 0) {
1540 close(fd_loop);
1541 fd_loop = -1;
1542 }
1543
1544 return fd_loop;
1545 }
1546
1547 int lxc_unstack_mountpoint(const char *path, bool lazy)
1548 {
1549 int ret;
1550 int umounts = 0;
1551
1552 pop_stack:
1553 ret = umount2(path, lazy ? MNT_DETACH : 0);
1554 if (ret < 0) {
1555 /* We consider anything else than EINVAL deadly to prevent going
1556 * into an infinite loop. (The other alternative is constantly
1557 * parsing /proc/self/mountinfo which is yucky and probably
1558 * racy.)
1559 */
1560 if (errno != EINVAL)
1561 return -errno;
1562 } else {
1563 /* Just stop counting when this happens. That'd just be so
1564 * stupid that we won't even bother trying to report back the
1565 * correct value anymore.
1566 */
1567 if (umounts != INT_MAX)
1568 umounts++;
1569
1570 /* We succeeded in umounting. Make sure that there's no other
1571 * mountpoint stacked underneath.
1572 */
1573 goto pop_stack;
1574 }
1575
1576 return umounts;
1577 }
1578
1579 int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args)
1580 {
1581 pid_t child;
1582 int ret, fret, pipefd[2];
1583 ssize_t bytes;
1584
1585 /* Make sure our callers do not receive uninitialized memory. */
1586 if (buf_size > 0 && buf)
1587 buf[0] = '\0';
1588
1589 if (pipe(pipefd) < 0) {
1590 SYSERROR("Failed to create pipe");
1591 return -1;
1592 }
1593
1594 child = lxc_raw_clone(0);
1595 if (child < 0) {
1596 close(pipefd[0]);
1597 close(pipefd[1]);
1598 SYSERROR("Failed to create new process");
1599 return -1;
1600 }
1601
1602 if (child == 0) {
1603 /* Close the read-end of the pipe. */
1604 close(pipefd[0]);
1605
1606 /* Redirect std{err,out} to write-end of the
1607 * pipe.
1608 */
1609 ret = dup2(pipefd[1], STDOUT_FILENO);
1610 if (ret >= 0)
1611 ret = dup2(pipefd[1], STDERR_FILENO);
1612
1613 /* Close the write-end of the pipe. */
1614 close(pipefd[1]);
1615
1616 if (ret < 0) {
1617 SYSERROR("Failed to duplicate std{err,out} file descriptor");
1618 _exit(EXIT_FAILURE);
1619 }
1620
1621 /* Does not return. */
1622 child_fn(args);
1623 ERROR("Failed to exec command");
1624 _exit(EXIT_FAILURE);
1625 }
1626
1627 /* close the write-end of the pipe */
1628 close(pipefd[1]);
1629
1630 if (buf && buf_size > 0) {
1631 bytes = lxc_read_nointr(pipefd[0], buf, buf_size - 1);
1632 if (bytes > 0)
1633 buf[bytes - 1] = '\0';
1634 }
1635
1636 fret = wait_for_pid(child);
1637 /* close the read-end of the pipe */
1638 close(pipefd[0]);
1639
1640 return fret;
1641 }
1642
1643 bool lxc_nic_exists(char *nic)
1644 {
1645 #define __LXC_SYS_CLASS_NET_LEN 15 + IFNAMSIZ + 1
1646 char path[__LXC_SYS_CLASS_NET_LEN];
1647 int ret;
1648 struct stat sb;
1649
1650 if (!strcmp(nic, "none"))
1651 return true;
1652
1653 ret = snprintf(path, __LXC_SYS_CLASS_NET_LEN, "/sys/class/net/%s", nic);
1654 if (ret < 0 || (size_t)ret >= __LXC_SYS_CLASS_NET_LEN)
1655 return false;
1656
1657 ret = stat(path, &sb);
1658 if (ret < 0)
1659 return false;
1660
1661 return true;
1662 }
1663
1664 uint64_t lxc_find_next_power2(uint64_t n)
1665 {
1666 /* 0 is not valid input. We return 0 to the caller since 0 is not a
1667 * valid power of two.
1668 */
1669 if (n == 0)
1670 return 0;
1671
1672 if (!(n & (n - 1)))
1673 return n;
1674
1675 while (n & (n - 1))
1676 n = n & (n - 1);
1677
1678 n = n << 1;
1679 return n;
1680 }
1681
1682 int lxc_set_death_signal(int signal, pid_t parent)
1683 {
1684 int ret;
1685 pid_t ppid;
1686
1687 ret = prctl(PR_SET_PDEATHSIG, prctl_arg(signal), prctl_arg(0),
1688 prctl_arg(0), prctl_arg(0));
1689
1690 /* Check whether we have been orphaned. */
1691 ppid = (pid_t)syscall(SYS_getppid);
1692 if (ppid != parent) {
1693 ret = raise(SIGKILL);
1694 if (ret < 0)
1695 return -1;
1696 }
1697
1698 if (ret < 0)
1699 return -1;
1700
1701 return 0;
1702 }
1703
1704 int fd_cloexec(int fd, bool cloexec)
1705 {
1706 int oflags, nflags;
1707
1708 oflags = fcntl(fd, F_GETFD, 0);
1709 if (oflags < 0)
1710 return -errno;
1711
1712 if (cloexec)
1713 nflags = oflags | FD_CLOEXEC;
1714 else
1715 nflags = oflags & ~FD_CLOEXEC;
1716
1717 if (nflags == oflags)
1718 return 0;
1719
1720 if (fcntl(fd, F_SETFD, nflags) < 0)
1721 return -errno;
1722
1723 return 0;
1724 }
1725
1726 int recursive_destroy(char *dirname)
1727 {
1728 int ret;
1729 struct dirent *direntp;
1730 DIR *dir;
1731 int r = 0;
1732
1733 dir = opendir(dirname);
1734 if (!dir) {
1735 SYSERROR("Failed to open dir \"%s\"", dirname);
1736 return -1;
1737 }
1738
1739 while ((direntp = readdir(dir))) {
1740 char *pathname;
1741 struct stat mystat;
1742
1743 if (!strcmp(direntp->d_name, ".") ||
1744 !strcmp(direntp->d_name, ".."))
1745 continue;
1746
1747 pathname = must_make_path(dirname, direntp->d_name, NULL);
1748
1749 ret = lstat(pathname, &mystat);
1750 if (ret < 0) {
1751 if (!r)
1752 SYSWARN("Failed to stat \"%s\"", pathname);
1753
1754 r = -1;
1755 goto next;
1756 }
1757
1758 if (!S_ISDIR(mystat.st_mode))
1759 goto next;
1760
1761 ret = recursive_destroy(pathname);
1762 if (ret < 0)
1763 r = -1;
1764
1765 next:
1766 free(pathname);
1767 }
1768
1769 ret = rmdir(dirname);
1770 if (ret < 0) {
1771 if (!r)
1772 SYSWARN("Failed to delete \"%s\"", dirname);
1773
1774 r = -1;
1775 }
1776
1777 ret = closedir(dir);
1778 if (ret < 0) {
1779 if (!r)
1780 SYSWARN("Failed to delete \"%s\"", dirname);
1781
1782 r = -1;
1783 }
1784
1785 return r;
1786 }
1787
1788 int lxc_setup_keyring(void)
1789 {
1790 key_serial_t keyring;
1791 int ret = 0;
1792
1793 /* Try to allocate a new session keyring for the container to prevent
1794 * information leaks.
1795 */
1796 keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, prctl_arg(0),
1797 prctl_arg(0), prctl_arg(0), prctl_arg(0));
1798 if (keyring < 0) {
1799 switch (errno) {
1800 case ENOSYS:
1801 DEBUG("The keyctl() syscall is not supported or blocked");
1802 break;
1803 case EACCES:
1804 __fallthrough;
1805 case EPERM:
1806 DEBUG("Failed to access kernel keyring. Continuing...");
1807 break;
1808 default:
1809 SYSERROR("Failed to create kernel keyring");
1810 break;
1811 }
1812 }
1813
1814 return ret;
1815 }