2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
31 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/mount.h>
40 #include <sys/types.h>
43 #include <sys/prctl.h>
48 #include "namespace.h"
54 #ifndef PR_SET_MM_ARG_START
55 #define PR_SET_MM_ARG_START 8
58 #ifndef PR_SET_MM_ARG_END
59 #define PR_SET_MM_ARG_END 9
62 #ifndef PR_SET_MM_ENV_START
63 #define PR_SET_MM_ENV_START 10
66 #ifndef PR_SET_MM_ENV_END
67 #define PR_SET_MM_ENV_END 11
70 lxc_log_define(lxc_utils
, lxc
);
73 * if path is btrfs, tries to remove it and any subvolumes beneath it
75 extern bool btrfs_try_remove_subvol(const char *path
);
77 static int _recursive_rmdir(char *dirname
, dev_t pdev
,
78 const char *exclude
, int level
, bool onedev
)
80 struct dirent dirent
, *direntp
;
83 char pathname
[MAXPATHLEN
];
84 bool hadexclude
= false;
86 dir
= opendir(dirname
);
88 ERROR("%s: failed to open %s", __func__
, dirname
);
92 while (!readdir_r(dir
, &dirent
, &direntp
)) {
99 if (!strcmp(direntp
->d_name
, ".") ||
100 !strcmp(direntp
->d_name
, ".."))
103 rc
= snprintf(pathname
, MAXPATHLEN
, "%s/%s", dirname
, direntp
->d_name
);
104 if (rc
< 0 || rc
>= MAXPATHLEN
) {
105 ERROR("pathname too long");
110 if (!level
&& exclude
&& !strcmp(direntp
->d_name
, exclude
)) {
111 ret
= rmdir(pathname
);
115 INFO("Not deleting snapshot %s", pathname
);
119 ret
= unlink(pathname
);
121 INFO("%s: failed to remove %s", __func__
, pathname
);
124 SYSERROR("%s: failed to rmdir %s", __func__
, pathname
);
132 ret
= lstat(pathname
, &mystat
);
134 ERROR("%s: failed to stat %s", __func__
, pathname
);
138 if (onedev
&& mystat
.st_dev
!= pdev
) {
139 /* TODO should we be checking /proc/self/mountinfo for
140 * pathname and not doing this if found? */
141 if (btrfs_try_remove_subvol(pathname
))
142 INFO("Removed btrfs subvolume at %s\n", pathname
);
145 if (S_ISDIR(mystat
.st_mode
)) {
146 if (_recursive_rmdir(pathname
, pdev
, exclude
, level
+1, onedev
) < 0)
149 if (unlink(pathname
) < 0) {
150 SYSERROR("%s: failed to delete %s", __func__
, pathname
);
156 if (rmdir(dirname
) < 0 && !btrfs_try_remove_subvol(dirname
) && !hadexclude
) {
157 ERROR("%s: failed to delete %s", __func__
, dirname
);
163 ERROR("%s: failed to close directory %s", __func__
, dirname
);
167 return failed
? -1 : 0;
170 /* we have two different magic values for overlayfs, yay */
171 #define OVERLAYFS_SUPER_MAGIC 0x794c764f
172 #define OVERLAY_SUPER_MAGIC 0x794c7630
174 * In overlayfs, st_dev is unreliable. so on overlayfs we don't do
175 * the lxc_rmdir_onedev()
177 static bool is_native_overlayfs(const char *path
)
181 if (statfs(path
, &sb
) < 0)
183 if (sb
.f_type
== OVERLAYFS_SUPER_MAGIC
||
184 sb
.f_type
== OVERLAY_SUPER_MAGIC
)
189 /* returns 0 on success, -1 if there were any failures */
190 extern int lxc_rmdir_onedev(char *path
, const char *exclude
)
195 if (is_native_overlayfs(path
)) {
199 if (lstat(path
, &mystat
) < 0) {
202 ERROR("%s: failed to stat %s", __func__
, path
);
206 return _recursive_rmdir(path
, mystat
.st_dev
, exclude
, 0, onedev
);
209 /* borrowed from iproute2 */
210 extern int get_u16(unsigned short *val
, const char *arg
, int base
)
219 res
= strtoul(arg
, &ptr
, base
);
220 if (!ptr
|| ptr
== arg
|| *ptr
|| res
> 0xFFFF || errno
!= 0)
228 extern int mkdir_p(const char *dir
, mode_t mode
)
230 const char *tmp
= dir
;
231 const char *orig
= dir
;
235 dir
= tmp
+ strspn(tmp
, "/");
236 tmp
= dir
+ strcspn(dir
, "/");
237 makeme
= strndup(orig
, dir
- orig
);
239 if (mkdir(makeme
, mode
) && errno
!= EEXIST
) {
240 SYSERROR("failed to create directory '%s'", makeme
);
256 if (geteuid() == 0) {
257 rundir
= strdup(RUNTIME_PATH
);
261 rundir
= getenv("XDG_RUNTIME_DIR");
263 rundir
= strdup(rundir
);
267 INFO("XDG_RUNTIME_DIR isn't set in the environment.");
268 homedir
= getenv("HOME");
270 ERROR("HOME isn't set in the environment.");
274 rundir
= malloc(sizeof(char) * (17 + strlen(homedir
)));
275 sprintf(rundir
, "%s/.cache/lxc/run/", homedir
);
280 int wait_for_pid(pid_t pid
)
285 ret
= waitpid(pid
, &status
, 0);
293 if (!WIFEXITED(status
) || WEXITSTATUS(status
) != 0)
298 int lxc_wait_for_pid_status(pid_t pid
)
303 ret
= waitpid(pid
, &status
, 0);
314 ssize_t
lxc_write_nointr(int fd
, const void* buf
, size_t count
)
318 ret
= write(fd
, buf
, count
);
319 if (ret
< 0 && errno
== EINTR
)
324 ssize_t
lxc_read_nointr(int fd
, void* buf
, size_t count
)
328 ret
= read(fd
, buf
, count
);
329 if (ret
< 0 && errno
== EINTR
)
334 ssize_t
lxc_read_nointr_expect(int fd
, void* buf
, size_t count
, const void* expected_buf
)
337 ret
= lxc_read_nointr(fd
, buf
, count
);
340 if ((size_t)ret
!= count
)
342 if (expected_buf
&& memcmp(buf
, expected_buf
, count
) != 0) {
350 #include <gnutls/gnutls.h>
351 #include <gnutls/crypto.h>
353 __attribute__((constructor
))
354 static void gnutls_lxc_init(void)
356 gnutls_global_init();
359 int sha1sum_file(char *fnam
, unsigned char *digest
)
368 f
= fopen_cloexec(fnam
, "r");
370 SYSERROR("Error opening template");
373 if (fseek(f
, 0, SEEK_END
) < 0) {
374 SYSERROR("Error seeking to end of template");
378 if ((flen
= ftell(f
)) < 0) {
379 SYSERROR("Error telling size of template");
383 if (fseek(f
, 0, SEEK_SET
) < 0) {
384 SYSERROR("Error seeking to start of template");
388 if ((buf
= malloc(flen
+1)) == NULL
) {
389 SYSERROR("Out of memory");
393 if (fread(buf
, 1, flen
, f
) != flen
) {
394 SYSERROR("Failure reading template");
400 SYSERROR("Failre closing template");
405 ret
= gnutls_hash_fast(GNUTLS_DIG_SHA1
, buf
, flen
, (void *)digest
);
411 char** lxc_va_arg_list_to_argv(va_list ap
, size_t skip
, int do_strdup
)
414 size_t count
= 1 + skip
;
417 /* first determine size of argument list, we don't want to reallocate
422 char* arg
= va_arg(ap2
, char*);
429 result
= calloc(count
, sizeof(char*));
434 char* arg
= va_arg(ap
, char*);
437 arg
= do_strdup
? strdup(arg
) : arg
;
440 result
[count
++] = arg
;
443 /* calloc has already set last element to NULL*/
451 const char** lxc_va_arg_list_to_argv_const(va_list ap
, size_t skip
)
453 return (const char**)lxc_va_arg_list_to_argv(ap
, skip
, 0);
456 extern struct lxc_popen_FILE
*lxc_popen(const char *command
)
458 struct lxc_popen_FILE
*fp
= NULL
;
459 int parent_end
= -1, child_end
= -1;
463 int r
= pipe2(pipe_fds
, O_CLOEXEC
);
466 ERROR("pipe2 failure");
470 parent_end
= pipe_fds
[0];
471 child_end
= pipe_fds
[1];
475 if (child_pid
== 0) {
477 int child_std_end
= STDOUT_FILENO
;
479 if (child_end
!= child_std_end
) {
480 /* dup2() doesn't dup close-on-exec flag */
481 dup2(child_end
, child_std_end
);
483 /* it's safe not to close child_end here
484 * as it's marked close-on-exec anyway
488 * The descriptor is already the one we will use.
489 * But it must not be marked close-on-exec.
492 if (fcntl(child_end
, F_SETFD
, 0) != 0) {
493 SYSERROR("Failed to remove FD_CLOEXEC from fd.");
500 * This is the main/only reason
501 * why we do our lousy popen() emulation.
506 sigprocmask(SIG_UNBLOCK
, &mask
, NULL
);
509 execl("/bin/sh", "sh", "-c", command
, (char *) NULL
);
519 ERROR("fork failure");
523 fp
= calloc(1, sizeof(*fp
));
525 ERROR("failed to allocate memory");
529 fp
->f
= fdopen(parent_end
, "r");
531 ERROR("fdopen failure");
535 fp
->child_pid
= child_pid
;
544 parent_end
= -1; /* so we do not close it second time */
550 if (parent_end
!= -1)
556 extern int lxc_pclose(struct lxc_popen_FILE
*fp
)
565 child_pid
= fp
->child_pid
;
566 /* free memory (we still need to close file stream) */
571 if (!f
|| fclose(f
)) {
572 ERROR("fclose failure");
577 wait_pid
= waitpid(child_pid
, &wstatus
, 0);
578 } while (wait_pid
== -1 && errno
== EINTR
);
580 if (wait_pid
== -1) {
581 ERROR("waitpid failure");
588 char *lxc_string_replace(const char *needle
, const char *replacement
, const char *haystack
)
590 ssize_t len
= -1, saved_len
= -1;
592 size_t replacement_len
= strlen(replacement
);
593 size_t needle_len
= strlen(needle
);
595 /* should be executed exactly twice */
596 while (len
== -1 || result
== NULL
) {
602 result
= calloc(1, len
+ 1);
610 for (last_p
= (char *)haystack
, p
= strstr(last_p
, needle
); p
; last_p
= p
, p
= strstr(last_p
, needle
)) {
611 part_len
= (ssize_t
)(p
- last_p
);
612 if (result
&& part_len
> 0)
613 memcpy(&result
[len
], last_p
, part_len
);
615 if (result
&& replacement_len
> 0)
616 memcpy(&result
[len
], replacement
, replacement_len
);
617 len
+= replacement_len
;
620 part_len
= strlen(last_p
);
621 if (result
&& part_len
> 0)
622 memcpy(&result
[len
], last_p
, part_len
);
626 /* make sure we did the same thing twice,
627 * once for calculating length, the other
628 * time for copying data */
629 assert(saved_len
== len
);
630 /* make sure we didn't overwrite any buffer,
631 * due to calloc the string should be 0-terminated */
632 assert(result
[len
] == '\0');
637 bool lxc_string_in_array(const char *needle
, const char **haystack
)
639 for (; haystack
&& *haystack
; haystack
++)
640 if (!strcmp(needle
, *haystack
))
645 char *lxc_string_join(const char *sep
, const char **parts
, bool use_as_prefix
)
649 size_t sep_len
= strlen(sep
);
650 size_t result_len
= use_as_prefix
* sep_len
;
652 /* calculate new string length */
653 for (p
= (char **)parts
; *p
; p
++)
654 result_len
+= (p
> (char **)parts
) * sep_len
+ strlen(*p
);
656 result
= calloc(result_len
+ 1, 1);
662 for (p
= (char **)parts
; *p
; p
++) {
663 if (p
> (char **)parts
)
671 char **lxc_normalize_path(const char *path
)
675 size_t components_len
= 0;
678 components
= lxc_string_split(path
, '/');
681 for (p
= components
; *p
; p
++)
684 /* resolve '.' and '..' */
685 for (pos
= 0; pos
< components_len
; ) {
686 if (!strcmp(components
[pos
], ".") || (!strcmp(components
[pos
], "..") && pos
== 0)) {
687 /* eat this element */
688 free(components
[pos
]);
689 memmove(&components
[pos
], &components
[pos
+1], sizeof(char *) * (components_len
- pos
));
691 } else if (!strcmp(components
[pos
], "..")) {
692 /* eat this and the previous element */
693 free(components
[pos
- 1]);
694 free(components
[pos
]);
695 memmove(&components
[pos
-1], &components
[pos
+1], sizeof(char *) * (components_len
- pos
));
706 char *lxc_append_paths(const char *first
, const char *second
)
708 size_t len
= strlen(first
) + strlen(second
) + 1;
709 const char *pattern
= "%s%s";
712 if (second
[0] != '/') {
717 result
= calloc(1, len
);
721 snprintf(result
, len
, pattern
, first
, second
);
725 bool lxc_string_in_list(const char *needle
, const char *haystack
, char _sep
)
727 char *token
, *str
, *saveptr
= NULL
;
728 char sep
[2] = { _sep
, '\0' };
730 if (!haystack
|| !needle
)
733 str
= alloca(strlen(haystack
)+1);
734 strcpy(str
, haystack
);
735 for (; (token
= strtok_r(str
, sep
, &saveptr
)); str
= NULL
) {
736 if (strcmp(needle
, token
) == 0)
743 char **lxc_string_split(const char *string
, char _sep
)
745 char *token
, *str
, *saveptr
= NULL
;
746 char sep
[2] = { _sep
, '\0' };
747 char **result
= NULL
;
748 size_t result_capacity
= 0;
749 size_t result_count
= 0;
753 return calloc(1, sizeof(char *));
755 str
= alloca(strlen(string
)+1);
757 for (; (token
= strtok_r(str
, sep
, &saveptr
)); str
= NULL
) {
758 r
= lxc_grow_array((void ***)&result
, &result_capacity
, result_count
+ 1, 16);
761 result
[result_count
] = strdup(token
);
762 if (!result
[result_count
])
767 /* if we allocated too much, reduce it */
768 return realloc(result
, (result_count
+ 1) * sizeof(char *));
771 lxc_free_array((void **)result
, free
);
776 char **lxc_string_split_and_trim(const char *string
, char _sep
)
778 char *token
, *str
, *saveptr
= NULL
;
779 char sep
[2] = { _sep
, '\0' };
780 char **result
= NULL
;
781 size_t result_capacity
= 0;
782 size_t result_count
= 0;
787 return calloc(1, sizeof(char *));
789 str
= alloca(strlen(string
)+1);
791 for (; (token
= strtok_r(str
, sep
, &saveptr
)); str
= NULL
) {
792 while (token
[0] == ' ' || token
[0] == '\t')
795 while (i
> 0 && (token
[i
- 1] == ' ' || token
[i
- 1] == '\t')) {
799 r
= lxc_grow_array((void ***)&result
, &result_capacity
, result_count
+ 1, 16);
802 result
[result_count
] = strdup(token
);
803 if (!result
[result_count
])
808 /* if we allocated too much, reduce it */
809 return realloc(result
, (result_count
+ 1) * sizeof(char *));
812 lxc_free_array((void **)result
, free
);
817 void lxc_free_array(void **array
, lxc_free_fn element_free_fn
)
820 for (p
= array
; p
&& *p
; p
++)
825 int lxc_grow_array(void ***array
, size_t* capacity
, size_t new_size
, size_t capacity_increment
)
830 /* first time around, catch some trivial mistakes of the user
831 * only initializing one of these */
832 if (!*array
|| !*capacity
) {
837 new_capacity
= *capacity
;
838 while (new_size
+ 1 > new_capacity
)
839 new_capacity
+= capacity_increment
;
840 if (new_capacity
!= *capacity
) {
841 /* we have to reallocate */
842 new_array
= realloc(*array
, new_capacity
* sizeof(void *));
845 memset(&new_array
[*capacity
], 0, (new_capacity
- (*capacity
)) * sizeof(void *));
847 *capacity
= new_capacity
;
850 /* array has sufficient elements */
854 size_t lxc_array_len(void **array
)
859 for (p
= array
; p
&& *p
; p
++)
865 int lxc_write_to_file(const char *filename
, const void* buf
, size_t count
, bool add_newline
)
870 fd
= open(filename
, O_WRONLY
| O_TRUNC
| O_CREAT
| O_CLOEXEC
, 0666);
873 ret
= lxc_write_nointr(fd
, buf
, count
);
876 if ((size_t)ret
!= count
)
879 ret
= lxc_write_nointr(fd
, "\n", 1);
893 int lxc_read_from_file(const char *filename
, void* buf
, size_t count
)
895 int fd
= -1, saved_errno
;
898 fd
= open(filename
, O_RDONLY
| O_CLOEXEC
);
902 if (!buf
|| !count
) {
905 while ((ret
= read(fd
, buf2
, 100)) > 0)
910 memset(buf
, 0, count
);
911 ret
= read(fd
, buf
, count
);
915 ERROR("read %s: %s", filename
, strerror(errno
));
923 void **lxc_append_null_to_array(void **array
, size_t count
)
927 /* Append NULL to the array */
929 temp
= realloc(array
, (count
+ 1) * sizeof(*array
));
932 for (i
= 0; i
< count
; i
++)
943 int randseed(bool srand_it
)
946 srand pre-seed function based on /dev/urandom
948 unsigned int seed
=time(NULL
)+getpid();
951 f
= fopen("/dev/urandom", "r");
953 int ret
= fread(&seed
, sizeof(seed
), 1, f
);
955 DEBUG("unable to fread /dev/urandom, %s, fallback to time+pid rand seed", strerror(errno
));
965 uid_t
get_ns_uid(uid_t orig
)
969 uid_t nsid
, hostid
, range
;
970 FILE *f
= fopen("/proc/self/uid_map", "r");
974 while (getline(&line
, &sz
, f
) != -1) {
975 if (sscanf(line
, "%u %u %u", &nsid
, &hostid
, &range
) != 3)
977 if (hostid
<= orig
&& hostid
+ range
> orig
) {
978 nsid
+= orig
- hostid
;
990 bool dir_exists(const char *path
)
995 ret
= stat(path
, &sb
);
997 // could be something other than eexist, just say no
999 return S_ISDIR(sb
.st_mode
);
1002 /* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS.
1003 * FNV has good anti collision properties and we're not worried
1004 * about pre-image resistance or one-way-ness, we're just trying to make
1005 * the name unique in the 108 bytes of space we have.
1007 uint64_t fnv_64a_buf(void *buf
, size_t len
, uint64_t hval
)
1011 for(bp
= buf
; bp
< (unsigned char *)buf
+ len
; bp
++)
1013 /* xor the bottom with the current octet */
1014 hval
^= (uint64_t)*bp
;
1017 * multiply by the 64 bit FNV magic prime mod 2^64
1019 hval
+= (hval
<< 1) + (hval
<< 4) + (hval
<< 5) +
1020 (hval
<< 7) + (hval
<< 8) + (hval
<< 40);
1027 * Detect whether / is mounted MS_SHARED. The only way I know of to
1028 * check that is through /proc/self/mountinfo.
1029 * I'm only checking for /. If the container rootfs or mount location
1030 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1031 * out would be too much work to be worth it.
1033 #define LINELEN 4096
1034 int detect_shared_rootfs(void)
1036 char buf
[LINELEN
], *p
;
1041 f
= fopen("/proc/self/mountinfo", "r");
1044 while (fgets(buf
, LINELEN
, f
)) {
1045 for (p
= buf
, i
=0; p
&& i
< 4; i
++)
1046 p
= strchr(p
+1, ' ');
1049 p2
= strchr(p
+1, ' ');
1053 if (strcmp(p
+1, "/") == 0) {
1054 // this is '/'. is it shared?
1055 p
= strchr(p2
+1, ' ');
1056 if (p
&& strstr(p
, "shared:")) {
1066 bool switch_to_ns(pid_t pid
, const char *ns
) {
1068 char nspath
[MAXPATHLEN
];
1070 /* Switch to new ns */
1071 ret
= snprintf(nspath
, MAXPATHLEN
, "/proc/%d/ns/%s", pid
, ns
);
1072 if (ret
< 0 || ret
>= MAXPATHLEN
)
1075 fd
= open(nspath
, O_RDONLY
);
1077 SYSERROR("failed to open %s", nspath
);
1083 SYSERROR("failed to set process %d to %s of %d.", pid
, ns
, fd
);
1092 * looking at fs/proc_namespace.c, it appears we can
1093 * actually expect the rootfs entry to very specifically contain
1094 * " - rootfs rootfs "
1095 * IIUC, so long as we've chrooted so that rootfs is not our root,
1096 * the rootfs entry should always be skipped in mountinfo contents.
1098 int detect_ramfs_rootfs(void)
1100 char buf
[LINELEN
], *p
;
1105 f
= fopen("/proc/self/mountinfo", "r");
1108 while (fgets(buf
, LINELEN
, f
)) {
1109 for (p
= buf
, i
=0; p
&& i
< 4; i
++)
1110 p
= strchr(p
+1, ' ');
1113 p2
= strchr(p
+1, ' ');
1117 if (strcmp(p
+1, "/") == 0) {
1118 // this is '/'. is it the ramfs?
1119 p
= strchr(p2
+1, '-');
1120 if (p
&& strncmp(p
, "- rootfs rootfs ", 16) == 0) {
1130 char *on_path(char *cmd
, const char *rootfs
) {
1133 char *saveptr
= NULL
;
1134 char cmdpath
[MAXPATHLEN
];
1137 path
= getenv("PATH");
1141 path
= strdup(path
);
1145 entry
= strtok_r(path
, ":", &saveptr
);
1148 ret
= snprintf(cmdpath
, MAXPATHLEN
, "%s/%s/%s", rootfs
, entry
, cmd
);
1150 ret
= snprintf(cmdpath
, MAXPATHLEN
, "%s/%s", entry
, cmd
);
1152 if (ret
< 0 || ret
>= MAXPATHLEN
)
1155 if (access(cmdpath
, X_OK
) == 0) {
1157 return strdup(cmdpath
);
1161 entry
= strtok_r(NULL
, ":", &saveptr
);
1168 bool file_exists(const char *f
)
1170 struct stat statbuf
;
1172 return stat(f
, &statbuf
) == 0;
1175 /* historically lxc-init has been under /usr/lib/lxc and under
1176 * /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc.
1178 char *choose_init(const char *rootfs
)
1181 const char *empty
= "",
1183 int ret
, env_set
= 0;
1186 if (!getenv("PATH")) {
1187 if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 0))
1188 SYSERROR("Failed to setenv");
1192 retv
= on_path("init.lxc", rootfs
);
1195 if (unsetenv("PATH"))
1196 SYSERROR("Failed to unsetenv");
1202 retv
= malloc(PATH_MAX
);
1211 ret
= snprintf(retv
, PATH_MAX
, "%s/%s/%s", tmp
, SBINDIR
, "/init.lxc");
1212 if (ret
< 0 || ret
>= PATH_MAX
) {
1213 ERROR("pathname too long");
1217 ret
= stat(retv
, &mystat
);
1221 ret
= snprintf(retv
, PATH_MAX
, "%s/%s/%s", tmp
, LXCINITDIR
, "/lxc/lxc-init");
1222 if (ret
< 0 || ret
>= PATH_MAX
) {
1223 ERROR("pathname too long");
1227 ret
= stat(retv
, &mystat
);
1231 ret
= snprintf(retv
, PATH_MAX
, "%s/usr/lib/lxc/lxc-init", tmp
);
1232 if (ret
< 0 || ret
>= PATH_MAX
) {
1233 ERROR("pathname too long");
1236 ret
= stat(retv
, &mystat
);
1240 ret
= snprintf(retv
, PATH_MAX
, "%s/sbin/lxc-init", tmp
);
1241 if (ret
< 0 || ret
>= PATH_MAX
) {
1242 ERROR("pathname too long");
1245 ret
= stat(retv
, &mystat
);
1250 * Last resort, look for the statically compiled init.lxc which we
1251 * hopefully bind-mounted in.
1252 * If we are called during container setup, and we get to this point,
1253 * then the init.lxc.static from the host will need to be bind-mounted
1254 * in. So we return NULL here to indicate that.
1259 ret
= snprintf(retv
, PATH_MAX
, "/init.lxc.static");
1260 if (ret
< 0 || ret
>= PATH_MAX
) {
1261 WARN("Nonsense - name /lxc.init.static too long");
1264 ret
= stat(retv
, &mystat
);
1273 int print_to_file(const char *file
, const char *content
)
1278 f
= fopen(file
, "w");
1281 if (fprintf(f
, "%s", content
) != strlen(content
))
1287 int is_dir(const char *path
)
1289 struct stat statbuf
;
1290 int ret
= stat(path
, &statbuf
);
1291 if (ret
== 0 && S_ISDIR(statbuf
.st_mode
))
1297 * Given the '-t' template option to lxc-create, figure out what to
1298 * do. If the template is a full executable path, use that. If it
1299 * is something like 'sshd', then return $templatepath/lxc-sshd.
1300 * On success return the template, on error return NULL.
1302 char *get_template_path(const char *t
)
1307 if (t
[0] == '/' && access(t
, X_OK
) == 0) {
1312 len
= strlen(LXCTEMPLATEDIR
) + strlen(t
) + strlen("/lxc-") + 1;
1313 tpath
= malloc(len
);
1316 ret
= snprintf(tpath
, len
, "%s/lxc-%s", LXCTEMPLATEDIR
, t
);
1317 if (ret
< 0 || ret
>= len
) {
1321 if (access(tpath
, X_OK
) < 0) {
1322 SYSERROR("bad template: %s", t
);
1331 * Sets the process title to the specified title. Note:
1332 * 1. this function requires root to succeed
1333 * 2. it clears /proc/self/environ
1334 * 3. it may not succed (e.g. if title is longer than /proc/self/environ +
1335 * the original title)
1337 int setproctitle(char *title
)
1339 char buf
[2048], *tmp
;
1341 int i
, len
, ret
= 0;
1342 unsigned long arg_start
, arg_end
, env_start
, env_end
;
1344 f
= fopen_cloexec("/proc/self/stat", "r");
1349 tmp
= fgets(buf
, sizeof(buf
), f
);
1355 /* Skip the first 47 fields, column 48-51 are ARG_START and
1357 tmp
= strchr(buf
, ' ');
1358 for (i
= 0; i
< 46; i
++) {
1361 tmp
= strchr(tmp
+1, ' ');
1367 i
= sscanf(tmp
, "%lu %lu %lu %lu", &arg_start
, &arg_end
, &env_start
, &env_end
);
1372 /* Include the null byte here, because in the calculations below we
1373 * want to have room for it. */
1374 len
= strlen(title
) + 1;
1376 /* We're truncating the environment, so we should use at most the
1377 * length of the argument + environment for the title. */
1378 if (len
> env_end
- arg_start
) {
1380 len
= env_end
- arg_start
;
1382 /* Only truncate the environment if we're actually going to
1383 * overwrite part of it. */
1384 if (len
>= arg_end
- arg_start
) {
1385 env_start
= env_end
;
1388 arg_end
= arg_start
+ len
;
1390 /* check overflow */
1391 if (arg_end
< len
|| arg_end
< arg_start
) {
1397 strcpy((char*)arg_start
, title
);
1399 ret
|= prctl(PR_SET_MM
, PR_SET_MM_ARG_START
, arg_start
, 0, 0);
1400 ret
|= prctl(PR_SET_MM
, PR_SET_MM_ARG_END
, arg_end
, 0, 0);
1401 ret
|= prctl(PR_SET_MM
, PR_SET_MM_ENV_START
, env_start
, 0, 0);
1402 ret
|= prctl(PR_SET_MM
, PR_SET_MM_ENV_END
, env_end
, 0, 0);
1408 * Mount a proc under @rootfs if proc self points to a pid other than
1409 * my own. This is needed to have a known-good proc mount for setting
1410 * up LSMs both at container startup and attach.
1412 * @rootfs : the rootfs where proc should be mounted
1414 * Returns < 0 on failure, 0 if the correct proc was already mounted
1415 * and 1 if a new proc was mounted.
1417 int mount_proc_if_needed(const char *rootfs
)
1419 char path
[MAXPATHLEN
];
1424 ret
= snprintf(path
, MAXPATHLEN
, "%s/proc/self", rootfs
);
1425 if (ret
< 0 || ret
>= MAXPATHLEN
) {
1426 SYSERROR("proc path name too long");
1429 memset(link
, 0, 20);
1430 linklen
= readlink(path
, link
, 20);
1431 mypid
= (int)getpid();
1432 INFO("I am %d, /proc/self points to '%s'", mypid
, link
);
1433 ret
= snprintf(path
, MAXPATHLEN
, "%s/proc", rootfs
);
1434 if (linklen
< 0) /* /proc not mounted */
1436 if (atoi(link
) != mypid
) {
1437 /* wrong /procs mounted */
1438 umount2(path
, MNT_DETACH
); /* ignore failure */
1441 /* the right proc is already mounted */
1445 if (mount("proc", path
, "proc", 0, NULL
))
1447 INFO("Mounted /proc in container for security transition");
1451 int null_stdfds(void)
1455 fd
= open("/dev/null", O_RDWR
);
1459 if (dup2(fd
, 0) < 0)
1461 if (dup2(fd
, 1) < 0)
1463 if (dup2(fd
, 2) < 0)