2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
31 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/mount.h>
40 #include <sys/types.h>
43 #include <sys/prctl.h>
48 #include "namespace.h"
54 #ifndef PR_SET_MM_ARG_START
55 #define PR_SET_MM_ARG_START 8
58 #ifndef PR_SET_MM_ARG_END
59 #define PR_SET_MM_ARG_END 9
62 #ifndef PR_SET_MM_ENV_START
63 #define PR_SET_MM_ENV_START 10
66 #ifndef PR_SET_MM_ENV_END
67 #define PR_SET_MM_ENV_END 11
70 lxc_log_define(lxc_utils
, lxc
);
72 static int _recursive_rmdir(char *dirname
, dev_t pdev
,
73 const char *exclude
, int level
, bool onedev
)
75 struct dirent dirent
, *direntp
;
78 char pathname
[MAXPATHLEN
];
79 bool hadexclude
= false;
81 dir
= opendir(dirname
);
83 ERROR("%s: failed to open %s", __func__
, dirname
);
87 while (!readdir_r(dir
, &dirent
, &direntp
)) {
94 if (!strcmp(direntp
->d_name
, ".") ||
95 !strcmp(direntp
->d_name
, ".."))
98 rc
= snprintf(pathname
, MAXPATHLEN
, "%s/%s", dirname
, direntp
->d_name
);
99 if (rc
< 0 || rc
>= MAXPATHLEN
) {
100 ERROR("pathname too long");
105 if (!level
&& exclude
&& !strcmp(direntp
->d_name
, exclude
)) {
106 ret
= rmdir(pathname
);
110 INFO("Not deleting snapshot %s", pathname
);
114 ret
= unlink(pathname
);
116 INFO("%s: failed to remove %s", __func__
, pathname
);
119 SYSERROR("%s: failed to rmdir %s", __func__
, pathname
);
127 ret
= lstat(pathname
, &mystat
);
129 ERROR("%s: failed to stat %s", __func__
, pathname
);
133 if (onedev
&& mystat
.st_dev
!= pdev
)
135 if (S_ISDIR(mystat
.st_mode
)) {
136 if (_recursive_rmdir(pathname
, pdev
, exclude
, level
+1, onedev
) < 0)
139 if (unlink(pathname
) < 0) {
140 SYSERROR("%s: failed to delete %s", __func__
, pathname
);
146 if (rmdir(dirname
) < 0) {
148 ERROR("%s: failed to delete %s", __func__
, dirname
);
155 ERROR("%s: failed to close directory %s", __func__
, dirname
);
159 return failed
? -1 : 0;
162 /* we have two different magic values for overlayfs, yay */
163 #define OVERLAYFS_SUPER_MAGIC 0x794c764f
164 #define OVERLAY_SUPER_MAGIC 0x794c7630
166 * In overlayfs, st_dev is unreliable. so on overlayfs we don't do
167 * the lxc_rmdir_onedev()
169 static bool is_native_overlayfs(const char *path
)
173 if (statfs(path
, &sb
) < 0)
175 if (sb
.f_type
== OVERLAYFS_SUPER_MAGIC
||
176 sb
.f_type
== OVERLAY_SUPER_MAGIC
)
181 /* returns 0 on success, -1 if there were any failures */
182 extern int lxc_rmdir_onedev(char *path
, const char *exclude
)
187 if (is_native_overlayfs(path
)) {
191 if (lstat(path
, &mystat
) < 0) {
192 ERROR("%s: failed to stat %s", __func__
, path
);
196 return _recursive_rmdir(path
, mystat
.st_dev
, exclude
, 0, onedev
);
199 static int mount_fs(const char *source
, const char *target
, const char *type
)
201 /* the umount may fail */
203 WARN("failed to unmount %s : %s", target
, strerror(errno
));
205 if (mount(source
, target
, type
, 0, NULL
)) {
206 ERROR("failed to mount %s : %s", target
, strerror(errno
));
210 DEBUG("'%s' mounted on '%s'", source
, target
);
215 extern void lxc_setup_fs(void)
217 if (mount_fs("proc", "/proc", "proc"))
218 INFO("failed to remount proc");
220 /* if we can't mount /dev/shm, continue anyway */
221 if (mount_fs("shmfs", "/dev/shm", "tmpfs"))
222 INFO("failed to mount /dev/shm");
224 /* If we were able to mount /dev/shm, then /dev exists */
225 /* Sure, but it's read-only per config :) */
226 if (access("/dev/mqueue", F_OK
) && mkdir("/dev/mqueue", 0666)) {
227 DEBUG("failed to create '/dev/mqueue'");
231 /* continue even without posix message queue support */
232 if (mount_fs("mqueue", "/dev/mqueue", "mqueue"))
233 INFO("failed to mount /dev/mqueue");
236 /* borrowed from iproute2 */
237 extern int get_u16(unsigned short *val
, const char *arg
, int base
)
246 res
= strtoul(arg
, &ptr
, base
);
247 if (!ptr
|| ptr
== arg
|| *ptr
|| res
> 0xFFFF || errno
!= 0)
255 extern int mkdir_p(const char *dir
, mode_t mode
)
257 const char *tmp
= dir
;
258 const char *orig
= dir
;
262 dir
= tmp
+ strspn(tmp
, "/");
263 tmp
= dir
+ strcspn(dir
, "/");
264 makeme
= strndup(orig
, dir
- orig
);
266 if (mkdir(makeme
, mode
) && errno
!= EEXIST
) {
267 SYSERROR("failed to create directory '%s'", makeme
);
278 extern void remove_trailing_slashes(char *p
)
281 while (--l
>= 0 && (p
[l
] == '/' || p
[l
] == '\n'))
285 static char *copy_global_config_value(char *p
)
292 if (p
[len
-1] == '\n') {
296 retbuf
= malloc(len
+1);
303 #define DEFAULT_VG "lxc"
304 #define DEFAULT_THIN_POOL "lxc"
305 #define DEFAULT_ZFSROOT "lxc"
307 const char *lxc_global_config_value(const char *option_name
)
309 static const char * const options
[][2] = {
310 { "lxc.bdev.lvm.vg", DEFAULT_VG
},
311 { "lxc.bdev.lvm.thin_pool", DEFAULT_THIN_POOL
},
312 { "lxc.bdev.zfs.root", DEFAULT_ZFSROOT
},
313 { "lxc.lxcpath", NULL
},
314 { "lxc.default_config", NULL
},
315 { "lxc.cgroup.pattern", NULL
},
316 { "lxc.cgroup.use", NULL
},
320 /* placed in the thread local storage pool for non-bionic targets */
322 static __thread
const char *values
[sizeof(options
) / sizeof(options
[0])] = { 0 };
324 static const char *values
[sizeof(options
) / sizeof(options
[0])] = { 0 };
327 /* user_config_path is freed as soon as it is used */
328 char *user_config_path
= NULL
;
331 * The following variables are freed at bottom unconditionally.
332 * So NULL the value if it is to be returned to the caller
334 char *user_default_config_path
= NULL
;
335 char *user_lxc_path
= NULL
;
336 char *user_cgroup_pattern
= NULL
;
339 const char *user_home
= getenv("HOME");
343 user_config_path
= malloc(sizeof(char) * (22 + strlen(user_home
)));
344 user_default_config_path
= malloc(sizeof(char) * (26 + strlen(user_home
)));
345 user_lxc_path
= malloc(sizeof(char) * (19 + strlen(user_home
)));
347 sprintf(user_config_path
, "%s/.config/lxc/lxc.conf", user_home
);
348 sprintf(user_default_config_path
, "%s/.config/lxc/default.conf", user_home
);
349 sprintf(user_lxc_path
, "%s/.local/share/lxc/", user_home
);
350 user_cgroup_pattern
= strdup("lxc/%n");
353 user_config_path
= strdup(LXC_GLOBAL_CONF
);
354 user_default_config_path
= strdup(LXC_DEFAULT_CONFIG
);
355 user_lxc_path
= strdup(LXCPATH
);
356 user_cgroup_pattern
= strdup(DEFAULT_CGROUP_PATTERN
);
359 const char * const (*ptr
)[2];
361 char buf
[1024], *p
, *p2
;
364 for (i
= 0, ptr
= options
; (*ptr
)[0]; ptr
++, i
++) {
365 if (!strcmp(option_name
, (*ptr
)[0]))
369 free(user_config_path
);
370 free(user_default_config_path
);
372 free(user_cgroup_pattern
);
378 free(user_config_path
);
379 free(user_default_config_path
);
381 free(user_cgroup_pattern
);
385 fin
= fopen_cloexec(user_config_path
, "r");
386 free(user_config_path
);
388 while (fgets(buf
, 1024, fin
)) {
391 p
= strstr(buf
, option_name
);
394 /* see if there was just white space in front
397 for (p2
= buf
; p2
< p
; p2
++) {
398 if (*p2
!= ' ' && *p2
!= '\t')
406 /* see if there was just white space after
409 for (p2
+= strlen(option_name
); p2
< p
; p2
++) {
410 if (*p2
!= ' ' && *p2
!= '\t')
416 while (*p
&& (*p
== ' ' || *p
== '\t')) p
++;
420 if (strcmp(option_name
, "lxc.lxcpath") == 0) {
422 user_lxc_path
= copy_global_config_value(p
);
423 remove_trailing_slashes(user_lxc_path
);
424 values
[i
] = user_lxc_path
;
425 user_lxc_path
= NULL
;
429 values
[i
] = copy_global_config_value(p
);
433 /* could not find value, use default */
434 if (strcmp(option_name
, "lxc.lxcpath") == 0) {
435 remove_trailing_slashes(user_lxc_path
);
436 values
[i
] = user_lxc_path
;
437 user_lxc_path
= NULL
;
439 else if (strcmp(option_name
, "lxc.default_config") == 0) {
440 values
[i
] = user_default_config_path
;
441 user_default_config_path
= NULL
;
443 else if (strcmp(option_name
, "lxc.cgroup.pattern") == 0) {
444 values
[i
] = user_cgroup_pattern
;
445 user_cgroup_pattern
= NULL
;
448 values
[i
] = (*ptr
)[1];
450 /* special case: if default value is NULL,
451 * and there is no config, don't view that
460 free(user_cgroup_pattern
);
461 free(user_default_config_path
);
472 if (geteuid() == 0) {
473 rundir
= strdup(RUNTIME_PATH
);
477 rundir
= getenv("XDG_RUNTIME_DIR");
479 rundir
= strdup(rundir
);
483 INFO("XDG_RUNTIME_DIR isn't set in the environment.");
484 homedir
= getenv("HOME");
486 ERROR("HOME isn't set in the environment.");
490 rundir
= malloc(sizeof(char) * (17 + strlen(homedir
)));
491 sprintf(rundir
, "%s/.cache/lxc/run/", homedir
);
496 int wait_for_pid(pid_t pid
)
501 ret
= waitpid(pid
, &status
, 0);
509 if (!WIFEXITED(status
) || WEXITSTATUS(status
) != 0)
514 int lxc_wait_for_pid_status(pid_t pid
)
519 ret
= waitpid(pid
, &status
, 0);
530 ssize_t
lxc_write_nointr(int fd
, const void* buf
, size_t count
)
534 ret
= write(fd
, buf
, count
);
535 if (ret
< 0 && errno
== EINTR
)
540 ssize_t
lxc_read_nointr(int fd
, void* buf
, size_t count
)
544 ret
= read(fd
, buf
, count
);
545 if (ret
< 0 && errno
== EINTR
)
550 ssize_t
lxc_read_nointr_expect(int fd
, void* buf
, size_t count
, const void* expected_buf
)
553 ret
= lxc_read_nointr(fd
, buf
, count
);
556 if ((size_t)ret
!= count
)
558 if (expected_buf
&& memcmp(buf
, expected_buf
, count
) != 0) {
566 #include <gnutls/gnutls.h>
567 #include <gnutls/crypto.h>
569 __attribute__((constructor
))
570 static void gnutls_lxc_init(void)
572 gnutls_global_init();
575 int sha1sum_file(char *fnam
, unsigned char *digest
)
584 f
= fopen_cloexec(fnam
, "r");
586 SYSERROR("Error opening template");
589 if (fseek(f
, 0, SEEK_END
) < 0) {
590 SYSERROR("Error seeking to end of template");
594 if ((flen
= ftell(f
)) < 0) {
595 SYSERROR("Error telling size of template");
599 if (fseek(f
, 0, SEEK_SET
) < 0) {
600 SYSERROR("Error seeking to start of template");
604 if ((buf
= malloc(flen
+1)) == NULL
) {
605 SYSERROR("Out of memory");
609 if (fread(buf
, 1, flen
, f
) != flen
) {
610 SYSERROR("Failure reading template");
616 SYSERROR("Failre closing template");
621 ret
= gnutls_hash_fast(GNUTLS_DIG_SHA1
, buf
, flen
, (void *)digest
);
627 char** lxc_va_arg_list_to_argv(va_list ap
, size_t skip
, int do_strdup
)
630 size_t count
= 1 + skip
;
633 /* first determine size of argument list, we don't want to reallocate
638 char* arg
= va_arg(ap2
, char*);
645 result
= calloc(count
, sizeof(char*));
650 char* arg
= va_arg(ap
, char*);
653 arg
= do_strdup
? strdup(arg
) : arg
;
656 result
[count
++] = arg
;
659 /* calloc has already set last element to NULL*/
667 const char** lxc_va_arg_list_to_argv_const(va_list ap
, size_t skip
)
669 return (const char**)lxc_va_arg_list_to_argv(ap
, skip
, 0);
672 FILE *fopen_cloexec(const char *path
, const char *mode
)
680 if (!strncmp(mode
, "r+", 2)) {
683 } else if (!strncmp(mode
, "r", 1)) {
684 open_mode
= O_RDONLY
;
686 } else if (!strncmp(mode
, "w+", 2)) {
687 open_mode
= O_RDWR
| O_TRUNC
| O_CREAT
;
689 } else if (!strncmp(mode
, "w", 1)) {
690 open_mode
= O_WRONLY
| O_TRUNC
| O_CREAT
;
692 } else if (!strncmp(mode
, "a+", 2)) {
693 open_mode
= O_RDWR
| O_CREAT
| O_APPEND
;
695 } else if (!strncmp(mode
, "a", 1)) {
696 open_mode
= O_WRONLY
| O_CREAT
| O_APPEND
;
699 for (; mode
[step
]; step
++)
700 if (mode
[step
] == 'x')
702 open_mode
|= O_CLOEXEC
;
704 fd
= open(path
, open_mode
, 0666);
708 ret
= fdopen(fd
, mode
);
716 extern struct lxc_popen_FILE
*lxc_popen(const char *command
)
718 struct lxc_popen_FILE
*fp
= NULL
;
719 int parent_end
= -1, child_end
= -1;
723 int r
= pipe2(pipe_fds
, O_CLOEXEC
);
726 ERROR("pipe2 failure");
730 parent_end
= pipe_fds
[0];
731 child_end
= pipe_fds
[1];
735 if (child_pid
== 0) {
737 int child_std_end
= STDOUT_FILENO
;
739 if (child_end
!= child_std_end
) {
740 /* dup2() doesn't dup close-on-exec flag */
741 dup2(child_end
, child_std_end
);
743 /* it's safe not to close child_end here
744 * as it's marked close-on-exec anyway
748 * The descriptor is already the one we will use.
749 * But it must not be marked close-on-exec.
752 if (fcntl(child_end
, F_SETFD
, 0) != 0) {
753 SYSERROR("Failed to remove FD_CLOEXEC from fd.");
760 * This is the main/only reason
761 * why we do our lousy popen() emulation.
766 sigprocmask(SIG_UNBLOCK
, &mask
, NULL
);
769 execl("/bin/sh", "sh", "-c", command
, (char *) NULL
);
779 ERROR("fork failure");
783 fp
= calloc(1, sizeof(*fp
));
785 ERROR("failed to allocate memory");
789 fp
->f
= fdopen(parent_end
, "r");
791 ERROR("fdopen failure");
795 fp
->child_pid
= child_pid
;
804 parent_end
= -1; /* so we do not close it second time */
810 if (parent_end
!= -1)
816 extern int lxc_pclose(struct lxc_popen_FILE
*fp
)
825 child_pid
= fp
->child_pid
;
826 /* free memory (we still need to close file stream) */
831 if (!f
|| fclose(f
)) {
832 ERROR("fclose failure");
837 wait_pid
= waitpid(child_pid
, &wstatus
, 0);
838 } while (wait_pid
== -1 && errno
== EINTR
);
840 if (wait_pid
== -1) {
841 ERROR("waitpid failure");
848 char *lxc_string_replace(const char *needle
, const char *replacement
, const char *haystack
)
850 ssize_t len
= -1, saved_len
= -1;
852 size_t replacement_len
= strlen(replacement
);
853 size_t needle_len
= strlen(needle
);
855 /* should be executed exactly twice */
856 while (len
== -1 || result
== NULL
) {
862 result
= calloc(1, len
+ 1);
870 for (last_p
= (char *)haystack
, p
= strstr(last_p
, needle
); p
; last_p
= p
, p
= strstr(last_p
, needle
)) {
871 part_len
= (ssize_t
)(p
- last_p
);
872 if (result
&& part_len
> 0)
873 memcpy(&result
[len
], last_p
, part_len
);
875 if (result
&& replacement_len
> 0)
876 memcpy(&result
[len
], replacement
, replacement_len
);
877 len
+= replacement_len
;
880 part_len
= strlen(last_p
);
881 if (result
&& part_len
> 0)
882 memcpy(&result
[len
], last_p
, part_len
);
886 /* make sure we did the same thing twice,
887 * once for calculating length, the other
888 * time for copying data */
889 assert(saved_len
== len
);
890 /* make sure we didn't overwrite any buffer,
891 * due to calloc the string should be 0-terminated */
892 assert(result
[len
] == '\0');
897 bool lxc_string_in_array(const char *needle
, const char **haystack
)
899 for (; haystack
&& *haystack
; haystack
++)
900 if (!strcmp(needle
, *haystack
))
905 char *lxc_string_join(const char *sep
, const char **parts
, bool use_as_prefix
)
909 size_t sep_len
= strlen(sep
);
910 size_t result_len
= use_as_prefix
* sep_len
;
912 /* calculate new string length */
913 for (p
= (char **)parts
; *p
; p
++)
914 result_len
+= (p
> (char **)parts
) * sep_len
+ strlen(*p
);
916 result
= calloc(result_len
+ 1, 1);
922 for (p
= (char **)parts
; *p
; p
++) {
923 if (p
> (char **)parts
)
931 char **lxc_normalize_path(const char *path
)
935 size_t components_len
= 0;
938 components
= lxc_string_split(path
, '/');
941 for (p
= components
; *p
; p
++)
944 /* resolve '.' and '..' */
945 for (pos
= 0; pos
< components_len
; ) {
946 if (!strcmp(components
[pos
], ".") || (!strcmp(components
[pos
], "..") && pos
== 0)) {
947 /* eat this element */
948 free(components
[pos
]);
949 memmove(&components
[pos
], &components
[pos
+1], sizeof(char *) * (components_len
- pos
));
951 } else if (!strcmp(components
[pos
], "..")) {
952 /* eat this and the previous element */
953 free(components
[pos
- 1]);
954 free(components
[pos
]);
955 memmove(&components
[pos
-1], &components
[pos
+1], sizeof(char *) * (components_len
- pos
));
966 char *lxc_append_paths(const char *first
, const char *second
)
968 size_t len
= strlen(first
) + strlen(second
) + 1;
969 const char *pattern
= "%s%s";
972 if (second
[0] != '/') {
977 result
= calloc(1, len
);
981 snprintf(result
, len
, pattern
, first
, second
);
985 bool lxc_string_in_list(const char *needle
, const char *haystack
, char _sep
)
987 char *token
, *str
, *saveptr
= NULL
;
988 char sep
[2] = { _sep
, '\0' };
990 if (!haystack
|| !needle
)
993 str
= alloca(strlen(haystack
)+1);
994 strcpy(str
, haystack
);
995 for (; (token
= strtok_r(str
, sep
, &saveptr
)); str
= NULL
) {
996 if (strcmp(needle
, token
) == 0)
1003 char **lxc_string_split(const char *string
, char _sep
)
1005 char *token
, *str
, *saveptr
= NULL
;
1006 char sep
[2] = { _sep
, '\0' };
1007 char **result
= NULL
;
1008 size_t result_capacity
= 0;
1009 size_t result_count
= 0;
1013 return calloc(1, sizeof(char *));
1015 str
= alloca(strlen(string
)+1);
1016 strcpy(str
, string
);
1017 for (; (token
= strtok_r(str
, sep
, &saveptr
)); str
= NULL
) {
1018 r
= lxc_grow_array((void ***)&result
, &result_capacity
, result_count
+ 1, 16);
1021 result
[result_count
] = strdup(token
);
1022 if (!result
[result_count
])
1027 /* if we allocated too much, reduce it */
1028 return realloc(result
, (result_count
+ 1) * sizeof(char *));
1030 saved_errno
= errno
;
1031 lxc_free_array((void **)result
, free
);
1032 errno
= saved_errno
;
1036 char **lxc_string_split_and_trim(const char *string
, char _sep
)
1038 char *token
, *str
, *saveptr
= NULL
;
1039 char sep
[2] = { _sep
, '\0' };
1040 char **result
= NULL
;
1041 size_t result_capacity
= 0;
1042 size_t result_count
= 0;
1047 return calloc(1, sizeof(char *));
1049 str
= alloca(strlen(string
)+1);
1050 strcpy(str
, string
);
1051 for (; (token
= strtok_r(str
, sep
, &saveptr
)); str
= NULL
) {
1052 while (token
[0] == ' ' || token
[0] == '\t')
1055 while (i
> 0 && (token
[i
- 1] == ' ' || token
[i
- 1] == '\t')) {
1056 token
[i
- 1] = '\0';
1059 r
= lxc_grow_array((void ***)&result
, &result_capacity
, result_count
+ 1, 16);
1062 result
[result_count
] = strdup(token
);
1063 if (!result
[result_count
])
1068 /* if we allocated too much, reduce it */
1069 return realloc(result
, (result_count
+ 1) * sizeof(char *));
1071 saved_errno
= errno
;
1072 lxc_free_array((void **)result
, free
);
1073 errno
= saved_errno
;
1077 void lxc_free_array(void **array
, lxc_free_fn element_free_fn
)
1080 for (p
= array
; p
&& *p
; p
++)
1081 element_free_fn(*p
);
1085 int lxc_grow_array(void ***array
, size_t* capacity
, size_t new_size
, size_t capacity_increment
)
1087 size_t new_capacity
;
1090 /* first time around, catch some trivial mistakes of the user
1091 * only initializing one of these */
1092 if (!*array
|| !*capacity
) {
1097 new_capacity
= *capacity
;
1098 while (new_size
+ 1 > new_capacity
)
1099 new_capacity
+= capacity_increment
;
1100 if (new_capacity
!= *capacity
) {
1101 /* we have to reallocate */
1102 new_array
= realloc(*array
, new_capacity
* sizeof(void *));
1105 memset(&new_array
[*capacity
], 0, (new_capacity
- (*capacity
)) * sizeof(void *));
1107 *capacity
= new_capacity
;
1110 /* array has sufficient elements */
1114 size_t lxc_array_len(void **array
)
1119 for (p
= array
; p
&& *p
; p
++)
1125 int lxc_write_to_file(const char *filename
, const void* buf
, size_t count
, bool add_newline
)
1127 int fd
, saved_errno
;
1130 fd
= open(filename
, O_WRONLY
| O_TRUNC
| O_CREAT
| O_CLOEXEC
, 0666);
1133 ret
= lxc_write_nointr(fd
, buf
, count
);
1136 if ((size_t)ret
!= count
)
1139 ret
= lxc_write_nointr(fd
, "\n", 1);
1147 saved_errno
= errno
;
1149 errno
= saved_errno
;
1153 int lxc_read_from_file(const char *filename
, void* buf
, size_t count
)
1155 int fd
= -1, saved_errno
;
1158 fd
= open(filename
, O_RDONLY
| O_CLOEXEC
);
1162 if (!buf
|| !count
) {
1165 while ((ret
= read(fd
, buf2
, 100)) > 0)
1170 memset(buf
, 0, count
);
1171 ret
= read(fd
, buf
, count
);
1175 ERROR("read %s: %s", filename
, strerror(errno
));
1177 saved_errno
= errno
;
1179 errno
= saved_errno
;
1183 void **lxc_append_null_to_array(void **array
, size_t count
)
1187 /* Append NULL to the array */
1189 temp
= realloc(array
, (count
+ 1) * sizeof(*array
));
1192 for (i
= 0; i
< count
; i
++)
1198 array
[count
] = NULL
;
1203 int randseed(bool srand_it
)
1206 srand pre-seed function based on /dev/urandom
1208 unsigned int seed
=time(NULL
)+getpid();
1211 f
= fopen("/dev/urandom", "r");
1213 int ret
= fread(&seed
, sizeof(seed
), 1, f
);
1215 DEBUG("unable to fread /dev/urandom, %s, fallback to time+pid rand seed", strerror(errno
));
1225 uid_t
get_ns_uid(uid_t orig
)
1229 uid_t nsid
, hostid
, range
;
1230 FILE *f
= fopen("/proc/self/uid_map", "r");
1234 while (getline(&line
, &sz
, f
) != -1) {
1235 if (sscanf(line
, "%u %u %u", &nsid
, &hostid
, &range
) != 3)
1237 if (hostid
<= orig
&& hostid
+ range
> orig
) {
1238 nsid
+= orig
- hostid
;
1250 bool dir_exists(const char *path
)
1255 ret
= stat(path
, &sb
);
1257 // could be something other than eexist, just say no
1259 return S_ISDIR(sb
.st_mode
);
1262 /* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS.
1263 * FNV has good anti collision properties and we're not worried
1264 * about pre-image resistance or one-way-ness, we're just trying to make
1265 * the name unique in the 108 bytes of space we have.
1267 uint64_t fnv_64a_buf(void *buf
, size_t len
, uint64_t hval
)
1271 for(bp
= buf
; bp
< (unsigned char *)buf
+ len
; bp
++)
1273 /* xor the bottom with the current octet */
1274 hval
^= (uint64_t)*bp
;
1277 * multiply by the 64 bit FNV magic prime mod 2^64
1279 hval
+= (hval
<< 1) + (hval
<< 4) + (hval
<< 5) +
1280 (hval
<< 7) + (hval
<< 8) + (hval
<< 40);
1287 * Detect whether / is mounted MS_SHARED. The only way I know of to
1288 * check that is through /proc/self/mountinfo.
1289 * I'm only checking for /. If the container rootfs or mount location
1290 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1291 * out would be too much work to be worth it.
1293 #define LINELEN 4096
1294 int detect_shared_rootfs(void)
1296 char buf
[LINELEN
], *p
;
1301 f
= fopen("/proc/self/mountinfo", "r");
1304 while (fgets(buf
, LINELEN
, f
)) {
1305 for (p
= buf
, i
=0; p
&& i
< 4; i
++)
1306 p
= strchr(p
+1, ' ');
1309 p2
= strchr(p
+1, ' ');
1313 if (strcmp(p
+1, "/") == 0) {
1314 // this is '/'. is it shared?
1315 p
= strchr(p2
+1, ' ');
1316 if (p
&& strstr(p
, "shared:")) {
1326 bool switch_to_ns(pid_t pid
, const char *ns
) {
1328 char nspath
[MAXPATHLEN
];
1330 /* Switch to new ns */
1331 ret
= snprintf(nspath
, MAXPATHLEN
, "/proc/%d/ns/%s", pid
, ns
);
1332 if (ret
< 0 || ret
>= MAXPATHLEN
)
1335 fd
= open(nspath
, O_RDONLY
);
1337 SYSERROR("failed to open %s", nspath
);
1343 SYSERROR("failed to set process %d to %s of %d.", pid
, ns
, fd
);
1352 * looking at fs/proc_namespace.c, it appears we can
1353 * actually expect the rootfs entry to very specifically contain
1354 * " - rootfs rootfs "
1355 * IIUC, so long as we've chrooted so that rootfs is not our root,
1356 * the rootfs entry should always be skipped in mountinfo contents.
1358 int detect_ramfs_rootfs(void)
1360 char buf
[LINELEN
], *p
;
1365 f
= fopen("/proc/self/mountinfo", "r");
1368 while (fgets(buf
, LINELEN
, f
)) {
1369 for (p
= buf
, i
=0; p
&& i
< 4; i
++)
1370 p
= strchr(p
+1, ' ');
1373 p2
= strchr(p
+1, ' ');
1377 if (strcmp(p
+1, "/") == 0) {
1378 // this is '/'. is it the ramfs?
1379 p
= strchr(p2
+1, '-');
1380 if (p
&& strncmp(p
, "- rootfs rootfs ", 16) == 0) {
1390 char *on_path(char *cmd
, const char *rootfs
) {
1393 char *saveptr
= NULL
;
1394 char cmdpath
[MAXPATHLEN
];
1397 path
= getenv("PATH");
1401 path
= strdup(path
);
1405 entry
= strtok_r(path
, ":", &saveptr
);
1408 ret
= snprintf(cmdpath
, MAXPATHLEN
, "%s/%s/%s", rootfs
, entry
, cmd
);
1410 ret
= snprintf(cmdpath
, MAXPATHLEN
, "%s/%s", entry
, cmd
);
1412 if (ret
< 0 || ret
>= MAXPATHLEN
)
1415 if (access(cmdpath
, X_OK
) == 0) {
1417 return strdup(cmdpath
);
1421 entry
= strtok_r(NULL
, ":", &saveptr
);
1428 bool file_exists(const char *f
)
1430 struct stat statbuf
;
1432 return stat(f
, &statbuf
) == 0;
1435 /* historically lxc-init has been under /usr/lib/lxc and under
1436 * /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc.
1438 char *choose_init(const char *rootfs
)
1441 const char *empty
= "",
1443 int ret
, env_set
= 0;
1446 if (!getenv("PATH")) {
1447 if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 0))
1448 SYSERROR("Failed to setenv");
1452 retv
= on_path("init.lxc", rootfs
);
1455 if (unsetenv("PATH"))
1456 SYSERROR("Failed to unsetenv");
1462 retv
= malloc(PATH_MAX
);
1471 ret
= snprintf(retv
, PATH_MAX
, "%s/%s/%s", tmp
, SBINDIR
, "/init.lxc");
1472 if (ret
< 0 || ret
>= PATH_MAX
) {
1473 ERROR("pathname too long");
1477 ret
= stat(retv
, &mystat
);
1481 ret
= snprintf(retv
, PATH_MAX
, "%s/%s/%s", tmp
, LXCINITDIR
, "/lxc/lxc-init");
1482 if (ret
< 0 || ret
>= PATH_MAX
) {
1483 ERROR("pathname too long");
1487 ret
= stat(retv
, &mystat
);
1491 ret
= snprintf(retv
, PATH_MAX
, "%s/usr/lib/lxc/lxc-init", tmp
);
1492 if (ret
< 0 || ret
>= PATH_MAX
) {
1493 ERROR("pathname too long");
1496 ret
= stat(retv
, &mystat
);
1500 ret
= snprintf(retv
, PATH_MAX
, "%s/sbin/lxc-init", tmp
);
1501 if (ret
< 0 || ret
>= PATH_MAX
) {
1502 ERROR("pathname too long");
1505 ret
= stat(retv
, &mystat
);
1510 * Last resort, look for the statically compiled init.lxc which we
1511 * hopefully bind-mounted in.
1512 * If we are called during container setup, and we get to this point,
1513 * then the init.lxc.static from the host will need to be bind-mounted
1514 * in. So we return NULL here to indicate that.
1519 ret
= snprintf(retv
, PATH_MAX
, "/init.lxc.static");
1520 if (ret
< 0 || ret
>= PATH_MAX
) {
1521 WARN("Nonsense - name /lxc.init.static too long");
1524 ret
= stat(retv
, &mystat
);
1533 int print_to_file(const char *file
, const char *content
)
1538 f
= fopen(file
, "w");
1541 if (fprintf(f
, "%s", content
) != strlen(content
))
1547 int is_dir(const char *path
)
1549 struct stat statbuf
;
1550 int ret
= stat(path
, &statbuf
);
1551 if (ret
== 0 && S_ISDIR(statbuf
.st_mode
))
1557 * Given the '-t' template option to lxc-create, figure out what to
1558 * do. If the template is a full executable path, use that. If it
1559 * is something like 'sshd', then return $templatepath/lxc-sshd.
1560 * On success return the template, on error return NULL.
1562 char *get_template_path(const char *t
)
1567 if (t
[0] == '/' && access(t
, X_OK
) == 0) {
1572 len
= strlen(LXCTEMPLATEDIR
) + strlen(t
) + strlen("/lxc-") + 1;
1573 tpath
= malloc(len
);
1576 ret
= snprintf(tpath
, len
, "%s/lxc-%s", LXCTEMPLATEDIR
, t
);
1577 if (ret
< 0 || ret
>= len
) {
1581 if (access(tpath
, X_OK
) < 0) {
1582 SYSERROR("bad template: %s", t
);
1591 * Sets the process title to the specified title. Note:
1592 * 1. this function requires root to succeed
1593 * 2. it clears /proc/self/environ
1594 * 3. it may not succed (e.g. if title is longer than /proc/self/environ +
1595 * the original title)
1597 int setproctitle(char *title
)
1599 char buf
[2048], *tmp
;
1601 int i
, len
, ret
= 0;
1602 unsigned long arg_start
, arg_end
, env_start
, env_end
;
1604 f
= fopen_cloexec("/proc/self/stat", "r");
1609 tmp
= fgets(buf
, sizeof(buf
), f
);
1615 /* Skip the first 47 fields, column 48-51 are ARG_START and
1617 tmp
= strchr(buf
, ' ');
1618 for (i
= 0; i
< 46; i
++) {
1621 tmp
= strchr(tmp
+1, ' ');
1627 i
= sscanf(tmp
, "%lu %lu %lu %lu", &arg_start
, &arg_end
, &env_start
, &env_end
);
1632 /* Include the null byte here, because in the calculations below we
1633 * want to have room for it. */
1634 len
= strlen(title
) + 1;
1636 /* We're truncating the environment, so we should use at most the
1637 * length of the argument + environment for the title. */
1638 if (len
> env_end
- arg_start
) {
1640 len
= env_end
- arg_start
;
1642 /* Only truncate the environment if we're actually going to
1643 * overwrite part of it. */
1644 if (len
>= arg_end
- arg_start
) {
1645 env_start
= env_end
;
1647 arg_end
= arg_start
+ len
;
1650 strcpy((char*)arg_start
, title
);
1652 ret
|= prctl(PR_SET_MM
, PR_SET_MM_ARG_START
, (long)arg_start
, 0, 0);
1653 ret
|= prctl(PR_SET_MM
, PR_SET_MM_ARG_END
, (long)arg_end
, 0, 0);
1654 ret
|= prctl(PR_SET_MM
, PR_SET_MM_ENV_START
, (long)env_start
, 0, 0);
1655 ret
|= prctl(PR_SET_MM
, PR_SET_MM_ENV_END
, (long)env_end
, 0, 0);