2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
33 #include <sys/syscall.h>
38 #include <../include/openpty.h>
41 #include <linux/loop.h>
43 #include <sys/types.h>
44 #include <sys/utsname.h>
45 #include <sys/param.h>
47 #include <sys/socket.h>
48 #include <sys/mount.h>
50 #include <sys/prctl.h>
52 #include <arpa/inet.h>
54 #include <netinet/in.h>
65 #include "lxc.h" /* for lxc_cgroup_set() */
66 #include "caps.h" /* for lxc_caps_last_cap() */
73 #if HAVE_SYS_CAPABILITY_H
74 #include <sys/capability.h>
77 #if HAVE_SYS_PERSONALITY_H
78 #include <sys/personality.h>
82 #include <../include/lxcmntent.h>
87 #include "lxcseccomp.h"
89 lxc_log_define(lxc_conf
, lxc
);
92 #define MAXINDEXLEN 20
94 #define MAXLINELEN 128
96 #if HAVE_SYS_CAPABILITY_H
98 #define CAP_SETFCAP 31
101 #ifndef CAP_MAC_OVERRIDE
102 #define CAP_MAC_OVERRIDE 32
105 #ifndef CAP_MAC_ADMIN
106 #define CAP_MAC_ADMIN 33
110 #ifndef PR_CAPBSET_DROP
111 #define PR_CAPBSET_DROP 24
114 #ifndef LO_FLAGS_AUTOCLEAR
115 #define LO_FLAGS_AUTOCLEAR 4
118 /* Define pivot_root() if missing from the C library */
119 #ifndef HAVE_PIVOT_ROOT
120 static int pivot_root(const char * new_root
, const char * put_old
)
122 #ifdef __NR_pivot_root
123 return syscall(__NR_pivot_root
, new_root
, put_old
);
130 extern int pivot_root(const char * new_root
, const char * put_old
);
133 /* Define sethostname() if missing from the C library */
134 #ifndef HAVE_SETHOSTNAME
135 static int sethostname(const char * name
, size_t len
)
137 #ifdef __NR_sethostname
138 return syscall(__NR_sethostname
, name
, len
);
146 /* Define __S_ISTYPE if missing from the C library */
148 #define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
151 char *lxchook_names
[NUM_LXC_HOOKS
] = {
152 "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop", "clone" };
154 typedef int (*instanciate_cb
)(struct lxc_handler
*, struct lxc_netdev
*);
167 static int instanciate_veth(struct lxc_handler
*, struct lxc_netdev
*);
168 static int instanciate_macvlan(struct lxc_handler
*, struct lxc_netdev
*);
169 static int instanciate_vlan(struct lxc_handler
*, struct lxc_netdev
*);
170 static int instanciate_phys(struct lxc_handler
*, struct lxc_netdev
*);
171 static int instanciate_empty(struct lxc_handler
*, struct lxc_netdev
*);
173 static instanciate_cb netdev_conf
[LXC_NET_MAXCONFTYPE
+ 1] = {
174 [LXC_NET_VETH
] = instanciate_veth
,
175 [LXC_NET_MACVLAN
] = instanciate_macvlan
,
176 [LXC_NET_VLAN
] = instanciate_vlan
,
177 [LXC_NET_PHYS
] = instanciate_phys
,
178 [LXC_NET_EMPTY
] = instanciate_empty
,
181 static int shutdown_veth(struct lxc_handler
*, struct lxc_netdev
*);
182 static int shutdown_macvlan(struct lxc_handler
*, struct lxc_netdev
*);
183 static int shutdown_vlan(struct lxc_handler
*, struct lxc_netdev
*);
184 static int shutdown_phys(struct lxc_handler
*, struct lxc_netdev
*);
185 static int shutdown_empty(struct lxc_handler
*, struct lxc_netdev
*);
187 static instanciate_cb netdev_deconf
[LXC_NET_MAXCONFTYPE
+ 1] = {
188 [LXC_NET_VETH
] = shutdown_veth
,
189 [LXC_NET_MACVLAN
] = shutdown_macvlan
,
190 [LXC_NET_VLAN
] = shutdown_vlan
,
191 [LXC_NET_PHYS
] = shutdown_phys
,
192 [LXC_NET_EMPTY
] = shutdown_empty
,
195 static struct mount_opt mount_opt
[] = {
196 { "defaults", 0, 0 },
197 { "ro", 0, MS_RDONLY
},
198 { "rw", 1, MS_RDONLY
},
199 { "suid", 1, MS_NOSUID
},
200 { "nosuid", 0, MS_NOSUID
},
201 { "dev", 1, MS_NODEV
},
202 { "nodev", 0, MS_NODEV
},
203 { "exec", 1, MS_NOEXEC
},
204 { "noexec", 0, MS_NOEXEC
},
205 { "sync", 0, MS_SYNCHRONOUS
},
206 { "async", 1, MS_SYNCHRONOUS
},
207 { "dirsync", 0, MS_DIRSYNC
},
208 { "remount", 0, MS_REMOUNT
},
209 { "mand", 0, MS_MANDLOCK
},
210 { "nomand", 1, MS_MANDLOCK
},
211 { "atime", 1, MS_NOATIME
},
212 { "noatime", 0, MS_NOATIME
},
213 { "diratime", 1, MS_NODIRATIME
},
214 { "nodiratime", 0, MS_NODIRATIME
},
215 { "bind", 0, MS_BIND
},
216 { "rbind", 0, MS_BIND
|MS_REC
},
217 { "relatime", 0, MS_RELATIME
},
218 { "norelatime", 1, MS_RELATIME
},
219 { "strictatime", 0, MS_STRICTATIME
},
220 { "nostrictatime", 1, MS_STRICTATIME
},
224 #if HAVE_SYS_CAPABILITY_H
225 static struct caps_opt caps_opt
[] = {
226 { "chown", CAP_CHOWN
},
227 { "dac_override", CAP_DAC_OVERRIDE
},
228 { "dac_read_search", CAP_DAC_READ_SEARCH
},
229 { "fowner", CAP_FOWNER
},
230 { "fsetid", CAP_FSETID
},
231 { "kill", CAP_KILL
},
232 { "setgid", CAP_SETGID
},
233 { "setuid", CAP_SETUID
},
234 { "setpcap", CAP_SETPCAP
},
235 { "linux_immutable", CAP_LINUX_IMMUTABLE
},
236 { "net_bind_service", CAP_NET_BIND_SERVICE
},
237 { "net_broadcast", CAP_NET_BROADCAST
},
238 { "net_admin", CAP_NET_ADMIN
},
239 { "net_raw", CAP_NET_RAW
},
240 { "ipc_lock", CAP_IPC_LOCK
},
241 { "ipc_owner", CAP_IPC_OWNER
},
242 { "sys_module", CAP_SYS_MODULE
},
243 { "sys_rawio", CAP_SYS_RAWIO
},
244 { "sys_chroot", CAP_SYS_CHROOT
},
245 { "sys_ptrace", CAP_SYS_PTRACE
},
246 { "sys_pacct", CAP_SYS_PACCT
},
247 { "sys_admin", CAP_SYS_ADMIN
},
248 { "sys_boot", CAP_SYS_BOOT
},
249 { "sys_nice", CAP_SYS_NICE
},
250 { "sys_resource", CAP_SYS_RESOURCE
},
251 { "sys_time", CAP_SYS_TIME
},
252 { "sys_tty_config", CAP_SYS_TTY_CONFIG
},
253 { "mknod", CAP_MKNOD
},
254 { "lease", CAP_LEASE
},
255 #ifdef CAP_AUDIT_WRITE
256 { "audit_write", CAP_AUDIT_WRITE
},
258 #ifdef CAP_AUDIT_CONTROL
259 { "audit_control", CAP_AUDIT_CONTROL
},
261 { "setfcap", CAP_SETFCAP
},
262 { "mac_override", CAP_MAC_OVERRIDE
},
263 { "mac_admin", CAP_MAC_ADMIN
},
265 { "syslog", CAP_SYSLOG
},
267 #ifdef CAP_WAKE_ALARM
268 { "wake_alarm", CAP_WAKE_ALARM
},
272 static struct caps_opt caps_opt
[] = {};
275 static int run_buffer(char *buffer
)
281 f
= popen(buffer
, "r");
283 SYSERROR("popen failed");
287 output
= malloc(LXC_LOG_BUFFER_SIZE
);
289 ERROR("failed to allocate memory for script output");
294 while(fgets(output
, LXC_LOG_BUFFER_SIZE
, f
))
295 DEBUG("script output: %s", output
);
301 SYSERROR("Script exited on error");
303 } else if (WIFEXITED(ret
) && WEXITSTATUS(ret
) != 0) {
304 ERROR("Script exited with status %d", WEXITSTATUS(ret
));
306 } else if (WIFSIGNALED(ret
)) {
307 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret
),
308 strsignal(WTERMSIG(ret
)));
315 static int run_script_argv(const char *name
, const char *section
,
316 const char *script
, const char *hook
, const char *lxcpath
,
323 INFO("Executing script '%s' for container '%s', config section '%s'",
324 script
, name
, section
);
326 for (i
=0; argsin
&& argsin
[i
]; i
++)
327 size
+= strlen(argsin
[i
]) + 1;
329 size
+= strlen(hook
) + 1;
331 size
+= strlen(script
);
332 size
+= strlen(name
);
333 size
+= strlen(section
);
339 buffer
= alloca(size
);
341 ERROR("failed to allocate memory");
345 ret
= snprintf(buffer
, size
, "%s %s %s %s", script
, name
, section
, hook
);
346 if (ret
< 0 || ret
>= size
) {
347 ERROR("Script name too long");
351 for (i
=0; argsin
&& argsin
[i
]; i
++) {
354 rc
= snprintf(buffer
+ ret
, len
, " %s", argsin
[i
]);
355 if (rc
< 0 || rc
>= len
) {
356 ERROR("Script args too long");
362 return run_buffer(buffer
);
365 static int run_script(const char *name
, const char *section
,
366 const char *script
, ...)
373 INFO("Executing script '%s' for container '%s', config section '%s'",
374 script
, name
, section
);
376 va_start(ap
, script
);
377 while ((p
= va_arg(ap
, char *)))
378 size
+= strlen(p
) + 1;
381 size
+= strlen(script
);
382 size
+= strlen(name
);
383 size
+= strlen(section
);
389 buffer
= alloca(size
);
391 ERROR("failed to allocate memory");
395 ret
= snprintf(buffer
, size
, "%s %s %s", script
, name
, section
);
396 if (ret
< 0 || ret
>= size
) {
397 ERROR("Script name too long");
401 va_start(ap
, script
);
402 while ((p
= va_arg(ap
, char *))) {
405 rc
= snprintf(buffer
+ ret
, len
, " %s", p
);
406 if (rc
< 0 || rc
>= len
) {
407 ERROR("Script args too long");
414 return run_buffer(buffer
);
417 static int find_fstype_cb(char* buffer
, void *data
)
427 /* we don't try 'nodev' entries */
428 if (strstr(buffer
, "nodev"))
432 fstype
+= lxc_char_left_gc(fstype
, strlen(fstype
));
433 fstype
[lxc_char_right_gc(fstype
, strlen(fstype
))] = '\0';
435 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
436 cbarg
->rootfs
, cbarg
->target
, fstype
);
438 if (mount(cbarg
->rootfs
, cbarg
->target
, fstype
, cbarg
->mntopt
, NULL
)) {
439 DEBUG("mount failed with error: %s", strerror(errno
));
443 INFO("mounted '%s' on '%s', with fstype '%s'",
444 cbarg
->rootfs
, cbarg
->target
, fstype
);
449 static int mount_unknow_fs(const char *rootfs
, const char *target
, int mntopt
)
464 * find the filesystem type with brute force:
465 * first we check with /etc/filesystems, in case the modules
466 * are auto-loaded and fall back to the supported kernel fs
473 for (i
= 0; i
< sizeof(fsfile
)/sizeof(fsfile
[0]); i
++) {
477 if (access(fsfile
[i
], F_OK
))
480 ret
= lxc_file_for_each_line(fsfile
[i
], find_fstype_cb
, &cbarg
);
482 ERROR("failed to parse '%s'", fsfile
[i
]);
490 ERROR("failed to determine fs type for '%s'", rootfs
);
494 static int mount_rootfs_dir(const char *rootfs
, const char *target
)
496 return mount(rootfs
, target
, "none", MS_BIND
| MS_REC
, NULL
);
499 static int setup_lodev(const char *rootfs
, int fd
, struct loop_info64
*loinfo
)
504 rfd
= open(rootfs
, O_RDWR
);
506 SYSERROR("failed to open '%s'", rootfs
);
510 memset(loinfo
, 0, sizeof(*loinfo
));
512 loinfo
->lo_flags
= LO_FLAGS_AUTOCLEAR
;
514 if (ioctl(fd
, LOOP_SET_FD
, rfd
)) {
515 SYSERROR("failed to LOOP_SET_FD");
519 if (ioctl(fd
, LOOP_SET_STATUS64
, loinfo
)) {
520 SYSERROR("failed to LOOP_SET_STATUS64");
531 static int mount_rootfs_file(const char *rootfs
, const char *target
)
533 struct dirent dirent
, *direntp
;
534 struct loop_info64 loinfo
;
535 int ret
= -1, fd
= -1, rc
;
537 char path
[MAXPATHLEN
];
539 dir
= opendir("/dev");
541 SYSERROR("failed to open '/dev'");
545 while (!readdir_r(dir
, &dirent
, &direntp
)) {
550 if (!strcmp(direntp
->d_name
, "."))
553 if (!strcmp(direntp
->d_name
, ".."))
556 if (strncmp(direntp
->d_name
, "loop", 4))
559 rc
= snprintf(path
, MAXPATHLEN
, "/dev/%s", direntp
->d_name
);
560 if (rc
< 0 || rc
>= MAXPATHLEN
)
563 fd
= open(path
, O_RDWR
);
567 if (ioctl(fd
, LOOP_GET_STATUS64
, &loinfo
) == 0) {
572 if (errno
!= ENXIO
) {
573 WARN("unexpected error for ioctl on '%s': %m",
579 DEBUG("found '%s' free lodev", path
);
581 ret
= setup_lodev(rootfs
, fd
, &loinfo
);
583 ret
= mount_unknow_fs(path
, target
, 0);
590 WARN("failed to close directory");
595 static int mount_rootfs_block(const char *rootfs
, const char *target
)
597 return mount_unknow_fs(rootfs
, target
, 0);
602 * if rootfs is a directory, then open ${rootfs}.hold for writing for the
603 * duration of the container run, to prevent the container from marking the
604 * underlying fs readonly on shutdown.
605 * return -1 on error.
606 * return -2 if nothing needed to be pinned.
607 * return an open fd (>=0) if we pinned it.
609 int pin_rootfs(const char *rootfs
)
611 char absrootfs
[MAXPATHLEN
];
612 char absrootfspin
[MAXPATHLEN
];
616 if (rootfs
== NULL
|| strlen(rootfs
) == 0)
619 if (!realpath(rootfs
, absrootfs
)) {
620 INFO("failed to get real path for '%s', not pinning", rootfs
);
624 if (access(absrootfs
, F_OK
)) {
625 SYSERROR("'%s' is not accessible", absrootfs
);
629 if (stat(absrootfs
, &s
)) {
630 SYSERROR("failed to stat '%s'", absrootfs
);
634 if (!S_ISDIR(s
.st_mode
))
637 ret
= snprintf(absrootfspin
, MAXPATHLEN
, "%s%s", absrootfs
, ".hold");
638 if (ret
>= MAXPATHLEN
) {
639 SYSERROR("pathname too long for rootfs hold file");
643 fd
= open(absrootfspin
, O_CREAT
| O_RDWR
, S_IWUSR
|S_IRUSR
);
644 INFO("opened %s as fd %d\n", absrootfspin
, fd
);
648 static int mount_rootfs(const char *rootfs
, const char *target
)
650 char absrootfs
[MAXPATHLEN
];
654 typedef int (*rootfs_cb
)(const char *, const char *);
660 { S_IFDIR
, mount_rootfs_dir
},
661 { S_IFBLK
, mount_rootfs_block
},
662 { S_IFREG
, mount_rootfs_file
},
665 if (!realpath(rootfs
, absrootfs
)) {
666 SYSERROR("failed to get real path for '%s'", rootfs
);
670 if (access(absrootfs
, F_OK
)) {
671 SYSERROR("'%s' is not accessible", absrootfs
);
675 if (stat(absrootfs
, &s
)) {
676 SYSERROR("failed to stat '%s'", absrootfs
);
680 for (i
= 0; i
< sizeof(rtfs_type
)/sizeof(rtfs_type
[0]); i
++) {
682 if (!__S_ISTYPE(s
.st_mode
, rtfs_type
[i
].type
))
685 return rtfs_type
[i
].cb(absrootfs
, target
);
688 ERROR("unsupported rootfs type for '%s'", absrootfs
);
692 static int setup_utsname(struct utsname
*utsname
)
697 if (sethostname(utsname
->nodename
, strlen(utsname
->nodename
))) {
698 SYSERROR("failed to set the hostname to '%s'", utsname
->nodename
);
702 INFO("'%s' hostname has been setup", utsname
->nodename
);
707 static int setup_tty(const struct lxc_rootfs
*rootfs
,
708 const struct lxc_tty_info
*tty_info
, char *ttydir
)
710 char path
[MAXPATHLEN
], lxcpath
[MAXPATHLEN
];
716 for (i
= 0; i
< tty_info
->nbtty
; i
++) {
718 struct lxc_pty_info
*pty_info
= &tty_info
->pty_info
[i
];
720 ret
= snprintf(path
, sizeof(path
), "%s/dev/tty%d",
721 rootfs
->mount
, i
+ 1);
722 if (ret
>= sizeof(path
)) {
723 ERROR("pathname too long for ttys");
727 /* create dev/lxc/tty%d" */
728 ret
= snprintf(lxcpath
, sizeof(lxcpath
), "%s/dev/%s/tty%d",
729 rootfs
->mount
, ttydir
, i
+ 1);
730 if (ret
>= sizeof(lxcpath
)) {
731 ERROR("pathname too long for ttys");
734 ret
= creat(lxcpath
, 0660);
735 if (ret
==-1 && errno
!= EEXIST
) {
736 SYSERROR("error creating %s\n", lxcpath
);
742 if (ret
&& errno
!= ENOENT
) {
743 SYSERROR("error unlinking %s\n", path
);
747 if (mount(pty_info
->name
, lxcpath
, "none", MS_BIND
, 0)) {
748 WARN("failed to mount '%s'->'%s'",
749 pty_info
->name
, path
);
753 ret
= snprintf(lxcpath
, sizeof(lxcpath
), "%s/tty%d", ttydir
, i
+1);
754 if (ret
>= sizeof(lxcpath
)) {
755 ERROR("tty pathname too long");
758 ret
= symlink(lxcpath
, path
);
760 SYSERROR("failed to create symlink for tty %d\n", i
+1);
764 /* If we populated /dev, then we need to create /dev/ttyN */
765 if (access(path
, F_OK
)) {
766 ret
= creat(path
, 0660);
768 SYSERROR("error creating %s\n", path
);
769 /* this isn't fatal, continue */
773 if (mount(pty_info
->name
, path
, "none", MS_BIND
, 0)) {
774 WARN("failed to mount '%s'->'%s'",
775 pty_info
->name
, path
);
781 INFO("%d tty(s) has been setup", tty_info
->nbtty
);
786 static int setup_rootfs_pivot_root_cb(char *buffer
, void *data
)
788 struct lxc_list
*mountlist
, *listentry
, *iterator
;
789 char *pivotdir
, *mountpoint
, *mountentry
, *saveptr
= NULL
;
794 cbparm
= (void **)data
;
796 mountlist
= cbparm
[0];
797 pivotdir
= cbparm
[1];
799 /* parse entry, first field is mountname, ignore */
800 mountpoint
= strtok_r(mountentry
, " ", &saveptr
);
804 /* second field is mountpoint */
805 mountpoint
= strtok_r(NULL
, " ", &saveptr
);
809 /* only consider mountpoints below old root fs */
810 if (strncmp(mountpoint
, pivotdir
, strlen(pivotdir
)))
813 /* filter duplicate mountpoints */
815 lxc_list_for_each(iterator
, mountlist
) {
816 if (!strcmp(iterator
->elem
, mountpoint
)) {
824 /* add entry to list */
825 listentry
= malloc(sizeof(*listentry
));
827 SYSERROR("malloc for mountpoint listentry failed");
831 listentry
->elem
= strdup(mountpoint
);
832 if (!listentry
->elem
) {
833 SYSERROR("strdup failed");
837 lxc_list_add_tail(mountlist
, listentry
);
842 static int umount_oldrootfs(const char *oldrootfs
)
844 char path
[MAXPATHLEN
];
846 struct lxc_list mountlist
, *iterator
, *next
;
847 int ok
, still_mounted
, last_still_mounted
;
850 /* read and parse /proc/mounts in old root fs */
851 lxc_list_init(&mountlist
);
853 /* oldrootfs is on the top tree directory now */
854 rc
= snprintf(path
, sizeof(path
), "/%s", oldrootfs
);
855 if (rc
>= sizeof(path
)) {
856 ERROR("rootfs name too long");
859 cbparm
[0] = &mountlist
;
861 cbparm
[1] = strdup(path
);
863 SYSERROR("strdup failed");
867 rc
= snprintf(path
, sizeof(path
), "%s/proc/mounts", oldrootfs
);
868 if (rc
>= sizeof(path
)) {
869 ERROR("container proc/mounts name too long");
873 ok
= lxc_file_for_each_line(path
,
874 setup_rootfs_pivot_root_cb
, &cbparm
);
876 SYSERROR("failed to read or parse mount list '%s'", path
);
880 /* umount filesystems until none left or list no longer shrinks */
883 last_still_mounted
= still_mounted
;
886 lxc_list_for_each_safe(iterator
, &mountlist
, next
) {
888 /* umount normally */
889 if (!umount(iterator
->elem
)) {
890 DEBUG("umounted '%s'", (char *)iterator
->elem
);
891 lxc_list_del(iterator
);
898 } while (still_mounted
> 0 && still_mounted
!= last_still_mounted
);
901 lxc_list_for_each(iterator
, &mountlist
) {
903 /* let's try a lazy umount */
904 if (!umount2(iterator
->elem
, MNT_DETACH
)) {
905 INFO("lazy unmount of '%s'", (char *)iterator
->elem
);
909 /* be more brutal (nfs) */
910 if (!umount2(iterator
->elem
, MNT_FORCE
)) {
911 INFO("forced unmount of '%s'", (char *)iterator
->elem
);
915 WARN("failed to unmount '%s'", (char *)iterator
->elem
);
921 static int setup_rootfs_pivot_root(const char *rootfs
, const char *pivotdir
)
923 char path
[MAXPATHLEN
];
924 int remove_pivotdir
= 0;
927 /* change into new root fs */
929 SYSERROR("can't chdir to new rootfs '%s'", rootfs
);
934 pivotdir
= "lxc_putold";
936 /* compute the full path to pivotdir under rootfs */
937 rc
= snprintf(path
, sizeof(path
), "%s/%s", rootfs
, pivotdir
);
938 if (rc
>= sizeof(path
)) {
939 ERROR("pivot dir name too long");
943 if (access(path
, F_OK
)) {
945 if (mkdir_p(path
, 0755)) {
946 SYSERROR("failed to create pivotdir '%s'", path
);
951 DEBUG("created '%s' directory", path
);
954 DEBUG("mountpoint for old rootfs is '%s'", path
);
956 /* pivot_root into our new root fs */
957 if (pivot_root(".", path
)) {
958 SYSERROR("pivot_root syscall failed");
963 SYSERROR("can't chdir to / after pivot_root");
967 DEBUG("pivot_root syscall to '%s' successful", rootfs
);
969 /* we switch from absolute path to relative path */
970 if (umount_oldrootfs(pivotdir
))
973 /* remove temporary mount point, we don't consider the removing
975 if (remove_pivotdir
&& rmdir(pivotdir
))
976 WARN("can't remove mountpoint '%s': %m", pivotdir
);
982 * Do we want to add options for max size of /dev and a file to
983 * specify which devices to create?
985 static int mount_autodev(char *root
)
988 char path
[MAXPATHLEN
];
990 INFO("Mounting /dev under %s\n", root
);
991 ret
= snprintf(path
, MAXPATHLEN
, "%s/dev", root
);
992 if (ret
< 0 || ret
> MAXPATHLEN
)
994 ret
= mount("none", path
, "tmpfs", 0, "size=100000");
996 SYSERROR("Failed to mount /dev at %s\n", root
);
999 ret
= snprintf(path
, MAXPATHLEN
, "%s/dev/pts", root
);
1000 if (ret
< 0 || ret
>= MAXPATHLEN
)
1002 ret
= mkdir(path
, S_IRWXU
| S_IRGRP
| S_IXGRP
| S_IROTH
| S_IXOTH
);
1004 SYSERROR("Failed to create /dev/pts in container");
1008 INFO("Mounted /dev under %s\n", root
);
1019 struct lxc_devs lxc_devs
[] = {
1020 { "null", S_IFCHR
| S_IRWXU
| S_IRWXG
| S_IRWXO
, 1, 3 },
1021 { "zero", S_IFCHR
| S_IRWXU
| S_IRWXG
| S_IRWXO
, 1, 5 },
1022 { "full", S_IFCHR
| S_IRWXU
| S_IRWXG
| S_IRWXO
, 1, 7 },
1023 { "urandom", S_IFCHR
| S_IRWXU
| S_IRWXG
| S_IRWXO
, 1, 9 },
1024 { "random", S_IFCHR
| S_IRWXU
| S_IRWXG
| S_IRWXO
, 1, 8 },
1025 { "tty", S_IFCHR
| S_IRWXU
| S_IRWXG
| S_IRWXO
, 5, 0 },
1026 { "console", S_IFCHR
| S_IRUSR
| S_IWUSR
, 5, 1 },
1029 static int setup_autodev(char *root
)
1033 char path
[MAXPATHLEN
];
1037 INFO("Creating initial consoles under %s/dev\n", root
);
1039 ret
= snprintf(path
, MAXPATHLEN
, "%s/dev", root
);
1040 if (ret
< 0 || ret
>= MAXPATHLEN
) {
1041 ERROR("Error calculating container /dev location");
1045 INFO("Populating /dev under %s\n", root
);
1046 cmask
= umask(S_IXUSR
| S_IXGRP
| S_IXOTH
);
1047 for (i
= 0; i
< sizeof(lxc_devs
) / sizeof(lxc_devs
[0]); i
++) {
1049 ret
= snprintf(path
, MAXPATHLEN
, "%s/dev/%s", root
, d
->name
);
1050 if (ret
< 0 || ret
>= MAXPATHLEN
)
1052 ret
= mknod(path
, d
->mode
, makedev(d
->maj
, d
->min
));
1053 if (ret
&& errno
!= EEXIST
) {
1054 SYSERROR("Error creating %s\n", d
->name
);
1060 INFO("Populated /dev under %s\n", root
);
1065 * Detect whether / is mounted MS_SHARED. The only way I know of to
1066 * check that is through /proc/self/mountinfo.
1067 * I'm only checking for /. If the container rootfs or mount location
1068 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1069 * out would be too much work to be worth it.
1071 #define LINELEN 4096
1072 int detect_shared_rootfs(void)
1074 char buf
[LINELEN
], *p
;
1079 f
= fopen("/proc/self/mountinfo", "r");
1082 while ((p
= fgets(buf
, LINELEN
, f
))) {
1083 INFO("looking at .%s.", p
);
1084 for (p
= buf
, i
=0; p
&& i
< 4; i
++)
1085 p
= index(p
+1, ' ');
1088 p2
= index(p
+1, ' ');
1092 INFO("now p is .%s.", p
);
1093 if (strcmp(p
+1, "/") == 0) {
1094 // this is '/'. is it shared?
1095 p
= index(p2
+1, ' ');
1096 if (p
&& strstr(p
, "shared:")) {
1107 * I'll forgive you for asking whether all of this is needed :) The
1109 * pivot_root will fail if the new root, the put_old dir, or the parent
1110 * of current->fs->root are MS_SHARED. (parent of current->fs_root may
1111 * or may not be current->fs_root - if we assumed it always was, we could
1112 * just mount --make-rslave /). So,
1113 * 1. mount a tiny tmpfs to be parent of current->fs->root.
1114 * 2. make that MS_SLAVE
1115 * 3. make a 'root' directory under that
1116 * 4. mount --rbind / under the $tinyroot/root.
1117 * 5. make that rslave
1118 * 6. chdir and chroot into $tinyroot/root
1119 * 7. $tinyroot will be unmounted by our parent in start.c
1121 static int chroot_into_slave(struct lxc_conf
*conf
)
1123 char path
[MAXPATHLEN
];
1124 const char *destpath
= conf
->rootfs
.mount
;
1127 if (mount(destpath
, destpath
, NULL
, MS_BIND
, 0)) {
1128 SYSERROR("failed to mount %s bind", destpath
);
1131 if (mount("", destpath
, NULL
, MS_SLAVE
, 0)) {
1132 SYSERROR("failed to make %s slave", destpath
);
1135 if (mount("none", destpath
, "tmpfs", 0, "size=10000")) {
1136 SYSERROR("Failed to mount tmpfs / at %s", destpath
);
1139 ret
= snprintf(path
, MAXPATHLEN
, "%s/root", destpath
);
1140 if (ret
< 0 || ret
>= MAXPATHLEN
) {
1141 ERROR("out of memory making root path");
1144 if (mkdir(path
, S_IRWXU
| S_IRGRP
| S_IXGRP
| S_IROTH
| S_IXOTH
)) {
1145 SYSERROR("Failed to create /dev/pts in container");
1148 if (mount("/", path
, NULL
, MS_BIND
|MS_REC
, 0)) {
1149 SYSERROR("Failed to rbind mount / to %s", path
);
1152 if (mount("", destpath
, NULL
, MS_SLAVE
|MS_REC
, 0)) {
1153 SYSERROR("Failed to make tmp-/ at %s rslave", path
);
1157 SYSERROR("Failed to chdir into tmp-/");
1161 SYSERROR("Failed to chroot into tmp-/");
1164 INFO("Chrooted into tmp-/ at %s\n", path
);
1168 static int setup_rootfs(struct lxc_conf
*conf
)
1170 const struct lxc_rootfs
*rootfs
= &conf
->rootfs
;
1172 if (!rootfs
->path
) {
1173 if (mount("", "/", NULL
, MS_SLAVE
|MS_REC
, 0)) {
1174 SYSERROR("Failed to make / rslave");
1180 if (access(rootfs
->mount
, F_OK
)) {
1181 SYSERROR("failed to access to '%s', check it is present",
1186 if (detect_shared_rootfs()) {
1187 if (chroot_into_slave(conf
)) {
1188 ERROR("Failed to chroot into slave /");
1193 // First try mounting rootfs using a bdev
1194 struct bdev
*bdev
= bdev_init(rootfs
->path
, rootfs
->mount
, NULL
);
1195 if (bdev
&& bdev
->ops
->mount(bdev
) == 0) {
1196 DEBUG("mounted '%s' on '%s'", rootfs
->path
, rootfs
->mount
);
1199 if (mount_rootfs(rootfs
->path
, rootfs
->mount
)) {
1200 ERROR("failed to mount rootfs");
1204 DEBUG("mounted '%s' on '%s'", rootfs
->path
, rootfs
->mount
);
1209 int setup_pivot_root(const struct lxc_rootfs
*rootfs
)
1214 if (setup_rootfs_pivot_root(rootfs
->mount
, rootfs
->pivot
)) {
1215 ERROR("failed to setup pivot root");
1222 static int setup_pts(int pts
)
1224 char target
[PATH_MAX
];
1229 if (!access("/dev/pts/ptmx", F_OK
) && umount("/dev/pts")) {
1230 SYSERROR("failed to umount 'dev/pts'");
1234 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL
,
1235 "newinstance,ptmxmode=0666")) {
1236 SYSERROR("failed to mount a new instance of '/dev/pts'");
1240 if (access("/dev/ptmx", F_OK
)) {
1241 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1243 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
1247 if (realpath("/dev/ptmx", target
) && !strcmp(target
, "/dev/pts/ptmx"))
1250 /* fallback here, /dev/pts/ptmx exists just mount bind */
1251 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND
, 0)) {
1252 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
1256 INFO("created new pts instance");
1262 static int setup_personality(int persona
)
1264 #if HAVE_SYS_PERSONALITY_H
1268 if (personality(persona
) < 0) {
1269 SYSERROR("failed to set personality to '0x%x'", persona
);
1273 INFO("set personality to '0x%x'", persona
);
1279 static int setup_dev_console(const struct lxc_rootfs
*rootfs
,
1280 const struct lxc_console
*console
)
1282 char path
[MAXPATHLEN
];
1286 ret
= snprintf(path
, sizeof(path
), "%s/dev/console", rootfs
->mount
);
1287 if (ret
>= sizeof(path
)) {
1288 ERROR("console path too long\n");
1292 if (access(path
, F_OK
)) {
1293 WARN("rootfs specified but no console found at '%s'", path
);
1297 if (console
->master
< 0) {
1302 if (stat(path
, &s
)) {
1303 SYSERROR("failed to stat '%s'", path
);
1307 if (chmod(console
->name
, s
.st_mode
)) {
1308 SYSERROR("failed to set mode '0%o' to '%s'",
1309 s
.st_mode
, console
->name
);
1313 if (mount(console
->name
, path
, "none", MS_BIND
, 0)) {
1314 ERROR("failed to mount '%s' on '%s'", console
->name
, path
);
1318 INFO("console has been setup");
1322 static int setup_ttydir_console(const struct lxc_rootfs
*rootfs
,
1323 const struct lxc_console
*console
,
1326 char path
[MAXPATHLEN
], lxcpath
[MAXPATHLEN
];
1329 /* create rootfs/dev/<ttydir> directory */
1330 ret
= snprintf(path
, sizeof(path
), "%s/dev/%s", rootfs
->mount
,
1332 if (ret
>= sizeof(path
))
1334 ret
= mkdir(path
, 0755);
1335 if (ret
&& errno
!= EEXIST
) {
1336 SYSERROR("failed with errno %d to create %s\n", errno
, path
);
1339 INFO("created %s\n", path
);
1341 ret
= snprintf(lxcpath
, sizeof(lxcpath
), "%s/dev/%s/console",
1342 rootfs
->mount
, ttydir
);
1343 if (ret
>= sizeof(lxcpath
)) {
1344 ERROR("console path too long\n");
1348 snprintf(path
, sizeof(path
), "%s/dev/console", rootfs
->mount
);
1350 if (ret
&& errno
!= ENOENT
) {
1351 SYSERROR("error unlinking %s\n", path
);
1355 ret
= creat(lxcpath
, 0660);
1356 if (ret
==-1 && errno
!= EEXIST
) {
1357 SYSERROR("error %d creating %s\n", errno
, lxcpath
);
1363 if (console
->master
< 0) {
1368 if (mount(console
->name
, lxcpath
, "none", MS_BIND
, 0)) {
1369 ERROR("failed to mount '%s' on '%s'", console
->name
, lxcpath
);
1373 /* create symlink from rootfs/dev/console to 'lxc/console' */
1374 ret
= snprintf(lxcpath
, sizeof(lxcpath
), "%s/console", ttydir
);
1375 if (ret
>= sizeof(lxcpath
)) {
1376 ERROR("lxc/console path too long");
1379 ret
= symlink(lxcpath
, path
);
1381 SYSERROR("failed to create symlink for console");
1385 INFO("console has been setup on %s", lxcpath
);
1390 static int setup_console(const struct lxc_rootfs
*rootfs
,
1391 const struct lxc_console
*console
,
1394 /* We don't have a rootfs, /dev/console will be shared */
1398 return setup_dev_console(rootfs
, console
);
1400 return setup_ttydir_console(rootfs
, console
, ttydir
);
1403 static int setup_kmsg(const struct lxc_rootfs
*rootfs
,
1404 const struct lxc_console
*console
)
1406 char kpath
[MAXPATHLEN
];
1411 ret
= snprintf(kpath
, sizeof(kpath
), "%s/dev/kmsg", rootfs
->mount
);
1412 if (ret
< 0 || ret
>= sizeof(kpath
))
1415 ret
= unlink(kpath
);
1416 if (ret
&& errno
!= ENOENT
) {
1417 SYSERROR("error unlinking %s\n", kpath
);
1421 ret
= symlink("console", kpath
);
1423 SYSERROR("failed to create symlink for kmsg");
1430 static int _setup_cgroup(const char *cgpath
, struct lxc_list
*cgroups
,
1433 struct lxc_list
*iterator
;
1434 struct lxc_cgroup
*cg
;
1437 if (lxc_list_empty(cgroups
))
1440 lxc_list_for_each(iterator
, cgroups
) {
1441 cg
= iterator
->elem
;
1443 if (devices
== !strncmp("devices", cg
->subsystem
, 7)) {
1444 if (lxc_cgroup_set_bypath(cgpath
, cg
->subsystem
,
1446 ERROR("Error setting %s to %s for %s\n",
1447 cg
->subsystem
, cg
->value
, cgpath
);
1452 DEBUG("cgroup '%s' set to '%s'", cg
->subsystem
, cg
->value
);
1456 INFO("cgroup has been setup");
1461 int setup_cgroup_devices(const char *cgpath
, struct lxc_list
*cgroups
)
1463 return _setup_cgroup(cgpath
, cgroups
, 1);
1466 int setup_cgroup(const char *cgpath
, struct lxc_list
*cgroups
)
1468 return _setup_cgroup(cgpath
, cgroups
, 0);
1471 static void parse_mntopt(char *opt
, unsigned long *flags
, char **data
)
1473 struct mount_opt
*mo
;
1475 /* If opt is found in mount_opt, set or clear flags.
1476 * Otherwise append it to data. */
1478 for (mo
= &mount_opt
[0]; mo
->name
!= NULL
; mo
++) {
1479 if (!strncmp(opt
, mo
->name
, strlen(mo
->name
))) {
1481 *flags
&= ~mo
->flag
;
1493 static int parse_mntopts(const char *mntopts
, unsigned long *mntflags
,
1497 char *p
, *saveptr
= NULL
;
1505 s
= strdup(mntopts
);
1507 SYSERROR("failed to allocate memory");
1511 data
= malloc(strlen(s
) + 1);
1513 SYSERROR("failed to allocate memory");
1519 for (p
= strtok_r(s
, ",", &saveptr
); p
!= NULL
;
1520 p
= strtok_r(NULL
, ",", &saveptr
))
1521 parse_mntopt(p
, mntflags
, &data
);
1532 static int mount_entry(const char *fsname
, const char *target
,
1533 const char *fstype
, unsigned long mountflags
,
1536 if (mount(fsname
, target
, fstype
, mountflags
& ~MS_REMOUNT
, data
)) {
1537 SYSERROR("failed to mount '%s' on '%s'", fsname
, target
);
1541 if ((mountflags
& MS_REMOUNT
) || (mountflags
& MS_BIND
)) {
1543 DEBUG("remounting %s on %s to respect bind or remount options",
1546 if (mount(fsname
, target
, fstype
,
1547 mountflags
| MS_REMOUNT
, data
)) {
1548 SYSERROR("failed to mount '%s' on '%s'",
1554 DEBUG("mounted '%s' on '%s', type '%s'", fsname
, target
, fstype
);
1559 static inline int mount_entry_on_systemfs(struct mntent
*mntent
)
1561 unsigned long mntflags
;
1565 if (parse_mntopts(mntent
->mnt_opts
, &mntflags
, &mntdata
) < 0) {
1566 ERROR("failed to parse mount option '%s'", mntent
->mnt_opts
);
1570 ret
= mount_entry(mntent
->mnt_fsname
, mntent
->mnt_dir
,
1571 mntent
->mnt_type
, mntflags
, mntdata
);
1573 if (hasmntopt(mntent
, "optional") != NULL
)
1581 static int mount_entry_on_absolute_rootfs(struct mntent
*mntent
,
1582 const struct lxc_rootfs
*rootfs
,
1583 const char *lxc_name
)
1586 char path
[MAXPATHLEN
];
1587 unsigned long mntflags
;
1589 int r
, ret
= 0, offset
;
1590 const char *lxcpath
;
1592 if (parse_mntopts(mntent
->mnt_opts
, &mntflags
, &mntdata
) < 0) {
1593 ERROR("failed to parse mount option '%s'", mntent
->mnt_opts
);
1597 lxcpath
= default_lxc_path();
1599 ERROR("Out of memory");
1603 /* if rootfs->path is a blockdev path, allow container fstab to
1604 * use $lxcpath/CN/rootfs as the target prefix */
1605 r
= snprintf(path
, MAXPATHLEN
, "%s/%s/rootfs", lxcpath
, lxc_name
);
1606 if (r
< 0 || r
>= MAXPATHLEN
)
1609 aux
= strstr(mntent
->mnt_dir
, path
);
1611 offset
= strlen(path
);
1616 aux
= strstr(mntent
->mnt_dir
, rootfs
->path
);
1618 WARN("ignoring mount point '%s'", mntent
->mnt_dir
);
1621 offset
= strlen(rootfs
->path
);
1625 r
= snprintf(path
, MAXPATHLEN
, "%s/%s", rootfs
->mount
,
1627 if (r
< 0 || r
>= MAXPATHLEN
) {
1628 WARN("pathnme too long for '%s'", mntent
->mnt_dir
);
1634 ret
= mount_entry(mntent
->mnt_fsname
, path
, mntent
->mnt_type
,
1637 if (hasmntopt(mntent
, "optional") != NULL
)
1645 static int mount_entry_on_relative_rootfs(struct mntent
*mntent
,
1648 char path
[MAXPATHLEN
];
1649 unsigned long mntflags
;
1653 if (parse_mntopts(mntent
->mnt_opts
, &mntflags
, &mntdata
) < 0) {
1654 ERROR("failed to parse mount option '%s'", mntent
->mnt_opts
);
1658 /* relative to root mount point */
1659 ret
= snprintf(path
, sizeof(path
), "%s/%s", rootfs
, mntent
->mnt_dir
);
1660 if (ret
>= sizeof(path
)) {
1661 ERROR("path name too long");
1665 ret
= mount_entry(mntent
->mnt_fsname
, path
, mntent
->mnt_type
,
1668 if (hasmntopt(mntent
, "optional") != NULL
)
1676 static int mount_file_entries(const struct lxc_rootfs
*rootfs
, FILE *file
,
1677 const char *lxc_name
)
1679 struct mntent
*mntent
;
1682 while ((mntent
= getmntent(file
))) {
1684 if (!rootfs
->path
) {
1685 if (mount_entry_on_systemfs(mntent
))
1690 /* We have a separate root, mounts are relative to it */
1691 if (mntent
->mnt_dir
[0] != '/') {
1692 if (mount_entry_on_relative_rootfs(mntent
,
1698 if (mount_entry_on_absolute_rootfs(mntent
, rootfs
, lxc_name
))
1704 INFO("mount points have been setup");
1709 static int setup_mount(const struct lxc_rootfs
*rootfs
, const char *fstab
,
1710 const char *lxc_name
)
1718 file
= setmntent(fstab
, "r");
1720 SYSERROR("failed to use '%s'", fstab
);
1724 ret
= mount_file_entries(rootfs
, file
, lxc_name
);
1730 static int setup_mount_entries(const struct lxc_rootfs
*rootfs
, struct lxc_list
*mount
,
1731 const char *lxc_name
)
1734 struct lxc_list
*iterator
;
1740 ERROR("tmpfile error: %m");
1744 lxc_list_for_each(iterator
, mount
) {
1745 mount_entry
= iterator
->elem
;
1746 fprintf(file
, "%s\n", mount_entry
);
1751 ret
= mount_file_entries(rootfs
, file
, lxc_name
);
1757 static int setup_caps(struct lxc_list
*caps
)
1759 struct lxc_list
*iterator
;
1764 lxc_list_for_each(iterator
, caps
) {
1766 drop_entry
= iterator
->elem
;
1770 for (i
= 0; i
< sizeof(caps_opt
)/sizeof(caps_opt
[0]); i
++) {
1772 if (strcmp(drop_entry
, caps_opt
[i
].name
))
1775 capid
= caps_opt
[i
].value
;
1780 /* try to see if it's numeric, so the user may specify
1781 * capabilities that the running kernel knows about but
1783 capid
= strtol(drop_entry
, &ptr
, 10);
1784 if (!ptr
|| *ptr
!= '\0' ||
1785 capid
== LONG_MIN
|| capid
== LONG_MAX
)
1786 /* not a valid number */
1788 else if (capid
> lxc_caps_last_cap())
1789 /* we have a number but it's not a valid
1795 ERROR("unknown capability %s", drop_entry
);
1799 DEBUG("drop capability '%s' (%d)", drop_entry
, capid
);
1801 if (prctl(PR_CAPBSET_DROP
, capid
, 0, 0, 0)) {
1802 SYSERROR("failed to remove %s capability", drop_entry
);
1808 DEBUG("capabilities has been setup");
1813 static int setup_hw_addr(char *hwaddr
, const char *ifname
)
1815 struct sockaddr sockaddr
;
1819 ret
= lxc_convert_mac(hwaddr
, &sockaddr
);
1821 ERROR("mac address '%s' conversion failed : %s",
1822 hwaddr
, strerror(-ret
));
1826 memcpy(ifr
.ifr_name
, ifname
, IFNAMSIZ
);
1827 memcpy((char *) &ifr
.ifr_hwaddr
, (char *) &sockaddr
, sizeof(sockaddr
));
1829 fd
= socket(AF_INET
, SOCK_DGRAM
, 0);
1831 ERROR("socket failure : %s", strerror(errno
));
1835 ret
= ioctl(fd
, SIOCSIFHWADDR
, &ifr
);
1838 ERROR("ioctl failure : %s", strerror(errno
));
1840 DEBUG("mac address '%s' on '%s' has been setup", hwaddr
, ifname
);
1845 static int setup_ipv4_addr(struct lxc_list
*ip
, int ifindex
)
1847 struct lxc_list
*iterator
;
1848 struct lxc_inetdev
*inetdev
;
1851 lxc_list_for_each(iterator
, ip
) {
1853 inetdev
= iterator
->elem
;
1855 err
= lxc_ipv4_addr_add(ifindex
, &inetdev
->addr
,
1856 &inetdev
->bcast
, inetdev
->prefix
);
1858 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
1859 ifindex
, strerror(-err
));
1867 static int setup_ipv6_addr(struct lxc_list
*ip
, int ifindex
)
1869 struct lxc_list
*iterator
;
1870 struct lxc_inet6dev
*inet6dev
;
1873 lxc_list_for_each(iterator
, ip
) {
1875 inet6dev
= iterator
->elem
;
1877 err
= lxc_ipv6_addr_add(ifindex
, &inet6dev
->addr
,
1878 &inet6dev
->mcast
, &inet6dev
->acast
,
1881 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
1882 ifindex
, strerror(-err
));
1890 static int setup_netdev(struct lxc_netdev
*netdev
)
1892 char ifname
[IFNAMSIZ
];
1893 char *current_ifname
= ifname
;
1896 /* empty network namespace */
1897 if (!netdev
->ifindex
) {
1898 if (netdev
->flags
& IFF_UP
) {
1899 err
= lxc_netdev_up("lo");
1901 ERROR("failed to set the loopback up : %s",
1909 /* retrieve the name of the interface */
1910 if (!if_indextoname(netdev
->ifindex
, current_ifname
)) {
1911 ERROR("no interface corresponding to index '%d'",
1916 /* default: let the system to choose one interface name */
1918 netdev
->name
= netdev
->type
== LXC_NET_PHYS
?
1919 netdev
->link
: "eth%d";
1921 /* rename the interface name */
1922 err
= lxc_netdev_rename_by_name(ifname
, netdev
->name
);
1924 ERROR("failed to rename %s->%s : %s", ifname
, netdev
->name
,
1929 /* Re-read the name of the interface because its name has changed
1930 * and would be automatically allocated by the system
1932 if (!if_indextoname(netdev
->ifindex
, current_ifname
)) {
1933 ERROR("no interface corresponding to index '%d'",
1938 /* set a mac address */
1939 if (netdev
->hwaddr
) {
1940 if (setup_hw_addr(netdev
->hwaddr
, current_ifname
)) {
1941 ERROR("failed to setup hw address for '%s'",
1947 /* setup ipv4 addresses on the interface */
1948 if (setup_ipv4_addr(&netdev
->ipv4
, netdev
->ifindex
)) {
1949 ERROR("failed to setup ip addresses for '%s'",
1954 /* setup ipv6 addresses on the interface */
1955 if (setup_ipv6_addr(&netdev
->ipv6
, netdev
->ifindex
)) {
1956 ERROR("failed to setup ipv6 addresses for '%s'",
1961 /* set the network device up */
1962 if (netdev
->flags
& IFF_UP
) {
1965 err
= lxc_netdev_up(current_ifname
);
1967 ERROR("failed to set '%s' up : %s", current_ifname
,
1972 /* the network is up, make the loopback up too */
1973 err
= lxc_netdev_up("lo");
1975 ERROR("failed to set the loopback up : %s",
1981 /* We can only set up the default routes after bringing
1982 * up the interface, sine bringing up the interface adds
1983 * the link-local routes and we can't add a default
1984 * route if the gateway is not reachable. */
1986 /* setup ipv4 gateway on the interface */
1987 if (netdev
->ipv4_gateway
) {
1988 if (!(netdev
->flags
& IFF_UP
)) {
1989 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname
);
1993 if (lxc_list_empty(&netdev
->ipv4
)) {
1994 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname
);
1998 err
= lxc_ipv4_gateway_add(netdev
->ifindex
, netdev
->ipv4_gateway
);
2000 ERROR("failed to setup ipv4 gateway for '%s': %s",
2001 ifname
, strerror(-err
));
2002 if (netdev
->ipv4_gateway_auto
) {
2003 char buf
[INET_ADDRSTRLEN
];
2004 inet_ntop(AF_INET
, netdev
->ipv4_gateway
, buf
, sizeof(buf
));
2005 ERROR("tried to set autodetected ipv4 gateway '%s'", buf
);
2011 /* setup ipv6 gateway on the interface */
2012 if (netdev
->ipv6_gateway
) {
2013 if (!(netdev
->flags
& IFF_UP
)) {
2014 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname
);
2018 if (lxc_list_empty(&netdev
->ipv6
) && !IN6_IS_ADDR_LINKLOCAL(netdev
->ipv6_gateway
)) {
2019 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname
);
2023 err
= lxc_ipv6_gateway_add(netdev
->ifindex
, netdev
->ipv6_gateway
);
2025 ERROR("failed to setup ipv6 gateway for '%s': %s",
2026 ifname
, strerror(-err
));
2027 if (netdev
->ipv6_gateway_auto
) {
2028 char buf
[INET6_ADDRSTRLEN
];
2029 inet_ntop(AF_INET6
, netdev
->ipv6_gateway
, buf
, sizeof(buf
));
2030 ERROR("tried to set autodetected ipv6 gateway '%s'", buf
);
2036 DEBUG("'%s' has been setup", current_ifname
);
2041 static int setup_network(struct lxc_list
*network
)
2043 struct lxc_list
*iterator
;
2044 struct lxc_netdev
*netdev
;
2046 lxc_list_for_each(iterator
, network
) {
2048 netdev
= iterator
->elem
;
2050 if (setup_netdev(netdev
)) {
2051 ERROR("failed to setup netdev");
2056 if (!lxc_list_empty(network
))
2057 INFO("network has been setup");
2062 void lxc_rename_phys_nics_on_shutdown(struct lxc_conf
*conf
)
2066 INFO("running to reset %d nic names", conf
->num_savednics
);
2067 for (i
=0; i
<conf
->num_savednics
; i
++) {
2068 struct saved_nic
*s
= &conf
->saved_nics
[i
];
2069 INFO("resetting nic %d to %s\n", s
->ifindex
, s
->orig_name
);
2070 lxc_netdev_rename_by_index(s
->ifindex
, s
->orig_name
);
2073 conf
->num_savednics
= 0;
2074 free(conf
->saved_nics
);
2077 static int setup_private_host_hw_addr(char *veth1
)
2083 sockfd
= socket(AF_INET
, SOCK_DGRAM
, 0);
2087 snprintf((char *)ifr
.ifr_name
, IFNAMSIZ
, "%s", veth1
);
2088 err
= ioctl(sockfd
, SIOCGIFHWADDR
, &ifr
);
2094 ifr
.ifr_hwaddr
.sa_data
[0] = 0xfe;
2095 err
= ioctl(sockfd
, SIOCSIFHWADDR
, &ifr
);
2100 DEBUG("mac address of host interface '%s' changed to private "
2101 "%02x:%02x:%02x:%02x:%02x:%02x", veth1
,
2102 ifr
.ifr_hwaddr
.sa_data
[0] & 0xff,
2103 ifr
.ifr_hwaddr
.sa_data
[1] & 0xff,
2104 ifr
.ifr_hwaddr
.sa_data
[2] & 0xff,
2105 ifr
.ifr_hwaddr
.sa_data
[3] & 0xff,
2106 ifr
.ifr_hwaddr
.sa_data
[4] & 0xff,
2107 ifr
.ifr_hwaddr
.sa_data
[5] & 0xff);
2112 static char *default_rootfs_mount
= LXCROOTFSMOUNT
;
2114 struct lxc_conf
*lxc_conf_init(void)
2116 struct lxc_conf
*new;
2119 new = malloc(sizeof(*new));
2121 ERROR("lxc_conf_init : %m");
2124 memset(new, 0, sizeof(*new));
2126 new->personality
= -1;
2127 new->console
.log_path
= NULL
;
2128 new->console
.log_fd
= -1;
2129 new->console
.path
= NULL
;
2130 new->console
.peer
= -1;
2131 new->console
.peerpty
.busy
= -1;
2132 new->console
.peerpty
.master
= -1;
2133 new->console
.peerpty
.slave
= -1;
2134 new->console
.master
= -1;
2135 new->console
.slave
= -1;
2136 new->console
.name
[0] = '\0';
2137 new->maincmd_fd
= -1;
2138 new->rootfs
.mount
= strdup(default_rootfs_mount
);
2139 if (!new->rootfs
.mount
) {
2140 ERROR("lxc_conf_init : %m");
2145 lxc_list_init(&new->cgroup
);
2146 lxc_list_init(&new->network
);
2147 lxc_list_init(&new->mount_list
);
2148 lxc_list_init(&new->caps
);
2149 lxc_list_init(&new->id_map
);
2150 for (i
=0; i
<NUM_LXC_HOOKS
; i
++)
2151 lxc_list_init(&new->hooks
[i
]);
2153 new->aa_profile
= NULL
;
2155 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
2156 new->lsm_umount_proc
= 0;
2162 static int instanciate_veth(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2164 char veth1buf
[IFNAMSIZ
], *veth1
;
2165 char veth2buf
[IFNAMSIZ
], *veth2
;
2168 if (netdev
->priv
.veth_attr
.pair
)
2169 veth1
= netdev
->priv
.veth_attr
.pair
;
2171 err
= snprintf(veth1buf
, sizeof(veth1buf
), "vethXXXXXX");
2172 if (err
>= sizeof(veth1buf
)) { /* can't *really* happen, but... */
2173 ERROR("veth1 name too long");
2176 veth1
= mktemp(veth1buf
);
2177 /* store away for deconf */
2178 memcpy(netdev
->priv
.veth_attr
.veth1
, veth1
, IFNAMSIZ
);
2181 snprintf(veth2buf
, sizeof(veth2buf
), "vethXXXXXX");
2182 veth2
= mktemp(veth2buf
);
2184 if (!strlen(veth1
) || !strlen(veth2
)) {
2185 ERROR("failed to allocate a temporary name");
2189 err
= lxc_veth_create(veth1
, veth2
);
2191 ERROR("failed to create %s-%s : %s", veth1
, veth2
,
2196 /* changing the high byte of the mac address to 0xfe, the bridge interface
2197 * will always keep the host's mac address and not take the mac address
2199 err
= setup_private_host_hw_addr(veth1
);
2201 ERROR("failed to change mac address of host interface '%s' : %s",
2202 veth1
, strerror(-err
));
2207 err
= lxc_netdev_set_mtu(veth1
, atoi(netdev
->mtu
));
2209 err
= lxc_netdev_set_mtu(veth2
, atoi(netdev
->mtu
));
2211 ERROR("failed to set mtu '%s' for %s-%s : %s",
2212 netdev
->mtu
, veth1
, veth2
, strerror(-err
));
2218 err
= lxc_bridge_attach(netdev
->link
, veth1
);
2220 ERROR("failed to attach '%s' to the bridge '%s' : %s",
2221 veth1
, netdev
->link
, strerror(-err
));
2226 netdev
->ifindex
= if_nametoindex(veth2
);
2227 if (!netdev
->ifindex
) {
2228 ERROR("failed to retrieve the index for %s", veth2
);
2232 err
= lxc_netdev_up(veth1
);
2234 ERROR("failed to set %s up : %s", veth1
, strerror(-err
));
2238 if (netdev
->upscript
) {
2239 err
= run_script(handler
->name
, "net", netdev
->upscript
, "up",
2240 "veth", veth1
, (char*) NULL
);
2245 DEBUG("instanciated veth '%s/%s', index is '%d'",
2246 veth1
, veth2
, netdev
->ifindex
);
2251 lxc_netdev_delete_by_name(veth1
);
2255 static int shutdown_veth(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2260 if (netdev
->priv
.veth_attr
.pair
)
2261 veth1
= netdev
->priv
.veth_attr
.pair
;
2263 veth1
= netdev
->priv
.veth_attr
.veth1
;
2265 if (netdev
->downscript
) {
2266 err
= run_script(handler
->name
, "net", netdev
->downscript
,
2267 "down", "veth", veth1
, (char*) NULL
);
2274 static int instanciate_macvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2276 char peerbuf
[IFNAMSIZ
], *peer
;
2279 if (!netdev
->link
) {
2280 ERROR("no link specified for macvlan netdev");
2284 err
= snprintf(peerbuf
, sizeof(peerbuf
), "mcXXXXXX");
2285 if (err
>= sizeof(peerbuf
))
2288 peer
= mktemp(peerbuf
);
2289 if (!strlen(peer
)) {
2290 ERROR("failed to make a temporary name");
2294 err
= lxc_macvlan_create(netdev
->link
, peer
,
2295 netdev
->priv
.macvlan_attr
.mode
);
2297 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2298 peer
, netdev
->link
, strerror(-err
));
2302 netdev
->ifindex
= if_nametoindex(peer
);
2303 if (!netdev
->ifindex
) {
2304 ERROR("failed to retrieve the index for %s", peer
);
2305 lxc_netdev_delete_by_name(peer
);
2309 if (netdev
->upscript
) {
2310 err
= run_script(handler
->name
, "net", netdev
->upscript
, "up",
2311 "macvlan", netdev
->link
, (char*) NULL
);
2316 DEBUG("instanciated macvlan '%s', index is '%d' and mode '%d'",
2317 peer
, netdev
->ifindex
, netdev
->priv
.macvlan_attr
.mode
);
2322 static int shutdown_macvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2326 if (netdev
->downscript
) {
2327 err
= run_script(handler
->name
, "net", netdev
->downscript
,
2328 "down", "macvlan", netdev
->link
,
2336 /* XXX: merge with instanciate_macvlan */
2337 static int instanciate_vlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2339 char peer
[IFNAMSIZ
];
2342 if (!netdev
->link
) {
2343 ERROR("no link specified for vlan netdev");
2347 err
= snprintf(peer
, sizeof(peer
), "vlan%d", netdev
->priv
.vlan_attr
.vid
);
2348 if (err
>= sizeof(peer
)) {
2349 ERROR("peer name too long");
2353 err
= lxc_vlan_create(netdev
->link
, peer
, netdev
->priv
.vlan_attr
.vid
);
2355 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2356 peer
, netdev
->link
, strerror(-err
));
2360 netdev
->ifindex
= if_nametoindex(peer
);
2361 if (!netdev
->ifindex
) {
2362 ERROR("failed to retrieve the ifindex for %s", peer
);
2363 lxc_netdev_delete_by_name(peer
);
2367 DEBUG("instanciated vlan '%s', ifindex is '%d'", " vlan1000",
2373 static int shutdown_vlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2378 static int instanciate_phys(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2380 if (!netdev
->link
) {
2381 ERROR("no link specified for the physical interface");
2385 netdev
->ifindex
= if_nametoindex(netdev
->link
);
2386 if (!netdev
->ifindex
) {
2387 ERROR("failed to retrieve the index for %s", netdev
->link
);
2391 if (netdev
->upscript
) {
2393 err
= run_script(handler
->name
, "net", netdev
->upscript
,
2394 "up", "phys", netdev
->link
, (char*) NULL
);
2402 static int shutdown_phys(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2406 if (netdev
->downscript
) {
2407 err
= run_script(handler
->name
, "net", netdev
->downscript
,
2408 "down", "phys", netdev
->link
, (char*) NULL
);
2415 static int instanciate_empty(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2417 netdev
->ifindex
= 0;
2418 if (netdev
->upscript
) {
2420 err
= run_script(handler
->name
, "net", netdev
->upscript
,
2421 "up", "empty", (char*) NULL
);
2428 static int shutdown_empty(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2432 if (netdev
->downscript
) {
2433 err
= run_script(handler
->name
, "net", netdev
->downscript
,
2434 "down", "empty", (char*) NULL
);
2441 int lxc_create_network(struct lxc_handler
*handler
)
2443 struct lxc_list
*network
= &handler
->conf
->network
;
2444 struct lxc_list
*iterator
;
2445 struct lxc_netdev
*netdev
;
2447 lxc_list_for_each(iterator
, network
) {
2449 netdev
= iterator
->elem
;
2451 if (netdev
->type
< 0 || netdev
->type
> LXC_NET_MAXCONFTYPE
) {
2452 ERROR("invalid network configuration type '%d'",
2457 if (netdev_conf
[netdev
->type
](handler
, netdev
)) {
2458 ERROR("failed to create netdev");
2467 void lxc_delete_network(struct lxc_handler
*handler
)
2469 struct lxc_list
*network
= &handler
->conf
->network
;
2470 struct lxc_list
*iterator
;
2471 struct lxc_netdev
*netdev
;
2473 lxc_list_for_each(iterator
, network
) {
2474 netdev
= iterator
->elem
;
2476 if (netdev
->ifindex
!= 0 && netdev
->type
== LXC_NET_PHYS
) {
2477 if (lxc_netdev_rename_by_index(netdev
->ifindex
, netdev
->link
))
2478 WARN("failed to rename to the initial name the " \
2479 "netdev '%s'", netdev
->link
);
2483 if (netdev_deconf
[netdev
->type
](handler
, netdev
)) {
2484 WARN("failed to destroy netdev");
2487 /* Recent kernel remove the virtual interfaces when the network
2488 * namespace is destroyed but in case we did not moved the
2489 * interface to the network namespace, we have to destroy it
2491 if (netdev
->ifindex
!= 0 &&
2492 lxc_netdev_delete_by_index(netdev
->ifindex
))
2493 WARN("failed to remove interface '%s'", netdev
->name
);
2497 int lxc_assign_network(struct lxc_list
*network
, pid_t pid
)
2499 struct lxc_list
*iterator
;
2500 struct lxc_netdev
*netdev
;
2503 lxc_list_for_each(iterator
, network
) {
2505 netdev
= iterator
->elem
;
2507 /* empty network namespace, nothing to move */
2508 if (!netdev
->ifindex
)
2511 err
= lxc_netdev_move_by_index(netdev
->ifindex
, pid
);
2513 ERROR("failed to move '%s' to the container : %s",
2514 netdev
->link
, strerror(-err
));
2518 DEBUG("move '%s' to '%d'", netdev
->name
, pid
);
2524 static int write_id_mapping(enum idtype idtype
, pid_t pid
, const char *buf
,
2527 char path
[PATH_MAX
];
2531 ret
= snprintf(path
, PATH_MAX
, "/proc/%d/%cid_map", pid
, idtype
== ID_TYPE_UID
? 'u' : 'g');
2532 if (ret
< 0 || ret
>= PATH_MAX
) {
2533 fprintf(stderr
, "%s: path name too long", __func__
);
2536 f
= fopen(path
, "w");
2541 ret
= fwrite(buf
, buf_size
, 1, f
);
2543 SYSERROR("writing id mapping");
2544 closeret
= fclose(f
);
2546 SYSERROR("writing id mapping");
2547 return ret
< 0 ? ret
: closeret
;
2550 int lxc_map_ids(struct lxc_list
*idmap
, pid_t pid
)
2552 struct lxc_list
*iterator
;
2556 char *buf
= NULL
, *pos
;
2558 for(type
= ID_TYPE_UID
; type
<= ID_TYPE_GID
; type
++) {
2562 lxc_list_for_each(iterator
, idmap
) {
2563 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
2565 buf
= pos
= malloc(4096);
2569 map
= iterator
->elem
;
2570 if (map
->idtype
== type
) {
2571 left
= 4096 - (pos
- buf
);
2572 fill
= snprintf(pos
, left
, "%lu %lu %lu\n",
2573 map
->nsid
, map
->hostid
, map
->range
);
2574 if (fill
<= 0 || fill
>= left
)
2575 SYSERROR("snprintf failed, too many mappings");
2579 if (pos
== buf
) // no mappings were found
2581 ret
= write_id_mapping(type
, pid
, buf
, pos
-buf
);
2591 int lxc_find_gateway_addresses(struct lxc_handler
*handler
)
2593 struct lxc_list
*network
= &handler
->conf
->network
;
2594 struct lxc_list
*iterator
;
2595 struct lxc_netdev
*netdev
;
2598 lxc_list_for_each(iterator
, network
) {
2599 netdev
= iterator
->elem
;
2601 if (!netdev
->ipv4_gateway_auto
&& !netdev
->ipv6_gateway_auto
)
2604 if (netdev
->type
!= LXC_NET_VETH
&& netdev
->type
!= LXC_NET_MACVLAN
) {
2605 ERROR("gateway = auto only supported for "
2606 "veth and macvlan");
2610 if (!netdev
->link
) {
2611 ERROR("gateway = auto needs a link interface");
2615 link_index
= if_nametoindex(netdev
->link
);
2619 if (netdev
->ipv4_gateway_auto
) {
2620 if (lxc_ipv4_addr_get(link_index
, &netdev
->ipv4_gateway
)) {
2621 ERROR("failed to automatically find ipv4 gateway "
2622 "address from link interface '%s'", netdev
->link
);
2627 if (netdev
->ipv6_gateway_auto
) {
2628 if (lxc_ipv6_addr_get(link_index
, &netdev
->ipv6_gateway
)) {
2629 ERROR("failed to automatically find ipv6 gateway "
2630 "address from link interface '%s'", netdev
->link
);
2639 int lxc_create_tty(const char *name
, struct lxc_conf
*conf
)
2641 struct lxc_tty_info
*tty_info
= &conf
->tty_info
;
2644 /* no tty in the configuration */
2648 tty_info
->pty_info
=
2649 malloc(sizeof(*tty_info
->pty_info
)*conf
->tty
);
2650 if (!tty_info
->pty_info
) {
2651 SYSERROR("failed to allocate pty_info");
2655 for (i
= 0; i
< conf
->tty
; i
++) {
2657 struct lxc_pty_info
*pty_info
= &tty_info
->pty_info
[i
];
2659 if (openpty(&pty_info
->master
, &pty_info
->slave
,
2660 pty_info
->name
, NULL
, NULL
)) {
2661 SYSERROR("failed to create pty #%d", i
);
2662 tty_info
->nbtty
= i
;
2663 lxc_delete_tty(tty_info
);
2667 DEBUG("allocated pty '%s' (%d/%d)",
2668 pty_info
->name
, pty_info
->master
, pty_info
->slave
);
2670 /* Prevent leaking the file descriptors to the container */
2671 fcntl(pty_info
->master
, F_SETFD
, FD_CLOEXEC
);
2672 fcntl(pty_info
->slave
, F_SETFD
, FD_CLOEXEC
);
2677 tty_info
->nbtty
= conf
->tty
;
2679 INFO("tty's configured");
2684 void lxc_delete_tty(struct lxc_tty_info
*tty_info
)
2688 for (i
= 0; i
< tty_info
->nbtty
; i
++) {
2689 struct lxc_pty_info
*pty_info
= &tty_info
->pty_info
[i
];
2691 close(pty_info
->master
);
2692 close(pty_info
->slave
);
2695 free(tty_info
->pty_info
);
2696 tty_info
->nbtty
= 0;
2700 * given a host uid, return the ns uid if it is mapped.
2701 * if it is not mapped, return the original host id.
2703 static int shiftid(struct lxc_conf
*c
, int uid
, enum idtype w
)
2705 struct lxc_list
*iterator
;
2709 lxc_list_for_each(iterator
, &c
->id_map
) {
2710 map
= iterator
->elem
;
2711 if (map
->idtype
!= w
)
2715 high
= map
->nsid
+ map
->range
;
2716 if (uid
< low
|| uid
>= high
)
2719 return uid
- low
+ map
->hostid
;
2726 * Take a pathname for a file created on the host, and map the uid and gid
2727 * into the container if needed. (Used for ttys)
2729 static int uid_shift_file(char *path
, struct lxc_conf
*c
)
2731 struct stat statbuf
;
2734 if (stat(path
, &statbuf
)) {
2735 SYSERROR("stat(%s)", path
);
2739 newuid
= shiftid(c
, statbuf
.st_uid
, ID_TYPE_UID
);
2740 newgid
= shiftid(c
, statbuf
.st_gid
, ID_TYPE_GID
);
2741 if (newuid
!= statbuf
.st_uid
|| newgid
!= statbuf
.st_gid
) {
2742 DEBUG("chowning %s from %d:%d to %d:%d\n", path
, (int)statbuf
.st_uid
, (int)statbuf
.st_gid
, newuid
, newgid
);
2743 if (chown(path
, newuid
, newgid
)) {
2744 SYSERROR("chown(%s)", path
);
2751 int uid_shift_ttys(int pid
, struct lxc_conf
*conf
)
2754 struct lxc_tty_info
*tty_info
= &conf
->tty_info
;
2755 char path
[MAXPATHLEN
];
2756 char *ttydir
= conf
->ttydir
;
2758 if (!conf
->rootfs
.path
)
2760 /* first the console */
2761 ret
= snprintf(path
, sizeof(path
), "/proc/%d/root/dev/%s/console", pid
, ttydir
? ttydir
: "");
2762 if (ret
< 0 || ret
>= sizeof(path
)) {
2763 ERROR("console path too long\n");
2766 if (uid_shift_file(path
, conf
)) {
2767 DEBUG("Failed to chown the console %s.\n", path
);
2770 for (i
=0; i
< tty_info
->nbtty
; i
++) {
2771 ret
= snprintf(path
, sizeof(path
), "/proc/%d/root/dev/%s/tty%d",
2772 pid
, ttydir
? ttydir
: "", i
+ 1);
2773 if (ret
< 0 || ret
>= sizeof(path
)) {
2774 ERROR("pathname too long for ttys");
2777 if (uid_shift_file(path
, conf
)) {
2778 DEBUG("Failed to chown pty %s.\n", path
);
2786 int lxc_setup(const char *name
, struct lxc_conf
*lxc_conf
, const char *lxcpath
)
2788 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
2792 if (setup_utsname(lxc_conf
->utsname
)) {
2793 ERROR("failed to setup the utsname for '%s'", name
);
2797 if (setup_network(&lxc_conf
->network
)) {
2798 ERROR("failed to setup the network for '%s'", name
);
2802 if (run_lxc_hooks(name
, "pre-mount", lxc_conf
, lxcpath
, NULL
)) {
2803 ERROR("failed to run pre-mount hooks for container '%s'.", name
);
2807 if (setup_rootfs(lxc_conf
)) {
2808 ERROR("failed to setup rootfs for '%s'", name
);
2812 if (lxc_conf
->autodev
) {
2813 if (mount_autodev(lxc_conf
->rootfs
.mount
)) {
2814 ERROR("failed to mount /dev in the container");
2819 if (setup_mount(&lxc_conf
->rootfs
, lxc_conf
->fstab
, name
)) {
2820 ERROR("failed to setup the mounts for '%s'", name
);
2824 if (!lxc_list_empty(&lxc_conf
->mount_list
) && setup_mount_entries(&lxc_conf
->rootfs
, &lxc_conf
->mount_list
, name
)) {
2825 ERROR("failed to setup the mount entries for '%s'", name
);
2829 if (run_lxc_hooks(name
, "mount", lxc_conf
, lxcpath
, NULL
)) {
2830 ERROR("failed to run mount hooks for container '%s'.", name
);
2834 if (lxc_conf
->autodev
) {
2835 if (run_lxc_hooks(name
, "autodev", lxc_conf
, lxcpath
, NULL
)) {
2836 ERROR("failed to run autodev hooks for container '%s'.", name
);
2839 if (setup_autodev(lxc_conf
->rootfs
.mount
)) {
2840 ERROR("failed to populate /dev in the container");
2845 if (!lxc_conf
->is_execute
&& setup_console(&lxc_conf
->rootfs
, &lxc_conf
->console
, lxc_conf
->ttydir
)) {
2846 ERROR("failed to setup the console for '%s'", name
);
2850 if (lxc_conf
->kmsg
) {
2851 if (setup_kmsg(&lxc_conf
->rootfs
, &lxc_conf
->console
)) // don't fail
2852 ERROR("failed to setup kmsg for '%s'", name
);
2855 if (!lxc_conf
->is_execute
&& setup_tty(&lxc_conf
->rootfs
, &lxc_conf
->tty_info
, lxc_conf
->ttydir
)) {
2856 ERROR("failed to setup the ttys for '%s'", name
);
2860 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
2861 INFO("rootfs path is .%s., mount is .%s.", lxc_conf
->rootfs
.path
,
2862 lxc_conf
->rootfs
.mount
);
2863 if (lxc_conf
->rootfs
.path
== NULL
|| strlen(lxc_conf
->rootfs
.path
) == 0) {
2864 if (mount("proc", "/proc", "proc", 0, NULL
)) {
2865 SYSERROR("Failed mounting /proc, proceeding");
2870 mounted
= lsm_mount_proc_if_needed(lxc_conf
->rootfs
.path
, lxc_conf
->rootfs
.mount
);
2871 if (mounted
== -1) {
2872 SYSERROR("failed to mount /proc in the container.");
2874 } else if (mounted
== 1) {
2875 lxc_conf
->lsm_umount_proc
= 1;
2879 if (setup_pivot_root(&lxc_conf
->rootfs
)) {
2880 ERROR("failed to set rootfs for '%s'", name
);
2884 if (setup_pts(lxc_conf
->pts
)) {
2885 ERROR("failed to setup the new pts instance");
2889 if (setup_personality(lxc_conf
->personality
)) {
2890 ERROR("failed to setup personality");
2894 if (lxc_list_empty(&lxc_conf
->id_map
)) {
2895 if (setup_caps(&lxc_conf
->caps
)) {
2896 ERROR("failed to drop capabilities");
2901 NOTICE("'%s' is setup.", name
);
2906 int run_lxc_hooks(const char *name
, char *hook
, struct lxc_conf
*conf
,
2907 const char *lxcpath
, char *argv
[])
2910 struct lxc_list
*it
;
2912 if (strcmp(hook
, "pre-start") == 0)
2913 which
= LXCHOOK_PRESTART
;
2914 else if (strcmp(hook
, "pre-mount") == 0)
2915 which
= LXCHOOK_PREMOUNT
;
2916 else if (strcmp(hook
, "mount") == 0)
2917 which
= LXCHOOK_MOUNT
;
2918 else if (strcmp(hook
, "autodev") == 0)
2919 which
= LXCHOOK_AUTODEV
;
2920 else if (strcmp(hook
, "start") == 0)
2921 which
= LXCHOOK_START
;
2922 else if (strcmp(hook
, "post-stop") == 0)
2923 which
= LXCHOOK_POSTSTOP
;
2924 else if (strcmp(hook
, "clone") == 0)
2925 which
= LXCHOOK_CLONE
;
2928 lxc_list_for_each(it
, &conf
->hooks
[which
]) {
2930 char *hookname
= it
->elem
;
2931 ret
= run_script_argv(name
, "lxc", hookname
, hook
, lxcpath
, argv
);
2938 static void lxc_remove_nic(struct lxc_list
*it
)
2940 struct lxc_netdev
*netdev
= it
->elem
;
2941 struct lxc_list
*it2
,*next
;
2949 if (netdev
->upscript
)
2950 free(netdev
->upscript
);
2952 free(netdev
->hwaddr
);
2955 if (netdev
->ipv4_gateway
)
2956 free(netdev
->ipv4_gateway
);
2957 if (netdev
->ipv6_gateway
)
2958 free(netdev
->ipv6_gateway
);
2959 lxc_list_for_each_safe(it2
, &netdev
->ipv4
, next
) {
2964 lxc_list_for_each_safe(it2
, &netdev
->ipv6
, next
) {
2973 /* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
2974 int lxc_clear_nic(struct lxc_conf
*c
, const char *key
)
2978 struct lxc_list
*it
;
2979 struct lxc_netdev
*netdev
;
2981 p1
= index(key
, '.');
2982 if (!p1
|| *(p1
+1) == '\0')
2985 ret
= sscanf(key
, "%d", &idx
);
2986 if (ret
!= 1) return -1;
2991 lxc_list_for_each(it
, &c
->network
) {
2996 if (i
< idx
) // we don't have that many nics defined
2999 if (!it
|| !it
->elem
)
3006 } else if (strcmp(p1
, "ipv4") == 0) {
3007 struct lxc_list
*it2
,*next
;
3008 lxc_list_for_each_safe(it2
, &netdev
->ipv4
, next
) {
3013 } else if (strcmp(p1
, "ipv6") == 0) {
3014 struct lxc_list
*it2
,*next
;
3015 lxc_list_for_each_safe(it2
, &netdev
->ipv6
, next
) {
3020 } else if (strcmp(p1
, "link") == 0) {
3023 netdev
->link
= NULL
;
3025 } else if (strcmp(p1
, "name") == 0) {
3028 netdev
->name
= NULL
;
3030 } else if (strcmp(p1
, "script.up") == 0) {
3031 if (netdev
->upscript
) {
3032 free(netdev
->upscript
);
3033 netdev
->upscript
= NULL
;
3035 } else if (strcmp(p1
, "hwaddr") == 0) {
3036 if (netdev
->hwaddr
) {
3037 free(netdev
->hwaddr
);
3038 netdev
->hwaddr
= NULL
;
3040 } else if (strcmp(p1
, "mtu") == 0) {
3045 } else if (strcmp(p1
, "ipv4_gateway") == 0) {
3046 if (netdev
->ipv4_gateway
) {
3047 free(netdev
->ipv4_gateway
);
3048 netdev
->ipv4_gateway
= NULL
;
3050 } else if (strcmp(p1
, "ipv6_gateway") == 0) {
3051 if (netdev
->ipv6_gateway
) {
3052 free(netdev
->ipv6_gateway
);
3053 netdev
->ipv6_gateway
= NULL
;
3061 int lxc_clear_config_network(struct lxc_conf
*c
)
3063 struct lxc_list
*it
,*next
;
3064 lxc_list_for_each_safe(it
, &c
->network
, next
) {
3070 int lxc_clear_config_caps(struct lxc_conf
*c
)
3072 struct lxc_list
*it
,*next
;
3074 lxc_list_for_each_safe(it
, &c
->caps
, next
) {
3082 int lxc_clear_cgroups(struct lxc_conf
*c
, const char *key
)
3084 struct lxc_list
*it
,*next
;
3086 const char *k
= key
+ 11;
3088 if (strcmp(key
, "lxc.cgroup") == 0)
3091 lxc_list_for_each_safe(it
, &c
->cgroup
, next
) {
3092 struct lxc_cgroup
*cg
= it
->elem
;
3093 if (!all
&& strcmp(cg
->subsystem
, k
) != 0)
3096 free(cg
->subsystem
);
3104 int lxc_clear_mount_entries(struct lxc_conf
*c
)
3106 struct lxc_list
*it
,*next
;
3108 lxc_list_for_each_safe(it
, &c
->mount_list
, next
) {
3116 int lxc_clear_hooks(struct lxc_conf
*c
, const char *key
)
3118 struct lxc_list
*it
,*next
;
3119 bool all
= false, done
= false;
3120 const char *k
= key
+ 9;
3123 if (strcmp(key
, "lxc.hook") == 0)
3126 for (i
=0; i
<NUM_LXC_HOOKS
; i
++) {
3127 if (all
|| strcmp(k
, lxchook_names
[i
]) == 0) {
3128 lxc_list_for_each_safe(it
, &c
->hooks
[i
], next
) {
3138 ERROR("Invalid hook key: %s", key
);
3144 void lxc_clear_saved_nics(struct lxc_conf
*conf
)
3148 if (!conf
->num_savednics
)
3150 for (i
=0; i
< conf
->num_savednics
; i
++)
3151 free(conf
->saved_nics
[i
].orig_name
);
3152 conf
->saved_nics
= 0;
3153 free(conf
->saved_nics
);
3156 void lxc_conf_free(struct lxc_conf
*conf
)
3160 if (conf
->console
.path
)
3161 free(conf
->console
.path
);
3162 if (conf
->rootfs
.mount
)
3163 free(conf
->rootfs
.mount
);
3164 if (conf
->rootfs
.path
)
3165 free(conf
->rootfs
.path
);
3167 free(conf
->utsname
);
3174 lxc_clear_config_network(conf
);
3176 if (conf
->aa_profile
)
3177 free(conf
->aa_profile
);
3179 lxc_seccomp_free(conf
);
3180 lxc_clear_config_caps(conf
);
3181 lxc_clear_cgroups(conf
, "lxc.cgroup");
3182 lxc_clear_hooks(conf
, "lxc.hook");
3183 lxc_clear_mount_entries(conf
);
3184 lxc_clear_saved_nics(conf
);