2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <dlezcano at fr.ibm.com>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
36 #include <linux/loop.h>
38 #include <sys/types.h>
39 #include <sys/utsname.h>
40 #include <sys/param.h>
42 #include <sys/socket.h>
43 #include <sys/mount.h>
45 #include <sys/prctl.h>
46 #include <sys/capability.h>
47 #include <sys/personality.h>
49 #include <arpa/inet.h>
51 #include <netinet/in.h>
62 #include "lxc.h" /* for lxc_cgroup_set() */
63 #include "caps.h" /* for lxc_caps_last_cap() */
69 lxc_log_define(lxc_conf
, lxc
);
72 #define MAXINDEXLEN 20
74 #define MAXLINELEN 128
77 #define MS_DIRSYNC 128
89 #define MS_RELATIME (1 << 21)
92 #ifndef MS_STRICTATIME
93 #define MS_STRICTATIME (1 << 24)
97 #define CAP_SETFCAP 31
100 #ifndef CAP_MAC_OVERRIDE
101 #define CAP_MAC_OVERRIDE 32
104 #ifndef CAP_MAC_ADMIN
105 #define CAP_MAC_ADMIN 33
108 #ifndef PR_CAPBSET_DROP
109 #define PR_CAPBSET_DROP 24
112 char *lxchook_names
[NUM_LXC_HOOKS
] = {
113 "pre-start", "pre-mount", "mount", "start", "post-stop" };
115 extern int pivot_root(const char * new_root
, const char * put_old
);
117 typedef int (*instanciate_cb
)(struct lxc_handler
*, struct lxc_netdev
*);
130 static int instanciate_veth(struct lxc_handler
*, struct lxc_netdev
*);
131 static int instanciate_macvlan(struct lxc_handler
*, struct lxc_netdev
*);
132 static int instanciate_vlan(struct lxc_handler
*, struct lxc_netdev
*);
133 static int instanciate_phys(struct lxc_handler
*, struct lxc_netdev
*);
134 static int instanciate_empty(struct lxc_handler
*, struct lxc_netdev
*);
136 static instanciate_cb netdev_conf
[LXC_NET_MAXCONFTYPE
+ 1] = {
137 [LXC_NET_VETH
] = instanciate_veth
,
138 [LXC_NET_MACVLAN
] = instanciate_macvlan
,
139 [LXC_NET_VLAN
] = instanciate_vlan
,
140 [LXC_NET_PHYS
] = instanciate_phys
,
141 [LXC_NET_EMPTY
] = instanciate_empty
,
144 static int shutdown_veth(struct lxc_handler
*, struct lxc_netdev
*);
145 static int shutdown_macvlan(struct lxc_handler
*, struct lxc_netdev
*);
146 static int shutdown_vlan(struct lxc_handler
*, struct lxc_netdev
*);
147 static int shutdown_phys(struct lxc_handler
*, struct lxc_netdev
*);
148 static int shutdown_empty(struct lxc_handler
*, struct lxc_netdev
*);
150 static instanciate_cb netdev_deconf
[LXC_NET_MAXCONFTYPE
+ 1] = {
151 [LXC_NET_VETH
] = shutdown_veth
,
152 [LXC_NET_MACVLAN
] = shutdown_macvlan
,
153 [LXC_NET_VLAN
] = shutdown_vlan
,
154 [LXC_NET_PHYS
] = shutdown_phys
,
155 [LXC_NET_EMPTY
] = shutdown_empty
,
158 static struct mount_opt mount_opt
[] = {
159 { "defaults", 0, 0 },
160 { "ro", 0, MS_RDONLY
},
161 { "rw", 1, MS_RDONLY
},
162 { "suid", 1, MS_NOSUID
},
163 { "nosuid", 0, MS_NOSUID
},
164 { "dev", 1, MS_NODEV
},
165 { "nodev", 0, MS_NODEV
},
166 { "exec", 1, MS_NOEXEC
},
167 { "noexec", 0, MS_NOEXEC
},
168 { "sync", 0, MS_SYNCHRONOUS
},
169 { "async", 1, MS_SYNCHRONOUS
},
170 { "dirsync", 0, MS_DIRSYNC
},
171 { "remount", 0, MS_REMOUNT
},
172 { "mand", 0, MS_MANDLOCK
},
173 { "nomand", 1, MS_MANDLOCK
},
174 { "atime", 1, MS_NOATIME
},
175 { "noatime", 0, MS_NOATIME
},
176 { "diratime", 1, MS_NODIRATIME
},
177 { "nodiratime", 0, MS_NODIRATIME
},
178 { "bind", 0, MS_BIND
},
179 { "rbind", 0, MS_BIND
|MS_REC
},
180 { "relatime", 0, MS_RELATIME
},
181 { "norelatime", 1, MS_RELATIME
},
182 { "strictatime", 0, MS_STRICTATIME
},
183 { "nostrictatime", 1, MS_STRICTATIME
},
187 static struct caps_opt caps_opt
[] = {
188 { "chown", CAP_CHOWN
},
189 { "dac_override", CAP_DAC_OVERRIDE
},
190 { "dac_read_search", CAP_DAC_READ_SEARCH
},
191 { "fowner", CAP_FOWNER
},
192 { "fsetid", CAP_FSETID
},
193 { "kill", CAP_KILL
},
194 { "setgid", CAP_SETGID
},
195 { "setuid", CAP_SETUID
},
196 { "setpcap", CAP_SETPCAP
},
197 { "linux_immutable", CAP_LINUX_IMMUTABLE
},
198 { "net_bind_service", CAP_NET_BIND_SERVICE
},
199 { "net_broadcast", CAP_NET_BROADCAST
},
200 { "net_admin", CAP_NET_ADMIN
},
201 { "net_raw", CAP_NET_RAW
},
202 { "ipc_lock", CAP_IPC_LOCK
},
203 { "ipc_owner", CAP_IPC_OWNER
},
204 { "sys_module", CAP_SYS_MODULE
},
205 { "sys_rawio", CAP_SYS_RAWIO
},
206 { "sys_chroot", CAP_SYS_CHROOT
},
207 { "sys_ptrace", CAP_SYS_PTRACE
},
208 { "sys_pacct", CAP_SYS_PACCT
},
209 { "sys_admin", CAP_SYS_ADMIN
},
210 { "sys_boot", CAP_SYS_BOOT
},
211 { "sys_nice", CAP_SYS_NICE
},
212 { "sys_resource", CAP_SYS_RESOURCE
},
213 { "sys_time", CAP_SYS_TIME
},
214 { "sys_tty_config", CAP_SYS_TTY_CONFIG
},
215 { "mknod", CAP_MKNOD
},
216 { "lease", CAP_LEASE
},
217 #ifdef CAP_AUDIT_WRITE
218 { "audit_write", CAP_AUDIT_WRITE
},
220 #ifdef CAP_AUDIT_CONTROL
221 { "audit_control", CAP_AUDIT_CONTROL
},
223 { "setfcap", CAP_SETFCAP
},
224 { "mac_override", CAP_MAC_OVERRIDE
},
225 { "mac_admin", CAP_MAC_ADMIN
},
227 { "syslog", CAP_SYSLOG
},
229 #ifdef CAP_WAKE_ALARM
230 { "wake_alarm", CAP_WAKE_ALARM
},
234 static int run_buffer(char *buffer
)
239 f
= popen(buffer
, "r");
241 SYSERROR("popen failed");
245 output
= malloc(LXC_LOG_BUFFER_SIZE
);
247 ERROR("failed to allocate memory for script output");
251 while(fgets(output
, LXC_LOG_BUFFER_SIZE
, f
))
252 DEBUG("script output: %s", output
);
256 if (pclose(f
) == -1) {
257 SYSERROR("Script exited on error");
264 static int run_script(const char *name
, const char *section
,
265 const char *script
, ...)
272 INFO("Executing script '%s' for container '%s', config section '%s'",
273 script
, name
, section
);
275 va_start(ap
, script
);
276 while ((p
= va_arg(ap
, char *)))
277 size
+= strlen(p
) + 1;
280 size
+= strlen(script
);
281 size
+= strlen(name
);
282 size
+= strlen(section
);
288 buffer
= alloca(size
);
290 ERROR("failed to allocate memory");
294 ret
= snprintf(buffer
, size
, "%s %s %s", script
, name
, section
);
295 if (ret
< 0 || ret
>= size
) {
296 ERROR("Script name too long");
301 va_start(ap
, script
);
302 while ((p
= va_arg(ap
, char *))) {
305 rc
= snprintf(buffer
+ ret
, len
, " %s", p
);
306 if (rc
< 0 || rc
>= len
) {
308 ERROR("Script args too long");
315 return run_buffer(buffer
);
318 static int find_fstype_cb(char* buffer
, void *data
)
328 /* we don't try 'nodev' entries */
329 if (strstr(buffer
, "nodev"))
333 fstype
+= lxc_char_left_gc(fstype
, strlen(fstype
));
334 fstype
[lxc_char_right_gc(fstype
, strlen(fstype
))] = '\0';
336 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
337 cbarg
->rootfs
, cbarg
->target
, fstype
);
339 if (mount(cbarg
->rootfs
, cbarg
->target
, fstype
, cbarg
->mntopt
, NULL
)) {
340 DEBUG("mount failed with error: %s", strerror(errno
));
344 INFO("mounted '%s' on '%s', with fstype '%s'",
345 cbarg
->rootfs
, cbarg
->target
, fstype
);
350 static int mount_unknow_fs(const char *rootfs
, const char *target
, int mntopt
)
365 * find the filesystem type with brute force:
366 * first we check with /etc/filesystems, in case the modules
367 * are auto-loaded and fall back to the supported kernel fs
374 for (i
= 0; i
< sizeof(fsfile
)/sizeof(fsfile
[0]); i
++) {
378 if (access(fsfile
[i
], F_OK
))
381 ret
= lxc_file_for_each_line(fsfile
[i
], find_fstype_cb
, &cbarg
);
383 ERROR("failed to parse '%s'", fsfile
[i
]);
391 ERROR("failed to determine fs type for '%s'", rootfs
);
395 static int mount_rootfs_dir(const char *rootfs
, const char *target
)
397 return mount(rootfs
, target
, "none", MS_BIND
| MS_REC
, NULL
);
400 static int setup_lodev(const char *rootfs
, int fd
, struct loop_info64
*loinfo
)
405 rfd
= open(rootfs
, O_RDWR
);
407 SYSERROR("failed to open '%s'", rootfs
);
411 memset(loinfo
, 0, sizeof(*loinfo
));
413 loinfo
->lo_flags
= LO_FLAGS_AUTOCLEAR
;
415 if (ioctl(fd
, LOOP_SET_FD
, rfd
)) {
416 SYSERROR("failed to LOOP_SET_FD");
420 if (ioctl(fd
, LOOP_SET_STATUS64
, loinfo
)) {
421 SYSERROR("failed to LOOP_SET_STATUS64");
432 static int mount_rootfs_file(const char *rootfs
, const char *target
)
434 struct dirent dirent
, *direntp
;
435 struct loop_info64 loinfo
;
436 int ret
= -1, fd
= -1, rc
;
438 char path
[MAXPATHLEN
];
440 dir
= opendir("/dev");
442 SYSERROR("failed to open '/dev'");
446 while (!readdir_r(dir
, &dirent
, &direntp
)) {
451 if (!strcmp(direntp
->d_name
, "."))
454 if (!strcmp(direntp
->d_name
, ".."))
457 if (strncmp(direntp
->d_name
, "loop", 4))
460 rc
= snprintf(path
, MAXPATHLEN
, "/dev/%s", direntp
->d_name
);
461 if (rc
< 0 || rc
>= MAXPATHLEN
)
464 fd
= open(path
, O_RDWR
);
468 if (ioctl(fd
, LOOP_GET_STATUS64
, &loinfo
) == 0) {
473 if (errno
!= ENXIO
) {
474 WARN("unexpected error for ioctl on '%s': %m",
479 DEBUG("found '%s' free lodev", path
);
481 ret
= setup_lodev(rootfs
, fd
, &loinfo
);
483 ret
= mount_unknow_fs(path
, target
, 0);
490 WARN("failed to close directory");
495 static int mount_rootfs_block(const char *rootfs
, const char *target
)
497 return mount_unknow_fs(rootfs
, target
, 0);
502 * if rootfs is a directory, then open ${rootfs}.hold for writing for the
503 * duration of the container run, to prevent the container from marking the
504 * underlying fs readonly on shutdown.
505 * return -1 on error.
506 * return -2 if nothing needed to be pinned.
507 * return an open fd (>=0) if we pinned it.
509 int pin_rootfs(const char *rootfs
)
511 char absrootfs
[MAXPATHLEN
];
512 char absrootfspin
[MAXPATHLEN
];
516 if (rootfs
== NULL
|| strlen(rootfs
) == 0)
519 if (!realpath(rootfs
, absrootfs
)) {
520 SYSERROR("failed to get real path for '%s'", rootfs
);
524 if (access(absrootfs
, F_OK
)) {
525 SYSERROR("'%s' is not accessible", absrootfs
);
529 if (stat(absrootfs
, &s
)) {
530 SYSERROR("failed to stat '%s'", absrootfs
);
534 if (!__S_ISTYPE(s
.st_mode
, S_IFDIR
))
537 ret
= snprintf(absrootfspin
, MAXPATHLEN
, "%s%s", absrootfs
, ".hold");
538 if (ret
>= MAXPATHLEN
) {
539 SYSERROR("pathname too long for rootfs hold file");
543 fd
= open(absrootfspin
, O_CREAT
| O_RDWR
, S_IWUSR
|S_IRUSR
);
544 INFO("opened %s as fd %d\n", absrootfspin
, fd
);
548 static int mount_rootfs(const char *rootfs
, const char *target
)
550 char absrootfs
[MAXPATHLEN
];
554 typedef int (*rootfs_cb
)(const char *, const char *);
560 { S_IFDIR
, mount_rootfs_dir
},
561 { S_IFBLK
, mount_rootfs_block
},
562 { S_IFREG
, mount_rootfs_file
},
565 if (!realpath(rootfs
, absrootfs
)) {
566 SYSERROR("failed to get real path for '%s'", rootfs
);
570 if (access(absrootfs
, F_OK
)) {
571 SYSERROR("'%s' is not accessible", absrootfs
);
575 if (stat(absrootfs
, &s
)) {
576 SYSERROR("failed to stat '%s'", absrootfs
);
580 for (i
= 0; i
< sizeof(rtfs_type
)/sizeof(rtfs_type
[0]); i
++) {
582 if (!__S_ISTYPE(s
.st_mode
, rtfs_type
[i
].type
))
585 return rtfs_type
[i
].cb(absrootfs
, target
);
588 ERROR("unsupported rootfs type for '%s'", absrootfs
);
592 static int setup_utsname(struct utsname
*utsname
)
597 if (sethostname(utsname
->nodename
, strlen(utsname
->nodename
))) {
598 SYSERROR("failed to set the hostname to '%s'", utsname
->nodename
);
602 INFO("'%s' hostname has been setup", utsname
->nodename
);
607 static int setup_tty(const struct lxc_rootfs
*rootfs
,
608 const struct lxc_tty_info
*tty_info
, char *ttydir
)
610 char path
[MAXPATHLEN
], lxcpath
[MAXPATHLEN
];
616 for (i
= 0; i
< tty_info
->nbtty
; i
++) {
618 struct lxc_pty_info
*pty_info
= &tty_info
->pty_info
[i
];
620 ret
= snprintf(path
, sizeof(path
), "%s/dev/tty%d",
621 rootfs
->mount
, i
+ 1);
622 if (ret
>= sizeof(path
)) {
623 ERROR("pathname too long for ttys");
627 /* create dev/lxc/tty%d" */
628 ret
= snprintf(lxcpath
, sizeof(lxcpath
), "%s/dev/%s/tty%d",
629 rootfs
->mount
, ttydir
, i
+ 1);
630 if (ret
>= sizeof(lxcpath
)) {
631 ERROR("pathname too long for ttys");
634 ret
= creat(lxcpath
, 0660);
635 if (ret
==-1 && errno
!= EEXIST
) {
636 SYSERROR("error creating %s\n", lxcpath
);
641 if (ret
&& errno
!= ENOENT
) {
642 SYSERROR("error unlinking %s\n", path
);
646 if (mount(pty_info
->name
, lxcpath
, "none", MS_BIND
, 0)) {
647 WARN("failed to mount '%s'->'%s'",
648 pty_info
->name
, path
);
652 ret
= snprintf(lxcpath
, sizeof(lxcpath
), "%s/tty%d", ttydir
, i
+1);
653 if (ret
>= sizeof(lxcpath
)) {
654 ERROR("tty pathname too long");
657 ret
= symlink(lxcpath
, path
);
659 SYSERROR("failed to create symlink for tty %d\n", i
+1);
663 /* If we populated /dev, then we need to create /dev/ttyN */
664 if (access(path
, F_OK
)) {
665 ret
= creat(path
, 0660);
667 SYSERROR("error creating %s\n", path
);
668 /* this isn't fatal, continue */
672 if (mount(pty_info
->name
, path
, "none", MS_BIND
, 0)) {
673 WARN("failed to mount '%s'->'%s'",
674 pty_info
->name
, path
);
680 INFO("%d tty(s) has been setup", tty_info
->nbtty
);
685 static int setup_rootfs_pivot_root_cb(char *buffer
, void *data
)
687 struct lxc_list
*mountlist
, *listentry
, *iterator
;
688 char *pivotdir
, *mountpoint
, *mountentry
;
693 cbparm
= (void **)data
;
695 mountlist
= cbparm
[0];
696 pivotdir
= cbparm
[1];
698 /* parse entry, first field is mountname, ignore */
699 mountpoint
= strtok(mountentry
, " ");
703 /* second field is mountpoint */
704 mountpoint
= strtok(NULL
, " ");
708 /* only consider mountpoints below old root fs */
709 if (strncmp(mountpoint
, pivotdir
, strlen(pivotdir
)))
712 /* filter duplicate mountpoints */
714 lxc_list_for_each(iterator
, mountlist
) {
715 if (!strcmp(iterator
->elem
, mountpoint
)) {
723 /* add entry to list */
724 listentry
= malloc(sizeof(*listentry
));
726 SYSERROR("malloc for mountpoint listentry failed");
730 listentry
->elem
= strdup(mountpoint
);
731 if (!listentry
->elem
) {
732 SYSERROR("strdup failed");
735 lxc_list_add_tail(mountlist
, listentry
);
740 static int umount_oldrootfs(const char *oldrootfs
)
742 char path
[MAXPATHLEN
];
744 struct lxc_list mountlist
, *iterator
, *next
;
745 int ok
, still_mounted
, last_still_mounted
;
748 /* read and parse /proc/mounts in old root fs */
749 lxc_list_init(&mountlist
);
751 /* oldrootfs is on the top tree directory now */
752 rc
= snprintf(path
, sizeof(path
), "/%s", oldrootfs
);
753 if (rc
>= sizeof(path
)) {
754 ERROR("rootfs name too long");
757 cbparm
[0] = &mountlist
;
759 cbparm
[1] = strdup(path
);
761 SYSERROR("strdup failed");
765 rc
= snprintf(path
, sizeof(path
), "%s/proc/mounts", oldrootfs
);
766 if (rc
>= sizeof(path
)) {
767 ERROR("container proc/mounts name too long");
771 ok
= lxc_file_for_each_line(path
,
772 setup_rootfs_pivot_root_cb
, &cbparm
);
774 SYSERROR("failed to read or parse mount list '%s'", path
);
778 /* umount filesystems until none left or list no longer shrinks */
781 last_still_mounted
= still_mounted
;
784 lxc_list_for_each_safe(iterator
, &mountlist
, next
) {
786 /* umount normally */
787 if (!umount(iterator
->elem
)) {
788 DEBUG("umounted '%s'", (char *)iterator
->elem
);
789 lxc_list_del(iterator
);
796 } while (still_mounted
> 0 && still_mounted
!= last_still_mounted
);
799 lxc_list_for_each(iterator
, &mountlist
) {
801 /* let's try a lazy umount */
802 if (!umount2(iterator
->elem
, MNT_DETACH
)) {
803 INFO("lazy unmount of '%s'", (char *)iterator
->elem
);
807 /* be more brutal (nfs) */
808 if (!umount2(iterator
->elem
, MNT_FORCE
)) {
809 INFO("forced unmount of '%s'", (char *)iterator
->elem
);
813 WARN("failed to unmount '%s'", (char *)iterator
->elem
);
819 static int setup_rootfs_pivot_root(const char *rootfs
, const char *pivotdir
)
821 char path
[MAXPATHLEN
];
822 int remove_pivotdir
= 0;
825 /* change into new root fs */
827 SYSERROR("can't chdir to new rootfs '%s'", rootfs
);
832 pivotdir
= "lxc_putold";
834 /* compute the full path to pivotdir under rootfs */
835 rc
= snprintf(path
, sizeof(path
), "%s/%s", rootfs
, pivotdir
);
836 if (rc
>= sizeof(path
)) {
837 ERROR("pivot dir name too long");
841 if (access(path
, F_OK
)) {
843 if (mkdir_p(path
, 0755)) {
844 SYSERROR("failed to create pivotdir '%s'", path
);
849 DEBUG("created '%s' directory", path
);
852 DEBUG("mountpoint for old rootfs is '%s'", path
);
854 /* pivot_root into our new root fs */
855 if (pivot_root(".", path
)) {
856 SYSERROR("pivot_root syscall failed");
861 SYSERROR("can't chdir to / after pivot_root");
865 DEBUG("pivot_root syscall to '%s' successful", rootfs
);
867 /* we switch from absolute path to relative path */
868 if (umount_oldrootfs(pivotdir
))
871 /* remove temporary mount point, we don't consider the removing
873 if (remove_pivotdir
&& rmdir(pivotdir
))
874 WARN("can't remove mountpoint '%s': %m", pivotdir
);
880 * Do we want to add options for max size of /dev and a file to
881 * specify which devices to create?
883 static int mount_autodev(char *root
)
886 char path
[MAXPATHLEN
];
888 INFO("Mounting /dev under %s\n", root
);
889 ret
= snprintf(path
, MAXPATHLEN
, "%s/dev", root
);
890 if (ret
< 0 || ret
> MAXPATHLEN
)
892 ret
= mount("none", path
, "tmpfs", 0, "size=100000");
894 SYSERROR("Failed to mount /dev at %s\n", root
);
897 ret
= snprintf(path
, MAXPATHLEN
, "%s/dev/pts", root
);
898 if (ret
< 0 || ret
>= MAXPATHLEN
)
900 ret
= mkdir(path
, S_IRWXU
| S_IRGRP
| S_IXGRP
| S_IROTH
| S_IXOTH
);
902 SYSERROR("Failed to create /dev/pts in container");
906 INFO("Mounted /dev under %s\n", root
);
911 * Try to run MAKEDEV console in the container. If something fails,
912 * continue anyway as it should not be detrimental to the container.
913 * This makes sure that things like /dev/vcs* exist.
914 * (Pass devpath in to reduce stack usage)
916 static void run_makedev(char *devpath
)
921 curd
= open(".", O_RDONLY
);
924 ret
= chdir(devpath
);
929 if (run_buffer("/sbin/MAKEDEV console"))
930 INFO("Error running MAKEDEV console in %s", devpath
);
933 INFO("Error returning to original directory: expect breakage");
944 struct lxc_devs lxc_devs
[] = {
945 { "null", S_IFCHR
| S_IRWXU
| S_IRWXG
| S_IRWXO
, 1, 3 },
946 { "zero", S_IFCHR
| S_IRWXU
| S_IRWXG
| S_IRWXO
, 1, 5 },
947 { "full", S_IFCHR
| S_IRWXU
| S_IRWXG
| S_IRWXO
, 1, 7 },
948 { "urandom", S_IFCHR
| S_IRWXU
| S_IRWXG
| S_IRWXO
, 1, 9 },
949 { "random", S_IFCHR
| S_IRWXU
| S_IRWXG
| S_IRWXO
, 1, 8 },
950 { "tty", S_IFCHR
| S_IRWXU
| S_IRWXG
| S_IRWXO
, 5, 0 },
951 { "console", S_IFCHR
| S_IRUSR
| S_IWUSR
, 5, 1 },
954 static int setup_autodev(char *root
)
958 char path
[MAXPATHLEN
];
961 INFO("Creating initial consoles under %s/dev\n", root
);
963 ret
= snprintf(path
, MAXPATHLEN
, "%s/dev", root
);
964 if (ret
< 0 || ret
>= MAXPATHLEN
) {
965 ERROR("Error calculating container /dev location");
970 INFO("Populating /dev under %s\n", root
);
971 for (i
= 0; i
< sizeof(lxc_devs
) / sizeof(lxc_devs
[0]); i
++) {
973 ret
= snprintf(path
, MAXPATHLEN
, "%s/dev/%s", root
, d
->name
);
974 if (ret
< 0 || ret
>= MAXPATHLEN
)
976 ret
= mknod(path
, d
->mode
, makedev(d
->maj
, d
->min
));
977 if (ret
&& errno
!= EEXIST
) {
978 SYSERROR("Error creating %s\n", d
->name
);
983 INFO("Populated /dev under %s\n", root
);
987 static int setup_rootfs(const struct lxc_rootfs
*rootfs
)
992 if (access(rootfs
->mount
, F_OK
)) {
993 SYSERROR("failed to access to '%s', check it is present",
998 if (mount_rootfs(rootfs
->path
, rootfs
->mount
)) {
999 ERROR("failed to mount rootfs");
1003 DEBUG("mounted '%s' on '%s'", rootfs
->path
, rootfs
->mount
);
1008 int setup_pivot_root(const struct lxc_rootfs
*rootfs
)
1013 if (setup_rootfs_pivot_root(rootfs
->mount
, rootfs
->pivot
)) {
1014 ERROR("failed to setup pivot root");
1021 static int setup_pts(int pts
)
1023 char target
[PATH_MAX
];
1028 if (!access("/dev/pts/ptmx", F_OK
) && umount("/dev/pts")) {
1029 SYSERROR("failed to umount 'dev/pts'");
1033 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL
,
1034 "newinstance,ptmxmode=0666")) {
1035 SYSERROR("failed to mount a new instance of '/dev/pts'");
1039 if (access("/dev/ptmx", F_OK
)) {
1040 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1042 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
1046 if (realpath("/dev/ptmx", target
) && !strcmp(target
, "/dev/pts/ptmx"))
1049 /* fallback here, /dev/pts/ptmx exists just mount bind */
1050 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND
, 0)) {
1051 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
1055 INFO("created new pts instance");
1061 static int setup_personality(int persona
)
1066 if (personality(persona
) < 0) {
1067 SYSERROR("failed to set personality to '0x%x'", persona
);
1071 INFO("set personality to '0x%x'", persona
);
1076 static int setup_dev_console(const struct lxc_rootfs
*rootfs
,
1077 const struct lxc_console
*console
)
1079 char path
[MAXPATHLEN
];
1083 ret
= snprintf(path
, sizeof(path
), "%s/dev/console", rootfs
->mount
);
1084 if (ret
>= sizeof(path
)) {
1085 ERROR("console path too long\n");
1089 if (access(path
, F_OK
)) {
1090 WARN("rootfs specified but no console found at '%s'", path
);
1094 if (console
->peer
== -1) {
1095 INFO("no console output required");
1099 if (stat(path
, &s
)) {
1100 SYSERROR("failed to stat '%s'", path
);
1104 if (chmod(console
->name
, s
.st_mode
)) {
1105 SYSERROR("failed to set mode '0%o' to '%s'",
1106 s
.st_mode
, console
->name
);
1110 if (mount(console
->name
, path
, "none", MS_BIND
, 0)) {
1111 ERROR("failed to mount '%s' on '%s'", console
->name
, path
);
1115 INFO("console has been setup");
1119 static int setup_ttydir_console(const struct lxc_rootfs
*rootfs
,
1120 const struct lxc_console
*console
,
1123 char path
[MAXPATHLEN
], lxcpath
[MAXPATHLEN
];
1126 /* create rootfs/dev/<ttydir> directory */
1127 ret
= snprintf(path
, sizeof(path
), "%s/dev/%s", rootfs
->mount
,
1129 if (ret
>= sizeof(path
))
1131 ret
= mkdir(path
, 0755);
1132 if (ret
&& errno
!= EEXIST
) {
1133 SYSERROR("failed with errno %d to create %s\n", errno
, path
);
1136 INFO("created %s\n", path
);
1138 ret
= snprintf(lxcpath
, sizeof(lxcpath
), "%s/dev/%s/console",
1139 rootfs
->mount
, ttydir
);
1140 if (ret
>= sizeof(lxcpath
)) {
1141 ERROR("console path too long\n");
1145 snprintf(path
, sizeof(path
), "%s/dev/console", rootfs
->mount
);
1147 if (ret
&& errno
!= ENOENT
) {
1148 SYSERROR("error unlinking %s\n", path
);
1152 ret
= creat(lxcpath
, 0660);
1153 if (ret
==-1 && errno
!= EEXIST
) {
1154 SYSERROR("error %d creating %s\n", errno
, lxcpath
);
1159 if (console
->peer
== -1) {
1160 INFO("no console output required");
1164 if (mount(console
->name
, lxcpath
, "none", MS_BIND
, 0)) {
1165 ERROR("failed to mount '%s' on '%s'", console
->name
, lxcpath
);
1169 /* create symlink from rootfs/dev/console to 'lxc/console' */
1170 ret
= snprintf(lxcpath
, sizeof(lxcpath
), "%s/console", ttydir
);
1171 if (ret
>= sizeof(lxcpath
)) {
1172 ERROR("lxc/console path too long");
1175 ret
= symlink(lxcpath
, path
);
1177 SYSERROR("failed to create symlink for console");
1181 INFO("console has been setup on %s", lxcpath
);
1186 static int setup_console(const struct lxc_rootfs
*rootfs
,
1187 const struct lxc_console
*console
,
1190 /* We don't have a rootfs, /dev/console will be shared */
1194 return setup_dev_console(rootfs
, console
);
1196 return setup_ttydir_console(rootfs
, console
, ttydir
);
1199 static int setup_kmsg(const struct lxc_rootfs
*rootfs
,
1200 const struct lxc_console
*console
)
1202 char kpath
[MAXPATHLEN
];
1205 ret
= snprintf(kpath
, sizeof(kpath
), "%s/dev/kmsg", rootfs
->mount
);
1206 if (ret
< 0 || ret
>= sizeof(kpath
))
1209 ret
= unlink(kpath
);
1210 if (ret
&& errno
!= ENOENT
) {
1211 SYSERROR("error unlinking %s\n", kpath
);
1215 ret
= symlink("console", kpath
);
1217 SYSERROR("failed to create symlink for kmsg");
1224 static int setup_cgroup(const char *name
, struct lxc_list
*cgroups
)
1226 struct lxc_list
*iterator
;
1227 struct lxc_cgroup
*cg
;
1230 if (lxc_list_empty(cgroups
))
1233 lxc_list_for_each(iterator
, cgroups
) {
1235 cg
= iterator
->elem
;
1237 if (lxc_cgroup_set(name
, cg
->subsystem
, cg
->value
))
1240 DEBUG("cgroup '%s' set to '%s'", cg
->subsystem
, cg
->value
);
1244 INFO("cgroup has been setup");
1249 static void parse_mntopt(char *opt
, unsigned long *flags
, char **data
)
1251 struct mount_opt
*mo
;
1253 /* If opt is found in mount_opt, set or clear flags.
1254 * Otherwise append it to data. */
1256 for (mo
= &mount_opt
[0]; mo
->name
!= NULL
; mo
++) {
1257 if (!strncmp(opt
, mo
->name
, strlen(mo
->name
))) {
1259 *flags
&= ~mo
->flag
;
1271 static int parse_mntopts(const char *mntopts
, unsigned long *mntflags
,
1275 char *p
, *saveptr
= NULL
;
1283 s
= strdup(mntopts
);
1285 SYSERROR("failed to allocate memory");
1289 data
= malloc(strlen(s
) + 1);
1291 SYSERROR("failed to allocate memory");
1297 for (p
= strtok_r(s
, ",", &saveptr
); p
!= NULL
;
1298 p
= strtok_r(NULL
, ",", &saveptr
))
1299 parse_mntopt(p
, mntflags
, &data
);
1310 static int mount_entry(const char *fsname
, const char *target
,
1311 const char *fstype
, unsigned long mountflags
,
1314 if (mount(fsname
, target
, fstype
, mountflags
& ~MS_REMOUNT
, data
)) {
1315 SYSERROR("failed to mount '%s' on '%s'", fsname
, target
);
1319 if ((mountflags
& MS_REMOUNT
) || (mountflags
& MS_BIND
)) {
1321 DEBUG("remounting %s on %s to respect bind or remount options",
1324 if (mount(fsname
, target
, fstype
,
1325 mountflags
| MS_REMOUNT
, data
)) {
1326 SYSERROR("failed to mount '%s' on '%s'",
1332 DEBUG("mounted '%s' on '%s', type '%s'", fsname
, target
, fstype
);
1337 static inline int mount_entry_on_systemfs(struct mntent
*mntent
)
1339 unsigned long mntflags
;
1343 if (parse_mntopts(mntent
->mnt_opts
, &mntflags
, &mntdata
) < 0) {
1344 ERROR("failed to parse mount option '%s'", mntent
->mnt_opts
);
1348 ret
= mount_entry(mntent
->mnt_fsname
, mntent
->mnt_dir
,
1349 mntent
->mnt_type
, mntflags
, mntdata
);
1356 static int mount_entry_on_absolute_rootfs(struct mntent
*mntent
,
1357 const struct lxc_rootfs
*rootfs
,
1358 const char *lxc_name
)
1361 char path
[MAXPATHLEN
];
1362 unsigned long mntflags
;
1364 int r
, ret
= 0, offset
;
1366 if (parse_mntopts(mntent
->mnt_opts
, &mntflags
, &mntdata
) < 0) {
1367 ERROR("failed to parse mount option '%s'", mntent
->mnt_opts
);
1371 /* if rootfs->path is a blockdev path, allow container fstab to
1372 * use $LXCPATH/CN/rootfs as the target prefix */
1373 r
= snprintf(path
, MAXPATHLEN
, LXCPATH
"/%s/rootfs", lxc_name
);
1374 if (r
< 0 || r
>= MAXPATHLEN
)
1377 aux
= strstr(mntent
->mnt_dir
, path
);
1379 offset
= strlen(path
);
1384 aux
= strstr(mntent
->mnt_dir
, rootfs
->path
);
1386 WARN("ignoring mount point '%s'", mntent
->mnt_dir
);
1389 offset
= strlen(rootfs
->path
);
1393 r
= snprintf(path
, MAXPATHLEN
, "%s/%s", rootfs
->mount
,
1395 if (r
< 0 || r
>= MAXPATHLEN
) {
1396 WARN("pathnme too long for '%s'", mntent
->mnt_dir
);
1402 ret
= mount_entry(mntent
->mnt_fsname
, path
, mntent
->mnt_type
,
1410 static int mount_entry_on_relative_rootfs(struct mntent
*mntent
,
1413 char path
[MAXPATHLEN
];
1414 unsigned long mntflags
;
1418 if (parse_mntopts(mntent
->mnt_opts
, &mntflags
, &mntdata
) < 0) {
1419 ERROR("failed to parse mount option '%s'", mntent
->mnt_opts
);
1423 /* relative to root mount point */
1424 ret
= snprintf(path
, sizeof(path
), "%s/%s", rootfs
, mntent
->mnt_dir
);
1425 if (ret
>= sizeof(path
)) {
1426 ERROR("path name too long");
1430 ret
= mount_entry(mntent
->mnt_fsname
, path
, mntent
->mnt_type
,
1438 static int mount_file_entries(const struct lxc_rootfs
*rootfs
, FILE *file
,
1439 const char *lxc_name
)
1441 struct mntent
*mntent
;
1444 while ((mntent
= getmntent(file
))) {
1446 if (!rootfs
->path
) {
1447 if (mount_entry_on_systemfs(mntent
))
1452 /* We have a separate root, mounts are relative to it */
1453 if (mntent
->mnt_dir
[0] != '/') {
1454 if (mount_entry_on_relative_rootfs(mntent
,
1460 if (mount_entry_on_absolute_rootfs(mntent
, rootfs
, lxc_name
))
1466 INFO("mount points have been setup");
1471 static int setup_mount(const struct lxc_rootfs
*rootfs
, const char *fstab
,
1472 const char *lxc_name
)
1480 file
= setmntent(fstab
, "r");
1482 SYSERROR("failed to use '%s'", fstab
);
1486 ret
= mount_file_entries(rootfs
, file
, lxc_name
);
1492 static int setup_mount_entries(const struct lxc_rootfs
*rootfs
, struct lxc_list
*mount
,
1493 const char *lxc_name
)
1496 struct lxc_list
*iterator
;
1502 ERROR("tmpfile error: %m");
1506 lxc_list_for_each(iterator
, mount
) {
1507 mount_entry
= iterator
->elem
;
1508 fprintf(file
, "%s\n", mount_entry
);
1513 ret
= mount_file_entries(rootfs
, file
, lxc_name
);
1519 static int setup_caps(struct lxc_list
*caps
)
1521 struct lxc_list
*iterator
;
1526 lxc_list_for_each(iterator
, caps
) {
1528 drop_entry
= iterator
->elem
;
1532 for (i
= 0; i
< sizeof(caps_opt
)/sizeof(caps_opt
[0]); i
++) {
1534 if (strcmp(drop_entry
, caps_opt
[i
].name
))
1537 capid
= caps_opt
[i
].value
;
1542 /* try to see if it's numeric, so the user may specify
1543 * capabilities that the running kernel knows about but
1545 capid
= strtol(drop_entry
, &ptr
, 10);
1546 if (!ptr
|| *ptr
!= '\0' ||
1547 capid
== LONG_MIN
|| capid
== LONG_MAX
)
1548 /* not a valid number */
1550 else if (capid
> lxc_caps_last_cap())
1551 /* we have a number but it's not a valid
1557 ERROR("unknown capability %s", drop_entry
);
1561 DEBUG("drop capability '%s' (%d)", drop_entry
, capid
);
1563 if (prctl(PR_CAPBSET_DROP
, capid
, 0, 0, 0)) {
1564 SYSERROR("failed to remove %s capability", drop_entry
);
1570 DEBUG("capabilities has been setup");
1575 static int setup_hw_addr(char *hwaddr
, const char *ifname
)
1577 struct sockaddr sockaddr
;
1581 ret
= lxc_convert_mac(hwaddr
, &sockaddr
);
1583 ERROR("mac address '%s' conversion failed : %s",
1584 hwaddr
, strerror(-ret
));
1588 memcpy(ifr
.ifr_name
, ifname
, IFNAMSIZ
);
1589 memcpy((char *) &ifr
.ifr_hwaddr
, (char *) &sockaddr
, sizeof(sockaddr
));
1591 fd
= socket(AF_INET
, SOCK_DGRAM
, 0);
1593 ERROR("socket failure : %s", strerror(errno
));
1597 ret
= ioctl(fd
, SIOCSIFHWADDR
, &ifr
);
1600 ERROR("ioctl failure : %s", strerror(errno
));
1602 DEBUG("mac address '%s' on '%s' has been setup", hwaddr
, ifname
);
1607 static int setup_ipv4_addr(struct lxc_list
*ip
, int ifindex
)
1609 struct lxc_list
*iterator
;
1610 struct lxc_inetdev
*inetdev
;
1613 lxc_list_for_each(iterator
, ip
) {
1615 inetdev
= iterator
->elem
;
1617 err
= lxc_ipv4_addr_add(ifindex
, &inetdev
->addr
,
1618 &inetdev
->bcast
, inetdev
->prefix
);
1620 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
1621 ifindex
, strerror(-err
));
1629 static int setup_ipv6_addr(struct lxc_list
*ip
, int ifindex
)
1631 struct lxc_list
*iterator
;
1632 struct lxc_inet6dev
*inet6dev
;
1635 lxc_list_for_each(iterator
, ip
) {
1637 inet6dev
= iterator
->elem
;
1639 err
= lxc_ipv6_addr_add(ifindex
, &inet6dev
->addr
,
1640 &inet6dev
->mcast
, &inet6dev
->acast
,
1643 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
1644 ifindex
, strerror(-err
));
1652 static int setup_netdev(struct lxc_netdev
*netdev
)
1654 char ifname
[IFNAMSIZ
];
1655 char *current_ifname
= ifname
;
1658 /* empty network namespace */
1659 if (!netdev
->ifindex
) {
1660 if (netdev
->flags
& IFF_UP
) {
1661 err
= lxc_netdev_up("lo");
1663 ERROR("failed to set the loopback up : %s",
1671 /* retrieve the name of the interface */
1672 if (!if_indextoname(netdev
->ifindex
, current_ifname
)) {
1673 ERROR("no interface corresponding to index '%d'",
1678 /* default: let the system to choose one interface name */
1680 netdev
->name
= netdev
->type
== LXC_NET_PHYS
?
1681 netdev
->link
: "eth%d";
1683 /* rename the interface name */
1684 err
= lxc_netdev_rename_by_name(ifname
, netdev
->name
);
1686 ERROR("failed to rename %s->%s : %s", ifname
, netdev
->name
,
1691 /* Re-read the name of the interface because its name has changed
1692 * and would be automatically allocated by the system
1694 if (!if_indextoname(netdev
->ifindex
, current_ifname
)) {
1695 ERROR("no interface corresponding to index '%d'",
1700 /* set a mac address */
1701 if (netdev
->hwaddr
) {
1702 if (setup_hw_addr(netdev
->hwaddr
, current_ifname
)) {
1703 ERROR("failed to setup hw address for '%s'",
1709 /* setup ipv4 addresses on the interface */
1710 if (setup_ipv4_addr(&netdev
->ipv4
, netdev
->ifindex
)) {
1711 ERROR("failed to setup ip addresses for '%s'",
1716 /* setup ipv6 addresses on the interface */
1717 if (setup_ipv6_addr(&netdev
->ipv6
, netdev
->ifindex
)) {
1718 ERROR("failed to setup ipv6 addresses for '%s'",
1723 /* set the network device up */
1724 if (netdev
->flags
& IFF_UP
) {
1727 err
= lxc_netdev_up(current_ifname
);
1729 ERROR("failed to set '%s' up : %s", current_ifname
,
1734 /* the network is up, make the loopback up too */
1735 err
= lxc_netdev_up("lo");
1737 ERROR("failed to set the loopback up : %s",
1743 /* We can only set up the default routes after bringing
1744 * up the interface, sine bringing up the interface adds
1745 * the link-local routes and we can't add a default
1746 * route if the gateway is not reachable. */
1748 /* setup ipv4 gateway on the interface */
1749 if (netdev
->ipv4_gateway
) {
1750 if (!(netdev
->flags
& IFF_UP
)) {
1751 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname
);
1755 if (lxc_list_empty(&netdev
->ipv4
)) {
1756 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname
);
1760 err
= lxc_ipv4_gateway_add(netdev
->ifindex
, netdev
->ipv4_gateway
);
1762 ERROR("failed to setup ipv4 gateway for '%s': %s",
1763 ifname
, strerror(-err
));
1764 if (netdev
->ipv4_gateway_auto
) {
1765 char buf
[INET_ADDRSTRLEN
];
1766 inet_ntop(AF_INET
, netdev
->ipv4_gateway
, buf
, sizeof(buf
));
1767 ERROR("tried to set autodetected ipv4 gateway '%s'", buf
);
1773 /* setup ipv6 gateway on the interface */
1774 if (netdev
->ipv6_gateway
) {
1775 if (!(netdev
->flags
& IFF_UP
)) {
1776 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname
);
1780 if (lxc_list_empty(&netdev
->ipv6
) && !IN6_IS_ADDR_LINKLOCAL(netdev
->ipv6_gateway
)) {
1781 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname
);
1785 err
= lxc_ipv6_gateway_add(netdev
->ifindex
, netdev
->ipv6_gateway
);
1787 ERROR("failed to setup ipv6 gateway for '%s': %s",
1788 ifname
, strerror(-err
));
1789 if (netdev
->ipv6_gateway_auto
) {
1790 char buf
[INET6_ADDRSTRLEN
];
1791 inet_ntop(AF_INET6
, netdev
->ipv6_gateway
, buf
, sizeof(buf
));
1792 ERROR("tried to set autodetected ipv6 gateway '%s'", buf
);
1798 DEBUG("'%s' has been setup", current_ifname
);
1803 static int setup_network(struct lxc_list
*network
)
1805 struct lxc_list
*iterator
;
1806 struct lxc_netdev
*netdev
;
1808 lxc_list_for_each(iterator
, network
) {
1810 netdev
= iterator
->elem
;
1812 if (setup_netdev(netdev
)) {
1813 ERROR("failed to setup netdev");
1818 if (!lxc_list_empty(network
))
1819 INFO("network has been setup");
1824 void lxc_rename_phys_nics_on_shutdown(struct lxc_conf
*conf
)
1828 INFO("running to reset %d nic names", conf
->num_savednics
);
1829 for (i
=0; i
<conf
->num_savednics
; i
++) {
1830 struct saved_nic
*s
= &conf
->saved_nics
[i
];
1831 INFO("resetting nic %d to %s\n", s
->ifindex
, s
->orig_name
);
1832 lxc_netdev_rename_by_index(s
->ifindex
, s
->orig_name
);
1835 conf
->num_savednics
= 0;
1836 free(conf
->saved_nics
);
1839 static int setup_private_host_hw_addr(char *veth1
)
1845 sockfd
= socket(AF_INET
, SOCK_DGRAM
, 0);
1849 snprintf((char *)ifr
.ifr_name
, IFNAMSIZ
, "%s", veth1
);
1850 err
= ioctl(sockfd
, SIOCGIFHWADDR
, &ifr
);
1856 ifr
.ifr_hwaddr
.sa_data
[0] = 0xfe;
1857 err
= ioctl(sockfd
, SIOCSIFHWADDR
, &ifr
);
1862 DEBUG("mac address of host interface '%s' changed to private "
1863 "%02x:%02x:%02x:%02x:%02x:%02x", veth1
,
1864 ifr
.ifr_hwaddr
.sa_data
[0] & 0xff,
1865 ifr
.ifr_hwaddr
.sa_data
[1] & 0xff,
1866 ifr
.ifr_hwaddr
.sa_data
[2] & 0xff,
1867 ifr
.ifr_hwaddr
.sa_data
[3] & 0xff,
1868 ifr
.ifr_hwaddr
.sa_data
[4] & 0xff,
1869 ifr
.ifr_hwaddr
.sa_data
[5] & 0xff);
1874 static char *default_rootfs_mount
= LXCROOTFSMOUNT
;
1876 struct lxc_conf
*lxc_conf_init(void)
1878 struct lxc_conf
*new;
1881 new = malloc(sizeof(*new));
1883 ERROR("lxc_conf_init : %m");
1886 memset(new, 0, sizeof(*new));
1888 new->personality
= -1;
1889 new->console
.path
= NULL
;
1890 new->console
.peer
= -1;
1891 new->console
.master
= -1;
1892 new->console
.slave
= -1;
1893 new->console
.name
[0] = '\0';
1894 new->rootfs
.mount
= default_rootfs_mount
;
1895 new->loglevel
= LXC_LOG_PRIORITY_NOTSET
;
1896 lxc_list_init(&new->cgroup
);
1897 lxc_list_init(&new->network
);
1898 lxc_list_init(&new->mount_list
);
1899 lxc_list_init(&new->caps
);
1900 for (i
=0; i
<NUM_LXC_HOOKS
; i
++)
1901 lxc_list_init(&new->hooks
[i
]);
1903 new->aa_profile
= NULL
;
1905 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
1906 new->lsm_umount_proc
= 0;
1912 static int instanciate_veth(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1914 char veth1buf
[IFNAMSIZ
], *veth1
;
1915 char veth2buf
[IFNAMSIZ
], *veth2
;
1918 if (netdev
->priv
.veth_attr
.pair
)
1919 veth1
= netdev
->priv
.veth_attr
.pair
;
1921 err
= snprintf(veth1buf
, sizeof(veth1buf
), "vethXXXXXX");
1922 if (err
>= sizeof(veth1buf
)) { /* can't *really* happen, but... */
1923 ERROR("veth1 name too long");
1926 veth1
= mktemp(veth1buf
);
1927 /* store away for deconf */
1928 memcpy(netdev
->priv
.veth_attr
.veth1
, veth1
, IFNAMSIZ
);
1931 snprintf(veth2buf
, sizeof(veth2buf
), "vethXXXXXX");
1932 veth2
= mktemp(veth2buf
);
1934 if (!strlen(veth1
) || !strlen(veth2
)) {
1935 ERROR("failed to allocate a temporary name");
1939 err
= lxc_veth_create(veth1
, veth2
);
1941 ERROR("failed to create %s-%s : %s", veth1
, veth2
,
1946 /* changing the high byte of the mac address to 0xfe, the bridge interface
1947 * will always keep the host's mac address and not take the mac address
1949 err
= setup_private_host_hw_addr(veth1
);
1951 ERROR("failed to change mac address of host interface '%s' : %s",
1952 veth1
, strerror(-err
));
1957 err
= lxc_netdev_set_mtu(veth1
, atoi(netdev
->mtu
));
1959 err
= lxc_netdev_set_mtu(veth2
, atoi(netdev
->mtu
));
1961 ERROR("failed to set mtu '%s' for %s-%s : %s",
1962 netdev
->mtu
, veth1
, veth2
, strerror(-err
));
1968 err
= lxc_bridge_attach(netdev
->link
, veth1
);
1970 ERROR("failed to attach '%s' to the bridge '%s' : %s",
1971 veth1
, netdev
->link
, strerror(-err
));
1976 netdev
->ifindex
= if_nametoindex(veth2
);
1977 if (!netdev
->ifindex
) {
1978 ERROR("failed to retrieve the index for %s", veth2
);
1982 err
= lxc_netdev_up(veth1
);
1984 ERROR("failed to set %s up : %s", veth1
, strerror(-err
));
1988 if (netdev
->upscript
) {
1989 err
= run_script(handler
->name
, "net", netdev
->upscript
, "up",
1990 "veth", veth1
, (char*) NULL
);
1995 DEBUG("instanciated veth '%s/%s', index is '%d'",
1996 veth1
, veth2
, netdev
->ifindex
);
2001 lxc_netdev_delete_by_name(veth1
);
2005 static int shutdown_veth(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2010 if (netdev
->priv
.veth_attr
.pair
)
2011 veth1
= netdev
->priv
.veth_attr
.pair
;
2013 veth1
= netdev
->priv
.veth_attr
.veth1
;
2015 if (netdev
->downscript
) {
2016 err
= run_script(handler
->name
, "net", netdev
->downscript
,
2017 "down", "veth", veth1
, (char*) NULL
);
2024 static int instanciate_macvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2026 char peerbuf
[IFNAMSIZ
], *peer
;
2029 if (!netdev
->link
) {
2030 ERROR("no link specified for macvlan netdev");
2034 err
= snprintf(peerbuf
, sizeof(peerbuf
), "mcXXXXXX");
2035 if (err
>= sizeof(peerbuf
))
2038 peer
= mktemp(peerbuf
);
2039 if (!strlen(peer
)) {
2040 ERROR("failed to make a temporary name");
2044 err
= lxc_macvlan_create(netdev
->link
, peer
,
2045 netdev
->priv
.macvlan_attr
.mode
);
2047 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2048 peer
, netdev
->link
, strerror(-err
));
2052 netdev
->ifindex
= if_nametoindex(peer
);
2053 if (!netdev
->ifindex
) {
2054 ERROR("failed to retrieve the index for %s", peer
);
2055 lxc_netdev_delete_by_name(peer
);
2059 if (netdev
->upscript
) {
2060 err
= run_script(handler
->name
, "net", netdev
->upscript
, "up",
2061 "macvlan", netdev
->link
, (char*) NULL
);
2066 DEBUG("instanciated macvlan '%s', index is '%d' and mode '%d'",
2067 peer
, netdev
->ifindex
, netdev
->priv
.macvlan_attr
.mode
);
2072 static int shutdown_macvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2076 if (netdev
->downscript
) {
2077 err
= run_script(handler
->name
, "net", netdev
->downscript
,
2078 "down", "macvlan", netdev
->link
,
2086 /* XXX: merge with instanciate_macvlan */
2087 static int instanciate_vlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2089 char peer
[IFNAMSIZ
];
2092 if (!netdev
->link
) {
2093 ERROR("no link specified for vlan netdev");
2097 err
= snprintf(peer
, sizeof(peer
), "vlan%d", netdev
->priv
.vlan_attr
.vid
);
2098 if (err
>= sizeof(peer
)) {
2099 ERROR("peer name too long");
2103 err
= lxc_vlan_create(netdev
->link
, peer
, netdev
->priv
.vlan_attr
.vid
);
2105 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2106 peer
, netdev
->link
, strerror(-err
));
2110 netdev
->ifindex
= if_nametoindex(peer
);
2111 if (!netdev
->ifindex
) {
2112 ERROR("failed to retrieve the ifindex for %s", peer
);
2113 lxc_netdev_delete_by_name(peer
);
2117 DEBUG("instanciated vlan '%s', ifindex is '%d'", " vlan1000",
2123 static int shutdown_vlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2128 static int instanciate_phys(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2130 if (!netdev
->link
) {
2131 ERROR("no link specified for the physical interface");
2135 netdev
->ifindex
= if_nametoindex(netdev
->link
);
2136 if (!netdev
->ifindex
) {
2137 ERROR("failed to retrieve the index for %s", netdev
->link
);
2141 if (netdev
->upscript
) {
2143 err
= run_script(handler
->name
, "net", netdev
->upscript
,
2144 "up", "phys", netdev
->link
, (char*) NULL
);
2152 static int shutdown_phys(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2156 if (netdev
->downscript
) {
2157 err
= run_script(handler
->name
, "net", netdev
->downscript
,
2158 "down", "phys", netdev
->link
, (char*) NULL
);
2165 static int instanciate_empty(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2167 netdev
->ifindex
= 0;
2168 if (netdev
->upscript
) {
2170 err
= run_script(handler
->name
, "net", netdev
->upscript
,
2171 "up", "empty", (char*) NULL
);
2178 static int shutdown_empty(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2182 if (netdev
->downscript
) {
2183 err
= run_script(handler
->name
, "net", netdev
->downscript
,
2184 "down", "empty", (char*) NULL
);
2191 int lxc_create_network(struct lxc_handler
*handler
)
2193 struct lxc_list
*network
= &handler
->conf
->network
;
2194 struct lxc_list
*iterator
;
2195 struct lxc_netdev
*netdev
;
2197 lxc_list_for_each(iterator
, network
) {
2199 netdev
= iterator
->elem
;
2201 if (netdev
->type
< 0 || netdev
->type
> LXC_NET_MAXCONFTYPE
) {
2202 ERROR("invalid network configuration type '%d'",
2207 if (netdev_conf
[netdev
->type
](handler
, netdev
)) {
2208 ERROR("failed to create netdev");
2217 void lxc_delete_network(struct lxc_handler
*handler
)
2219 struct lxc_list
*network
= &handler
->conf
->network
;
2220 struct lxc_list
*iterator
;
2221 struct lxc_netdev
*netdev
;
2223 lxc_list_for_each(iterator
, network
) {
2224 netdev
= iterator
->elem
;
2226 if (netdev
->ifindex
!= 0 && netdev
->type
== LXC_NET_PHYS
) {
2227 if (lxc_netdev_rename_by_index(netdev
->ifindex
, netdev
->link
))
2228 WARN("failed to rename to the initial name the " \
2229 "netdev '%s'", netdev
->link
);
2233 if (netdev_deconf
[netdev
->type
](handler
, netdev
)) {
2234 WARN("failed to destroy netdev");
2237 /* Recent kernel remove the virtual interfaces when the network
2238 * namespace is destroyed but in case we did not moved the
2239 * interface to the network namespace, we have to destroy it
2241 if (netdev
->ifindex
!= 0 &&
2242 lxc_netdev_delete_by_index(netdev
->ifindex
))
2243 WARN("failed to remove interface '%s'", netdev
->name
);
2247 int lxc_assign_network(struct lxc_list
*network
, pid_t pid
)
2249 struct lxc_list
*iterator
;
2250 struct lxc_netdev
*netdev
;
2253 lxc_list_for_each(iterator
, network
) {
2255 netdev
= iterator
->elem
;
2257 /* empty network namespace, nothing to move */
2258 if (!netdev
->ifindex
)
2261 err
= lxc_netdev_move_by_index(netdev
->ifindex
, pid
);
2263 ERROR("failed to move '%s' to the container : %s",
2264 netdev
->link
, strerror(-err
));
2268 DEBUG("move '%s' to '%d'", netdev
->name
, pid
);
2274 int lxc_find_gateway_addresses(struct lxc_handler
*handler
)
2276 struct lxc_list
*network
= &handler
->conf
->network
;
2277 struct lxc_list
*iterator
;
2278 struct lxc_netdev
*netdev
;
2281 lxc_list_for_each(iterator
, network
) {
2282 netdev
= iterator
->elem
;
2284 if (!netdev
->ipv4_gateway_auto
&& !netdev
->ipv6_gateway_auto
)
2287 if (netdev
->type
!= LXC_NET_VETH
&& netdev
->type
!= LXC_NET_MACVLAN
) {
2288 ERROR("gateway = auto only supported for "
2289 "veth and macvlan");
2293 if (!netdev
->link
) {
2294 ERROR("gateway = auto needs a link interface");
2298 link_index
= if_nametoindex(netdev
->link
);
2302 if (netdev
->ipv4_gateway_auto
) {
2303 if (lxc_ipv4_addr_get(link_index
, &netdev
->ipv4_gateway
)) {
2304 ERROR("failed to automatically find ipv4 gateway "
2305 "address from link interface '%s'", netdev
->link
);
2310 if (netdev
->ipv6_gateway_auto
) {
2311 if (lxc_ipv6_addr_get(link_index
, &netdev
->ipv6_gateway
)) {
2312 ERROR("failed to automatically find ipv6 gateway "
2313 "address from link interface '%s'", netdev
->link
);
2322 int lxc_create_tty(const char *name
, struct lxc_conf
*conf
)
2324 struct lxc_tty_info
*tty_info
= &conf
->tty_info
;
2327 /* no tty in the configuration */
2331 tty_info
->pty_info
=
2332 malloc(sizeof(*tty_info
->pty_info
)*conf
->tty
);
2333 if (!tty_info
->pty_info
) {
2334 SYSERROR("failed to allocate pty_info");
2338 for (i
= 0; i
< conf
->tty
; i
++) {
2340 struct lxc_pty_info
*pty_info
= &tty_info
->pty_info
[i
];
2342 if (openpty(&pty_info
->master
, &pty_info
->slave
,
2343 pty_info
->name
, NULL
, NULL
)) {
2344 SYSERROR("failed to create pty #%d", i
);
2345 tty_info
->nbtty
= i
;
2346 lxc_delete_tty(tty_info
);
2350 DEBUG("allocated pty '%s' (%d/%d)",
2351 pty_info
->name
, pty_info
->master
, pty_info
->slave
);
2353 /* Prevent leaking the file descriptors to the container */
2354 fcntl(pty_info
->master
, F_SETFD
, FD_CLOEXEC
);
2355 fcntl(pty_info
->slave
, F_SETFD
, FD_CLOEXEC
);
2360 tty_info
->nbtty
= conf
->tty
;
2362 INFO("tty's configured");
2367 void lxc_delete_tty(struct lxc_tty_info
*tty_info
)
2371 for (i
= 0; i
< tty_info
->nbtty
; i
++) {
2372 struct lxc_pty_info
*pty_info
= &tty_info
->pty_info
[i
];
2374 close(pty_info
->master
);
2375 close(pty_info
->slave
);
2378 free(tty_info
->pty_info
);
2379 tty_info
->nbtty
= 0;
2382 int lxc_setup(const char *name
, struct lxc_conf
*lxc_conf
)
2384 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
2388 if (setup_utsname(lxc_conf
->utsname
)) {
2389 ERROR("failed to setup the utsname for '%s'", name
);
2393 if (setup_network(&lxc_conf
->network
)) {
2394 ERROR("failed to setup the network for '%s'", name
);
2398 if (run_lxc_hooks(name
, "pre-mount", lxc_conf
)) {
2399 ERROR("failed to run pre-mount hooks for container '%s'.", name
);
2403 if (setup_rootfs(&lxc_conf
->rootfs
)) {
2404 ERROR("failed to setup rootfs for '%s'", name
);
2408 if (lxc_conf
->autodev
) {
2409 if (mount_autodev(lxc_conf
->rootfs
.mount
)) {
2410 ERROR("failed to mount /dev in the container");
2415 if (setup_mount(&lxc_conf
->rootfs
, lxc_conf
->fstab
, name
)) {
2416 ERROR("failed to setup the mounts for '%s'", name
);
2420 if (setup_mount_entries(&lxc_conf
->rootfs
, &lxc_conf
->mount_list
, name
)) {
2421 ERROR("failed to setup the mount entries for '%s'", name
);
2425 if (run_lxc_hooks(name
, "mount", lxc_conf
)) {
2426 ERROR("failed to run mount hooks for container '%s'.", name
);
2430 if (lxc_conf
->autodev
) {
2431 if (setup_autodev(lxc_conf
->rootfs
.mount
)) {
2432 ERROR("failed to populate /dev in the container");
2437 if (setup_cgroup(name
, &lxc_conf
->cgroup
)) {
2438 ERROR("failed to setup the cgroups for '%s'", name
);
2442 if (setup_console(&lxc_conf
->rootfs
, &lxc_conf
->console
, lxc_conf
->ttydir
)) {
2443 ERROR("failed to setup the console for '%s'", name
);
2447 if (setup_kmsg(&lxc_conf
->rootfs
, &lxc_conf
->console
)) // don't fail
2448 ERROR("failed to setup kmsg for '%s'", name
);
2450 if (setup_tty(&lxc_conf
->rootfs
, &lxc_conf
->tty_info
, lxc_conf
->ttydir
)) {
2451 ERROR("failed to setup the ttys for '%s'", name
);
2455 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
2456 INFO("rootfs path is .%s., mount is .%s.", lxc_conf
->rootfs
.path
,
2457 lxc_conf
->rootfs
.mount
);
2458 if (lxc_conf
->rootfs
.path
== NULL
|| strlen(lxc_conf
->rootfs
.path
) == 0)
2461 mounted
= lsm_mount_proc_if_needed(lxc_conf
->rootfs
.path
, lxc_conf
->rootfs
.mount
);
2462 if (mounted
== -1) {
2463 SYSERROR("failed to mount /proc in the container.");
2465 } else if (mounted
== 1) {
2466 lxc_conf
->lsm_umount_proc
= 1;
2470 if (setup_pivot_root(&lxc_conf
->rootfs
)) {
2471 ERROR("failed to set rootfs for '%s'", name
);
2475 if (setup_pts(lxc_conf
->pts
)) {
2476 ERROR("failed to setup the new pts instance");
2480 if (setup_personality(lxc_conf
->personality
)) {
2481 ERROR("failed to setup personality");
2485 if (setup_caps(&lxc_conf
->caps
)) {
2486 ERROR("failed to drop capabilities");
2490 NOTICE("'%s' is setup.", name
);
2495 int run_lxc_hooks(const char *name
, char *hook
, struct lxc_conf
*conf
)
2498 struct lxc_list
*it
;
2500 if (strcmp(hook
, "pre-start") == 0)
2501 which
= LXCHOOK_PRESTART
;
2502 else if (strcmp(hook
, "pre-mount") == 0)
2503 which
= LXCHOOK_PREMOUNT
;
2504 else if (strcmp(hook
, "mount") == 0)
2505 which
= LXCHOOK_MOUNT
;
2506 else if (strcmp(hook
, "start") == 0)
2507 which
= LXCHOOK_START
;
2508 else if (strcmp(hook
, "post-stop") == 0)
2509 which
= LXCHOOK_POSTSTOP
;
2512 lxc_list_for_each(it
, &conf
->hooks
[which
]) {
2514 char *hookname
= it
->elem
;
2515 ret
= run_script(name
, "lxc", hookname
, hook
, NULL
);
2522 static void lxc_remove_nic(struct lxc_list
*it
)
2524 struct lxc_netdev
*netdev
= it
->elem
;
2525 struct lxc_list
*it2
,*next
;
2533 if (netdev
->upscript
)
2534 free(netdev
->upscript
);
2536 free(netdev
->hwaddr
);
2539 if (netdev
->ipv4_gateway
)
2540 free(netdev
->ipv4_gateway
);
2541 if (netdev
->ipv6_gateway
)
2542 free(netdev
->ipv6_gateway
);
2543 lxc_list_for_each_safe(it2
, &netdev
->ipv4
, next
) {
2548 lxc_list_for_each_safe(it2
, &netdev
->ipv6
, next
) {
2557 /* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
2558 int lxc_clear_nic(struct lxc_conf
*c
, const char *key
)
2562 struct lxc_list
*it
;
2563 struct lxc_netdev
*netdev
;
2565 p1
= index(key
, '.');
2566 if (!p1
|| *(p1
+1) == '\0')
2569 ret
= sscanf(key
, "%d", &idx
);
2570 if (ret
!= 1) return -1;
2575 lxc_list_for_each(it
, &c
->network
) {
2580 if (i
< idx
) // we don't have that many nics defined
2583 if (!it
|| !it
->elem
)
2590 } else if (strcmp(p1
, "ipv4") == 0) {
2591 struct lxc_list
*it2
,*next
;
2592 lxc_list_for_each_safe(it2
, &netdev
->ipv4
, next
) {
2597 } else if (strcmp(p1
, "ipv6") == 0) {
2598 struct lxc_list
*it2
,*next
;
2599 lxc_list_for_each_safe(it2
, &netdev
->ipv6
, next
) {
2604 } else if (strcmp(p1
, "link") == 0) {
2607 netdev
->link
= NULL
;
2609 } else if (strcmp(p1
, "name") == 0) {
2612 netdev
->name
= NULL
;
2614 } else if (strcmp(p1
, "script.up") == 0) {
2615 if (netdev
->upscript
) {
2616 free(netdev
->upscript
);
2617 netdev
->upscript
= NULL
;
2619 } else if (strcmp(p1
, "hwaddr") == 0) {
2620 if (netdev
->hwaddr
) {
2621 free(netdev
->hwaddr
);
2622 netdev
->hwaddr
= NULL
;
2624 } else if (strcmp(p1
, "mtu") == 0) {
2629 } else if (strcmp(p1
, "ipv4_gateway") == 0) {
2630 if (netdev
->ipv4_gateway
) {
2631 free(netdev
->ipv4_gateway
);
2632 netdev
->ipv4_gateway
= NULL
;
2634 } else if (strcmp(p1
, "ipv6_gateway") == 0) {
2635 if (netdev
->ipv6_gateway
) {
2636 free(netdev
->ipv6_gateway
);
2637 netdev
->ipv6_gateway
= NULL
;
2645 int lxc_clear_config_network(struct lxc_conf
*c
)
2647 struct lxc_list
*it
,*next
;
2648 lxc_list_for_each_safe(it
, &c
->network
, next
) {
2654 int lxc_clear_config_caps(struct lxc_conf
*c
)
2656 struct lxc_list
*it
,*next
;
2658 lxc_list_for_each_safe(it
, &c
->caps
, next
) {
2666 int lxc_clear_cgroups(struct lxc_conf
*c
, const char *key
)
2668 struct lxc_list
*it
,*next
;
2670 const char *k
= key
+ 11;
2672 if (strcmp(key
, "lxc.cgroup") == 0)
2675 lxc_list_for_each_safe(it
, &c
->cgroup
, next
) {
2676 struct lxc_cgroup
*cg
= it
->elem
;
2677 if (!all
&& strcmp(cg
->subsystem
, k
) != 0)
2680 free(cg
->subsystem
);
2688 int lxc_clear_mount_entries(struct lxc_conf
*c
)
2690 struct lxc_list
*it
,*next
;
2692 lxc_list_for_each_safe(it
, &c
->mount_list
, next
) {
2700 int lxc_clear_hooks(struct lxc_conf
*c
, const char *key
)
2702 struct lxc_list
*it
,*next
;
2703 bool all
= false, done
= false;
2704 const char *k
= key
+ 9;
2707 if (strcmp(key
, "lxc.hook") == 0)
2710 for (i
=0; i
<NUM_LXC_HOOKS
; i
++) {
2711 if (all
|| strcmp(k
, lxchook_names
[i
]) == 0) {
2712 lxc_list_for_each_safe(it
, &c
->hooks
[i
], next
) {
2722 ERROR("Invalid hook key: %s", key
);
2728 void lxc_clear_saved_nics(struct lxc_conf
*conf
)
2732 if (!conf
->num_savednics
)
2734 for (i
=0; i
< conf
->num_savednics
; i
++)
2735 free(conf
->saved_nics
[i
].orig_name
);
2736 conf
->saved_nics
= 0;
2737 free(conf
->saved_nics
);
2740 void lxc_conf_free(struct lxc_conf
*conf
)
2744 if (conf
->console
.path
)
2745 free(conf
->console
.path
);
2746 if (conf
->rootfs
.mount
!= default_rootfs_mount
)
2747 free(conf
->rootfs
.mount
);
2748 if (conf
->rootfs
.path
)
2749 free(conf
->rootfs
.path
);
2751 free(conf
->utsname
);
2757 free(conf
->logfile
);
2758 lxc_clear_config_network(conf
);
2760 if (conf
->aa_profile
)
2761 free(conf
->aa_profile
);
2763 lxc_clear_config_caps(conf
);
2764 lxc_clear_cgroups(conf
, "lxc.cgroup");
2765 lxc_clear_hooks(conf
, "lxc.hook");
2766 lxc_clear_mount_entries(conf
);
2767 lxc_clear_saved_nics(conf
);