2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <dlezcano at fr.ibm.com>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
36 #include <linux/loop.h>
38 #include <sys/types.h>
39 #include <sys/utsname.h>
40 #include <sys/param.h>
42 #include <sys/socket.h>
43 #include <sys/mount.h>
45 #include <sys/prctl.h>
46 #include <sys/capability.h>
47 #include <sys/personality.h>
49 #include <arpa/inet.h>
51 #include <netinet/in.h>
62 #include "lxc.h" /* for lxc_cgroup_set() */
63 #include "caps.h" /* for lxc_caps_last_cap() */
69 lxc_log_define(lxc_conf
, lxc
);
72 #define MAXINDEXLEN 20
74 #define MAXLINELEN 128
77 #define MS_DIRSYNC 128
89 #define MS_RELATIME (1 << 21)
92 #ifndef MS_STRICTATIME
93 #define MS_STRICTATIME (1 << 24)
97 #define CAP_SETFCAP 31
100 #ifndef CAP_MAC_OVERRIDE
101 #define CAP_MAC_OVERRIDE 32
104 #ifndef CAP_MAC_ADMIN
105 #define CAP_MAC_ADMIN 33
108 #ifndef PR_CAPBSET_DROP
109 #define PR_CAPBSET_DROP 24
112 char *lxchook_names
[NUM_LXC_HOOKS
] = {
113 "pre-start", "pre-mount", "mount", "start", "post-stop" };
115 extern int pivot_root(const char * new_root
, const char * put_old
);
117 typedef int (*instanciate_cb
)(struct lxc_handler
*, struct lxc_netdev
*);
130 static int instanciate_veth(struct lxc_handler
*, struct lxc_netdev
*);
131 static int instanciate_macvlan(struct lxc_handler
*, struct lxc_netdev
*);
132 static int instanciate_vlan(struct lxc_handler
*, struct lxc_netdev
*);
133 static int instanciate_phys(struct lxc_handler
*, struct lxc_netdev
*);
134 static int instanciate_empty(struct lxc_handler
*, struct lxc_netdev
*);
136 static instanciate_cb netdev_conf
[LXC_NET_MAXCONFTYPE
+ 1] = {
137 [LXC_NET_VETH
] = instanciate_veth
,
138 [LXC_NET_MACVLAN
] = instanciate_macvlan
,
139 [LXC_NET_VLAN
] = instanciate_vlan
,
140 [LXC_NET_PHYS
] = instanciate_phys
,
141 [LXC_NET_EMPTY
] = instanciate_empty
,
144 static int shutdown_veth(struct lxc_handler
*, struct lxc_netdev
*);
145 static int shutdown_macvlan(struct lxc_handler
*, struct lxc_netdev
*);
146 static int shutdown_vlan(struct lxc_handler
*, struct lxc_netdev
*);
147 static int shutdown_phys(struct lxc_handler
*, struct lxc_netdev
*);
148 static int shutdown_empty(struct lxc_handler
*, struct lxc_netdev
*);
150 static instanciate_cb netdev_deconf
[LXC_NET_MAXCONFTYPE
+ 1] = {
151 [LXC_NET_VETH
] = shutdown_veth
,
152 [LXC_NET_MACVLAN
] = shutdown_macvlan
,
153 [LXC_NET_VLAN
] = shutdown_vlan
,
154 [LXC_NET_PHYS
] = shutdown_phys
,
155 [LXC_NET_EMPTY
] = shutdown_empty
,
158 static struct mount_opt mount_opt
[] = {
159 { "defaults", 0, 0 },
160 { "ro", 0, MS_RDONLY
},
161 { "rw", 1, MS_RDONLY
},
162 { "suid", 1, MS_NOSUID
},
163 { "nosuid", 0, MS_NOSUID
},
164 { "dev", 1, MS_NODEV
},
165 { "nodev", 0, MS_NODEV
},
166 { "exec", 1, MS_NOEXEC
},
167 { "noexec", 0, MS_NOEXEC
},
168 { "sync", 0, MS_SYNCHRONOUS
},
169 { "async", 1, MS_SYNCHRONOUS
},
170 { "dirsync", 0, MS_DIRSYNC
},
171 { "remount", 0, MS_REMOUNT
},
172 { "mand", 0, MS_MANDLOCK
},
173 { "nomand", 1, MS_MANDLOCK
},
174 { "atime", 1, MS_NOATIME
},
175 { "noatime", 0, MS_NOATIME
},
176 { "diratime", 1, MS_NODIRATIME
},
177 { "nodiratime", 0, MS_NODIRATIME
},
178 { "bind", 0, MS_BIND
},
179 { "rbind", 0, MS_BIND
|MS_REC
},
180 { "relatime", 0, MS_RELATIME
},
181 { "norelatime", 1, MS_RELATIME
},
182 { "strictatime", 0, MS_STRICTATIME
},
183 { "nostrictatime", 1, MS_STRICTATIME
},
187 static struct caps_opt caps_opt
[] = {
188 { "chown", CAP_CHOWN
},
189 { "dac_override", CAP_DAC_OVERRIDE
},
190 { "dac_read_search", CAP_DAC_READ_SEARCH
},
191 { "fowner", CAP_FOWNER
},
192 { "fsetid", CAP_FSETID
},
193 { "kill", CAP_KILL
},
194 { "setgid", CAP_SETGID
},
195 { "setuid", CAP_SETUID
},
196 { "setpcap", CAP_SETPCAP
},
197 { "linux_immutable", CAP_LINUX_IMMUTABLE
},
198 { "net_bind_service", CAP_NET_BIND_SERVICE
},
199 { "net_broadcast", CAP_NET_BROADCAST
},
200 { "net_admin", CAP_NET_ADMIN
},
201 { "net_raw", CAP_NET_RAW
},
202 { "ipc_lock", CAP_IPC_LOCK
},
203 { "ipc_owner", CAP_IPC_OWNER
},
204 { "sys_module", CAP_SYS_MODULE
},
205 { "sys_rawio", CAP_SYS_RAWIO
},
206 { "sys_chroot", CAP_SYS_CHROOT
},
207 { "sys_ptrace", CAP_SYS_PTRACE
},
208 { "sys_pacct", CAP_SYS_PACCT
},
209 { "sys_admin", CAP_SYS_ADMIN
},
210 { "sys_boot", CAP_SYS_BOOT
},
211 { "sys_nice", CAP_SYS_NICE
},
212 { "sys_resource", CAP_SYS_RESOURCE
},
213 { "sys_time", CAP_SYS_TIME
},
214 { "sys_tty_config", CAP_SYS_TTY_CONFIG
},
215 { "mknod", CAP_MKNOD
},
216 { "lease", CAP_LEASE
},
217 #ifdef CAP_AUDIT_WRITE
218 { "audit_write", CAP_AUDIT_WRITE
},
220 #ifdef CAP_AUDIT_CONTROL
221 { "audit_control", CAP_AUDIT_CONTROL
},
223 { "setfcap", CAP_SETFCAP
},
224 { "mac_override", CAP_MAC_OVERRIDE
},
225 { "mac_admin", CAP_MAC_ADMIN
},
227 { "syslog", CAP_SYSLOG
},
229 #ifdef CAP_WAKE_ALARM
230 { "wake_alarm", CAP_WAKE_ALARM
},
234 static int run_script(const char *name
, const char *section
,
235 const char *script
, ...)
239 char *buffer
, *p
, *output
;
243 INFO("Executing script '%s' for container '%s', config section '%s'",
244 script
, name
, section
);
246 va_start(ap
, script
);
247 while ((p
= va_arg(ap
, char *)))
248 size
+= strlen(p
) + 1;
251 size
+= strlen(script
);
252 size
+= strlen(name
);
253 size
+= strlen(section
);
259 buffer
= alloca(size
);
261 ERROR("failed to allocate memory");
265 ret
= snprintf(buffer
, size
, "%s %s %s", script
, name
, section
);
266 if (ret
< 0 || ret
>= size
) {
267 ERROR("Script name too long");
272 va_start(ap
, script
);
273 while ((p
= va_arg(ap
, char *))) {
276 rc
= snprintf(buffer
+ ret
, len
, " %s", p
);
277 if (rc
< 0 || rc
>= len
) {
279 ERROR("Script args too long");
286 f
= popen(buffer
, "r");
288 SYSERROR("popen failed");
292 output
= malloc(LXC_LOG_BUFFER_SIZE
);
294 ERROR("failed to allocate memory for script output");
298 while(fgets(output
, LXC_LOG_BUFFER_SIZE
, f
))
299 DEBUG("script output: %s", output
);
303 if (pclose(f
) == -1) {
304 SYSERROR("Script exited on error");
311 static int find_fstype_cb(char* buffer
, void *data
)
321 /* we don't try 'nodev' entries */
322 if (strstr(buffer
, "nodev"))
326 fstype
+= lxc_char_left_gc(fstype
, strlen(fstype
));
327 fstype
[lxc_char_right_gc(fstype
, strlen(fstype
))] = '\0';
329 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
330 cbarg
->rootfs
, cbarg
->target
, fstype
);
332 if (mount(cbarg
->rootfs
, cbarg
->target
, fstype
, cbarg
->mntopt
, NULL
)) {
333 DEBUG("mount failed with error: %s", strerror(errno
));
337 INFO("mounted '%s' on '%s', with fstype '%s'",
338 cbarg
->rootfs
, cbarg
->target
, fstype
);
343 static int mount_unknow_fs(const char *rootfs
, const char *target
, int mntopt
)
358 * find the filesystem type with brute force:
359 * first we check with /etc/filesystems, in case the modules
360 * are auto-loaded and fall back to the supported kernel fs
367 for (i
= 0; i
< sizeof(fsfile
)/sizeof(fsfile
[0]); i
++) {
371 if (access(fsfile
[i
], F_OK
))
374 ret
= lxc_file_for_each_line(fsfile
[i
], find_fstype_cb
, &cbarg
);
376 ERROR("failed to parse '%s'", fsfile
[i
]);
384 ERROR("failed to determine fs type for '%s'", rootfs
);
388 static int mount_rootfs_dir(const char *rootfs
, const char *target
)
390 return mount(rootfs
, target
, "none", MS_BIND
| MS_REC
, NULL
);
393 static int setup_lodev(const char *rootfs
, int fd
, struct loop_info64
*loinfo
)
398 rfd
= open(rootfs
, O_RDWR
);
400 SYSERROR("failed to open '%s'", rootfs
);
404 memset(loinfo
, 0, sizeof(*loinfo
));
406 loinfo
->lo_flags
= LO_FLAGS_AUTOCLEAR
;
408 if (ioctl(fd
, LOOP_SET_FD
, rfd
)) {
409 SYSERROR("failed to LOOP_SET_FD");
413 if (ioctl(fd
, LOOP_SET_STATUS64
, loinfo
)) {
414 SYSERROR("failed to LOOP_SET_STATUS64");
425 static int mount_rootfs_file(const char *rootfs
, const char *target
)
427 struct dirent dirent
, *direntp
;
428 struct loop_info64 loinfo
;
429 int ret
= -1, fd
= -1, rc
;
431 char path
[MAXPATHLEN
];
433 dir
= opendir("/dev");
435 SYSERROR("failed to open '/dev'");
439 while (!readdir_r(dir
, &dirent
, &direntp
)) {
444 if (!strcmp(direntp
->d_name
, "."))
447 if (!strcmp(direntp
->d_name
, ".."))
450 if (strncmp(direntp
->d_name
, "loop", 4))
453 rc
= snprintf(path
, MAXPATHLEN
, "/dev/%s", direntp
->d_name
);
454 if (rc
< 0 || rc
>= MAXPATHLEN
)
457 fd
= open(path
, O_RDWR
);
461 if (ioctl(fd
, LOOP_GET_STATUS64
, &loinfo
) == 0) {
466 if (errno
!= ENXIO
) {
467 WARN("unexpected error for ioctl on '%s': %m",
472 DEBUG("found '%s' free lodev", path
);
474 ret
= setup_lodev(rootfs
, fd
, &loinfo
);
476 ret
= mount_unknow_fs(path
, target
, 0);
483 WARN("failed to close directory");
488 static int mount_rootfs_block(const char *rootfs
, const char *target
)
490 return mount_unknow_fs(rootfs
, target
, 0);
495 * if rootfs is a directory, then open ${rootfs}.hold for writing for the
496 * duration of the container run, to prevent the container from marking the
497 * underlying fs readonly on shutdown.
498 * return -1 on error.
499 * return -2 if nothing needed to be pinned.
500 * return an open fd (>=0) if we pinned it.
502 int pin_rootfs(const char *rootfs
)
504 char absrootfs
[MAXPATHLEN
];
505 char absrootfspin
[MAXPATHLEN
];
509 if (rootfs
== NULL
|| strlen(rootfs
) == 0)
512 if (!realpath(rootfs
, absrootfs
)) {
513 SYSERROR("failed to get real path for '%s'", rootfs
);
517 if (access(absrootfs
, F_OK
)) {
518 SYSERROR("'%s' is not accessible", absrootfs
);
522 if (stat(absrootfs
, &s
)) {
523 SYSERROR("failed to stat '%s'", absrootfs
);
527 if (!__S_ISTYPE(s
.st_mode
, S_IFDIR
))
530 ret
= snprintf(absrootfspin
, MAXPATHLEN
, "%s%s", absrootfs
, ".hold");
531 if (ret
>= MAXPATHLEN
) {
532 SYSERROR("pathname too long for rootfs hold file");
536 fd
= open(absrootfspin
, O_CREAT
| O_RDWR
, S_IWUSR
|S_IRUSR
);
537 INFO("opened %s as fd %d\n", absrootfspin
, fd
);
541 static int mount_rootfs(const char *rootfs
, const char *target
)
543 char absrootfs
[MAXPATHLEN
];
547 typedef int (*rootfs_cb
)(const char *, const char *);
553 { S_IFDIR
, mount_rootfs_dir
},
554 { S_IFBLK
, mount_rootfs_block
},
555 { S_IFREG
, mount_rootfs_file
},
558 if (!realpath(rootfs
, absrootfs
)) {
559 SYSERROR("failed to get real path for '%s'", rootfs
);
563 if (access(absrootfs
, F_OK
)) {
564 SYSERROR("'%s' is not accessible", absrootfs
);
568 if (stat(absrootfs
, &s
)) {
569 SYSERROR("failed to stat '%s'", absrootfs
);
573 for (i
= 0; i
< sizeof(rtfs_type
)/sizeof(rtfs_type
[0]); i
++) {
575 if (!__S_ISTYPE(s
.st_mode
, rtfs_type
[i
].type
))
578 return rtfs_type
[i
].cb(absrootfs
, target
);
581 ERROR("unsupported rootfs type for '%s'", absrootfs
);
585 static int setup_utsname(struct utsname
*utsname
)
590 if (sethostname(utsname
->nodename
, strlen(utsname
->nodename
))) {
591 SYSERROR("failed to set the hostname to '%s'", utsname
->nodename
);
595 INFO("'%s' hostname has been setup", utsname
->nodename
);
600 static int setup_tty(const struct lxc_rootfs
*rootfs
,
601 const struct lxc_tty_info
*tty_info
, char *ttydir
)
603 char path
[MAXPATHLEN
], lxcpath
[MAXPATHLEN
];
609 for (i
= 0; i
< tty_info
->nbtty
; i
++) {
611 struct lxc_pty_info
*pty_info
= &tty_info
->pty_info
[i
];
613 ret
= snprintf(path
, sizeof(path
), "%s/dev/tty%d",
614 rootfs
->mount
, i
+ 1);
615 if (ret
>= sizeof(path
)) {
616 ERROR("pathname too long for ttys");
620 /* create dev/lxc/tty%d" */
621 ret
= snprintf(lxcpath
, sizeof(lxcpath
), "%s/dev/%s/tty%d",
622 rootfs
->mount
, ttydir
, i
+ 1);
623 if (ret
>= sizeof(lxcpath
)) {
624 ERROR("pathname too long for ttys");
627 ret
= creat(lxcpath
, 0660);
628 if (ret
==-1 && errno
!= EEXIST
) {
629 SYSERROR("error creating %s\n", lxcpath
);
634 if (ret
&& errno
!= ENOENT
) {
635 SYSERROR("error unlinking %s\n", path
);
639 if (mount(pty_info
->name
, lxcpath
, "none", MS_BIND
, 0)) {
640 WARN("failed to mount '%s'->'%s'",
641 pty_info
->name
, path
);
645 ret
= snprintf(lxcpath
, sizeof(lxcpath
), "%s/tty%d", ttydir
, i
+1);
646 if (ret
>= sizeof(lxcpath
)) {
647 ERROR("tty pathname too long");
650 ret
= symlink(lxcpath
, path
);
652 SYSERROR("failed to create symlink for tty %d\n", i
+1);
656 if (mount(pty_info
->name
, path
, "none", MS_BIND
, 0)) {
657 WARN("failed to mount '%s'->'%s'",
658 pty_info
->name
, path
);
664 INFO("%d tty(s) has been setup", tty_info
->nbtty
);
669 static int setup_rootfs_pivot_root_cb(char *buffer
, void *data
)
671 struct lxc_list
*mountlist
, *listentry
, *iterator
;
672 char *pivotdir
, *mountpoint
, *mountentry
;
677 cbparm
= (void **)data
;
679 mountlist
= cbparm
[0];
680 pivotdir
= cbparm
[1];
682 /* parse entry, first field is mountname, ignore */
683 mountpoint
= strtok(mountentry
, " ");
687 /* second field is mountpoint */
688 mountpoint
= strtok(NULL
, " ");
692 /* only consider mountpoints below old root fs */
693 if (strncmp(mountpoint
, pivotdir
, strlen(pivotdir
)))
696 /* filter duplicate mountpoints */
698 lxc_list_for_each(iterator
, mountlist
) {
699 if (!strcmp(iterator
->elem
, mountpoint
)) {
707 /* add entry to list */
708 listentry
= malloc(sizeof(*listentry
));
710 SYSERROR("malloc for mountpoint listentry failed");
714 listentry
->elem
= strdup(mountpoint
);
715 if (!listentry
->elem
) {
716 SYSERROR("strdup failed");
719 lxc_list_add_tail(mountlist
, listentry
);
724 static int umount_oldrootfs(const char *oldrootfs
)
726 char path
[MAXPATHLEN
];
728 struct lxc_list mountlist
, *iterator
;
729 int ok
, still_mounted
, last_still_mounted
;
732 /* read and parse /proc/mounts in old root fs */
733 lxc_list_init(&mountlist
);
735 /* oldrootfs is on the top tree directory now */
736 rc
= snprintf(path
, sizeof(path
), "/%s", oldrootfs
);
737 if (rc
>= sizeof(path
)) {
738 ERROR("rootfs name too long");
741 cbparm
[0] = &mountlist
;
743 cbparm
[1] = strdup(path
);
745 SYSERROR("strdup failed");
749 rc
= snprintf(path
, sizeof(path
), "%s/proc/mounts", oldrootfs
);
750 if (rc
>= sizeof(path
)) {
751 ERROR("container proc/mounts name too long");
755 ok
= lxc_file_for_each_line(path
,
756 setup_rootfs_pivot_root_cb
, &cbparm
);
758 SYSERROR("failed to read or parse mount list '%s'", path
);
762 /* umount filesystems until none left or list no longer shrinks */
765 last_still_mounted
= still_mounted
;
768 lxc_list_for_each(iterator
, &mountlist
) {
770 /* umount normally */
771 if (!umount(iterator
->elem
)) {
772 DEBUG("umounted '%s'", (char *)iterator
->elem
);
773 lxc_list_del(iterator
);
780 } while (still_mounted
> 0 && still_mounted
!= last_still_mounted
);
783 lxc_list_for_each(iterator
, &mountlist
) {
785 /* let's try a lazy umount */
786 if (!umount2(iterator
->elem
, MNT_DETACH
)) {
787 INFO("lazy unmount of '%s'", (char *)iterator
->elem
);
791 /* be more brutal (nfs) */
792 if (!umount2(iterator
->elem
, MNT_FORCE
)) {
793 INFO("forced unmount of '%s'", (char *)iterator
->elem
);
797 WARN("failed to unmount '%s'", (char *)iterator
->elem
);
803 static int setup_rootfs_pivot_root(const char *rootfs
, const char *pivotdir
)
805 char path
[MAXPATHLEN
];
806 int remove_pivotdir
= 0;
809 /* change into new root fs */
811 SYSERROR("can't chdir to new rootfs '%s'", rootfs
);
816 pivotdir
= "lxc_putold";
818 /* compute the full path to pivotdir under rootfs */
819 rc
= snprintf(path
, sizeof(path
), "%s/%s", rootfs
, pivotdir
);
820 if (rc
>= sizeof(path
)) {
821 ERROR("pivot dir name too long");
825 if (access(path
, F_OK
)) {
827 if (mkdir_p(path
, 0755)) {
828 SYSERROR("failed to create pivotdir '%s'", path
);
833 DEBUG("created '%s' directory", path
);
836 DEBUG("mountpoint for old rootfs is '%s'", path
);
838 /* pivot_root into our new root fs */
839 if (pivot_root(".", path
)) {
840 SYSERROR("pivot_root syscall failed");
845 SYSERROR("can't chdir to / after pivot_root");
849 DEBUG("pivot_root syscall to '%s' successful", rootfs
);
851 /* we switch from absolute path to relative path */
852 if (umount_oldrootfs(pivotdir
))
855 /* remove temporary mount point, we don't consider the removing
857 if (remove_pivotdir
&& rmdir(pivotdir
))
858 WARN("can't remove mountpoint '%s': %m", pivotdir
);
863 static int setup_rootfs(const struct lxc_rootfs
*rootfs
)
868 if (access(rootfs
->mount
, F_OK
)) {
869 SYSERROR("failed to access to '%s', check it is present",
874 if (mount_rootfs(rootfs
->path
, rootfs
->mount
)) {
875 ERROR("failed to mount rootfs");
879 DEBUG("mounted '%s' on '%s'", rootfs
->path
, rootfs
->mount
);
884 int setup_pivot_root(const struct lxc_rootfs
*rootfs
)
889 if (setup_rootfs_pivot_root(rootfs
->mount
, rootfs
->pivot
)) {
890 ERROR("failed to setup pivot root");
897 static int setup_pts(int pts
)
899 char target
[PATH_MAX
];
904 if (!access("/dev/pts/ptmx", F_OK
) && umount("/dev/pts")) {
905 SYSERROR("failed to umount 'dev/pts'");
909 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL
,
910 "newinstance,ptmxmode=0666")) {
911 SYSERROR("failed to mount a new instance of '/dev/pts'");
915 if (access("/dev/ptmx", F_OK
)) {
916 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
918 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
922 if (realpath("/dev/ptmx", target
) && !strcmp(target
, "/dev/pts/ptmx"))
925 /* fallback here, /dev/pts/ptmx exists just mount bind */
926 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND
, 0)) {
927 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
931 INFO("created new pts instance");
937 static int setup_personality(int persona
)
942 if (personality(persona
) < 0) {
943 SYSERROR("failed to set personality to '0x%x'", persona
);
947 INFO("set personality to '0x%x'", persona
);
952 static int setup_dev_console(const struct lxc_rootfs
*rootfs
,
953 const struct lxc_console
*console
)
955 char path
[MAXPATHLEN
];
959 ret
= snprintf(path
, sizeof(path
), "%s/dev/console", rootfs
->mount
);
960 if (ret
>= sizeof(path
)) {
961 ERROR("console path too long\n");
965 if (access(path
, F_OK
)) {
966 WARN("rootfs specified but no console found at '%s'", path
);
970 if (console
->peer
== -1) {
971 INFO("no console output required");
975 if (stat(path
, &s
)) {
976 SYSERROR("failed to stat '%s'", path
);
980 if (chmod(console
->name
, s
.st_mode
)) {
981 SYSERROR("failed to set mode '0%o' to '%s'",
982 s
.st_mode
, console
->name
);
986 if (mount(console
->name
, path
, "none", MS_BIND
, 0)) {
987 ERROR("failed to mount '%s' on '%s'", console
->name
, path
);
991 INFO("console has been setup");
995 static int setup_ttydir_console(const struct lxc_rootfs
*rootfs
,
996 const struct lxc_console
*console
,
999 char path
[MAXPATHLEN
], lxcpath
[MAXPATHLEN
];
1002 /* create rootfs/dev/<ttydir> directory */
1003 ret
= snprintf(path
, sizeof(path
), "%s/dev/%s", rootfs
->mount
,
1005 if (ret
>= sizeof(path
))
1007 ret
= mkdir(path
, 0755);
1008 if (ret
&& errno
!= EEXIST
) {
1009 SYSERROR("failed with errno %d to create %s\n", errno
, path
);
1012 INFO("created %s\n", path
);
1014 ret
= snprintf(lxcpath
, sizeof(lxcpath
), "%s/dev/%s/console",
1015 rootfs
->mount
, ttydir
);
1016 if (ret
>= sizeof(lxcpath
)) {
1017 ERROR("console path too long\n");
1021 snprintf(path
, sizeof(path
), "%s/dev/console", rootfs
->mount
);
1023 if (ret
&& errno
!= ENOENT
) {
1024 SYSERROR("error unlinking %s\n", path
);
1028 ret
= creat(lxcpath
, 0660);
1029 if (ret
==-1 && errno
!= EEXIST
) {
1030 SYSERROR("error %d creating %s\n", errno
, lxcpath
);
1035 if (console
->peer
== -1) {
1036 INFO("no console output required");
1040 if (mount(console
->name
, lxcpath
, "none", MS_BIND
, 0)) {
1041 ERROR("failed to mount '%s' on '%s'", console
->name
, lxcpath
);
1045 /* create symlink from rootfs/dev/console to 'lxc/console' */
1046 ret
= snprintf(lxcpath
, sizeof(lxcpath
), "%s/console", ttydir
);
1047 if (ret
>= sizeof(lxcpath
)) {
1048 ERROR("lxc/console path too long");
1051 ret
= symlink(lxcpath
, path
);
1053 SYSERROR("failed to create symlink for console");
1057 INFO("console has been setup on %s", lxcpath
);
1062 static int setup_console(const struct lxc_rootfs
*rootfs
,
1063 const struct lxc_console
*console
,
1066 /* We don't have a rootfs, /dev/console will be shared */
1070 return setup_dev_console(rootfs
, console
);
1072 return setup_ttydir_console(rootfs
, console
, ttydir
);
1075 static int setup_kmsg(const struct lxc_rootfs
*rootfs
,
1076 const struct lxc_console
*console
)
1078 char kpath
[MAXPATHLEN
];
1081 ret
= snprintf(kpath
, sizeof(kpath
), "%s/dev/kmsg", rootfs
->mount
);
1082 if (ret
< 0 || ret
>= sizeof(kpath
))
1085 ret
= unlink(kpath
);
1086 if (ret
&& errno
!= ENOENT
) {
1087 SYSERROR("error unlinking %s\n", kpath
);
1091 ret
= symlink("console", kpath
);
1093 SYSERROR("failed to create symlink for kmsg");
1100 static int setup_cgroup(const char *name
, struct lxc_list
*cgroups
)
1102 struct lxc_list
*iterator
;
1103 struct lxc_cgroup
*cg
;
1106 if (lxc_list_empty(cgroups
))
1109 lxc_list_for_each(iterator
, cgroups
) {
1111 cg
= iterator
->elem
;
1113 if (lxc_cgroup_set(name
, cg
->subsystem
, cg
->value
))
1116 DEBUG("cgroup '%s' set to '%s'", cg
->subsystem
, cg
->value
);
1120 INFO("cgroup has been setup");
1125 static void parse_mntopt(char *opt
, unsigned long *flags
, char **data
)
1127 struct mount_opt
*mo
;
1129 /* If opt is found in mount_opt, set or clear flags.
1130 * Otherwise append it to data. */
1132 for (mo
= &mount_opt
[0]; mo
->name
!= NULL
; mo
++) {
1133 if (!strncmp(opt
, mo
->name
, strlen(mo
->name
))) {
1135 *flags
&= ~mo
->flag
;
1147 static int parse_mntopts(const char *mntopts
, unsigned long *mntflags
,
1151 char *p
, *saveptr
= NULL
;
1159 s
= strdup(mntopts
);
1161 SYSERROR("failed to allocate memory");
1165 data
= malloc(strlen(s
) + 1);
1167 SYSERROR("failed to allocate memory");
1173 for (p
= strtok_r(s
, ",", &saveptr
); p
!= NULL
;
1174 p
= strtok_r(NULL
, ",", &saveptr
))
1175 parse_mntopt(p
, mntflags
, &data
);
1186 static int mount_entry(const char *fsname
, const char *target
,
1187 const char *fstype
, unsigned long mountflags
,
1190 if (mount(fsname
, target
, fstype
, mountflags
& ~MS_REMOUNT
, data
)) {
1191 SYSERROR("failed to mount '%s' on '%s'", fsname
, target
);
1195 if ((mountflags
& MS_REMOUNT
) || (mountflags
& MS_BIND
)) {
1197 DEBUG("remounting %s on %s to respect bind or remount options",
1200 if (mount(fsname
, target
, fstype
,
1201 mountflags
| MS_REMOUNT
, data
)) {
1202 SYSERROR("failed to mount '%s' on '%s'",
1208 DEBUG("mounted '%s' on '%s', type '%s'", fsname
, target
, fstype
);
1213 static inline int mount_entry_on_systemfs(struct mntent
*mntent
)
1215 unsigned long mntflags
;
1219 if (parse_mntopts(mntent
->mnt_opts
, &mntflags
, &mntdata
) < 0) {
1220 ERROR("failed to parse mount option '%s'", mntent
->mnt_opts
);
1224 ret
= mount_entry(mntent
->mnt_fsname
, mntent
->mnt_dir
,
1225 mntent
->mnt_type
, mntflags
, mntdata
);
1232 static int mount_entry_on_absolute_rootfs(struct mntent
*mntent
,
1233 const struct lxc_rootfs
*rootfs
,
1234 const char *lxc_name
)
1237 char path
[MAXPATHLEN
];
1238 unsigned long mntflags
;
1240 int r
, ret
= 0, offset
;
1242 if (parse_mntopts(mntent
->mnt_opts
, &mntflags
, &mntdata
) < 0) {
1243 ERROR("failed to parse mount option '%s'", mntent
->mnt_opts
);
1247 /* if rootfs->path is a blockdev path, allow container fstab to
1248 * use /var/lib/lxc/CN/rootfs as the target prefix */
1249 r
= snprintf(path
, MAXPATHLEN
, "/var/lib/lxc/%s/rootfs", lxc_name
);
1250 if (r
< 0 || r
>= MAXPATHLEN
)
1253 aux
= strstr(mntent
->mnt_dir
, path
);
1255 offset
= strlen(path
);
1260 aux
= strstr(mntent
->mnt_dir
, rootfs
->path
);
1262 WARN("ignoring mount point '%s'", mntent
->mnt_dir
);
1265 offset
= strlen(rootfs
->path
);
1269 r
= snprintf(path
, MAXPATHLEN
, "%s/%s", rootfs
->mount
,
1271 if (r
< 0 || r
>= MAXPATHLEN
) {
1272 WARN("pathnme too long for '%s'", mntent
->mnt_dir
);
1278 ret
= mount_entry(mntent
->mnt_fsname
, path
, mntent
->mnt_type
,
1286 static int mount_entry_on_relative_rootfs(struct mntent
*mntent
,
1289 char path
[MAXPATHLEN
];
1290 unsigned long mntflags
;
1294 if (parse_mntopts(mntent
->mnt_opts
, &mntflags
, &mntdata
) < 0) {
1295 ERROR("failed to parse mount option '%s'", mntent
->mnt_opts
);
1299 /* relative to root mount point */
1300 ret
= snprintf(path
, sizeof(path
), "%s/%s", rootfs
, mntent
->mnt_dir
);
1301 if (ret
>= sizeof(path
)) {
1302 ERROR("path name too long");
1306 ret
= mount_entry(mntent
->mnt_fsname
, path
, mntent
->mnt_type
,
1314 static int mount_file_entries(const struct lxc_rootfs
*rootfs
, FILE *file
,
1315 const char *lxc_name
)
1317 struct mntent
*mntent
;
1320 while ((mntent
= getmntent(file
))) {
1322 if (!rootfs
->path
) {
1323 if (mount_entry_on_systemfs(mntent
))
1328 /* We have a separate root, mounts are relative to it */
1329 if (mntent
->mnt_dir
[0] != '/') {
1330 if (mount_entry_on_relative_rootfs(mntent
,
1336 if (mount_entry_on_absolute_rootfs(mntent
, rootfs
, lxc_name
))
1342 INFO("mount points have been setup");
1347 static int setup_mount(const struct lxc_rootfs
*rootfs
, const char *fstab
,
1348 const char *lxc_name
)
1356 file
= setmntent(fstab
, "r");
1358 SYSERROR("failed to use '%s'", fstab
);
1362 ret
= mount_file_entries(rootfs
, file
, lxc_name
);
1368 static int setup_mount_entries(const struct lxc_rootfs
*rootfs
, struct lxc_list
*mount
,
1369 const char *lxc_name
)
1372 struct lxc_list
*iterator
;
1378 ERROR("tmpfile error: %m");
1382 lxc_list_for_each(iterator
, mount
) {
1383 mount_entry
= iterator
->elem
;
1384 fprintf(file
, "%s\n", mount_entry
);
1389 ret
= mount_file_entries(rootfs
, file
, lxc_name
);
1395 static int setup_caps(struct lxc_list
*caps
)
1397 struct lxc_list
*iterator
;
1402 lxc_list_for_each(iterator
, caps
) {
1404 drop_entry
= iterator
->elem
;
1408 for (i
= 0; i
< sizeof(caps_opt
)/sizeof(caps_opt
[0]); i
++) {
1410 if (strcmp(drop_entry
, caps_opt
[i
].name
))
1413 capid
= caps_opt
[i
].value
;
1418 /* try to see if it's numeric, so the user may specify
1419 * capabilities that the running kernel knows about but
1421 capid
= strtol(drop_entry
, &ptr
, 10);
1422 if (!ptr
|| *ptr
!= '\0' ||
1423 capid
== LONG_MIN
|| capid
== LONG_MAX
)
1424 /* not a valid number */
1426 else if (capid
> lxc_caps_last_cap())
1427 /* we have a number but it's not a valid
1433 ERROR("unknown capability %s", drop_entry
);
1437 DEBUG("drop capability '%s' (%d)", drop_entry
, capid
);
1439 if (prctl(PR_CAPBSET_DROP
, capid
, 0, 0, 0)) {
1440 SYSERROR("failed to remove %s capability", drop_entry
);
1446 DEBUG("capabilities has been setup");
1451 static int setup_hw_addr(char *hwaddr
, const char *ifname
)
1453 struct sockaddr sockaddr
;
1457 ret
= lxc_convert_mac(hwaddr
, &sockaddr
);
1459 ERROR("mac address '%s' conversion failed : %s",
1460 hwaddr
, strerror(-ret
));
1464 memcpy(ifr
.ifr_name
, ifname
, IFNAMSIZ
);
1465 memcpy((char *) &ifr
.ifr_hwaddr
, (char *) &sockaddr
, sizeof(sockaddr
));
1467 fd
= socket(AF_INET
, SOCK_DGRAM
, 0);
1469 ERROR("socket failure : %s", strerror(errno
));
1473 ret
= ioctl(fd
, SIOCSIFHWADDR
, &ifr
);
1476 ERROR("ioctl failure : %s", strerror(errno
));
1478 DEBUG("mac address '%s' on '%s' has been setup", hwaddr
, ifname
);
1483 static int setup_ipv4_addr(struct lxc_list
*ip
, int ifindex
)
1485 struct lxc_list
*iterator
;
1486 struct lxc_inetdev
*inetdev
;
1489 lxc_list_for_each(iterator
, ip
) {
1491 inetdev
= iterator
->elem
;
1493 err
= lxc_ipv4_addr_add(ifindex
, &inetdev
->addr
,
1494 &inetdev
->bcast
, inetdev
->prefix
);
1496 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
1497 ifindex
, strerror(-err
));
1505 static int setup_ipv6_addr(struct lxc_list
*ip
, int ifindex
)
1507 struct lxc_list
*iterator
;
1508 struct lxc_inet6dev
*inet6dev
;
1511 lxc_list_for_each(iterator
, ip
) {
1513 inet6dev
= iterator
->elem
;
1515 err
= lxc_ipv6_addr_add(ifindex
, &inet6dev
->addr
,
1516 &inet6dev
->mcast
, &inet6dev
->acast
,
1519 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
1520 ifindex
, strerror(-err
));
1528 static int setup_netdev(struct lxc_netdev
*netdev
)
1530 char ifname
[IFNAMSIZ
];
1531 char *current_ifname
= ifname
;
1534 /* empty network namespace */
1535 if (!netdev
->ifindex
) {
1536 if (netdev
->flags
& IFF_UP
) {
1537 err
= lxc_netdev_up("lo");
1539 ERROR("failed to set the loopback up : %s",
1547 /* retrieve the name of the interface */
1548 if (!if_indextoname(netdev
->ifindex
, current_ifname
)) {
1549 ERROR("no interface corresponding to index '%d'",
1554 /* default: let the system to choose one interface name */
1556 netdev
->name
= netdev
->type
== LXC_NET_PHYS
?
1557 netdev
->link
: "eth%d";
1559 /* rename the interface name */
1560 err
= lxc_netdev_rename_by_name(ifname
, netdev
->name
);
1562 ERROR("failed to rename %s->%s : %s", ifname
, netdev
->name
,
1567 /* Re-read the name of the interface because its name has changed
1568 * and would be automatically allocated by the system
1570 if (!if_indextoname(netdev
->ifindex
, current_ifname
)) {
1571 ERROR("no interface corresponding to index '%d'",
1576 /* set a mac address */
1577 if (netdev
->hwaddr
) {
1578 if (setup_hw_addr(netdev
->hwaddr
, current_ifname
)) {
1579 ERROR("failed to setup hw address for '%s'",
1585 /* setup ipv4 addresses on the interface */
1586 if (setup_ipv4_addr(&netdev
->ipv4
, netdev
->ifindex
)) {
1587 ERROR("failed to setup ip addresses for '%s'",
1592 /* setup ipv6 addresses on the interface */
1593 if (setup_ipv6_addr(&netdev
->ipv6
, netdev
->ifindex
)) {
1594 ERROR("failed to setup ipv6 addresses for '%s'",
1599 /* set the network device up */
1600 if (netdev
->flags
& IFF_UP
) {
1603 err
= lxc_netdev_up(current_ifname
);
1605 ERROR("failed to set '%s' up : %s", current_ifname
,
1610 /* the network is up, make the loopback up too */
1611 err
= lxc_netdev_up("lo");
1613 ERROR("failed to set the loopback up : %s",
1619 /* We can only set up the default routes after bringing
1620 * up the interface, sine bringing up the interface adds
1621 * the link-local routes and we can't add a default
1622 * route if the gateway is not reachable. */
1624 /* setup ipv4 gateway on the interface */
1625 if (netdev
->ipv4_gateway
) {
1626 if (!(netdev
->flags
& IFF_UP
)) {
1627 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname
);
1631 if (lxc_list_empty(&netdev
->ipv4
)) {
1632 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname
);
1636 err
= lxc_ipv4_gateway_add(netdev
->ifindex
, netdev
->ipv4_gateway
);
1638 ERROR("failed to setup ipv4 gateway for '%s': %s",
1639 ifname
, strerror(-err
));
1640 if (netdev
->ipv4_gateway_auto
) {
1641 char buf
[INET_ADDRSTRLEN
];
1642 inet_ntop(AF_INET
, netdev
->ipv4_gateway
, buf
, sizeof(buf
));
1643 ERROR("tried to set autodetected ipv4 gateway '%s'", buf
);
1649 /* setup ipv6 gateway on the interface */
1650 if (netdev
->ipv6_gateway
) {
1651 if (!(netdev
->flags
& IFF_UP
)) {
1652 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname
);
1656 if (lxc_list_empty(&netdev
->ipv6
) && !IN6_IS_ADDR_LINKLOCAL(netdev
->ipv6_gateway
)) {
1657 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname
);
1661 err
= lxc_ipv6_gateway_add(netdev
->ifindex
, netdev
->ipv6_gateway
);
1663 ERROR("failed to setup ipv6 gateway for '%s': %s",
1664 ifname
, strerror(-err
));
1665 if (netdev
->ipv6_gateway_auto
) {
1666 char buf
[INET6_ADDRSTRLEN
];
1667 inet_ntop(AF_INET6
, netdev
->ipv6_gateway
, buf
, sizeof(buf
));
1668 ERROR("tried to set autodetected ipv6 gateway '%s'", buf
);
1674 DEBUG("'%s' has been setup", current_ifname
);
1679 static int setup_network(struct lxc_list
*network
)
1681 struct lxc_list
*iterator
;
1682 struct lxc_netdev
*netdev
;
1684 lxc_list_for_each(iterator
, network
) {
1686 netdev
= iterator
->elem
;
1688 if (setup_netdev(netdev
)) {
1689 ERROR("failed to setup netdev");
1694 if (!lxc_list_empty(network
))
1695 INFO("network has been setup");
1700 static int setup_private_host_hw_addr(char *veth1
)
1706 sockfd
= socket(AF_INET
, SOCK_DGRAM
, 0);
1710 snprintf((char *)ifr
.ifr_name
, IFNAMSIZ
, "%s", veth1
);
1711 err
= ioctl(sockfd
, SIOCGIFHWADDR
, &ifr
);
1717 ifr
.ifr_hwaddr
.sa_data
[0] = 0xfe;
1718 err
= ioctl(sockfd
, SIOCSIFHWADDR
, &ifr
);
1723 DEBUG("mac address of host interface '%s' changed to private "
1724 "%02x:%02x:%02x:%02x:%02x:%02x", veth1
,
1725 ifr
.ifr_hwaddr
.sa_data
[0] & 0xff,
1726 ifr
.ifr_hwaddr
.sa_data
[1] & 0xff,
1727 ifr
.ifr_hwaddr
.sa_data
[2] & 0xff,
1728 ifr
.ifr_hwaddr
.sa_data
[3] & 0xff,
1729 ifr
.ifr_hwaddr
.sa_data
[4] & 0xff,
1730 ifr
.ifr_hwaddr
.sa_data
[5] & 0xff);
1735 struct lxc_conf
*lxc_conf_init(void)
1737 struct lxc_conf
*new;
1740 new = malloc(sizeof(*new));
1742 ERROR("lxc_conf_init : %m");
1745 memset(new, 0, sizeof(*new));
1747 new->personality
= -1;
1748 new->console
.path
= NULL
;
1749 new->console
.peer
= -1;
1750 new->console
.master
= -1;
1751 new->console
.slave
= -1;
1752 new->console
.name
[0] = '\0';
1753 new->rootfs
.mount
= LXCROOTFSMOUNT
;
1754 lxc_list_init(&new->cgroup
);
1755 lxc_list_init(&new->network
);
1756 lxc_list_init(&new->mount_list
);
1757 lxc_list_init(&new->caps
);
1758 for (i
=0; i
<NUM_LXC_HOOKS
; i
++)
1759 lxc_list_init(&new->hooks
[i
]);
1761 new->aa_profile
= NULL
;
1763 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
1764 new->lsm_umount_proc
= 0;
1770 static int instanciate_veth(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1772 char veth1buf
[IFNAMSIZ
], *veth1
;
1773 char veth2buf
[IFNAMSIZ
], *veth2
;
1776 if (netdev
->priv
.veth_attr
.pair
)
1777 veth1
= netdev
->priv
.veth_attr
.pair
;
1779 err
= snprintf(veth1buf
, sizeof(veth1buf
), "vethXXXXXX");
1780 if (err
>= sizeof(veth1buf
)) { /* can't *really* happen, but... */
1781 ERROR("veth1 name too long");
1784 veth1
= mktemp(veth1buf
);
1785 /* store away for deconf */
1786 memcpy(netdev
->priv
.veth_attr
.veth1
, veth1
, IFNAMSIZ
);
1789 snprintf(veth2buf
, sizeof(veth2buf
), "vethXXXXXX");
1790 veth2
= mktemp(veth2buf
);
1792 if (!strlen(veth1
) || !strlen(veth2
)) {
1793 ERROR("failed to allocate a temporary name");
1797 err
= lxc_veth_create(veth1
, veth2
);
1799 ERROR("failed to create %s-%s : %s", veth1
, veth2
,
1804 /* changing the high byte of the mac address to 0xfe, the bridge interface
1805 * will always keep the host's mac address and not take the mac address
1807 err
= setup_private_host_hw_addr(veth1
);
1809 ERROR("failed to change mac address of host interface '%s' : %s",
1810 veth1
, strerror(-err
));
1815 err
= lxc_netdev_set_mtu(veth1
, atoi(netdev
->mtu
));
1817 err
= lxc_netdev_set_mtu(veth2
, atoi(netdev
->mtu
));
1819 ERROR("failed to set mtu '%s' for %s-%s : %s",
1820 netdev
->mtu
, veth1
, veth2
, strerror(-err
));
1826 err
= lxc_bridge_attach(netdev
->link
, veth1
);
1828 ERROR("failed to attach '%s' to the bridge '%s' : %s",
1829 veth1
, netdev
->link
, strerror(-err
));
1834 netdev
->ifindex
= if_nametoindex(veth2
);
1835 if (!netdev
->ifindex
) {
1836 ERROR("failed to retrieve the index for %s", veth2
);
1840 err
= lxc_netdev_up(veth1
);
1842 ERROR("failed to set %s up : %s", veth1
, strerror(-err
));
1846 if (netdev
->upscript
) {
1847 err
= run_script(handler
->name
, "net", netdev
->upscript
, "up",
1848 "veth", veth1
, (char*) NULL
);
1853 DEBUG("instanciated veth '%s/%s', index is '%d'",
1854 veth1
, veth2
, netdev
->ifindex
);
1859 lxc_netdev_delete_by_name(veth1
);
1863 static int shutdown_veth(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1868 if (netdev
->priv
.veth_attr
.pair
)
1869 veth1
= netdev
->priv
.veth_attr
.pair
;
1871 veth1
= netdev
->priv
.veth_attr
.veth1
;
1873 if (netdev
->downscript
) {
1874 err
= run_script(handler
->name
, "net", netdev
->downscript
,
1875 "down", "veth", veth1
, (char*) NULL
);
1882 static int instanciate_macvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1884 char peerbuf
[IFNAMSIZ
], *peer
;
1887 if (!netdev
->link
) {
1888 ERROR("no link specified for macvlan netdev");
1892 err
= snprintf(peerbuf
, sizeof(peerbuf
), "mcXXXXXX");
1893 if (err
>= sizeof(peerbuf
))
1896 peer
= mktemp(peerbuf
);
1897 if (!strlen(peer
)) {
1898 ERROR("failed to make a temporary name");
1902 err
= lxc_macvlan_create(netdev
->link
, peer
,
1903 netdev
->priv
.macvlan_attr
.mode
);
1905 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
1906 peer
, netdev
->link
, strerror(-err
));
1910 netdev
->ifindex
= if_nametoindex(peer
);
1911 if (!netdev
->ifindex
) {
1912 ERROR("failed to retrieve the index for %s", peer
);
1913 lxc_netdev_delete_by_name(peer
);
1917 if (netdev
->upscript
) {
1918 err
= run_script(handler
->name
, "net", netdev
->upscript
, "up",
1919 "macvlan", netdev
->link
, (char*) NULL
);
1924 DEBUG("instanciated macvlan '%s', index is '%d' and mode '%d'",
1925 peer
, netdev
->ifindex
, netdev
->priv
.macvlan_attr
.mode
);
1930 static int shutdown_macvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1934 if (netdev
->downscript
) {
1935 err
= run_script(handler
->name
, "net", netdev
->downscript
,
1936 "down", "macvlan", netdev
->link
,
1944 /* XXX: merge with instanciate_macvlan */
1945 static int instanciate_vlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1947 char peer
[IFNAMSIZ
];
1950 if (!netdev
->link
) {
1951 ERROR("no link specified for vlan netdev");
1955 err
= snprintf(peer
, sizeof(peer
), "vlan%d", netdev
->priv
.vlan_attr
.vid
);
1956 if (err
>= sizeof(peer
)) {
1957 ERROR("peer name too long");
1961 err
= lxc_vlan_create(netdev
->link
, peer
, netdev
->priv
.vlan_attr
.vid
);
1963 ERROR("failed to create vlan interface '%s' on '%s' : %s",
1964 peer
, netdev
->link
, strerror(-err
));
1968 netdev
->ifindex
= if_nametoindex(peer
);
1969 if (!netdev
->ifindex
) {
1970 ERROR("failed to retrieve the ifindex for %s", peer
);
1971 lxc_netdev_delete_by_name(peer
);
1975 DEBUG("instanciated vlan '%s', ifindex is '%d'", " vlan1000",
1981 static int shutdown_vlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1986 static int instanciate_phys(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1988 if (!netdev
->link
) {
1989 ERROR("no link specified for the physical interface");
1993 netdev
->ifindex
= if_nametoindex(netdev
->link
);
1994 if (!netdev
->ifindex
) {
1995 ERROR("failed to retrieve the index for %s", netdev
->link
);
1999 if (netdev
->upscript
) {
2001 err
= run_script(handler
->name
, "net", netdev
->upscript
,
2002 "up", "phys", netdev
->link
, (char*) NULL
);
2010 static int shutdown_phys(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2014 if (netdev
->downscript
) {
2015 err
= run_script(handler
->name
, "net", netdev
->downscript
,
2016 "down", "phys", netdev
->link
, (char*) NULL
);
2023 static int instanciate_empty(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2025 netdev
->ifindex
= 0;
2026 if (netdev
->upscript
) {
2028 err
= run_script(handler
->name
, "net", netdev
->upscript
,
2029 "up", "empty", (char*) NULL
);
2036 static int shutdown_empty(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2040 if (netdev
->downscript
) {
2041 err
= run_script(handler
->name
, "net", netdev
->downscript
,
2042 "down", "empty", (char*) NULL
);
2049 int lxc_create_network(struct lxc_handler
*handler
)
2051 struct lxc_list
*network
= &handler
->conf
->network
;
2052 struct lxc_list
*iterator
;
2053 struct lxc_netdev
*netdev
;
2055 lxc_list_for_each(iterator
, network
) {
2057 netdev
= iterator
->elem
;
2059 if (netdev
->type
< 0 || netdev
->type
> LXC_NET_MAXCONFTYPE
) {
2060 ERROR("invalid network configuration type '%d'",
2065 if (netdev_conf
[netdev
->type
](handler
, netdev
)) {
2066 ERROR("failed to create netdev");
2075 void lxc_delete_network(struct lxc_handler
*handler
)
2077 struct lxc_list
*network
= &handler
->conf
->network
;
2078 struct lxc_list
*iterator
;
2079 struct lxc_netdev
*netdev
;
2081 lxc_list_for_each(iterator
, network
) {
2082 netdev
= iterator
->elem
;
2084 if (netdev
->ifindex
!= 0 && netdev
->type
== LXC_NET_PHYS
) {
2085 if (lxc_netdev_rename_by_index(netdev
->ifindex
, netdev
->link
))
2086 WARN("failed to rename to the initial name the " \
2087 "netdev '%s'", netdev
->link
);
2091 if (netdev_deconf
[netdev
->type
](handler
, netdev
)) {
2092 WARN("failed to destroy netdev");
2095 /* Recent kernel remove the virtual interfaces when the network
2096 * namespace is destroyed but in case we did not moved the
2097 * interface to the network namespace, we have to destroy it
2099 if (netdev
->ifindex
!= 0 &&
2100 lxc_netdev_delete_by_index(netdev
->ifindex
))
2101 WARN("failed to remove interface '%s'", netdev
->name
);
2105 int lxc_assign_network(struct lxc_list
*network
, pid_t pid
)
2107 struct lxc_list
*iterator
;
2108 struct lxc_netdev
*netdev
;
2111 lxc_list_for_each(iterator
, network
) {
2113 netdev
= iterator
->elem
;
2115 /* empty network namespace, nothing to move */
2116 if (!netdev
->ifindex
)
2119 err
= lxc_netdev_move_by_index(netdev
->ifindex
, pid
);
2121 ERROR("failed to move '%s' to the container : %s",
2122 netdev
->link
, strerror(-err
));
2126 DEBUG("move '%s' to '%d'", netdev
->name
, pid
);
2132 int lxc_find_gateway_addresses(struct lxc_handler
*handler
)
2134 struct lxc_list
*network
= &handler
->conf
->network
;
2135 struct lxc_list
*iterator
;
2136 struct lxc_netdev
*netdev
;
2139 lxc_list_for_each(iterator
, network
) {
2140 netdev
= iterator
->elem
;
2142 if (!netdev
->ipv4_gateway_auto
&& !netdev
->ipv6_gateway_auto
)
2145 if (netdev
->type
!= LXC_NET_VETH
&& netdev
->type
!= LXC_NET_MACVLAN
) {
2146 ERROR("gateway = auto only supported for "
2147 "veth and macvlan");
2151 if (!netdev
->link
) {
2152 ERROR("gateway = auto needs a link interface");
2156 link_index
= if_nametoindex(netdev
->link
);
2160 if (netdev
->ipv4_gateway_auto
) {
2161 if (lxc_ipv4_addr_get(link_index
, &netdev
->ipv4_gateway
)) {
2162 ERROR("failed to automatically find ipv4 gateway "
2163 "address from link interface '%s'", netdev
->link
);
2168 if (netdev
->ipv6_gateway_auto
) {
2169 if (lxc_ipv6_addr_get(link_index
, &netdev
->ipv6_gateway
)) {
2170 ERROR("failed to automatically find ipv6 gateway "
2171 "address from link interface '%s'", netdev
->link
);
2180 int lxc_create_tty(const char *name
, struct lxc_conf
*conf
)
2182 struct lxc_tty_info
*tty_info
= &conf
->tty_info
;
2185 /* no tty in the configuration */
2189 tty_info
->pty_info
=
2190 malloc(sizeof(*tty_info
->pty_info
)*conf
->tty
);
2191 if (!tty_info
->pty_info
) {
2192 SYSERROR("failed to allocate pty_info");
2196 for (i
= 0; i
< conf
->tty
; i
++) {
2198 struct lxc_pty_info
*pty_info
= &tty_info
->pty_info
[i
];
2200 if (openpty(&pty_info
->master
, &pty_info
->slave
,
2201 pty_info
->name
, NULL
, NULL
)) {
2202 SYSERROR("failed to create pty #%d", i
);
2203 tty_info
->nbtty
= i
;
2204 lxc_delete_tty(tty_info
);
2208 DEBUG("allocated pty '%s' (%d/%d)",
2209 pty_info
->name
, pty_info
->master
, pty_info
->slave
);
2211 /* Prevent leaking the file descriptors to the container */
2212 fcntl(pty_info
->master
, F_SETFD
, FD_CLOEXEC
);
2213 fcntl(pty_info
->slave
, F_SETFD
, FD_CLOEXEC
);
2218 tty_info
->nbtty
= conf
->tty
;
2220 INFO("tty's configured");
2225 void lxc_delete_tty(struct lxc_tty_info
*tty_info
)
2229 for (i
= 0; i
< tty_info
->nbtty
; i
++) {
2230 struct lxc_pty_info
*pty_info
= &tty_info
->pty_info
[i
];
2232 close(pty_info
->master
);
2233 close(pty_info
->slave
);
2236 free(tty_info
->pty_info
);
2237 tty_info
->nbtty
= 0;
2240 int lxc_setup(const char *name
, struct lxc_conf
*lxc_conf
)
2242 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
2246 if (setup_utsname(lxc_conf
->utsname
)) {
2247 ERROR("failed to setup the utsname for '%s'", name
);
2251 if (setup_network(&lxc_conf
->network
)) {
2252 ERROR("failed to setup the network for '%s'", name
);
2256 if (run_lxc_hooks(name
, "pre-mount", lxc_conf
)) {
2257 ERROR("failed to run pre-mount hooks for container '%s'.", name
);
2261 if (setup_rootfs(&lxc_conf
->rootfs
)) {
2262 ERROR("failed to setup rootfs for '%s'", name
);
2266 if (setup_mount(&lxc_conf
->rootfs
, lxc_conf
->fstab
, name
)) {
2267 ERROR("failed to setup the mounts for '%s'", name
);
2271 if (setup_mount_entries(&lxc_conf
->rootfs
, &lxc_conf
->mount_list
, name
)) {
2272 ERROR("failed to setup the mount entries for '%s'", name
);
2276 if (run_lxc_hooks(name
, "mount", lxc_conf
)) {
2277 ERROR("failed to run mount hooks for container '%s'.", name
);
2281 if (setup_cgroup(name
, &lxc_conf
->cgroup
)) {
2282 ERROR("failed to setup the cgroups for '%s'", name
);
2286 if (setup_console(&lxc_conf
->rootfs
, &lxc_conf
->console
, lxc_conf
->ttydir
)) {
2287 ERROR("failed to setup the console for '%s'", name
);
2291 if (setup_kmsg(&lxc_conf
->rootfs
, &lxc_conf
->console
)) {
2292 ERROR("failed to setup kmsg for '%s'", name
);
2296 if (setup_tty(&lxc_conf
->rootfs
, &lxc_conf
->tty_info
, lxc_conf
->ttydir
)) {
2297 ERROR("failed to setup the ttys for '%s'", name
);
2301 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
2302 INFO("rootfs path is .%s., mount is .%s.", lxc_conf
->rootfs
.path
,
2303 lxc_conf
->rootfs
.mount
);
2304 if (lxc_conf
->rootfs
.path
== NULL
|| strlen(lxc_conf
->rootfs
.path
) == 0)
2307 mounted
= lsm_mount_proc_if_needed(lxc_conf
->rootfs
.path
, lxc_conf
->rootfs
.mount
);
2308 if (mounted
== -1) {
2309 SYSERROR("failed to mount /proc in the container.");
2311 } else if (mounted
== 1) {
2312 lxc_conf
->lsm_umount_proc
= 1;
2316 if (setup_pivot_root(&lxc_conf
->rootfs
)) {
2317 ERROR("failed to set rootfs for '%s'", name
);
2321 if (setup_pts(lxc_conf
->pts
)) {
2322 ERROR("failed to setup the new pts instance");
2326 if (setup_personality(lxc_conf
->personality
)) {
2327 ERROR("failed to setup personality");
2331 if (setup_caps(&lxc_conf
->caps
)) {
2332 ERROR("failed to drop capabilities");
2336 NOTICE("'%s' is setup.", name
);
2341 int run_lxc_hooks(const char *name
, char *hook
, struct lxc_conf
*conf
)
2344 struct lxc_list
*it
;
2346 if (strcmp(hook
, "pre-start") == 0)
2347 which
= LXCHOOK_PRESTART
;
2348 else if (strcmp(hook
, "pre-mount") == 0)
2349 which
= LXCHOOK_PREMOUNT
;
2350 else if (strcmp(hook
, "mount") == 0)
2351 which
= LXCHOOK_MOUNT
;
2352 else if (strcmp(hook
, "start") == 0)
2353 which
= LXCHOOK_START
;
2354 else if (strcmp(hook
, "post-stop") == 0)
2355 which
= LXCHOOK_POSTSTOP
;
2358 lxc_list_for_each(it
, &conf
->hooks
[which
]) {
2360 char *hookname
= it
->elem
;
2361 ret
= run_script(name
, "lxc", hookname
, hook
, NULL
);
2368 static void lxc_remove_nic(struct lxc_list
*it
)
2370 struct lxc_netdev
*netdev
= it
->elem
;
2371 struct lxc_list
*it2
;
2379 if (netdev
->upscript
)
2380 free(netdev
->upscript
);
2382 free(netdev
->hwaddr
);
2385 if (netdev
->ipv4_gateway
)
2386 free(netdev
->ipv4_gateway
);
2387 if (netdev
->ipv6_gateway
)
2388 free(netdev
->ipv6_gateway
);
2389 lxc_list_for_each(it2
, &netdev
->ipv4
) {
2394 lxc_list_for_each(it2
, &netdev
->ipv6
) {
2402 /* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
2403 int lxc_clear_nic(struct lxc_conf
*c
, char *key
)
2407 struct lxc_list
*it
;
2408 struct lxc_netdev
*netdev
;
2410 p1
= index(key
, '.');
2411 if (!p1
|| *(p1
+1) == '\0')
2414 ret
= sscanf(key
, "%d", &idx
);
2415 if (ret
!= 1) return -1;
2420 lxc_list_for_each(it
, &c
->network
) {
2425 if (i
< idx
) // we don't have that many nics defined
2428 if (!it
|| !it
->elem
)
2435 } else if (strcmp(p1
, "ipv4") == 0) {
2436 struct lxc_list
*it2
;
2437 lxc_list_for_each(it2
, &netdev
->ipv4
) {
2442 } else if (strcmp(p1
, "ipv6") == 0) {
2443 struct lxc_list
*it2
;
2444 lxc_list_for_each(it2
, &netdev
->ipv6
) {
2449 } else if (strcmp(p1
, "link") == 0) {
2452 netdev
->link
= NULL
;
2454 } else if (strcmp(p1
, "name") == 0) {
2457 netdev
->name
= NULL
;
2459 } else if (strcmp(p1
, "script.up") == 0) {
2460 if (netdev
->upscript
) {
2461 free(netdev
->upscript
);
2462 netdev
->upscript
= NULL
;
2464 } else if (strcmp(p1
, "hwaddr") == 0) {
2465 if (netdev
->hwaddr
) {
2466 free(netdev
->hwaddr
);
2467 netdev
->hwaddr
= NULL
;
2469 } else if (strcmp(p1
, "mtu") == 0) {
2474 } else if (strcmp(p1
, "ipv4_gateway") == 0) {
2475 if (netdev
->ipv4_gateway
) {
2476 free(netdev
->ipv4_gateway
);
2477 netdev
->ipv4_gateway
= NULL
;
2479 } else if (strcmp(p1
, "ipv6_gateway") == 0) {
2480 if (netdev
->ipv6_gateway
) {
2481 free(netdev
->ipv6_gateway
);
2482 netdev
->ipv6_gateway
= NULL
;
2490 int lxc_clear_config_network(struct lxc_conf
*c
)
2492 struct lxc_list
*it
;
2493 lxc_list_for_each(it
, &c
->network
) {
2499 int lxc_clear_config_caps(struct lxc_conf
*c
)
2501 struct lxc_list
*it
;
2503 lxc_list_for_each(it
, &c
->caps
) {
2511 int lxc_clear_cgroups(struct lxc_conf
*c
, char *key
)
2513 struct lxc_list
*it
;
2517 if (strcmp(key
, "lxc.cgroup") == 0)
2520 lxc_list_for_each(it
, &c
->cgroup
) {
2521 struct lxc_cgroup
*cg
= it
->elem
;
2522 if (!all
&& strcmp(cg
->subsystem
, k
) != 0)
2525 free(cg
->subsystem
);
2533 int lxc_clear_mount_entries(struct lxc_conf
*c
)
2535 struct lxc_list
*it
;
2537 lxc_list_for_each(it
, &c
->mount_list
) {
2545 int lxc_clear_hooks(struct lxc_conf
*c
)
2547 struct lxc_list
*it
;
2550 for (i
=0; i
<NUM_LXC_HOOKS
; i
++) {
2551 lxc_list_for_each(it
, &c
->hooks
[i
]) {
2560 void lxc_conf_free(struct lxc_conf
*conf
)
2564 if (conf
->console
.path
)
2565 free(conf
->console
.path
);
2566 if (conf
->rootfs
.mount
!= LXCROOTFSMOUNT
)
2567 free(conf
->rootfs
.mount
);
2568 lxc_clear_config_network(conf
);
2569 if (conf
->aa_profile
)
2570 free(conf
->aa_profile
);
2571 lxc_clear_config_caps(conf
);
2572 lxc_clear_cgroups(conf
, "lxc.cgroup");
2573 lxc_clear_hooks(conf
);
2574 lxc_clear_mount_entries(conf
);