2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <sys/syscall.h>
39 #include <../include/ifaddrs.h>
45 #include <../include/openpty.h>
48 #include <linux/loop.h>
50 #include <sys/types.h>
51 #include <sys/utsname.h>
52 #include <sys/param.h>
54 #include <sys/socket.h>
55 #include <sys/mount.h>
57 #include <sys/prctl.h>
59 #include <arpa/inet.h>
61 #include <netinet/in.h>
72 #include "lxc.h" /* for lxc_cgroup_set() */
73 #include "caps.h" /* for lxc_caps_last_cap() */
80 #if HAVE_SYS_CAPABILITY_H
81 #include <sys/capability.h>
84 #if HAVE_SYS_PERSONALITY_H
85 #include <sys/personality.h>
89 #include <../include/lxcmntent.h>
94 #include "lxcseccomp.h"
96 lxc_log_define(lxc_conf
, lxc
);
99 #define MAXINDEXLEN 20
101 #define MAXLINELEN 128
103 #if HAVE_SYS_CAPABILITY_H
105 #define CAP_SETFCAP 31
108 #ifndef CAP_MAC_OVERRIDE
109 #define CAP_MAC_OVERRIDE 32
112 #ifndef CAP_MAC_ADMIN
113 #define CAP_MAC_ADMIN 33
117 #ifndef PR_CAPBSET_DROP
118 #define PR_CAPBSET_DROP 24
121 #ifndef LO_FLAGS_AUTOCLEAR
122 #define LO_FLAGS_AUTOCLEAR 4
125 /* Define pivot_root() if missing from the C library */
126 #ifndef HAVE_PIVOT_ROOT
127 static int pivot_root(const char * new_root
, const char * put_old
)
129 #ifdef __NR_pivot_root
130 return syscall(__NR_pivot_root
, new_root
, put_old
);
137 extern int pivot_root(const char * new_root
, const char * put_old
);
140 /* Define sethostname() if missing from the C library */
141 #ifndef HAVE_SETHOSTNAME
142 static int sethostname(const char * name
, size_t len
)
144 #ifdef __NR_sethostname
145 return syscall(__NR_sethostname
, name
, len
);
153 /* Define __S_ISTYPE if missing from the C library */
155 #define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
158 char *lxchook_names
[NUM_LXC_HOOKS
] = {
159 "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop", "clone" };
161 typedef int (*instanciate_cb
)(struct lxc_handler
*, struct lxc_netdev
*);
174 static int instanciate_veth(struct lxc_handler
*, struct lxc_netdev
*);
175 static int instanciate_macvlan(struct lxc_handler
*, struct lxc_netdev
*);
176 static int instanciate_vlan(struct lxc_handler
*, struct lxc_netdev
*);
177 static int instanciate_phys(struct lxc_handler
*, struct lxc_netdev
*);
178 static int instanciate_empty(struct lxc_handler
*, struct lxc_netdev
*);
180 static instanciate_cb netdev_conf
[LXC_NET_MAXCONFTYPE
+ 1] = {
181 [LXC_NET_VETH
] = instanciate_veth
,
182 [LXC_NET_MACVLAN
] = instanciate_macvlan
,
183 [LXC_NET_VLAN
] = instanciate_vlan
,
184 [LXC_NET_PHYS
] = instanciate_phys
,
185 [LXC_NET_EMPTY
] = instanciate_empty
,
188 static int shutdown_veth(struct lxc_handler
*, struct lxc_netdev
*);
189 static int shutdown_macvlan(struct lxc_handler
*, struct lxc_netdev
*);
190 static int shutdown_vlan(struct lxc_handler
*, struct lxc_netdev
*);
191 static int shutdown_phys(struct lxc_handler
*, struct lxc_netdev
*);
192 static int shutdown_empty(struct lxc_handler
*, struct lxc_netdev
*);
194 static instanciate_cb netdev_deconf
[LXC_NET_MAXCONFTYPE
+ 1] = {
195 [LXC_NET_VETH
] = shutdown_veth
,
196 [LXC_NET_MACVLAN
] = shutdown_macvlan
,
197 [LXC_NET_VLAN
] = shutdown_vlan
,
198 [LXC_NET_PHYS
] = shutdown_phys
,
199 [LXC_NET_EMPTY
] = shutdown_empty
,
202 static struct mount_opt mount_opt
[] = {
203 { "defaults", 0, 0 },
204 { "ro", 0, MS_RDONLY
},
205 { "rw", 1, MS_RDONLY
},
206 { "suid", 1, MS_NOSUID
},
207 { "nosuid", 0, MS_NOSUID
},
208 { "dev", 1, MS_NODEV
},
209 { "nodev", 0, MS_NODEV
},
210 { "exec", 1, MS_NOEXEC
},
211 { "noexec", 0, MS_NOEXEC
},
212 { "sync", 0, MS_SYNCHRONOUS
},
213 { "async", 1, MS_SYNCHRONOUS
},
214 { "dirsync", 0, MS_DIRSYNC
},
215 { "remount", 0, MS_REMOUNT
},
216 { "mand", 0, MS_MANDLOCK
},
217 { "nomand", 1, MS_MANDLOCK
},
218 { "atime", 1, MS_NOATIME
},
219 { "noatime", 0, MS_NOATIME
},
220 { "diratime", 1, MS_NODIRATIME
},
221 { "nodiratime", 0, MS_NODIRATIME
},
222 { "bind", 0, MS_BIND
},
223 { "rbind", 0, MS_BIND
|MS_REC
},
224 { "relatime", 0, MS_RELATIME
},
225 { "norelatime", 1, MS_RELATIME
},
226 { "strictatime", 0, MS_STRICTATIME
},
227 { "nostrictatime", 1, MS_STRICTATIME
},
231 #if HAVE_SYS_CAPABILITY_H
232 static struct caps_opt caps_opt
[] = {
233 { "chown", CAP_CHOWN
},
234 { "dac_override", CAP_DAC_OVERRIDE
},
235 { "dac_read_search", CAP_DAC_READ_SEARCH
},
236 { "fowner", CAP_FOWNER
},
237 { "fsetid", CAP_FSETID
},
238 { "kill", CAP_KILL
},
239 { "setgid", CAP_SETGID
},
240 { "setuid", CAP_SETUID
},
241 { "setpcap", CAP_SETPCAP
},
242 { "linux_immutable", CAP_LINUX_IMMUTABLE
},
243 { "net_bind_service", CAP_NET_BIND_SERVICE
},
244 { "net_broadcast", CAP_NET_BROADCAST
},
245 { "net_admin", CAP_NET_ADMIN
},
246 { "net_raw", CAP_NET_RAW
},
247 { "ipc_lock", CAP_IPC_LOCK
},
248 { "ipc_owner", CAP_IPC_OWNER
},
249 { "sys_module", CAP_SYS_MODULE
},
250 { "sys_rawio", CAP_SYS_RAWIO
},
251 { "sys_chroot", CAP_SYS_CHROOT
},
252 { "sys_ptrace", CAP_SYS_PTRACE
},
253 { "sys_pacct", CAP_SYS_PACCT
},
254 { "sys_admin", CAP_SYS_ADMIN
},
255 { "sys_boot", CAP_SYS_BOOT
},
256 { "sys_nice", CAP_SYS_NICE
},
257 { "sys_resource", CAP_SYS_RESOURCE
},
258 { "sys_time", CAP_SYS_TIME
},
259 { "sys_tty_config", CAP_SYS_TTY_CONFIG
},
260 { "mknod", CAP_MKNOD
},
261 { "lease", CAP_LEASE
},
262 #ifdef CAP_AUDIT_WRITE
263 { "audit_write", CAP_AUDIT_WRITE
},
265 #ifdef CAP_AUDIT_CONTROL
266 { "audit_control", CAP_AUDIT_CONTROL
},
268 { "setfcap", CAP_SETFCAP
},
269 { "mac_override", CAP_MAC_OVERRIDE
},
270 { "mac_admin", CAP_MAC_ADMIN
},
272 { "syslog", CAP_SYSLOG
},
274 #ifdef CAP_WAKE_ALARM
275 { "wake_alarm", CAP_WAKE_ALARM
},
279 static struct caps_opt caps_opt
[] = {};
282 static char padchar
[] =
283 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
285 static char *mkifname(char *template)
291 struct ifaddrs
*ifaddr
, *ifa
;
294 /* Get all the network interfaces */
297 /* Initialize the random number generator */
298 urandom
= fopen ("/dev/urandom", "r");
299 if (urandom
!= NULL
) {
300 if (fread (&seed
, sizeof(seed
), 1, urandom
) <= 0)
311 /* Generate random names until we find one that doesn't exist */
314 name
= strdup(template);
319 for (i
= 0; i
< strlen(name
); i
++) {
320 if (name
[i
] == 'X') {
322 name
[i
] = padchar
[rand_r(&seed
) % (strlen(padchar
) - 1)];
324 name
[i
] = padchar
[rand() % (strlen(padchar
) - 1)];
329 for (ifa
= ifaddr
; ifa
!= NULL
; ifa
= ifa
->ifa_next
) {
330 if (strcmp(ifa
->ifa_name
, name
) == 0) {
346 static int run_buffer(char *buffer
)
352 f
= popen(buffer
, "r");
354 SYSERROR("popen failed");
358 output
= malloc(LXC_LOG_BUFFER_SIZE
);
360 ERROR("failed to allocate memory for script output");
365 while(fgets(output
, LXC_LOG_BUFFER_SIZE
, f
))
366 DEBUG("script output: %s", output
);
372 SYSERROR("Script exited on error");
374 } else if (WIFEXITED(ret
) && WEXITSTATUS(ret
) != 0) {
375 ERROR("Script exited with status %d", WEXITSTATUS(ret
));
377 } else if (WIFSIGNALED(ret
)) {
378 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret
),
379 strsignal(WTERMSIG(ret
)));
386 static int run_script_argv(const char *name
, const char *section
,
387 const char *script
, const char *hook
, const char *lxcpath
,
394 INFO("Executing script '%s' for container '%s', config section '%s'",
395 script
, name
, section
);
397 for (i
=0; argsin
&& argsin
[i
]; i
++)
398 size
+= strlen(argsin
[i
]) + 1;
400 size
+= strlen(hook
) + 1;
402 size
+= strlen(script
);
403 size
+= strlen(name
);
404 size
+= strlen(section
);
410 buffer
= alloca(size
);
412 ERROR("failed to allocate memory");
416 ret
= snprintf(buffer
, size
, "%s %s %s %s", script
, name
, section
, hook
);
417 if (ret
< 0 || ret
>= size
) {
418 ERROR("Script name too long");
422 for (i
=0; argsin
&& argsin
[i
]; i
++) {
425 rc
= snprintf(buffer
+ ret
, len
, " %s", argsin
[i
]);
426 if (rc
< 0 || rc
>= len
) {
427 ERROR("Script args too long");
433 return run_buffer(buffer
);
436 static int run_script(const char *name
, const char *section
,
437 const char *script
, ...)
444 INFO("Executing script '%s' for container '%s', config section '%s'",
445 script
, name
, section
);
447 va_start(ap
, script
);
448 while ((p
= va_arg(ap
, char *)))
449 size
+= strlen(p
) + 1;
452 size
+= strlen(script
);
453 size
+= strlen(name
);
454 size
+= strlen(section
);
460 buffer
= alloca(size
);
462 ERROR("failed to allocate memory");
466 ret
= snprintf(buffer
, size
, "%s %s %s", script
, name
, section
);
467 if (ret
< 0 || ret
>= size
) {
468 ERROR("Script name too long");
472 va_start(ap
, script
);
473 while ((p
= va_arg(ap
, char *))) {
476 rc
= snprintf(buffer
+ ret
, len
, " %s", p
);
477 if (rc
< 0 || rc
>= len
) {
478 ERROR("Script args too long");
485 return run_buffer(buffer
);
488 static int find_fstype_cb(char* buffer
, void *data
)
498 /* we don't try 'nodev' entries */
499 if (strstr(buffer
, "nodev"))
503 fstype
+= lxc_char_left_gc(fstype
, strlen(fstype
));
504 fstype
[lxc_char_right_gc(fstype
, strlen(fstype
))] = '\0';
506 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
507 cbarg
->rootfs
, cbarg
->target
, fstype
);
509 if (mount(cbarg
->rootfs
, cbarg
->target
, fstype
, cbarg
->mntopt
, NULL
)) {
510 DEBUG("mount failed with error: %s", strerror(errno
));
514 INFO("mounted '%s' on '%s', with fstype '%s'",
515 cbarg
->rootfs
, cbarg
->target
, fstype
);
520 static int mount_unknow_fs(const char *rootfs
, const char *target
, int mntopt
)
535 * find the filesystem type with brute force:
536 * first we check with /etc/filesystems, in case the modules
537 * are auto-loaded and fall back to the supported kernel fs
544 for (i
= 0; i
< sizeof(fsfile
)/sizeof(fsfile
[0]); i
++) {
548 if (access(fsfile
[i
], F_OK
))
551 ret
= lxc_file_for_each_line(fsfile
[i
], find_fstype_cb
, &cbarg
);
553 ERROR("failed to parse '%s'", fsfile
[i
]);
561 ERROR("failed to determine fs type for '%s'", rootfs
);
565 static int mount_rootfs_dir(const char *rootfs
, const char *target
)
567 return mount(rootfs
, target
, "none", MS_BIND
| MS_REC
, NULL
);
570 static int setup_lodev(const char *rootfs
, int fd
, struct loop_info64
*loinfo
)
575 rfd
= open(rootfs
, O_RDWR
);
577 SYSERROR("failed to open '%s'", rootfs
);
581 memset(loinfo
, 0, sizeof(*loinfo
));
583 loinfo
->lo_flags
= LO_FLAGS_AUTOCLEAR
;
585 if (ioctl(fd
, LOOP_SET_FD
, rfd
)) {
586 SYSERROR("failed to LOOP_SET_FD");
590 if (ioctl(fd
, LOOP_SET_STATUS64
, loinfo
)) {
591 SYSERROR("failed to LOOP_SET_STATUS64");
602 static int mount_rootfs_file(const char *rootfs
, const char *target
)
604 struct dirent dirent
, *direntp
;
605 struct loop_info64 loinfo
;
606 int ret
= -1, fd
= -1, rc
;
608 char path
[MAXPATHLEN
];
610 dir
= opendir("/dev");
612 SYSERROR("failed to open '/dev'");
616 while (!readdir_r(dir
, &dirent
, &direntp
)) {
621 if (!strcmp(direntp
->d_name
, "."))
624 if (!strcmp(direntp
->d_name
, ".."))
627 if (strncmp(direntp
->d_name
, "loop", 4))
630 rc
= snprintf(path
, MAXPATHLEN
, "/dev/%s", direntp
->d_name
);
631 if (rc
< 0 || rc
>= MAXPATHLEN
)
634 fd
= open(path
, O_RDWR
);
638 if (ioctl(fd
, LOOP_GET_STATUS64
, &loinfo
) == 0) {
643 if (errno
!= ENXIO
) {
644 WARN("unexpected error for ioctl on '%s': %m",
650 DEBUG("found '%s' free lodev", path
);
652 ret
= setup_lodev(rootfs
, fd
, &loinfo
);
654 ret
= mount_unknow_fs(path
, target
, 0);
661 WARN("failed to close directory");
666 static int mount_rootfs_block(const char *rootfs
, const char *target
)
668 return mount_unknow_fs(rootfs
, target
, 0);
673 * if rootfs is a directory, then open ${rootfs}.hold for writing for the
674 * duration of the container run, to prevent the container from marking the
675 * underlying fs readonly on shutdown.
676 * return -1 on error.
677 * return -2 if nothing needed to be pinned.
678 * return an open fd (>=0) if we pinned it.
680 int pin_rootfs(const char *rootfs
)
682 char absrootfs
[MAXPATHLEN
];
683 char absrootfspin
[MAXPATHLEN
];
687 if (rootfs
== NULL
|| strlen(rootfs
) == 0)
690 if (!realpath(rootfs
, absrootfs
)) {
691 INFO("failed to get real path for '%s', not pinning", rootfs
);
695 if (access(absrootfs
, F_OK
)) {
696 SYSERROR("'%s' is not accessible", absrootfs
);
700 if (stat(absrootfs
, &s
)) {
701 SYSERROR("failed to stat '%s'", absrootfs
);
705 if (!S_ISDIR(s
.st_mode
))
708 ret
= snprintf(absrootfspin
, MAXPATHLEN
, "%s%s", absrootfs
, ".hold");
709 if (ret
>= MAXPATHLEN
) {
710 SYSERROR("pathname too long for rootfs hold file");
714 fd
= open(absrootfspin
, O_CREAT
| O_RDWR
, S_IWUSR
|S_IRUSR
);
715 INFO("opened %s as fd %d\n", absrootfspin
, fd
);
719 static int mount_rootfs(const char *rootfs
, const char *target
)
721 char absrootfs
[MAXPATHLEN
];
725 typedef int (*rootfs_cb
)(const char *, const char *);
731 { S_IFDIR
, mount_rootfs_dir
},
732 { S_IFBLK
, mount_rootfs_block
},
733 { S_IFREG
, mount_rootfs_file
},
736 if (!realpath(rootfs
, absrootfs
)) {
737 SYSERROR("failed to get real path for '%s'", rootfs
);
741 if (access(absrootfs
, F_OK
)) {
742 SYSERROR("'%s' is not accessible", absrootfs
);
746 if (stat(absrootfs
, &s
)) {
747 SYSERROR("failed to stat '%s'", absrootfs
);
751 for (i
= 0; i
< sizeof(rtfs_type
)/sizeof(rtfs_type
[0]); i
++) {
753 if (!__S_ISTYPE(s
.st_mode
, rtfs_type
[i
].type
))
756 return rtfs_type
[i
].cb(absrootfs
, target
);
759 ERROR("unsupported rootfs type for '%s'", absrootfs
);
763 static int setup_utsname(struct utsname
*utsname
)
768 if (sethostname(utsname
->nodename
, strlen(utsname
->nodename
))) {
769 SYSERROR("failed to set the hostname to '%s'", utsname
->nodename
);
773 INFO("'%s' hostname has been setup", utsname
->nodename
);
778 static int setup_tty(const struct lxc_rootfs
*rootfs
,
779 const struct lxc_tty_info
*tty_info
, char *ttydir
)
781 char path
[MAXPATHLEN
], lxcpath
[MAXPATHLEN
];
787 for (i
= 0; i
< tty_info
->nbtty
; i
++) {
789 struct lxc_pty_info
*pty_info
= &tty_info
->pty_info
[i
];
791 ret
= snprintf(path
, sizeof(path
), "%s/dev/tty%d",
792 rootfs
->mount
, i
+ 1);
793 if (ret
>= sizeof(path
)) {
794 ERROR("pathname too long for ttys");
798 /* create dev/lxc/tty%d" */
799 ret
= snprintf(lxcpath
, sizeof(lxcpath
), "%s/dev/%s/tty%d",
800 rootfs
->mount
, ttydir
, i
+ 1);
801 if (ret
>= sizeof(lxcpath
)) {
802 ERROR("pathname too long for ttys");
805 ret
= creat(lxcpath
, 0660);
806 if (ret
==-1 && errno
!= EEXIST
) {
807 SYSERROR("error creating %s\n", lxcpath
);
813 if (ret
&& errno
!= ENOENT
) {
814 SYSERROR("error unlinking %s\n", path
);
818 if (mount(pty_info
->name
, lxcpath
, "none", MS_BIND
, 0)) {
819 WARN("failed to mount '%s'->'%s'",
820 pty_info
->name
, path
);
824 ret
= snprintf(lxcpath
, sizeof(lxcpath
), "%s/tty%d", ttydir
, i
+1);
825 if (ret
>= sizeof(lxcpath
)) {
826 ERROR("tty pathname too long");
829 ret
= symlink(lxcpath
, path
);
831 SYSERROR("failed to create symlink for tty %d\n", i
+1);
835 /* If we populated /dev, then we need to create /dev/ttyN */
836 if (access(path
, F_OK
)) {
837 ret
= creat(path
, 0660);
839 SYSERROR("error creating %s\n", path
);
840 /* this isn't fatal, continue */
844 if (mount(pty_info
->name
, path
, "none", MS_BIND
, 0)) {
845 WARN("failed to mount '%s'->'%s'",
846 pty_info
->name
, path
);
852 INFO("%d tty(s) has been setup", tty_info
->nbtty
);
857 static int setup_rootfs_pivot_root_cb(char *buffer
, void *data
)
859 struct lxc_list
*mountlist
, *listentry
, *iterator
;
860 char *pivotdir
, *mountpoint
, *mountentry
, *saveptr
= NULL
;
865 cbparm
= (void **)data
;
867 mountlist
= cbparm
[0];
868 pivotdir
= cbparm
[1];
870 /* parse entry, first field is mountname, ignore */
871 mountpoint
= strtok_r(mountentry
, " ", &saveptr
);
875 /* second field is mountpoint */
876 mountpoint
= strtok_r(NULL
, " ", &saveptr
);
880 /* only consider mountpoints below old root fs */
881 if (strncmp(mountpoint
, pivotdir
, strlen(pivotdir
)))
884 /* filter duplicate mountpoints */
886 lxc_list_for_each(iterator
, mountlist
) {
887 if (!strcmp(iterator
->elem
, mountpoint
)) {
895 /* add entry to list */
896 listentry
= malloc(sizeof(*listentry
));
898 SYSERROR("malloc for mountpoint listentry failed");
902 listentry
->elem
= strdup(mountpoint
);
903 if (!listentry
->elem
) {
904 SYSERROR("strdup failed");
908 lxc_list_add_tail(mountlist
, listentry
);
913 static int umount_oldrootfs(const char *oldrootfs
)
915 char path
[MAXPATHLEN
];
917 struct lxc_list mountlist
, *iterator
, *next
;
918 int ok
, still_mounted
, last_still_mounted
;
921 /* read and parse /proc/mounts in old root fs */
922 lxc_list_init(&mountlist
);
924 /* oldrootfs is on the top tree directory now */
925 rc
= snprintf(path
, sizeof(path
), "/%s", oldrootfs
);
926 if (rc
>= sizeof(path
)) {
927 ERROR("rootfs name too long");
930 cbparm
[0] = &mountlist
;
932 cbparm
[1] = strdup(path
);
934 SYSERROR("strdup failed");
938 rc
= snprintf(path
, sizeof(path
), "%s/proc/mounts", oldrootfs
);
939 if (rc
>= sizeof(path
)) {
940 ERROR("container proc/mounts name too long");
944 ok
= lxc_file_for_each_line(path
,
945 setup_rootfs_pivot_root_cb
, &cbparm
);
947 SYSERROR("failed to read or parse mount list '%s'", path
);
951 /* umount filesystems until none left or list no longer shrinks */
954 last_still_mounted
= still_mounted
;
957 lxc_list_for_each_safe(iterator
, &mountlist
, next
) {
959 /* umount normally */
960 if (!umount(iterator
->elem
)) {
961 DEBUG("umounted '%s'", (char *)iterator
->elem
);
962 lxc_list_del(iterator
);
969 } while (still_mounted
> 0 && still_mounted
!= last_still_mounted
);
972 lxc_list_for_each(iterator
, &mountlist
) {
974 /* let's try a lazy umount */
975 if (!umount2(iterator
->elem
, MNT_DETACH
)) {
976 INFO("lazy unmount of '%s'", (char *)iterator
->elem
);
980 /* be more brutal (nfs) */
981 if (!umount2(iterator
->elem
, MNT_FORCE
)) {
982 INFO("forced unmount of '%s'", (char *)iterator
->elem
);
986 WARN("failed to unmount '%s'", (char *)iterator
->elem
);
992 static int setup_rootfs_pivot_root(const char *rootfs
, const char *pivotdir
)
994 char path
[MAXPATHLEN
];
995 int remove_pivotdir
= 0;
998 /* change into new root fs */
1000 SYSERROR("can't chdir to new rootfs '%s'", rootfs
);
1005 pivotdir
= "lxc_putold";
1007 /* compute the full path to pivotdir under rootfs */
1008 rc
= snprintf(path
, sizeof(path
), "%s/%s", rootfs
, pivotdir
);
1009 if (rc
>= sizeof(path
)) {
1010 ERROR("pivot dir name too long");
1014 if (access(path
, F_OK
)) {
1016 if (mkdir_p(path
, 0755)) {
1017 SYSERROR("failed to create pivotdir '%s'", path
);
1021 remove_pivotdir
= 1;
1022 DEBUG("created '%s' directory", path
);
1025 DEBUG("mountpoint for old rootfs is '%s'", path
);
1027 /* pivot_root into our new root fs */
1028 if (pivot_root(".", path
)) {
1029 SYSERROR("pivot_root syscall failed");
1034 SYSERROR("can't chdir to / after pivot_root");
1038 DEBUG("pivot_root syscall to '%s' successful", rootfs
);
1040 /* we switch from absolute path to relative path */
1041 if (umount_oldrootfs(pivotdir
))
1044 /* remove temporary mount point, we don't consider the removing
1046 if (remove_pivotdir
&& rmdir(pivotdir
))
1047 WARN("can't remove mountpoint '%s': %m", pivotdir
);
1053 * Do we want to add options for max size of /dev and a file to
1054 * specify which devices to create?
1056 static int mount_autodev(char *root
)
1059 char path
[MAXPATHLEN
];
1061 INFO("Mounting /dev under %s\n", root
);
1062 ret
= snprintf(path
, MAXPATHLEN
, "%s/dev", root
);
1063 if (ret
< 0 || ret
> MAXPATHLEN
)
1065 ret
= mount("none", path
, "tmpfs", 0, "size=100000");
1067 SYSERROR("Failed to mount /dev at %s\n", root
);
1070 ret
= snprintf(path
, MAXPATHLEN
, "%s/dev/pts", root
);
1071 if (ret
< 0 || ret
>= MAXPATHLEN
)
1073 ret
= mkdir(path
, S_IRWXU
| S_IRGRP
| S_IXGRP
| S_IROTH
| S_IXOTH
);
1075 SYSERROR("Failed to create /dev/pts in container");
1079 INFO("Mounted /dev under %s\n", root
);
1090 struct lxc_devs lxc_devs
[] = {
1091 { "null", S_IFCHR
| S_IRWXU
| S_IRWXG
| S_IRWXO
, 1, 3 },
1092 { "zero", S_IFCHR
| S_IRWXU
| S_IRWXG
| S_IRWXO
, 1, 5 },
1093 { "full", S_IFCHR
| S_IRWXU
| S_IRWXG
| S_IRWXO
, 1, 7 },
1094 { "urandom", S_IFCHR
| S_IRWXU
| S_IRWXG
| S_IRWXO
, 1, 9 },
1095 { "random", S_IFCHR
| S_IRWXU
| S_IRWXG
| S_IRWXO
, 1, 8 },
1096 { "tty", S_IFCHR
| S_IRWXU
| S_IRWXG
| S_IRWXO
, 5, 0 },
1097 { "console", S_IFCHR
| S_IRUSR
| S_IWUSR
, 5, 1 },
1100 static int setup_autodev(char *root
)
1104 char path
[MAXPATHLEN
];
1108 INFO("Creating initial consoles under %s/dev\n", root
);
1110 ret
= snprintf(path
, MAXPATHLEN
, "%s/dev", root
);
1111 if (ret
< 0 || ret
>= MAXPATHLEN
) {
1112 ERROR("Error calculating container /dev location");
1116 INFO("Populating /dev under %s\n", root
);
1117 cmask
= umask(S_IXUSR
| S_IXGRP
| S_IXOTH
);
1118 for (i
= 0; i
< sizeof(lxc_devs
) / sizeof(lxc_devs
[0]); i
++) {
1120 ret
= snprintf(path
, MAXPATHLEN
, "%s/dev/%s", root
, d
->name
);
1121 if (ret
< 0 || ret
>= MAXPATHLEN
)
1123 ret
= mknod(path
, d
->mode
, makedev(d
->maj
, d
->min
));
1124 if (ret
&& errno
!= EEXIST
) {
1125 SYSERROR("Error creating %s\n", d
->name
);
1131 INFO("Populated /dev under %s\n", root
);
1136 * Detect whether / is mounted MS_SHARED. The only way I know of to
1137 * check that is through /proc/self/mountinfo.
1138 * I'm only checking for /. If the container rootfs or mount location
1139 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1140 * out would be too much work to be worth it.
1142 #define LINELEN 4096
1143 int detect_shared_rootfs(void)
1145 char buf
[LINELEN
], *p
;
1150 f
= fopen("/proc/self/mountinfo", "r");
1153 while ((p
= fgets(buf
, LINELEN
, f
))) {
1154 INFO("looking at .%s.", p
);
1155 for (p
= buf
, i
=0; p
&& i
< 4; i
++)
1156 p
= index(p
+1, ' ');
1159 p2
= index(p
+1, ' ');
1163 INFO("now p is .%s.", p
);
1164 if (strcmp(p
+1, "/") == 0) {
1165 // this is '/'. is it shared?
1166 p
= index(p2
+1, ' ');
1167 if (p
&& strstr(p
, "shared:")) {
1178 * I'll forgive you for asking whether all of this is needed :) The
1180 * pivot_root will fail if the new root, the put_old dir, or the parent
1181 * of current->fs->root are MS_SHARED. (parent of current->fs_root may
1182 * or may not be current->fs_root - if we assumed it always was, we could
1183 * just mount --make-rslave /). So,
1184 * 1. mount a tiny tmpfs to be parent of current->fs->root.
1185 * 2. make that MS_SLAVE
1186 * 3. make a 'root' directory under that
1187 * 4. mount --rbind / under the $tinyroot/root.
1188 * 5. make that rslave
1189 * 6. chdir and chroot into $tinyroot/root
1190 * 7. $tinyroot will be unmounted by our parent in start.c
1192 static int chroot_into_slave(struct lxc_conf
*conf
)
1194 char path
[MAXPATHLEN
];
1195 const char *destpath
= conf
->rootfs
.mount
;
1198 if (mount(destpath
, destpath
, NULL
, MS_BIND
, 0)) {
1199 SYSERROR("failed to mount %s bind", destpath
);
1202 if (mount("", destpath
, NULL
, MS_SLAVE
, 0)) {
1203 SYSERROR("failed to make %s slave", destpath
);
1206 if (mount("none", destpath
, "tmpfs", 0, "size=10000")) {
1207 SYSERROR("Failed to mount tmpfs / at %s", destpath
);
1210 ret
= snprintf(path
, MAXPATHLEN
, "%s/root", destpath
);
1211 if (ret
< 0 || ret
>= MAXPATHLEN
) {
1212 ERROR("out of memory making root path");
1215 if (mkdir(path
, S_IRWXU
| S_IRGRP
| S_IXGRP
| S_IROTH
| S_IXOTH
)) {
1216 SYSERROR("Failed to create /dev/pts in container");
1219 if (mount("/", path
, NULL
, MS_BIND
|MS_REC
, 0)) {
1220 SYSERROR("Failed to rbind mount / to %s", path
);
1223 if (mount("", destpath
, NULL
, MS_SLAVE
|MS_REC
, 0)) {
1224 SYSERROR("Failed to make tmp-/ at %s rslave", path
);
1228 SYSERROR("Failed to chdir into tmp-/");
1232 SYSERROR("Failed to chroot into tmp-/");
1235 INFO("Chrooted into tmp-/ at %s\n", path
);
1239 static int setup_rootfs(struct lxc_conf
*conf
)
1241 const struct lxc_rootfs
*rootfs
= &conf
->rootfs
;
1243 if (!rootfs
->path
) {
1244 if (mount("", "/", NULL
, MS_SLAVE
|MS_REC
, 0)) {
1245 SYSERROR("Failed to make / rslave");
1251 if (access(rootfs
->mount
, F_OK
)) {
1252 SYSERROR("failed to access to '%s', check it is present",
1257 if (detect_shared_rootfs()) {
1258 if (chroot_into_slave(conf
)) {
1259 ERROR("Failed to chroot into slave /");
1264 // First try mounting rootfs using a bdev
1265 struct bdev
*bdev
= bdev_init(rootfs
->path
, rootfs
->mount
, NULL
);
1266 if (bdev
&& bdev
->ops
->mount(bdev
) == 0) {
1268 DEBUG("mounted '%s' on '%s'", rootfs
->path
, rootfs
->mount
);
1273 if (mount_rootfs(rootfs
->path
, rootfs
->mount
)) {
1274 ERROR("failed to mount rootfs");
1278 DEBUG("mounted '%s' on '%s'", rootfs
->path
, rootfs
->mount
);
1283 int setup_pivot_root(const struct lxc_rootfs
*rootfs
)
1288 if (setup_rootfs_pivot_root(rootfs
->mount
, rootfs
->pivot
)) {
1289 ERROR("failed to setup pivot root");
1296 static int setup_pts(int pts
)
1298 char target
[PATH_MAX
];
1303 if (!access("/dev/pts/ptmx", F_OK
) && umount("/dev/pts")) {
1304 SYSERROR("failed to umount 'dev/pts'");
1308 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL
,
1309 "newinstance,ptmxmode=0666")) {
1310 SYSERROR("failed to mount a new instance of '/dev/pts'");
1314 if (access("/dev/ptmx", F_OK
)) {
1315 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1317 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
1321 if (realpath("/dev/ptmx", target
) && !strcmp(target
, "/dev/pts/ptmx"))
1324 /* fallback here, /dev/pts/ptmx exists just mount bind */
1325 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND
, 0)) {
1326 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
1330 INFO("created new pts instance");
1336 static int setup_personality(int persona
)
1338 #if HAVE_SYS_PERSONALITY_H
1342 if (personality(persona
) < 0) {
1343 SYSERROR("failed to set personality to '0x%x'", persona
);
1347 INFO("set personality to '0x%x'", persona
);
1353 static int setup_dev_console(const struct lxc_rootfs
*rootfs
,
1354 const struct lxc_console
*console
)
1356 char path
[MAXPATHLEN
];
1360 ret
= snprintf(path
, sizeof(path
), "%s/dev/console", rootfs
->mount
);
1361 if (ret
>= sizeof(path
)) {
1362 ERROR("console path too long\n");
1366 if (access(path
, F_OK
)) {
1367 WARN("rootfs specified but no console found at '%s'", path
);
1371 if (console
->master
< 0) {
1376 if (stat(path
, &s
)) {
1377 SYSERROR("failed to stat '%s'", path
);
1381 if (chmod(console
->name
, s
.st_mode
)) {
1382 SYSERROR("failed to set mode '0%o' to '%s'",
1383 s
.st_mode
, console
->name
);
1387 if (mount(console
->name
, path
, "none", MS_BIND
, 0)) {
1388 ERROR("failed to mount '%s' on '%s'", console
->name
, path
);
1392 INFO("console has been setup");
1396 static int setup_ttydir_console(const struct lxc_rootfs
*rootfs
,
1397 const struct lxc_console
*console
,
1400 char path
[MAXPATHLEN
], lxcpath
[MAXPATHLEN
];
1403 /* create rootfs/dev/<ttydir> directory */
1404 ret
= snprintf(path
, sizeof(path
), "%s/dev/%s", rootfs
->mount
,
1406 if (ret
>= sizeof(path
))
1408 ret
= mkdir(path
, 0755);
1409 if (ret
&& errno
!= EEXIST
) {
1410 SYSERROR("failed with errno %d to create %s\n", errno
, path
);
1413 INFO("created %s\n", path
);
1415 ret
= snprintf(lxcpath
, sizeof(lxcpath
), "%s/dev/%s/console",
1416 rootfs
->mount
, ttydir
);
1417 if (ret
>= sizeof(lxcpath
)) {
1418 ERROR("console path too long\n");
1422 snprintf(path
, sizeof(path
), "%s/dev/console", rootfs
->mount
);
1424 if (ret
&& errno
!= ENOENT
) {
1425 SYSERROR("error unlinking %s\n", path
);
1429 ret
= creat(lxcpath
, 0660);
1430 if (ret
==-1 && errno
!= EEXIST
) {
1431 SYSERROR("error %d creating %s\n", errno
, lxcpath
);
1437 if (console
->master
< 0) {
1442 if (mount(console
->name
, lxcpath
, "none", MS_BIND
, 0)) {
1443 ERROR("failed to mount '%s' on '%s'", console
->name
, lxcpath
);
1447 /* create symlink from rootfs/dev/console to 'lxc/console' */
1448 ret
= snprintf(lxcpath
, sizeof(lxcpath
), "%s/console", ttydir
);
1449 if (ret
>= sizeof(lxcpath
)) {
1450 ERROR("lxc/console path too long");
1453 ret
= symlink(lxcpath
, path
);
1455 SYSERROR("failed to create symlink for console");
1459 INFO("console has been setup on %s", lxcpath
);
1464 static int setup_console(const struct lxc_rootfs
*rootfs
,
1465 const struct lxc_console
*console
,
1468 /* We don't have a rootfs, /dev/console will be shared */
1472 return setup_dev_console(rootfs
, console
);
1474 return setup_ttydir_console(rootfs
, console
, ttydir
);
1477 static int setup_kmsg(const struct lxc_rootfs
*rootfs
,
1478 const struct lxc_console
*console
)
1480 char kpath
[MAXPATHLEN
];
1485 ret
= snprintf(kpath
, sizeof(kpath
), "%s/dev/kmsg", rootfs
->mount
);
1486 if (ret
< 0 || ret
>= sizeof(kpath
))
1489 ret
= unlink(kpath
);
1490 if (ret
&& errno
!= ENOENT
) {
1491 SYSERROR("error unlinking %s\n", kpath
);
1495 ret
= symlink("console", kpath
);
1497 SYSERROR("failed to create symlink for kmsg");
1504 static void parse_mntopt(char *opt
, unsigned long *flags
, char **data
)
1506 struct mount_opt
*mo
;
1508 /* If opt is found in mount_opt, set or clear flags.
1509 * Otherwise append it to data. */
1511 for (mo
= &mount_opt
[0]; mo
->name
!= NULL
; mo
++) {
1512 if (!strncmp(opt
, mo
->name
, strlen(mo
->name
))) {
1514 *flags
&= ~mo
->flag
;
1526 static int parse_mntopts(const char *mntopts
, unsigned long *mntflags
,
1530 char *p
, *saveptr
= NULL
;
1538 s
= strdup(mntopts
);
1540 SYSERROR("failed to allocate memory");
1544 data
= malloc(strlen(s
) + 1);
1546 SYSERROR("failed to allocate memory");
1552 for (p
= strtok_r(s
, ",", &saveptr
); p
!= NULL
;
1553 p
= strtok_r(NULL
, ",", &saveptr
))
1554 parse_mntopt(p
, mntflags
, &data
);
1565 static int mount_entry(const char *fsname
, const char *target
,
1566 const char *fstype
, unsigned long mountflags
,
1569 if (mount(fsname
, target
, fstype
, mountflags
& ~MS_REMOUNT
, data
)) {
1570 SYSERROR("failed to mount '%s' on '%s'", fsname
, target
);
1574 if ((mountflags
& MS_REMOUNT
) || (mountflags
& MS_BIND
)) {
1576 DEBUG("remounting %s on %s to respect bind or remount options",
1579 if (mount(fsname
, target
, fstype
,
1580 mountflags
| MS_REMOUNT
, data
)) {
1581 SYSERROR("failed to mount '%s' on '%s'",
1587 DEBUG("mounted '%s' on '%s', type '%s'", fsname
, target
, fstype
);
1592 static inline int mount_entry_on_systemfs(struct mntent
*mntent
)
1594 unsigned long mntflags
;
1598 if (parse_mntopts(mntent
->mnt_opts
, &mntflags
, &mntdata
) < 0) {
1599 ERROR("failed to parse mount option '%s'", mntent
->mnt_opts
);
1603 ret
= mount_entry(mntent
->mnt_fsname
, mntent
->mnt_dir
,
1604 mntent
->mnt_type
, mntflags
, mntdata
);
1606 if (hasmntopt(mntent
, "optional") != NULL
)
1614 static int mount_entry_on_absolute_rootfs(struct mntent
*mntent
,
1615 const struct lxc_rootfs
*rootfs
,
1616 const char *lxc_name
)
1619 char path
[MAXPATHLEN
];
1620 unsigned long mntflags
;
1622 int r
, ret
= 0, offset
;
1623 const char *lxcpath
;
1625 if (parse_mntopts(mntent
->mnt_opts
, &mntflags
, &mntdata
) < 0) {
1626 ERROR("failed to parse mount option '%s'", mntent
->mnt_opts
);
1630 lxcpath
= default_lxc_path();
1632 ERROR("Out of memory");
1636 /* if rootfs->path is a blockdev path, allow container fstab to
1637 * use $lxcpath/CN/rootfs as the target prefix */
1638 r
= snprintf(path
, MAXPATHLEN
, "%s/%s/rootfs", lxcpath
, lxc_name
);
1639 if (r
< 0 || r
>= MAXPATHLEN
)
1642 aux
= strstr(mntent
->mnt_dir
, path
);
1644 offset
= strlen(path
);
1649 aux
= strstr(mntent
->mnt_dir
, rootfs
->path
);
1651 WARN("ignoring mount point '%s'", mntent
->mnt_dir
);
1654 offset
= strlen(rootfs
->path
);
1658 r
= snprintf(path
, MAXPATHLEN
, "%s/%s", rootfs
->mount
,
1660 if (r
< 0 || r
>= MAXPATHLEN
) {
1661 WARN("pathnme too long for '%s'", mntent
->mnt_dir
);
1667 ret
= mount_entry(mntent
->mnt_fsname
, path
, mntent
->mnt_type
,
1670 if (hasmntopt(mntent
, "optional") != NULL
)
1678 static int mount_entry_on_relative_rootfs(struct mntent
*mntent
,
1681 char path
[MAXPATHLEN
];
1682 unsigned long mntflags
;
1686 if (parse_mntopts(mntent
->mnt_opts
, &mntflags
, &mntdata
) < 0) {
1687 ERROR("failed to parse mount option '%s'", mntent
->mnt_opts
);
1691 /* relative to root mount point */
1692 ret
= snprintf(path
, sizeof(path
), "%s/%s", rootfs
, mntent
->mnt_dir
);
1693 if (ret
>= sizeof(path
)) {
1694 ERROR("path name too long");
1698 ret
= mount_entry(mntent
->mnt_fsname
, path
, mntent
->mnt_type
,
1701 if (hasmntopt(mntent
, "optional") != NULL
)
1709 static int mount_file_entries(const struct lxc_rootfs
*rootfs
, FILE *file
,
1710 const char *lxc_name
)
1712 struct mntent
*mntent
;
1715 while ((mntent
= getmntent(file
))) {
1717 if (!rootfs
->path
) {
1718 if (mount_entry_on_systemfs(mntent
))
1723 /* We have a separate root, mounts are relative to it */
1724 if (mntent
->mnt_dir
[0] != '/') {
1725 if (mount_entry_on_relative_rootfs(mntent
,
1731 if (mount_entry_on_absolute_rootfs(mntent
, rootfs
, lxc_name
))
1737 INFO("mount points have been setup");
1742 static int setup_mount(const struct lxc_rootfs
*rootfs
, const char *fstab
,
1743 const char *lxc_name
)
1751 file
= setmntent(fstab
, "r");
1753 SYSERROR("failed to use '%s'", fstab
);
1757 ret
= mount_file_entries(rootfs
, file
, lxc_name
);
1763 static int setup_mount_entries(const struct lxc_rootfs
*rootfs
, struct lxc_list
*mount
,
1764 const char *lxc_name
)
1767 struct lxc_list
*iterator
;
1773 ERROR("tmpfile error: %m");
1777 lxc_list_for_each(iterator
, mount
) {
1778 mount_entry
= iterator
->elem
;
1779 fprintf(file
, "%s\n", mount_entry
);
1784 ret
= mount_file_entries(rootfs
, file
, lxc_name
);
1790 static int setup_caps(struct lxc_list
*caps
)
1792 struct lxc_list
*iterator
;
1797 lxc_list_for_each(iterator
, caps
) {
1799 drop_entry
= iterator
->elem
;
1803 for (i
= 0; i
< sizeof(caps_opt
)/sizeof(caps_opt
[0]); i
++) {
1805 if (strcmp(drop_entry
, caps_opt
[i
].name
))
1808 capid
= caps_opt
[i
].value
;
1813 /* try to see if it's numeric, so the user may specify
1814 * capabilities that the running kernel knows about but
1816 capid
= strtol(drop_entry
, &ptr
, 10);
1817 if (!ptr
|| *ptr
!= '\0' ||
1818 capid
== LONG_MIN
|| capid
== LONG_MAX
)
1819 /* not a valid number */
1821 else if (capid
> lxc_caps_last_cap())
1822 /* we have a number but it's not a valid
1828 ERROR("unknown capability %s", drop_entry
);
1832 DEBUG("drop capability '%s' (%d)", drop_entry
, capid
);
1834 if (prctl(PR_CAPBSET_DROP
, capid
, 0, 0, 0)) {
1835 SYSERROR("failed to remove %s capability", drop_entry
);
1841 DEBUG("capabilities have been setup");
1846 static int dropcaps_except(struct lxc_list
*caps
)
1848 struct lxc_list
*iterator
;
1852 int numcaps
= lxc_caps_last_cap() + 1;
1853 INFO("found %d capabilities\n", numcaps
);
1855 // caplist[i] is 1 if we keep capability i
1856 int *caplist
= alloca(numcaps
* sizeof(int));
1857 memset(caplist
, 0, numcaps
* sizeof(int));
1859 lxc_list_for_each(iterator
, caps
) {
1861 keep_entry
= iterator
->elem
;
1865 for (i
= 0; i
< sizeof(caps_opt
)/sizeof(caps_opt
[0]); i
++) {
1867 if (strcmp(keep_entry
, caps_opt
[i
].name
))
1870 capid
= caps_opt
[i
].value
;
1875 /* try to see if it's numeric, so the user may specify
1876 * capabilities that the running kernel knows about but
1878 capid
= strtol(keep_entry
, &ptr
, 10);
1879 if (!ptr
|| *ptr
!= '\0' ||
1880 capid
== LONG_MIN
|| capid
== LONG_MAX
)
1881 /* not a valid number */
1883 else if (capid
> lxc_caps_last_cap())
1884 /* we have a number but it's not a valid
1890 ERROR("unknown capability %s", keep_entry
);
1894 DEBUG("drop capability '%s' (%d)", keep_entry
, capid
);
1898 for (i
=0; i
<numcaps
; i
++) {
1901 if (prctl(PR_CAPBSET_DROP
, i
, 0, 0, 0)) {
1902 SYSERROR("failed to remove capability %d", i
);
1907 DEBUG("capabilities have been setup");
1912 static int setup_hw_addr(char *hwaddr
, const char *ifname
)
1914 struct sockaddr sockaddr
;
1918 ret
= lxc_convert_mac(hwaddr
, &sockaddr
);
1920 ERROR("mac address '%s' conversion failed : %s",
1921 hwaddr
, strerror(-ret
));
1925 memcpy(ifr
.ifr_name
, ifname
, IFNAMSIZ
);
1926 memcpy((char *) &ifr
.ifr_hwaddr
, (char *) &sockaddr
, sizeof(sockaddr
));
1928 fd
= socket(AF_INET
, SOCK_DGRAM
, 0);
1930 ERROR("socket failure : %s", strerror(errno
));
1934 ret
= ioctl(fd
, SIOCSIFHWADDR
, &ifr
);
1937 ERROR("ioctl failure : %s", strerror(errno
));
1939 DEBUG("mac address '%s' on '%s' has been setup", hwaddr
, ifname
);
1944 static int setup_ipv4_addr(struct lxc_list
*ip
, int ifindex
)
1946 struct lxc_list
*iterator
;
1947 struct lxc_inetdev
*inetdev
;
1950 lxc_list_for_each(iterator
, ip
) {
1952 inetdev
= iterator
->elem
;
1954 err
= lxc_ipv4_addr_add(ifindex
, &inetdev
->addr
,
1955 &inetdev
->bcast
, inetdev
->prefix
);
1957 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
1958 ifindex
, strerror(-err
));
1966 static int setup_ipv6_addr(struct lxc_list
*ip
, int ifindex
)
1968 struct lxc_list
*iterator
;
1969 struct lxc_inet6dev
*inet6dev
;
1972 lxc_list_for_each(iterator
, ip
) {
1974 inet6dev
= iterator
->elem
;
1976 err
= lxc_ipv6_addr_add(ifindex
, &inet6dev
->addr
,
1977 &inet6dev
->mcast
, &inet6dev
->acast
,
1980 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
1981 ifindex
, strerror(-err
));
1989 static int setup_netdev(struct lxc_netdev
*netdev
)
1991 char ifname
[IFNAMSIZ
];
1992 char *current_ifname
= ifname
;
1995 /* empty network namespace */
1996 if (!netdev
->ifindex
) {
1997 if (netdev
->flags
& IFF_UP
) {
1998 err
= lxc_netdev_up("lo");
2000 ERROR("failed to set the loopback up : %s",
2008 /* retrieve the name of the interface */
2009 if (!if_indextoname(netdev
->ifindex
, current_ifname
)) {
2010 ERROR("no interface corresponding to index '%d'",
2015 /* default: let the system to choose one interface name */
2017 netdev
->name
= netdev
->type
== LXC_NET_PHYS
?
2018 netdev
->link
: "eth%d";
2020 /* rename the interface name */
2021 err
= lxc_netdev_rename_by_name(ifname
, netdev
->name
);
2023 ERROR("failed to rename %s->%s : %s", ifname
, netdev
->name
,
2028 /* Re-read the name of the interface because its name has changed
2029 * and would be automatically allocated by the system
2031 if (!if_indextoname(netdev
->ifindex
, current_ifname
)) {
2032 ERROR("no interface corresponding to index '%d'",
2037 /* set a mac address */
2038 if (netdev
->hwaddr
) {
2039 if (setup_hw_addr(netdev
->hwaddr
, current_ifname
)) {
2040 ERROR("failed to setup hw address for '%s'",
2046 /* setup ipv4 addresses on the interface */
2047 if (setup_ipv4_addr(&netdev
->ipv4
, netdev
->ifindex
)) {
2048 ERROR("failed to setup ip addresses for '%s'",
2053 /* setup ipv6 addresses on the interface */
2054 if (setup_ipv6_addr(&netdev
->ipv6
, netdev
->ifindex
)) {
2055 ERROR("failed to setup ipv6 addresses for '%s'",
2060 /* set the network device up */
2061 if (netdev
->flags
& IFF_UP
) {
2064 err
= lxc_netdev_up(current_ifname
);
2066 ERROR("failed to set '%s' up : %s", current_ifname
,
2071 /* the network is up, make the loopback up too */
2072 err
= lxc_netdev_up("lo");
2074 ERROR("failed to set the loopback up : %s",
2080 /* We can only set up the default routes after bringing
2081 * up the interface, sine bringing up the interface adds
2082 * the link-local routes and we can't add a default
2083 * route if the gateway is not reachable. */
2085 /* setup ipv4 gateway on the interface */
2086 if (netdev
->ipv4_gateway
) {
2087 if (!(netdev
->flags
& IFF_UP
)) {
2088 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname
);
2092 if (lxc_list_empty(&netdev
->ipv4
)) {
2093 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname
);
2097 err
= lxc_ipv4_gateway_add(netdev
->ifindex
, netdev
->ipv4_gateway
);
2099 ERROR("failed to setup ipv4 gateway for '%s': %s",
2100 ifname
, strerror(-err
));
2101 if (netdev
->ipv4_gateway_auto
) {
2102 char buf
[INET_ADDRSTRLEN
];
2103 inet_ntop(AF_INET
, netdev
->ipv4_gateway
, buf
, sizeof(buf
));
2104 ERROR("tried to set autodetected ipv4 gateway '%s'", buf
);
2110 /* setup ipv6 gateway on the interface */
2111 if (netdev
->ipv6_gateway
) {
2112 if (!(netdev
->flags
& IFF_UP
)) {
2113 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname
);
2117 if (lxc_list_empty(&netdev
->ipv6
) && !IN6_IS_ADDR_LINKLOCAL(netdev
->ipv6_gateway
)) {
2118 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname
);
2122 err
= lxc_ipv6_gateway_add(netdev
->ifindex
, netdev
->ipv6_gateway
);
2124 ERROR("failed to setup ipv6 gateway for '%s': %s",
2125 ifname
, strerror(-err
));
2126 if (netdev
->ipv6_gateway_auto
) {
2127 char buf
[INET6_ADDRSTRLEN
];
2128 inet_ntop(AF_INET6
, netdev
->ipv6_gateway
, buf
, sizeof(buf
));
2129 ERROR("tried to set autodetected ipv6 gateway '%s'", buf
);
2135 DEBUG("'%s' has been setup", current_ifname
);
2140 static int setup_network(struct lxc_list
*network
)
2142 struct lxc_list
*iterator
;
2143 struct lxc_netdev
*netdev
;
2145 lxc_list_for_each(iterator
, network
) {
2147 netdev
= iterator
->elem
;
2149 if (setup_netdev(netdev
)) {
2150 ERROR("failed to setup netdev");
2155 if (!lxc_list_empty(network
))
2156 INFO("network has been setup");
2161 void lxc_rename_phys_nics_on_shutdown(struct lxc_conf
*conf
)
2165 INFO("running to reset %d nic names", conf
->num_savednics
);
2166 for (i
=0; i
<conf
->num_savednics
; i
++) {
2167 struct saved_nic
*s
= &conf
->saved_nics
[i
];
2168 INFO("resetting nic %d to %s\n", s
->ifindex
, s
->orig_name
);
2169 lxc_netdev_rename_by_index(s
->ifindex
, s
->orig_name
);
2172 conf
->num_savednics
= 0;
2173 free(conf
->saved_nics
);
2176 static int setup_private_host_hw_addr(char *veth1
)
2182 sockfd
= socket(AF_INET
, SOCK_DGRAM
, 0);
2186 snprintf((char *)ifr
.ifr_name
, IFNAMSIZ
, "%s", veth1
);
2187 err
= ioctl(sockfd
, SIOCGIFHWADDR
, &ifr
);
2193 ifr
.ifr_hwaddr
.sa_data
[0] = 0xfe;
2194 err
= ioctl(sockfd
, SIOCSIFHWADDR
, &ifr
);
2199 DEBUG("mac address of host interface '%s' changed to private "
2200 "%02x:%02x:%02x:%02x:%02x:%02x", veth1
,
2201 ifr
.ifr_hwaddr
.sa_data
[0] & 0xff,
2202 ifr
.ifr_hwaddr
.sa_data
[1] & 0xff,
2203 ifr
.ifr_hwaddr
.sa_data
[2] & 0xff,
2204 ifr
.ifr_hwaddr
.sa_data
[3] & 0xff,
2205 ifr
.ifr_hwaddr
.sa_data
[4] & 0xff,
2206 ifr
.ifr_hwaddr
.sa_data
[5] & 0xff);
2211 static char *default_rootfs_mount
= LXCROOTFSMOUNT
;
2213 struct lxc_conf
*lxc_conf_init(void)
2215 struct lxc_conf
*new;
2218 new = malloc(sizeof(*new));
2220 ERROR("lxc_conf_init : %m");
2223 memset(new, 0, sizeof(*new));
2225 new->loglevel
= LXC_LOG_PRIORITY_NOTSET
;
2226 new->personality
= -1;
2227 new->console
.log_path
= NULL
;
2228 new->console
.log_fd
= -1;
2229 new->console
.path
= NULL
;
2230 new->console
.peer
= -1;
2231 new->console
.peerpty
.busy
= -1;
2232 new->console
.peerpty
.master
= -1;
2233 new->console
.peerpty
.slave
= -1;
2234 new->console
.master
= -1;
2235 new->console
.slave
= -1;
2236 new->console
.name
[0] = '\0';
2237 new->maincmd_fd
= -1;
2238 new->rootfs
.mount
= strdup(default_rootfs_mount
);
2239 if (!new->rootfs
.mount
) {
2240 ERROR("lxc_conf_init : %m");
2245 lxc_list_init(&new->cgroup
);
2246 lxc_list_init(&new->network
);
2247 lxc_list_init(&new->mount_list
);
2248 lxc_list_init(&new->caps
);
2249 lxc_list_init(&new->keepcaps
);
2250 lxc_list_init(&new->id_map
);
2251 for (i
=0; i
<NUM_LXC_HOOKS
; i
++)
2252 lxc_list_init(&new->hooks
[i
]);
2254 new->aa_profile
= NULL
;
2256 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
2257 new->lsm_umount_proc
= 0;
2263 static int instanciate_veth(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2265 char veth1buf
[IFNAMSIZ
], *veth1
;
2266 char veth2buf
[IFNAMSIZ
], *veth2
;
2269 if (netdev
->priv
.veth_attr
.pair
)
2270 veth1
= netdev
->priv
.veth_attr
.pair
;
2272 err
= snprintf(veth1buf
, sizeof(veth1buf
), "vethXXXXXX");
2273 if (err
>= sizeof(veth1buf
)) { /* can't *really* happen, but... */
2274 ERROR("veth1 name too long");
2277 veth1
= mkifname(veth1buf
);
2278 /* store away for deconf */
2279 memcpy(netdev
->priv
.veth_attr
.veth1
, veth1
, IFNAMSIZ
);
2282 snprintf(veth2buf
, sizeof(veth2buf
), "vethXXXXXX");
2283 veth2
= mkifname(veth2buf
);
2285 if (!strlen(veth1
) || !strlen(veth2
)) {
2286 ERROR("failed to allocate a temporary name");
2290 err
= lxc_veth_create(veth1
, veth2
);
2292 ERROR("failed to create %s-%s : %s", veth1
, veth2
,
2297 /* changing the high byte of the mac address to 0xfe, the bridge interface
2298 * will always keep the host's mac address and not take the mac address
2300 err
= setup_private_host_hw_addr(veth1
);
2302 ERROR("failed to change mac address of host interface '%s' : %s",
2303 veth1
, strerror(-err
));
2308 err
= lxc_netdev_set_mtu(veth1
, atoi(netdev
->mtu
));
2310 err
= lxc_netdev_set_mtu(veth2
, atoi(netdev
->mtu
));
2312 ERROR("failed to set mtu '%s' for %s-%s : %s",
2313 netdev
->mtu
, veth1
, veth2
, strerror(-err
));
2319 err
= lxc_bridge_attach(netdev
->link
, veth1
);
2321 ERROR("failed to attach '%s' to the bridge '%s' : %s",
2322 veth1
, netdev
->link
, strerror(-err
));
2327 netdev
->ifindex
= if_nametoindex(veth2
);
2328 if (!netdev
->ifindex
) {
2329 ERROR("failed to retrieve the index for %s", veth2
);
2333 err
= lxc_netdev_up(veth1
);
2335 ERROR("failed to set %s up : %s", veth1
, strerror(-err
));
2339 if (netdev
->upscript
) {
2340 err
= run_script(handler
->name
, "net", netdev
->upscript
, "up",
2341 "veth", veth1
, (char*) NULL
);
2346 DEBUG("instanciated veth '%s/%s', index is '%d'",
2347 veth1
, veth2
, netdev
->ifindex
);
2352 lxc_netdev_delete_by_name(veth1
);
2356 static int shutdown_veth(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2361 if (netdev
->priv
.veth_attr
.pair
)
2362 veth1
= netdev
->priv
.veth_attr
.pair
;
2364 veth1
= netdev
->priv
.veth_attr
.veth1
;
2366 if (netdev
->downscript
) {
2367 err
= run_script(handler
->name
, "net", netdev
->downscript
,
2368 "down", "veth", veth1
, (char*) NULL
);
2375 static int instanciate_macvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2377 char peerbuf
[IFNAMSIZ
], *peer
;
2380 if (!netdev
->link
) {
2381 ERROR("no link specified for macvlan netdev");
2385 err
= snprintf(peerbuf
, sizeof(peerbuf
), "mcXXXXXX");
2386 if (err
>= sizeof(peerbuf
))
2389 peer
= mkifname(peerbuf
);
2390 if (!strlen(peer
)) {
2391 ERROR("failed to make a temporary name");
2395 err
= lxc_macvlan_create(netdev
->link
, peer
,
2396 netdev
->priv
.macvlan_attr
.mode
);
2398 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2399 peer
, netdev
->link
, strerror(-err
));
2403 netdev
->ifindex
= if_nametoindex(peer
);
2404 if (!netdev
->ifindex
) {
2405 ERROR("failed to retrieve the index for %s", peer
);
2406 lxc_netdev_delete_by_name(peer
);
2410 if (netdev
->upscript
) {
2411 err
= run_script(handler
->name
, "net", netdev
->upscript
, "up",
2412 "macvlan", netdev
->link
, (char*) NULL
);
2417 DEBUG("instanciated macvlan '%s', index is '%d' and mode '%d'",
2418 peer
, netdev
->ifindex
, netdev
->priv
.macvlan_attr
.mode
);
2423 static int shutdown_macvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2427 if (netdev
->downscript
) {
2428 err
= run_script(handler
->name
, "net", netdev
->downscript
,
2429 "down", "macvlan", netdev
->link
,
2437 /* XXX: merge with instanciate_macvlan */
2438 static int instanciate_vlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2440 char peer
[IFNAMSIZ
];
2443 if (!netdev
->link
) {
2444 ERROR("no link specified for vlan netdev");
2448 err
= snprintf(peer
, sizeof(peer
), "vlan%d", netdev
->priv
.vlan_attr
.vid
);
2449 if (err
>= sizeof(peer
)) {
2450 ERROR("peer name too long");
2454 err
= lxc_vlan_create(netdev
->link
, peer
, netdev
->priv
.vlan_attr
.vid
);
2456 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2457 peer
, netdev
->link
, strerror(-err
));
2461 netdev
->ifindex
= if_nametoindex(peer
);
2462 if (!netdev
->ifindex
) {
2463 ERROR("failed to retrieve the ifindex for %s", peer
);
2464 lxc_netdev_delete_by_name(peer
);
2468 DEBUG("instanciated vlan '%s', ifindex is '%d'", " vlan1000",
2474 static int shutdown_vlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2479 static int instanciate_phys(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2481 if (!netdev
->link
) {
2482 ERROR("no link specified for the physical interface");
2486 netdev
->ifindex
= if_nametoindex(netdev
->link
);
2487 if (!netdev
->ifindex
) {
2488 ERROR("failed to retrieve the index for %s", netdev
->link
);
2492 if (netdev
->upscript
) {
2494 err
= run_script(handler
->name
, "net", netdev
->upscript
,
2495 "up", "phys", netdev
->link
, (char*) NULL
);
2503 static int shutdown_phys(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2507 if (netdev
->downscript
) {
2508 err
= run_script(handler
->name
, "net", netdev
->downscript
,
2509 "down", "phys", netdev
->link
, (char*) NULL
);
2516 static int instanciate_empty(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2518 netdev
->ifindex
= 0;
2519 if (netdev
->upscript
) {
2521 err
= run_script(handler
->name
, "net", netdev
->upscript
,
2522 "up", "empty", (char*) NULL
);
2529 static int shutdown_empty(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
2533 if (netdev
->downscript
) {
2534 err
= run_script(handler
->name
, "net", netdev
->downscript
,
2535 "down", "empty", (char*) NULL
);
2542 int lxc_create_network(struct lxc_handler
*handler
)
2544 struct lxc_list
*network
= &handler
->conf
->network
;
2545 struct lxc_list
*iterator
;
2546 struct lxc_netdev
*netdev
;
2548 lxc_list_for_each(iterator
, network
) {
2550 netdev
= iterator
->elem
;
2552 if (netdev
->type
< 0 || netdev
->type
> LXC_NET_MAXCONFTYPE
) {
2553 ERROR("invalid network configuration type '%d'",
2558 if (netdev_conf
[netdev
->type
](handler
, netdev
)) {
2559 ERROR("failed to create netdev");
2568 void lxc_delete_network(struct lxc_handler
*handler
)
2570 struct lxc_list
*network
= &handler
->conf
->network
;
2571 struct lxc_list
*iterator
;
2572 struct lxc_netdev
*netdev
;
2574 lxc_list_for_each(iterator
, network
) {
2575 netdev
= iterator
->elem
;
2577 if (netdev
->ifindex
!= 0 && netdev
->type
== LXC_NET_PHYS
) {
2578 if (lxc_netdev_rename_by_index(netdev
->ifindex
, netdev
->link
))
2579 WARN("failed to rename to the initial name the " \
2580 "netdev '%s'", netdev
->link
);
2584 if (netdev_deconf
[netdev
->type
](handler
, netdev
)) {
2585 WARN("failed to destroy netdev");
2588 /* Recent kernel remove the virtual interfaces when the network
2589 * namespace is destroyed but in case we did not moved the
2590 * interface to the network namespace, we have to destroy it
2592 if (netdev
->ifindex
!= 0 &&
2593 lxc_netdev_delete_by_index(netdev
->ifindex
))
2594 WARN("failed to remove interface '%s'", netdev
->name
);
2598 int lxc_assign_network(struct lxc_list
*network
, pid_t pid
)
2600 struct lxc_list
*iterator
;
2601 struct lxc_netdev
*netdev
;
2604 lxc_list_for_each(iterator
, network
) {
2606 netdev
= iterator
->elem
;
2608 /* empty network namespace, nothing to move */
2609 if (!netdev
->ifindex
)
2612 err
= lxc_netdev_move_by_index(netdev
->ifindex
, pid
);
2614 ERROR("failed to move '%s' to the container : %s",
2615 netdev
->link
, strerror(-err
));
2619 DEBUG("move '%s' to '%d'", netdev
->name
, pid
);
2625 static int write_id_mapping(enum idtype idtype
, pid_t pid
, const char *buf
,
2628 char path
[PATH_MAX
];
2632 ret
= snprintf(path
, PATH_MAX
, "/proc/%d/%cid_map", pid
, idtype
== ID_TYPE_UID
? 'u' : 'g');
2633 if (ret
< 0 || ret
>= PATH_MAX
) {
2634 fprintf(stderr
, "%s: path name too long", __func__
);
2637 f
= fopen(path
, "w");
2642 ret
= fwrite(buf
, buf_size
, 1, f
);
2644 SYSERROR("writing id mapping");
2645 closeret
= fclose(f
);
2647 SYSERROR("writing id mapping");
2648 return ret
< 0 ? ret
: closeret
;
2651 int lxc_map_ids(struct lxc_list
*idmap
, pid_t pid
)
2653 struct lxc_list
*iterator
;
2657 char *buf
= NULL
, *pos
;
2659 for(type
= ID_TYPE_UID
; type
<= ID_TYPE_GID
; type
++) {
2663 lxc_list_for_each(iterator
, idmap
) {
2664 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
2666 buf
= pos
= malloc(4096);
2670 map
= iterator
->elem
;
2671 if (map
->idtype
== type
) {
2672 left
= 4096 - (pos
- buf
);
2673 fill
= snprintf(pos
, left
, "%lu %lu %lu\n",
2674 map
->nsid
, map
->hostid
, map
->range
);
2675 if (fill
<= 0 || fill
>= left
)
2676 SYSERROR("snprintf failed, too many mappings");
2680 if (pos
== buf
) // no mappings were found
2682 ret
= write_id_mapping(type
, pid
, buf
, pos
-buf
);
2692 int lxc_find_gateway_addresses(struct lxc_handler
*handler
)
2694 struct lxc_list
*network
= &handler
->conf
->network
;
2695 struct lxc_list
*iterator
;
2696 struct lxc_netdev
*netdev
;
2699 lxc_list_for_each(iterator
, network
) {
2700 netdev
= iterator
->elem
;
2702 if (!netdev
->ipv4_gateway_auto
&& !netdev
->ipv6_gateway_auto
)
2705 if (netdev
->type
!= LXC_NET_VETH
&& netdev
->type
!= LXC_NET_MACVLAN
) {
2706 ERROR("gateway = auto only supported for "
2707 "veth and macvlan");
2711 if (!netdev
->link
) {
2712 ERROR("gateway = auto needs a link interface");
2716 link_index
= if_nametoindex(netdev
->link
);
2720 if (netdev
->ipv4_gateway_auto
) {
2721 if (lxc_ipv4_addr_get(link_index
, &netdev
->ipv4_gateway
)) {
2722 ERROR("failed to automatically find ipv4 gateway "
2723 "address from link interface '%s'", netdev
->link
);
2728 if (netdev
->ipv6_gateway_auto
) {
2729 if (lxc_ipv6_addr_get(link_index
, &netdev
->ipv6_gateway
)) {
2730 ERROR("failed to automatically find ipv6 gateway "
2731 "address from link interface '%s'", netdev
->link
);
2740 int lxc_create_tty(const char *name
, struct lxc_conf
*conf
)
2742 struct lxc_tty_info
*tty_info
= &conf
->tty_info
;
2745 /* no tty in the configuration */
2749 tty_info
->pty_info
=
2750 malloc(sizeof(*tty_info
->pty_info
)*conf
->tty
);
2751 if (!tty_info
->pty_info
) {
2752 SYSERROR("failed to allocate pty_info");
2756 for (i
= 0; i
< conf
->tty
; i
++) {
2758 struct lxc_pty_info
*pty_info
= &tty_info
->pty_info
[i
];
2760 if (openpty(&pty_info
->master
, &pty_info
->slave
,
2761 pty_info
->name
, NULL
, NULL
)) {
2762 SYSERROR("failed to create pty #%d", i
);
2763 tty_info
->nbtty
= i
;
2764 lxc_delete_tty(tty_info
);
2768 DEBUG("allocated pty '%s' (%d/%d)",
2769 pty_info
->name
, pty_info
->master
, pty_info
->slave
);
2771 /* Prevent leaking the file descriptors to the container */
2772 fcntl(pty_info
->master
, F_SETFD
, FD_CLOEXEC
);
2773 fcntl(pty_info
->slave
, F_SETFD
, FD_CLOEXEC
);
2778 tty_info
->nbtty
= conf
->tty
;
2780 INFO("tty's configured");
2785 void lxc_delete_tty(struct lxc_tty_info
*tty_info
)
2789 for (i
= 0; i
< tty_info
->nbtty
; i
++) {
2790 struct lxc_pty_info
*pty_info
= &tty_info
->pty_info
[i
];
2792 close(pty_info
->master
);
2793 close(pty_info
->slave
);
2796 free(tty_info
->pty_info
);
2797 tty_info
->nbtty
= 0;
2801 * given a host uid, return the ns uid if it is mapped.
2802 * if it is not mapped, return the original host id.
2804 static int shiftid(struct lxc_conf
*c
, int uid
, enum idtype w
)
2806 struct lxc_list
*iterator
;
2810 lxc_list_for_each(iterator
, &c
->id_map
) {
2811 map
= iterator
->elem
;
2812 if (map
->idtype
!= w
)
2816 high
= map
->nsid
+ map
->range
;
2817 if (uid
< low
|| uid
>= high
)
2820 return uid
- low
+ map
->hostid
;
2827 * Take a pathname for a file created on the host, and map the uid and gid
2828 * into the container if needed. (Used for ttys)
2830 static int uid_shift_file(char *path
, struct lxc_conf
*c
)
2832 struct stat statbuf
;
2835 if (stat(path
, &statbuf
)) {
2836 SYSERROR("stat(%s)", path
);
2840 newuid
= shiftid(c
, statbuf
.st_uid
, ID_TYPE_UID
);
2841 newgid
= shiftid(c
, statbuf
.st_gid
, ID_TYPE_GID
);
2842 if (newuid
!= statbuf
.st_uid
|| newgid
!= statbuf
.st_gid
) {
2843 DEBUG("chowning %s from %d:%d to %d:%d\n", path
, (int)statbuf
.st_uid
, (int)statbuf
.st_gid
, newuid
, newgid
);
2844 if (chown(path
, newuid
, newgid
)) {
2845 SYSERROR("chown(%s)", path
);
2852 int uid_shift_ttys(int pid
, struct lxc_conf
*conf
)
2855 struct lxc_tty_info
*tty_info
= &conf
->tty_info
;
2856 char path
[MAXPATHLEN
];
2857 char *ttydir
= conf
->ttydir
;
2859 if (!conf
->rootfs
.path
)
2861 /* first the console */
2862 ret
= snprintf(path
, sizeof(path
), "/proc/%d/root/dev/%s/console", pid
, ttydir
? ttydir
: "");
2863 if (ret
< 0 || ret
>= sizeof(path
)) {
2864 ERROR("console path too long\n");
2867 if (uid_shift_file(path
, conf
)) {
2868 DEBUG("Failed to chown the console %s.\n", path
);
2871 for (i
=0; i
< tty_info
->nbtty
; i
++) {
2872 ret
= snprintf(path
, sizeof(path
), "/proc/%d/root/dev/%s/tty%d",
2873 pid
, ttydir
? ttydir
: "", i
+ 1);
2874 if (ret
< 0 || ret
>= sizeof(path
)) {
2875 ERROR("pathname too long for ttys");
2878 if (uid_shift_file(path
, conf
)) {
2879 DEBUG("Failed to chown pty %s.\n", path
);
2887 int lxc_setup(const char *name
, struct lxc_conf
*lxc_conf
, const char *lxcpath
)
2889 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
2893 if (setup_utsname(lxc_conf
->utsname
)) {
2894 ERROR("failed to setup the utsname for '%s'", name
);
2898 if (setup_network(&lxc_conf
->network
)) {
2899 ERROR("failed to setup the network for '%s'", name
);
2903 if (run_lxc_hooks(name
, "pre-mount", lxc_conf
, lxcpath
, NULL
)) {
2904 ERROR("failed to run pre-mount hooks for container '%s'.", name
);
2908 if (setup_rootfs(lxc_conf
)) {
2909 ERROR("failed to setup rootfs for '%s'", name
);
2913 if (lxc_conf
->autodev
) {
2914 if (mount_autodev(lxc_conf
->rootfs
.mount
)) {
2915 ERROR("failed to mount /dev in the container");
2920 if (setup_mount(&lxc_conf
->rootfs
, lxc_conf
->fstab
, name
)) {
2921 ERROR("failed to setup the mounts for '%s'", name
);
2925 if (!lxc_list_empty(&lxc_conf
->mount_list
) && setup_mount_entries(&lxc_conf
->rootfs
, &lxc_conf
->mount_list
, name
)) {
2926 ERROR("failed to setup the mount entries for '%s'", name
);
2930 if (run_lxc_hooks(name
, "mount", lxc_conf
, lxcpath
, NULL
)) {
2931 ERROR("failed to run mount hooks for container '%s'.", name
);
2935 if (lxc_conf
->autodev
) {
2936 if (run_lxc_hooks(name
, "autodev", lxc_conf
, lxcpath
, NULL
)) {
2937 ERROR("failed to run autodev hooks for container '%s'.", name
);
2940 if (setup_autodev(lxc_conf
->rootfs
.mount
)) {
2941 ERROR("failed to populate /dev in the container");
2946 if (!lxc_conf
->is_execute
&& setup_console(&lxc_conf
->rootfs
, &lxc_conf
->console
, lxc_conf
->ttydir
)) {
2947 ERROR("failed to setup the console for '%s'", name
);
2951 if (lxc_conf
->kmsg
) {
2952 if (setup_kmsg(&lxc_conf
->rootfs
, &lxc_conf
->console
)) // don't fail
2953 ERROR("failed to setup kmsg for '%s'", name
);
2956 if (!lxc_conf
->is_execute
&& setup_tty(&lxc_conf
->rootfs
, &lxc_conf
->tty_info
, lxc_conf
->ttydir
)) {
2957 ERROR("failed to setup the ttys for '%s'", name
);
2961 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
2962 INFO("rootfs path is .%s., mount is .%s.", lxc_conf
->rootfs
.path
,
2963 lxc_conf
->rootfs
.mount
);
2964 if (lxc_conf
->rootfs
.path
== NULL
|| strlen(lxc_conf
->rootfs
.path
) == 0) {
2965 if (mount("proc", "/proc", "proc", 0, NULL
)) {
2966 SYSERROR("Failed mounting /proc, proceeding");
2971 mounted
= lsm_mount_proc_if_needed(lxc_conf
->rootfs
.path
, lxc_conf
->rootfs
.mount
);
2972 if (mounted
== -1) {
2973 SYSERROR("failed to mount /proc in the container.");
2975 } else if (mounted
== 1) {
2976 lxc_conf
->lsm_umount_proc
= 1;
2980 if (setup_pivot_root(&lxc_conf
->rootfs
)) {
2981 ERROR("failed to set rootfs for '%s'", name
);
2985 if (setup_pts(lxc_conf
->pts
)) {
2986 ERROR("failed to setup the new pts instance");
2990 if (setup_personality(lxc_conf
->personality
)) {
2991 ERROR("failed to setup personality");
2995 if (lxc_list_empty(&lxc_conf
->id_map
)) {
2996 if (!lxc_list_empty(&lxc_conf
->keepcaps
)) {
2997 if (!lxc_list_empty(&lxc_conf
->caps
)) {
2998 ERROR("Simultaneously requested dropping and keeping caps");
3001 if (dropcaps_except(&lxc_conf
->keepcaps
)) {
3002 ERROR("failed to keep requested caps\n");
3005 } else if (setup_caps(&lxc_conf
->caps
)) {
3006 ERROR("failed to drop capabilities");
3011 NOTICE("'%s' is setup.", name
);
3016 int run_lxc_hooks(const char *name
, char *hook
, struct lxc_conf
*conf
,
3017 const char *lxcpath
, char *argv
[])
3020 struct lxc_list
*it
;
3022 if (strcmp(hook
, "pre-start") == 0)
3023 which
= LXCHOOK_PRESTART
;
3024 else if (strcmp(hook
, "pre-mount") == 0)
3025 which
= LXCHOOK_PREMOUNT
;
3026 else if (strcmp(hook
, "mount") == 0)
3027 which
= LXCHOOK_MOUNT
;
3028 else if (strcmp(hook
, "autodev") == 0)
3029 which
= LXCHOOK_AUTODEV
;
3030 else if (strcmp(hook
, "start") == 0)
3031 which
= LXCHOOK_START
;
3032 else if (strcmp(hook
, "post-stop") == 0)
3033 which
= LXCHOOK_POSTSTOP
;
3034 else if (strcmp(hook
, "clone") == 0)
3035 which
= LXCHOOK_CLONE
;
3038 lxc_list_for_each(it
, &conf
->hooks
[which
]) {
3040 char *hookname
= it
->elem
;
3041 ret
= run_script_argv(name
, "lxc", hookname
, hook
, lxcpath
, argv
);
3048 static void lxc_remove_nic(struct lxc_list
*it
)
3050 struct lxc_netdev
*netdev
= it
->elem
;
3051 struct lxc_list
*it2
,*next
;
3059 if (netdev
->upscript
)
3060 free(netdev
->upscript
);
3062 free(netdev
->hwaddr
);
3065 if (netdev
->ipv4_gateway
)
3066 free(netdev
->ipv4_gateway
);
3067 if (netdev
->ipv6_gateway
)
3068 free(netdev
->ipv6_gateway
);
3069 lxc_list_for_each_safe(it2
, &netdev
->ipv4
, next
) {
3074 lxc_list_for_each_safe(it2
, &netdev
->ipv6
, next
) {
3083 /* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
3084 int lxc_clear_nic(struct lxc_conf
*c
, const char *key
)
3088 struct lxc_list
*it
;
3089 struct lxc_netdev
*netdev
;
3091 p1
= index(key
, '.');
3092 if (!p1
|| *(p1
+1) == '\0')
3095 ret
= sscanf(key
, "%d", &idx
);
3096 if (ret
!= 1) return -1;
3101 lxc_list_for_each(it
, &c
->network
) {
3106 if (i
< idx
) // we don't have that many nics defined
3109 if (!it
|| !it
->elem
)
3116 } else if (strcmp(p1
, "ipv4") == 0) {
3117 struct lxc_list
*it2
,*next
;
3118 lxc_list_for_each_safe(it2
, &netdev
->ipv4
, next
) {
3123 } else if (strcmp(p1
, "ipv6") == 0) {
3124 struct lxc_list
*it2
,*next
;
3125 lxc_list_for_each_safe(it2
, &netdev
->ipv6
, next
) {
3130 } else if (strcmp(p1
, "link") == 0) {
3133 netdev
->link
= NULL
;
3135 } else if (strcmp(p1
, "name") == 0) {
3138 netdev
->name
= NULL
;
3140 } else if (strcmp(p1
, "script.up") == 0) {
3141 if (netdev
->upscript
) {
3142 free(netdev
->upscript
);
3143 netdev
->upscript
= NULL
;
3145 } else if (strcmp(p1
, "hwaddr") == 0) {
3146 if (netdev
->hwaddr
) {
3147 free(netdev
->hwaddr
);
3148 netdev
->hwaddr
= NULL
;
3150 } else if (strcmp(p1
, "mtu") == 0) {
3155 } else if (strcmp(p1
, "ipv4_gateway") == 0) {
3156 if (netdev
->ipv4_gateway
) {
3157 free(netdev
->ipv4_gateway
);
3158 netdev
->ipv4_gateway
= NULL
;
3160 } else if (strcmp(p1
, "ipv6_gateway") == 0) {
3161 if (netdev
->ipv6_gateway
) {
3162 free(netdev
->ipv6_gateway
);
3163 netdev
->ipv6_gateway
= NULL
;
3171 int lxc_clear_config_network(struct lxc_conf
*c
)
3173 struct lxc_list
*it
,*next
;
3174 lxc_list_for_each_safe(it
, &c
->network
, next
) {
3180 int lxc_clear_config_caps(struct lxc_conf
*c
)
3182 struct lxc_list
*it
,*next
;
3184 lxc_list_for_each_safe(it
, &c
->caps
, next
) {
3192 int lxc_clear_idmaps(struct lxc_conf
*c
)
3194 struct lxc_list
*it
, *next
;
3196 lxc_list_for_each_safe(it
, &c
->id_map
, next
) {
3204 int lxc_clear_config_keepcaps(struct lxc_conf
*c
)
3206 struct lxc_list
*it
,*next
;
3208 lxc_list_for_each_safe(it
, &c
->keepcaps
, next
) {
3216 int lxc_clear_cgroups(struct lxc_conf
*c
, const char *key
)
3218 struct lxc_list
*it
,*next
;
3220 const char *k
= key
+ 11;
3222 if (strcmp(key
, "lxc.cgroup") == 0)
3225 lxc_list_for_each_safe(it
, &c
->cgroup
, next
) {
3226 struct lxc_cgroup
*cg
= it
->elem
;
3227 if (!all
&& strcmp(cg
->subsystem
, k
) != 0)
3230 free(cg
->subsystem
);
3238 int lxc_clear_mount_entries(struct lxc_conf
*c
)
3240 struct lxc_list
*it
,*next
;
3242 lxc_list_for_each_safe(it
, &c
->mount_list
, next
) {
3250 int lxc_clear_hooks(struct lxc_conf
*c
, const char *key
)
3252 struct lxc_list
*it
,*next
;
3253 bool all
= false, done
= false;
3254 const char *k
= key
+ 9;
3257 if (strcmp(key
, "lxc.hook") == 0)
3260 for (i
=0; i
<NUM_LXC_HOOKS
; i
++) {
3261 if (all
|| strcmp(k
, lxchook_names
[i
]) == 0) {
3262 lxc_list_for_each_safe(it
, &c
->hooks
[i
], next
) {
3272 ERROR("Invalid hook key: %s", key
);
3278 void lxc_clear_saved_nics(struct lxc_conf
*conf
)
3282 if (!conf
->num_savednics
)
3284 for (i
=0; i
< conf
->num_savednics
; i
++)
3285 free(conf
->saved_nics
[i
].orig_name
);
3286 conf
->saved_nics
= 0;
3287 free(conf
->saved_nics
);
3290 void lxc_conf_free(struct lxc_conf
*conf
)
3294 if (conf
->console
.path
)
3295 free(conf
->console
.path
);
3296 if (conf
->rootfs
.mount
)
3297 free(conf
->rootfs
.mount
);
3298 if (conf
->rootfs
.path
)
3299 free(conf
->rootfs
.path
);
3301 free(conf
->utsname
);
3308 lxc_clear_config_network(conf
);
3310 if (conf
->aa_profile
)
3311 free(conf
->aa_profile
);
3313 lxc_seccomp_free(conf
);
3314 lxc_clear_config_caps(conf
);
3315 lxc_clear_config_keepcaps(conf
);
3316 lxc_clear_cgroups(conf
, "lxc.cgroup");
3317 lxc_clear_hooks(conf
, "lxc.hook");
3318 lxc_clear_mount_entries(conf
);
3319 lxc_clear_saved_nics(conf
);
3320 lxc_clear_idmaps(conf
);