]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/conf.c
83613ed99a1d30639cf7276a7b0b56bc5436349d
[mirror_lxc.git] / src / lxc / conf.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23 #define _GNU_SOURCE
24 #include <stdio.h>
25 #undef _GNU_SOURCE
26 #include <stdlib.h>
27 #include <stdarg.h>
28 #include <errno.h>
29 #include <string.h>
30 #include <dirent.h>
31 #include <unistd.h>
32 #include <sys/wait.h>
33 #include <sys/syscall.h>
34 #include <time.h>
35
36 #if HAVE_IFADDRS_H
37 #include <ifaddrs.h>
38 #else
39 #include <../include/ifaddrs.h>
40 #endif
41
42 #if HAVE_PTY_H
43 #include <pty.h>
44 #else
45 #include <../include/openpty.h>
46 #endif
47
48 #include <linux/loop.h>
49
50 #include <sys/types.h>
51 #include <sys/utsname.h>
52 #include <sys/param.h>
53 #include <sys/stat.h>
54 #include <sys/socket.h>
55 #include <sys/mount.h>
56 #include <sys/mman.h>
57 #include <sys/prctl.h>
58
59 #include <arpa/inet.h>
60 #include <fcntl.h>
61 #include <netinet/in.h>
62 #include <net/if.h>
63 #include <libgen.h>
64
65 #include "network.h"
66 #include "error.h"
67 #include "parse.h"
68 #include "config.h"
69 #include "utils.h"
70 #include "conf.h"
71 #include "log.h"
72 #include "lxc.h" /* for lxc_cgroup_set() */
73 #include "caps.h" /* for lxc_caps_last_cap() */
74 #include "bdev.h"
75
76 #if HAVE_APPARMOR
77 #include <apparmor.h>
78 #endif
79
80 #if HAVE_SYS_CAPABILITY_H
81 #include <sys/capability.h>
82 #endif
83
84 #if HAVE_SYS_PERSONALITY_H
85 #include <sys/personality.h>
86 #endif
87
88 #if IS_BIONIC
89 #include <../include/lxcmntent.h>
90 #else
91 #include <mntent.h>
92 #endif
93
94 #include "lxcseccomp.h"
95
96 lxc_log_define(lxc_conf, lxc);
97
98 #define MAXHWLEN 18
99 #define MAXINDEXLEN 20
100 #define MAXMTULEN 16
101 #define MAXLINELEN 128
102
103 #if HAVE_SYS_CAPABILITY_H
104 #ifndef CAP_SETFCAP
105 #define CAP_SETFCAP 31
106 #endif
107
108 #ifndef CAP_MAC_OVERRIDE
109 #define CAP_MAC_OVERRIDE 32
110 #endif
111
112 #ifndef CAP_MAC_ADMIN
113 #define CAP_MAC_ADMIN 33
114 #endif
115 #endif
116
117 #ifndef PR_CAPBSET_DROP
118 #define PR_CAPBSET_DROP 24
119 #endif
120
121 #ifndef LO_FLAGS_AUTOCLEAR
122 #define LO_FLAGS_AUTOCLEAR 4
123 #endif
124
125 /* Define pivot_root() if missing from the C library */
126 #ifndef HAVE_PIVOT_ROOT
127 static int pivot_root(const char * new_root, const char * put_old)
128 {
129 #ifdef __NR_pivot_root
130 return syscall(__NR_pivot_root, new_root, put_old);
131 #else
132 errno = ENOSYS;
133 return -1;
134 #endif
135 }
136 #else
137 extern int pivot_root(const char * new_root, const char * put_old);
138 #endif
139
140 /* Define sethostname() if missing from the C library */
141 #ifndef HAVE_SETHOSTNAME
142 static int sethostname(const char * name, size_t len)
143 {
144 #ifdef __NR_sethostname
145 return syscall(__NR_sethostname, name, len);
146 #else
147 errno = ENOSYS;
148 return -1;
149 #endif
150 }
151 #endif
152
153 /* Define __S_ISTYPE if missing from the C library */
154 #ifndef __S_ISTYPE
155 #define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
156 #endif
157
158 char *lxchook_names[NUM_LXC_HOOKS] = {
159 "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop", "clone" };
160
161 typedef int (*instanciate_cb)(struct lxc_handler *, struct lxc_netdev *);
162
163 struct mount_opt {
164 char *name;
165 int clear;
166 int flag;
167 };
168
169 struct caps_opt {
170 char *name;
171 int value;
172 };
173
174 static int instanciate_veth(struct lxc_handler *, struct lxc_netdev *);
175 static int instanciate_macvlan(struct lxc_handler *, struct lxc_netdev *);
176 static int instanciate_vlan(struct lxc_handler *, struct lxc_netdev *);
177 static int instanciate_phys(struct lxc_handler *, struct lxc_netdev *);
178 static int instanciate_empty(struct lxc_handler *, struct lxc_netdev *);
179
180 static instanciate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
181 [LXC_NET_VETH] = instanciate_veth,
182 [LXC_NET_MACVLAN] = instanciate_macvlan,
183 [LXC_NET_VLAN] = instanciate_vlan,
184 [LXC_NET_PHYS] = instanciate_phys,
185 [LXC_NET_EMPTY] = instanciate_empty,
186 };
187
188 static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
189 static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
190 static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
191 static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
192 static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
193
194 static instanciate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
195 [LXC_NET_VETH] = shutdown_veth,
196 [LXC_NET_MACVLAN] = shutdown_macvlan,
197 [LXC_NET_VLAN] = shutdown_vlan,
198 [LXC_NET_PHYS] = shutdown_phys,
199 [LXC_NET_EMPTY] = shutdown_empty,
200 };
201
202 static struct mount_opt mount_opt[] = {
203 { "defaults", 0, 0 },
204 { "ro", 0, MS_RDONLY },
205 { "rw", 1, MS_RDONLY },
206 { "suid", 1, MS_NOSUID },
207 { "nosuid", 0, MS_NOSUID },
208 { "dev", 1, MS_NODEV },
209 { "nodev", 0, MS_NODEV },
210 { "exec", 1, MS_NOEXEC },
211 { "noexec", 0, MS_NOEXEC },
212 { "sync", 0, MS_SYNCHRONOUS },
213 { "async", 1, MS_SYNCHRONOUS },
214 { "dirsync", 0, MS_DIRSYNC },
215 { "remount", 0, MS_REMOUNT },
216 { "mand", 0, MS_MANDLOCK },
217 { "nomand", 1, MS_MANDLOCK },
218 { "atime", 1, MS_NOATIME },
219 { "noatime", 0, MS_NOATIME },
220 { "diratime", 1, MS_NODIRATIME },
221 { "nodiratime", 0, MS_NODIRATIME },
222 { "bind", 0, MS_BIND },
223 { "rbind", 0, MS_BIND|MS_REC },
224 { "relatime", 0, MS_RELATIME },
225 { "norelatime", 1, MS_RELATIME },
226 { "strictatime", 0, MS_STRICTATIME },
227 { "nostrictatime", 1, MS_STRICTATIME },
228 { NULL, 0, 0 },
229 };
230
231 #if HAVE_SYS_CAPABILITY_H
232 static struct caps_opt caps_opt[] = {
233 { "chown", CAP_CHOWN },
234 { "dac_override", CAP_DAC_OVERRIDE },
235 { "dac_read_search", CAP_DAC_READ_SEARCH },
236 { "fowner", CAP_FOWNER },
237 { "fsetid", CAP_FSETID },
238 { "kill", CAP_KILL },
239 { "setgid", CAP_SETGID },
240 { "setuid", CAP_SETUID },
241 { "setpcap", CAP_SETPCAP },
242 { "linux_immutable", CAP_LINUX_IMMUTABLE },
243 { "net_bind_service", CAP_NET_BIND_SERVICE },
244 { "net_broadcast", CAP_NET_BROADCAST },
245 { "net_admin", CAP_NET_ADMIN },
246 { "net_raw", CAP_NET_RAW },
247 { "ipc_lock", CAP_IPC_LOCK },
248 { "ipc_owner", CAP_IPC_OWNER },
249 { "sys_module", CAP_SYS_MODULE },
250 { "sys_rawio", CAP_SYS_RAWIO },
251 { "sys_chroot", CAP_SYS_CHROOT },
252 { "sys_ptrace", CAP_SYS_PTRACE },
253 { "sys_pacct", CAP_SYS_PACCT },
254 { "sys_admin", CAP_SYS_ADMIN },
255 { "sys_boot", CAP_SYS_BOOT },
256 { "sys_nice", CAP_SYS_NICE },
257 { "sys_resource", CAP_SYS_RESOURCE },
258 { "sys_time", CAP_SYS_TIME },
259 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
260 { "mknod", CAP_MKNOD },
261 { "lease", CAP_LEASE },
262 #ifdef CAP_AUDIT_WRITE
263 { "audit_write", CAP_AUDIT_WRITE },
264 #endif
265 #ifdef CAP_AUDIT_CONTROL
266 { "audit_control", CAP_AUDIT_CONTROL },
267 #endif
268 { "setfcap", CAP_SETFCAP },
269 { "mac_override", CAP_MAC_OVERRIDE },
270 { "mac_admin", CAP_MAC_ADMIN },
271 #ifdef CAP_SYSLOG
272 { "syslog", CAP_SYSLOG },
273 #endif
274 #ifdef CAP_WAKE_ALARM
275 { "wake_alarm", CAP_WAKE_ALARM },
276 #endif
277 };
278 #else
279 static struct caps_opt caps_opt[] = {};
280 #endif
281
282 static char padchar[] =
283 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
284
285 static char *mkifname(char *template)
286 {
287 char *name = NULL;
288 int i = 0;
289 FILE *urandom;
290 unsigned int seed;
291 struct ifaddrs *ifaddr, *ifa;
292 int ifexists = 0;
293
294 /* Get all the network interfaces */
295 getifaddrs(&ifaddr);
296
297 /* Initialize the random number generator */
298 urandom = fopen ("/dev/urandom", "r");
299 if (urandom != NULL) {
300 if (fread (&seed, sizeof(seed), 1, urandom) <= 0)
301 seed = time(0);
302 fclose(urandom);
303 }
304 else
305 seed = time(0);
306
307 #ifndef HAVE_RAND_R
308 srand(seed);
309 #endif
310
311 /* Generate random names until we find one that doesn't exist */
312 while(1) {
313 ifexists = 0;
314 name = strdup(template);
315
316 if (name == NULL)
317 return NULL;
318
319 for (i = 0; i < strlen(name); i++) {
320 if (name[i] == 'X') {
321 #ifdef HAVE_RAND_R
322 name[i] = padchar[rand_r(&seed) % (strlen(padchar) - 1)];
323 #else
324 name[i] = padchar[rand() % (strlen(padchar) - 1)];
325 #endif
326 }
327 }
328
329 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
330 if (strcmp(ifa->ifa_name, name) == 0) {
331 ifexists = 1;
332 break;
333 }
334 }
335
336 if (ifexists == 0)
337 break;
338
339 free(name);
340 }
341
342 freeifaddrs(ifaddr);
343 return name;
344 }
345
346 static int run_buffer(char *buffer)
347 {
348 FILE *f;
349 char *output;
350 int ret;
351
352 f = popen(buffer, "r");
353 if (!f) {
354 SYSERROR("popen failed");
355 return -1;
356 }
357
358 output = malloc(LXC_LOG_BUFFER_SIZE);
359 if (!output) {
360 ERROR("failed to allocate memory for script output");
361 pclose(f);
362 return -1;
363 }
364
365 while(fgets(output, LXC_LOG_BUFFER_SIZE, f))
366 DEBUG("script output: %s", output);
367
368 free(output);
369
370 ret = pclose(f);
371 if (ret == -1) {
372 SYSERROR("Script exited on error");
373 return -1;
374 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
375 ERROR("Script exited with status %d", WEXITSTATUS(ret));
376 return -1;
377 } else if (WIFSIGNALED(ret)) {
378 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
379 strsignal(WTERMSIG(ret)));
380 return -1;
381 }
382
383 return 0;
384 }
385
386 static int run_script_argv(const char *name, const char *section,
387 const char *script, const char *hook, const char *lxcpath,
388 char **argsin)
389 {
390 int ret, i;
391 char *buffer;
392 size_t size = 0;
393
394 INFO("Executing script '%s' for container '%s', config section '%s'",
395 script, name, section);
396
397 for (i=0; argsin && argsin[i]; i++)
398 size += strlen(argsin[i]) + 1;
399
400 size += strlen(hook) + 1;
401
402 size += strlen(script);
403 size += strlen(name);
404 size += strlen(section);
405 size += 3;
406
407 if (size > INT_MAX)
408 return -1;
409
410 buffer = alloca(size);
411 if (!buffer) {
412 ERROR("failed to allocate memory");
413 return -1;
414 }
415
416 ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
417 if (ret < 0 || ret >= size) {
418 ERROR("Script name too long");
419 return -1;
420 }
421
422 for (i=0; argsin && argsin[i]; i++) {
423 int len = size-ret;
424 int rc;
425 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
426 if (rc < 0 || rc >= len) {
427 ERROR("Script args too long");
428 return -1;
429 }
430 ret += rc;
431 }
432
433 return run_buffer(buffer);
434 }
435
436 static int run_script(const char *name, const char *section,
437 const char *script, ...)
438 {
439 int ret;
440 char *buffer, *p;
441 size_t size = 0;
442 va_list ap;
443
444 INFO("Executing script '%s' for container '%s', config section '%s'",
445 script, name, section);
446
447 va_start(ap, script);
448 while ((p = va_arg(ap, char *)))
449 size += strlen(p) + 1;
450 va_end(ap);
451
452 size += strlen(script);
453 size += strlen(name);
454 size += strlen(section);
455 size += 3;
456
457 if (size > INT_MAX)
458 return -1;
459
460 buffer = alloca(size);
461 if (!buffer) {
462 ERROR("failed to allocate memory");
463 return -1;
464 }
465
466 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
467 if (ret < 0 || ret >= size) {
468 ERROR("Script name too long");
469 return -1;
470 }
471
472 va_start(ap, script);
473 while ((p = va_arg(ap, char *))) {
474 int len = size-ret;
475 int rc;
476 rc = snprintf(buffer + ret, len, " %s", p);
477 if (rc < 0 || rc >= len) {
478 ERROR("Script args too long");
479 return -1;
480 }
481 ret += rc;
482 }
483 va_end(ap);
484
485 return run_buffer(buffer);
486 }
487
488 static int find_fstype_cb(char* buffer, void *data)
489 {
490 struct cbarg {
491 const char *rootfs;
492 const char *target;
493 int mntopt;
494 } *cbarg = data;
495
496 char *fstype;
497
498 /* we don't try 'nodev' entries */
499 if (strstr(buffer, "nodev"))
500 return 0;
501
502 fstype = buffer;
503 fstype += lxc_char_left_gc(fstype, strlen(fstype));
504 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
505
506 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
507 cbarg->rootfs, cbarg->target, fstype);
508
509 if (mount(cbarg->rootfs, cbarg->target, fstype, cbarg->mntopt, NULL)) {
510 DEBUG("mount failed with error: %s", strerror(errno));
511 return 0;
512 }
513
514 INFO("mounted '%s' on '%s', with fstype '%s'",
515 cbarg->rootfs, cbarg->target, fstype);
516
517 return 1;
518 }
519
520 static int mount_unknow_fs(const char *rootfs, const char *target, int mntopt)
521 {
522 int i;
523
524 struct cbarg {
525 const char *rootfs;
526 const char *target;
527 int mntopt;
528 } cbarg = {
529 .rootfs = rootfs,
530 .target = target,
531 .mntopt = mntopt,
532 };
533
534 /*
535 * find the filesystem type with brute force:
536 * first we check with /etc/filesystems, in case the modules
537 * are auto-loaded and fall back to the supported kernel fs
538 */
539 char *fsfile[] = {
540 "/etc/filesystems",
541 "/proc/filesystems",
542 };
543
544 for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
545
546 int ret;
547
548 if (access(fsfile[i], F_OK))
549 continue;
550
551 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
552 if (ret < 0) {
553 ERROR("failed to parse '%s'", fsfile[i]);
554 return -1;
555 }
556
557 if (ret)
558 return 0;
559 }
560
561 ERROR("failed to determine fs type for '%s'", rootfs);
562 return -1;
563 }
564
565 static int mount_rootfs_dir(const char *rootfs, const char *target)
566 {
567 return mount(rootfs, target, "none", MS_BIND | MS_REC, NULL);
568 }
569
570 static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
571 {
572 int rfd;
573 int ret = -1;
574
575 rfd = open(rootfs, O_RDWR);
576 if (rfd < 0) {
577 SYSERROR("failed to open '%s'", rootfs);
578 return -1;
579 }
580
581 memset(loinfo, 0, sizeof(*loinfo));
582
583 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
584
585 if (ioctl(fd, LOOP_SET_FD, rfd)) {
586 SYSERROR("failed to LOOP_SET_FD");
587 goto out;
588 }
589
590 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
591 SYSERROR("failed to LOOP_SET_STATUS64");
592 goto out;
593 }
594
595 ret = 0;
596 out:
597 close(rfd);
598
599 return ret;
600 }
601
602 static int mount_rootfs_file(const char *rootfs, const char *target)
603 {
604 struct dirent dirent, *direntp;
605 struct loop_info64 loinfo;
606 int ret = -1, fd = -1, rc;
607 DIR *dir;
608 char path[MAXPATHLEN];
609
610 dir = opendir("/dev");
611 if (!dir) {
612 SYSERROR("failed to open '/dev'");
613 return -1;
614 }
615
616 while (!readdir_r(dir, &dirent, &direntp)) {
617
618 if (!direntp)
619 break;
620
621 if (!strcmp(direntp->d_name, "."))
622 continue;
623
624 if (!strcmp(direntp->d_name, ".."))
625 continue;
626
627 if (strncmp(direntp->d_name, "loop", 4))
628 continue;
629
630 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
631 if (rc < 0 || rc >= MAXPATHLEN)
632 continue;
633
634 fd = open(path, O_RDWR);
635 if (fd < 0)
636 continue;
637
638 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
639 close(fd);
640 continue;
641 }
642
643 if (errno != ENXIO) {
644 WARN("unexpected error for ioctl on '%s': %m",
645 direntp->d_name);
646 close(fd);
647 continue;
648 }
649
650 DEBUG("found '%s' free lodev", path);
651
652 ret = setup_lodev(rootfs, fd, &loinfo);
653 if (!ret)
654 ret = mount_unknow_fs(path, target, 0);
655 close(fd);
656
657 break;
658 }
659
660 if (closedir(dir))
661 WARN("failed to close directory");
662
663 return ret;
664 }
665
666 static int mount_rootfs_block(const char *rootfs, const char *target)
667 {
668 return mount_unknow_fs(rootfs, target, 0);
669 }
670
671 /*
672 * pin_rootfs
673 * if rootfs is a directory, then open ${rootfs}.hold for writing for the
674 * duration of the container run, to prevent the container from marking the
675 * underlying fs readonly on shutdown.
676 * return -1 on error.
677 * return -2 if nothing needed to be pinned.
678 * return an open fd (>=0) if we pinned it.
679 */
680 int pin_rootfs(const char *rootfs)
681 {
682 char absrootfs[MAXPATHLEN];
683 char absrootfspin[MAXPATHLEN];
684 struct stat s;
685 int ret, fd;
686
687 if (rootfs == NULL || strlen(rootfs) == 0)
688 return -2;
689
690 if (!realpath(rootfs, absrootfs)) {
691 INFO("failed to get real path for '%s', not pinning", rootfs);
692 return -2;
693 }
694
695 if (access(absrootfs, F_OK)) {
696 SYSERROR("'%s' is not accessible", absrootfs);
697 return -1;
698 }
699
700 if (stat(absrootfs, &s)) {
701 SYSERROR("failed to stat '%s'", absrootfs);
702 return -1;
703 }
704
705 if (!S_ISDIR(s.st_mode))
706 return -2;
707
708 ret = snprintf(absrootfspin, MAXPATHLEN, "%s%s", absrootfs, ".hold");
709 if (ret >= MAXPATHLEN) {
710 SYSERROR("pathname too long for rootfs hold file");
711 return -1;
712 }
713
714 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
715 INFO("opened %s as fd %d\n", absrootfspin, fd);
716 return fd;
717 }
718
719 static int mount_rootfs(const char *rootfs, const char *target)
720 {
721 char absrootfs[MAXPATHLEN];
722 struct stat s;
723 int i;
724
725 typedef int (*rootfs_cb)(const char *, const char *);
726
727 struct rootfs_type {
728 int type;
729 rootfs_cb cb;
730 } rtfs_type[] = {
731 { S_IFDIR, mount_rootfs_dir },
732 { S_IFBLK, mount_rootfs_block },
733 { S_IFREG, mount_rootfs_file },
734 };
735
736 if (!realpath(rootfs, absrootfs)) {
737 SYSERROR("failed to get real path for '%s'", rootfs);
738 return -1;
739 }
740
741 if (access(absrootfs, F_OK)) {
742 SYSERROR("'%s' is not accessible", absrootfs);
743 return -1;
744 }
745
746 if (stat(absrootfs, &s)) {
747 SYSERROR("failed to stat '%s'", absrootfs);
748 return -1;
749 }
750
751 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
752
753 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
754 continue;
755
756 return rtfs_type[i].cb(absrootfs, target);
757 }
758
759 ERROR("unsupported rootfs type for '%s'", absrootfs);
760 return -1;
761 }
762
763 static int setup_utsname(struct utsname *utsname)
764 {
765 if (!utsname)
766 return 0;
767
768 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
769 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
770 return -1;
771 }
772
773 INFO("'%s' hostname has been setup", utsname->nodename);
774
775 return 0;
776 }
777
778 static int setup_tty(const struct lxc_rootfs *rootfs,
779 const struct lxc_tty_info *tty_info, char *ttydir)
780 {
781 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
782 int i, ret;
783
784 if (!rootfs->path)
785 return 0;
786
787 for (i = 0; i < tty_info->nbtty; i++) {
788
789 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
790
791 ret = snprintf(path, sizeof(path), "%s/dev/tty%d",
792 rootfs->mount, i + 1);
793 if (ret >= sizeof(path)) {
794 ERROR("pathname too long for ttys");
795 return -1;
796 }
797 if (ttydir) {
798 /* create dev/lxc/tty%d" */
799 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/tty%d",
800 rootfs->mount, ttydir, i + 1);
801 if (ret >= sizeof(lxcpath)) {
802 ERROR("pathname too long for ttys");
803 return -1;
804 }
805 ret = creat(lxcpath, 0660);
806 if (ret==-1 && errno != EEXIST) {
807 SYSERROR("error creating %s\n", lxcpath);
808 return -1;
809 }
810 if (ret >= 0)
811 close(ret);
812 ret = unlink(path);
813 if (ret && errno != ENOENT) {
814 SYSERROR("error unlinking %s\n", path);
815 return -1;
816 }
817
818 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
819 WARN("failed to mount '%s'->'%s'",
820 pty_info->name, path);
821 continue;
822 }
823
824 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
825 if (ret >= sizeof(lxcpath)) {
826 ERROR("tty pathname too long");
827 return -1;
828 }
829 ret = symlink(lxcpath, path);
830 if (ret) {
831 SYSERROR("failed to create symlink for tty %d\n", i+1);
832 return -1;
833 }
834 } else {
835 /* If we populated /dev, then we need to create /dev/ttyN */
836 if (access(path, F_OK)) {
837 ret = creat(path, 0660);
838 if (ret==-1) {
839 SYSERROR("error creating %s\n", path);
840 /* this isn't fatal, continue */
841 } else
842 close(ret);
843 }
844 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
845 WARN("failed to mount '%s'->'%s'",
846 pty_info->name, path);
847 continue;
848 }
849 }
850 }
851
852 INFO("%d tty(s) has been setup", tty_info->nbtty);
853
854 return 0;
855 }
856
857 static int setup_rootfs_pivot_root_cb(char *buffer, void *data)
858 {
859 struct lxc_list *mountlist, *listentry, *iterator;
860 char *pivotdir, *mountpoint, *mountentry, *saveptr = NULL;
861 int found;
862 void **cbparm;
863
864 mountentry = buffer;
865 cbparm = (void **)data;
866
867 mountlist = cbparm[0];
868 pivotdir = cbparm[1];
869
870 /* parse entry, first field is mountname, ignore */
871 mountpoint = strtok_r(mountentry, " ", &saveptr);
872 if (!mountpoint)
873 return -1;
874
875 /* second field is mountpoint */
876 mountpoint = strtok_r(NULL, " ", &saveptr);
877 if (!mountpoint)
878 return -1;
879
880 /* only consider mountpoints below old root fs */
881 if (strncmp(mountpoint, pivotdir, strlen(pivotdir)))
882 return 0;
883
884 /* filter duplicate mountpoints */
885 found = 0;
886 lxc_list_for_each(iterator, mountlist) {
887 if (!strcmp(iterator->elem, mountpoint)) {
888 found = 1;
889 break;
890 }
891 }
892 if (found)
893 return 0;
894
895 /* add entry to list */
896 listentry = malloc(sizeof(*listentry));
897 if (!listentry) {
898 SYSERROR("malloc for mountpoint listentry failed");
899 return -1;
900 }
901
902 listentry->elem = strdup(mountpoint);
903 if (!listentry->elem) {
904 SYSERROR("strdup failed");
905 free(listentry);
906 return -1;
907 }
908 lxc_list_add_tail(mountlist, listentry);
909
910 return 0;
911 }
912
913 static int umount_oldrootfs(const char *oldrootfs)
914 {
915 char path[MAXPATHLEN];
916 void *cbparm[2];
917 struct lxc_list mountlist, *iterator, *next;
918 int ok, still_mounted, last_still_mounted;
919 int rc;
920
921 /* read and parse /proc/mounts in old root fs */
922 lxc_list_init(&mountlist);
923
924 /* oldrootfs is on the top tree directory now */
925 rc = snprintf(path, sizeof(path), "/%s", oldrootfs);
926 if (rc >= sizeof(path)) {
927 ERROR("rootfs name too long");
928 return -1;
929 }
930 cbparm[0] = &mountlist;
931
932 cbparm[1] = strdup(path);
933 if (!cbparm[1]) {
934 SYSERROR("strdup failed");
935 return -1;
936 }
937
938 rc = snprintf(path, sizeof(path), "%s/proc/mounts", oldrootfs);
939 if (rc >= sizeof(path)) {
940 ERROR("container proc/mounts name too long");
941 return -1;
942 }
943
944 ok = lxc_file_for_each_line(path,
945 setup_rootfs_pivot_root_cb, &cbparm);
946 if (ok < 0) {
947 SYSERROR("failed to read or parse mount list '%s'", path);
948 return -1;
949 }
950
951 /* umount filesystems until none left or list no longer shrinks */
952 still_mounted = 0;
953 do {
954 last_still_mounted = still_mounted;
955 still_mounted = 0;
956
957 lxc_list_for_each_safe(iterator, &mountlist, next) {
958
959 /* umount normally */
960 if (!umount(iterator->elem)) {
961 DEBUG("umounted '%s'", (char *)iterator->elem);
962 lxc_list_del(iterator);
963 continue;
964 }
965
966 still_mounted++;
967 }
968
969 } while (still_mounted > 0 && still_mounted != last_still_mounted);
970
971
972 lxc_list_for_each(iterator, &mountlist) {
973
974 /* let's try a lazy umount */
975 if (!umount2(iterator->elem, MNT_DETACH)) {
976 INFO("lazy unmount of '%s'", (char *)iterator->elem);
977 continue;
978 }
979
980 /* be more brutal (nfs) */
981 if (!umount2(iterator->elem, MNT_FORCE)) {
982 INFO("forced unmount of '%s'", (char *)iterator->elem);
983 continue;
984 }
985
986 WARN("failed to unmount '%s'", (char *)iterator->elem);
987 }
988
989 return 0;
990 }
991
992 static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
993 {
994 char path[MAXPATHLEN];
995 int remove_pivotdir = 0;
996 int rc;
997
998 /* change into new root fs */
999 if (chdir(rootfs)) {
1000 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
1001 return -1;
1002 }
1003
1004 if (!pivotdir)
1005 pivotdir = "lxc_putold";
1006
1007 /* compute the full path to pivotdir under rootfs */
1008 rc = snprintf(path, sizeof(path), "%s/%s", rootfs, pivotdir);
1009 if (rc >= sizeof(path)) {
1010 ERROR("pivot dir name too long");
1011 return -1;
1012 }
1013
1014 if (access(path, F_OK)) {
1015
1016 if (mkdir_p(path, 0755)) {
1017 SYSERROR("failed to create pivotdir '%s'", path);
1018 return -1;
1019 }
1020
1021 remove_pivotdir = 1;
1022 DEBUG("created '%s' directory", path);
1023 }
1024
1025 DEBUG("mountpoint for old rootfs is '%s'", path);
1026
1027 /* pivot_root into our new root fs */
1028 if (pivot_root(".", path)) {
1029 SYSERROR("pivot_root syscall failed");
1030 return -1;
1031 }
1032
1033 if (chdir("/")) {
1034 SYSERROR("can't chdir to / after pivot_root");
1035 return -1;
1036 }
1037
1038 DEBUG("pivot_root syscall to '%s' successful", rootfs);
1039
1040 /* we switch from absolute path to relative path */
1041 if (umount_oldrootfs(pivotdir))
1042 return -1;
1043
1044 /* remove temporary mount point, we don't consider the removing
1045 * as fatal */
1046 if (remove_pivotdir && rmdir(pivotdir))
1047 WARN("can't remove mountpoint '%s': %m", pivotdir);
1048
1049 return 0;
1050 }
1051
1052 /*
1053 * Do we want to add options for max size of /dev and a file to
1054 * specify which devices to create?
1055 */
1056 static int mount_autodev(char *root)
1057 {
1058 int ret;
1059 char path[MAXPATHLEN];
1060
1061 INFO("Mounting /dev under %s\n", root);
1062 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
1063 if (ret < 0 || ret > MAXPATHLEN)
1064 return -1;
1065 ret = mount("none", path, "tmpfs", 0, "size=100000");
1066 if (ret) {
1067 SYSERROR("Failed to mount /dev at %s\n", root);
1068 return -1;
1069 }
1070 ret = snprintf(path, MAXPATHLEN, "%s/dev/pts", root);
1071 if (ret < 0 || ret >= MAXPATHLEN)
1072 return -1;
1073 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1074 if (ret) {
1075 SYSERROR("Failed to create /dev/pts in container");
1076 return -1;
1077 }
1078
1079 INFO("Mounted /dev under %s\n", root);
1080 return 0;
1081 }
1082
1083 struct lxc_devs {
1084 char *name;
1085 mode_t mode;
1086 int maj;
1087 int min;
1088 };
1089
1090 struct lxc_devs lxc_devs[] = {
1091 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1092 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1093 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1094 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1095 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1096 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1097 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1098 };
1099
1100 static int setup_autodev(char *root)
1101 {
1102 int ret;
1103 struct lxc_devs *d;
1104 char path[MAXPATHLEN];
1105 int i;
1106 mode_t cmask;
1107
1108 INFO("Creating initial consoles under %s/dev\n", root);
1109
1110 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
1111 if (ret < 0 || ret >= MAXPATHLEN) {
1112 ERROR("Error calculating container /dev location");
1113 return -1;
1114 }
1115
1116 INFO("Populating /dev under %s\n", root);
1117 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
1118 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
1119 d = &lxc_devs[i];
1120 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", root, d->name);
1121 if (ret < 0 || ret >= MAXPATHLEN)
1122 return -1;
1123 ret = mknod(path, d->mode, makedev(d->maj, d->min));
1124 if (ret && errno != EEXIST) {
1125 SYSERROR("Error creating %s\n", d->name);
1126 return -1;
1127 }
1128 }
1129 umask(cmask);
1130
1131 INFO("Populated /dev under %s\n", root);
1132 return 0;
1133 }
1134
1135 /*
1136 * Detect whether / is mounted MS_SHARED. The only way I know of to
1137 * check that is through /proc/self/mountinfo.
1138 * I'm only checking for /. If the container rootfs or mount location
1139 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1140 * out would be too much work to be worth it.
1141 */
1142 #define LINELEN 4096
1143 int detect_shared_rootfs(void)
1144 {
1145 char buf[LINELEN], *p;
1146 FILE *f;
1147 int i;
1148 char *p2;
1149
1150 f = fopen("/proc/self/mountinfo", "r");
1151 if (!f)
1152 return 0;
1153 while ((p = fgets(buf, LINELEN, f))) {
1154 INFO("looking at .%s.", p);
1155 for (p = buf, i=0; p && i < 4; i++)
1156 p = index(p+1, ' ');
1157 if (!p)
1158 continue;
1159 p2 = index(p+1, ' ');
1160 if (!p2)
1161 continue;
1162 *p2 = '\0';
1163 INFO("now p is .%s.", p);
1164 if (strcmp(p+1, "/") == 0) {
1165 // this is '/'. is it shared?
1166 p = index(p2+1, ' ');
1167 if (p && strstr(p, "shared:")) {
1168 fclose(f);
1169 return 1;
1170 }
1171 }
1172 }
1173 fclose(f);
1174 return 0;
1175 }
1176
1177 /*
1178 * I'll forgive you for asking whether all of this is needed :) The
1179 * answer is yes.
1180 * pivot_root will fail if the new root, the put_old dir, or the parent
1181 * of current->fs->root are MS_SHARED. (parent of current->fs_root may
1182 * or may not be current->fs_root - if we assumed it always was, we could
1183 * just mount --make-rslave /). So,
1184 * 1. mount a tiny tmpfs to be parent of current->fs->root.
1185 * 2. make that MS_SLAVE
1186 * 3. make a 'root' directory under that
1187 * 4. mount --rbind / under the $tinyroot/root.
1188 * 5. make that rslave
1189 * 6. chdir and chroot into $tinyroot/root
1190 * 7. $tinyroot will be unmounted by our parent in start.c
1191 */
1192 static int chroot_into_slave(struct lxc_conf *conf)
1193 {
1194 char path[MAXPATHLEN];
1195 const char *destpath = conf->rootfs.mount;
1196 int ret;
1197
1198 if (mount(destpath, destpath, NULL, MS_BIND, 0)) {
1199 SYSERROR("failed to mount %s bind", destpath);
1200 return -1;
1201 }
1202 if (mount("", destpath, NULL, MS_SLAVE, 0)) {
1203 SYSERROR("failed to make %s slave", destpath);
1204 return -1;
1205 }
1206 if (mount("none", destpath, "tmpfs", 0, "size=10000")) {
1207 SYSERROR("Failed to mount tmpfs / at %s", destpath);
1208 return -1;
1209 }
1210 ret = snprintf(path, MAXPATHLEN, "%s/root", destpath);
1211 if (ret < 0 || ret >= MAXPATHLEN) {
1212 ERROR("out of memory making root path");
1213 return -1;
1214 }
1215 if (mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) {
1216 SYSERROR("Failed to create /dev/pts in container");
1217 return -1;
1218 }
1219 if (mount("/", path, NULL, MS_BIND|MS_REC, 0)) {
1220 SYSERROR("Failed to rbind mount / to %s", path);
1221 return -1;
1222 }
1223 if (mount("", destpath, NULL, MS_SLAVE|MS_REC, 0)) {
1224 SYSERROR("Failed to make tmp-/ at %s rslave", path);
1225 return -1;
1226 }
1227 if (chdir(path)) {
1228 SYSERROR("Failed to chdir into tmp-/");
1229 return -1;
1230 }
1231 if (chroot(path)) {
1232 SYSERROR("Failed to chroot into tmp-/");
1233 return -1;
1234 }
1235 INFO("Chrooted into tmp-/ at %s\n", path);
1236 return 0;
1237 }
1238
1239 static int setup_rootfs(struct lxc_conf *conf)
1240 {
1241 const struct lxc_rootfs *rootfs = &conf->rootfs;
1242
1243 if (!rootfs->path) {
1244 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1245 SYSERROR("Failed to make / rslave");
1246 return -1;
1247 }
1248 return 0;
1249 }
1250
1251 if (access(rootfs->mount, F_OK)) {
1252 SYSERROR("failed to access to '%s', check it is present",
1253 rootfs->mount);
1254 return -1;
1255 }
1256
1257 if (detect_shared_rootfs()) {
1258 if (chroot_into_slave(conf)) {
1259 ERROR("Failed to chroot into slave /");
1260 return -1;
1261 }
1262 }
1263
1264 // First try mounting rootfs using a bdev
1265 struct bdev *bdev = bdev_init(rootfs->path, rootfs->mount, NULL);
1266 if (bdev && bdev->ops->mount(bdev) == 0) {
1267 bdev_put(bdev);
1268 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1269 return 0;
1270 }
1271 if (bdev)
1272 bdev_put(bdev);
1273 if (mount_rootfs(rootfs->path, rootfs->mount)) {
1274 ERROR("failed to mount rootfs");
1275 return -1;
1276 }
1277
1278 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1279
1280 return 0;
1281 }
1282
1283 int setup_pivot_root(const struct lxc_rootfs *rootfs)
1284 {
1285 if (!rootfs->path)
1286 return 0;
1287
1288 if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
1289 ERROR("failed to setup pivot root");
1290 return -1;
1291 }
1292
1293 return 0;
1294 }
1295
1296 static int setup_pts(int pts)
1297 {
1298 char target[PATH_MAX];
1299
1300 if (!pts)
1301 return 0;
1302
1303 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
1304 SYSERROR("failed to umount 'dev/pts'");
1305 return -1;
1306 }
1307
1308 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
1309 "newinstance,ptmxmode=0666")) {
1310 SYSERROR("failed to mount a new instance of '/dev/pts'");
1311 return -1;
1312 }
1313
1314 if (access("/dev/ptmx", F_OK)) {
1315 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1316 goto out;
1317 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
1318 return -1;
1319 }
1320
1321 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1322 goto out;
1323
1324 /* fallback here, /dev/pts/ptmx exists just mount bind */
1325 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
1326 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
1327 return -1;
1328 }
1329
1330 INFO("created new pts instance");
1331
1332 out:
1333 return 0;
1334 }
1335
1336 static int setup_personality(int persona)
1337 {
1338 #if HAVE_SYS_PERSONALITY_H
1339 if (persona == -1)
1340 return 0;
1341
1342 if (personality(persona) < 0) {
1343 SYSERROR("failed to set personality to '0x%x'", persona);
1344 return -1;
1345 }
1346
1347 INFO("set personality to '0x%x'", persona);
1348 #endif
1349
1350 return 0;
1351 }
1352
1353 static int setup_dev_console(const struct lxc_rootfs *rootfs,
1354 const struct lxc_console *console)
1355 {
1356 char path[MAXPATHLEN];
1357 struct stat s;
1358 int ret;
1359
1360 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1361 if (ret >= sizeof(path)) {
1362 ERROR("console path too long\n");
1363 return -1;
1364 }
1365
1366 if (access(path, F_OK)) {
1367 WARN("rootfs specified but no console found at '%s'", path);
1368 return 0;
1369 }
1370
1371 if (console->master < 0) {
1372 INFO("no console");
1373 return 0;
1374 }
1375
1376 if (stat(path, &s)) {
1377 SYSERROR("failed to stat '%s'", path);
1378 return -1;
1379 }
1380
1381 if (chmod(console->name, s.st_mode)) {
1382 SYSERROR("failed to set mode '0%o' to '%s'",
1383 s.st_mode, console->name);
1384 return -1;
1385 }
1386
1387 if (mount(console->name, path, "none", MS_BIND, 0)) {
1388 ERROR("failed to mount '%s' on '%s'", console->name, path);
1389 return -1;
1390 }
1391
1392 INFO("console has been setup");
1393 return 0;
1394 }
1395
1396 static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1397 const struct lxc_console *console,
1398 char *ttydir)
1399 {
1400 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1401 int ret;
1402
1403 /* create rootfs/dev/<ttydir> directory */
1404 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1405 ttydir);
1406 if (ret >= sizeof(path))
1407 return -1;
1408 ret = mkdir(path, 0755);
1409 if (ret && errno != EEXIST) {
1410 SYSERROR("failed with errno %d to create %s\n", errno, path);
1411 return -1;
1412 }
1413 INFO("created %s\n", path);
1414
1415 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1416 rootfs->mount, ttydir);
1417 if (ret >= sizeof(lxcpath)) {
1418 ERROR("console path too long\n");
1419 return -1;
1420 }
1421
1422 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1423 ret = unlink(path);
1424 if (ret && errno != ENOENT) {
1425 SYSERROR("error unlinking %s\n", path);
1426 return -1;
1427 }
1428
1429 ret = creat(lxcpath, 0660);
1430 if (ret==-1 && errno != EEXIST) {
1431 SYSERROR("error %d creating %s\n", errno, lxcpath);
1432 return -1;
1433 }
1434 if (ret >= 0)
1435 close(ret);
1436
1437 if (console->master < 0) {
1438 INFO("no console");
1439 return 0;
1440 }
1441
1442 if (mount(console->name, lxcpath, "none", MS_BIND, 0)) {
1443 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1444 return -1;
1445 }
1446
1447 /* create symlink from rootfs/dev/console to 'lxc/console' */
1448 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1449 if (ret >= sizeof(lxcpath)) {
1450 ERROR("lxc/console path too long");
1451 return -1;
1452 }
1453 ret = symlink(lxcpath, path);
1454 if (ret) {
1455 SYSERROR("failed to create symlink for console");
1456 return -1;
1457 }
1458
1459 INFO("console has been setup on %s", lxcpath);
1460
1461 return 0;
1462 }
1463
1464 static int setup_console(const struct lxc_rootfs *rootfs,
1465 const struct lxc_console *console,
1466 char *ttydir)
1467 {
1468 /* We don't have a rootfs, /dev/console will be shared */
1469 if (!rootfs->path)
1470 return 0;
1471 if (!ttydir)
1472 return setup_dev_console(rootfs, console);
1473
1474 return setup_ttydir_console(rootfs, console, ttydir);
1475 }
1476
1477 static int setup_kmsg(const struct lxc_rootfs *rootfs,
1478 const struct lxc_console *console)
1479 {
1480 char kpath[MAXPATHLEN];
1481 int ret;
1482
1483 if (!rootfs->path)
1484 return 0;
1485 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1486 if (ret < 0 || ret >= sizeof(kpath))
1487 return -1;
1488
1489 ret = unlink(kpath);
1490 if (ret && errno != ENOENT) {
1491 SYSERROR("error unlinking %s\n", kpath);
1492 return -1;
1493 }
1494
1495 ret = symlink("console", kpath);
1496 if (ret) {
1497 SYSERROR("failed to create symlink for kmsg");
1498 return -1;
1499 }
1500
1501 return 0;
1502 }
1503
1504 static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1505 {
1506 struct mount_opt *mo;
1507
1508 /* If opt is found in mount_opt, set or clear flags.
1509 * Otherwise append it to data. */
1510
1511 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1512 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1513 if (mo->clear)
1514 *flags &= ~mo->flag;
1515 else
1516 *flags |= mo->flag;
1517 return;
1518 }
1519 }
1520
1521 if (strlen(*data))
1522 strcat(*data, ",");
1523 strcat(*data, opt);
1524 }
1525
1526 static int parse_mntopts(const char *mntopts, unsigned long *mntflags,
1527 char **mntdata)
1528 {
1529 char *s, *data;
1530 char *p, *saveptr = NULL;
1531
1532 *mntdata = NULL;
1533 *mntflags = 0L;
1534
1535 if (!mntopts)
1536 return 0;
1537
1538 s = strdup(mntopts);
1539 if (!s) {
1540 SYSERROR("failed to allocate memory");
1541 return -1;
1542 }
1543
1544 data = malloc(strlen(s) + 1);
1545 if (!data) {
1546 SYSERROR("failed to allocate memory");
1547 free(s);
1548 return -1;
1549 }
1550 *data = 0;
1551
1552 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1553 p = strtok_r(NULL, ",", &saveptr))
1554 parse_mntopt(p, mntflags, &data);
1555
1556 if (*data)
1557 *mntdata = data;
1558 else
1559 free(data);
1560 free(s);
1561
1562 return 0;
1563 }
1564
1565 static int mount_entry(const char *fsname, const char *target,
1566 const char *fstype, unsigned long mountflags,
1567 const char *data)
1568 {
1569 if (mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data)) {
1570 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1571 return -1;
1572 }
1573
1574 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
1575
1576 DEBUG("remounting %s on %s to respect bind or remount options",
1577 fsname, target);
1578
1579 if (mount(fsname, target, fstype,
1580 mountflags | MS_REMOUNT, data)) {
1581 SYSERROR("failed to mount '%s' on '%s'",
1582 fsname, target);
1583 return -1;
1584 }
1585 }
1586
1587 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1588
1589 return 0;
1590 }
1591
1592 static inline int mount_entry_on_systemfs(struct mntent *mntent)
1593 {
1594 unsigned long mntflags;
1595 char *mntdata;
1596 int ret;
1597
1598 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1599 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1600 return -1;
1601 }
1602
1603 ret = mount_entry(mntent->mnt_fsname, mntent->mnt_dir,
1604 mntent->mnt_type, mntflags, mntdata);
1605
1606 if (hasmntopt(mntent, "optional") != NULL)
1607 ret = 0;
1608
1609 free(mntdata);
1610
1611 return ret;
1612 }
1613
1614 static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
1615 const struct lxc_rootfs *rootfs,
1616 const char *lxc_name)
1617 {
1618 char *aux;
1619 char path[MAXPATHLEN];
1620 unsigned long mntflags;
1621 char *mntdata;
1622 int r, ret = 0, offset;
1623 const char *lxcpath;
1624
1625 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1626 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1627 return -1;
1628 }
1629
1630 lxcpath = default_lxc_path();
1631 if (!lxcpath) {
1632 ERROR("Out of memory");
1633 return -1;
1634 }
1635
1636 /* if rootfs->path is a blockdev path, allow container fstab to
1637 * use $lxcpath/CN/rootfs as the target prefix */
1638 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
1639 if (r < 0 || r >= MAXPATHLEN)
1640 goto skipvarlib;
1641
1642 aux = strstr(mntent->mnt_dir, path);
1643 if (aux) {
1644 offset = strlen(path);
1645 goto skipabs;
1646 }
1647
1648 skipvarlib:
1649 aux = strstr(mntent->mnt_dir, rootfs->path);
1650 if (!aux) {
1651 WARN("ignoring mount point '%s'", mntent->mnt_dir);
1652 goto out;
1653 }
1654 offset = strlen(rootfs->path);
1655
1656 skipabs:
1657
1658 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
1659 aux + offset);
1660 if (r < 0 || r >= MAXPATHLEN) {
1661 WARN("pathnme too long for '%s'", mntent->mnt_dir);
1662 ret = -1;
1663 goto out;
1664 }
1665
1666
1667 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1668 mntflags, mntdata);
1669
1670 if (hasmntopt(mntent, "optional") != NULL)
1671 ret = 0;
1672
1673 out:
1674 free(mntdata);
1675 return ret;
1676 }
1677
1678 static int mount_entry_on_relative_rootfs(struct mntent *mntent,
1679 const char *rootfs)
1680 {
1681 char path[MAXPATHLEN];
1682 unsigned long mntflags;
1683 char *mntdata;
1684 int ret;
1685
1686 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1687 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1688 return -1;
1689 }
1690
1691 /* relative to root mount point */
1692 ret = snprintf(path, sizeof(path), "%s/%s", rootfs, mntent->mnt_dir);
1693 if (ret >= sizeof(path)) {
1694 ERROR("path name too long");
1695 return -1;
1696 }
1697
1698 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1699 mntflags, mntdata);
1700
1701 if (hasmntopt(mntent, "optional") != NULL)
1702 ret = 0;
1703
1704 free(mntdata);
1705
1706 return ret;
1707 }
1708
1709 static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
1710 const char *lxc_name)
1711 {
1712 struct mntent *mntent;
1713 int ret = -1;
1714
1715 while ((mntent = getmntent(file))) {
1716
1717 if (!rootfs->path) {
1718 if (mount_entry_on_systemfs(mntent))
1719 goto out;
1720 continue;
1721 }
1722
1723 /* We have a separate root, mounts are relative to it */
1724 if (mntent->mnt_dir[0] != '/') {
1725 if (mount_entry_on_relative_rootfs(mntent,
1726 rootfs->mount))
1727 goto out;
1728 continue;
1729 }
1730
1731 if (mount_entry_on_absolute_rootfs(mntent, rootfs, lxc_name))
1732 goto out;
1733 }
1734
1735 ret = 0;
1736
1737 INFO("mount points have been setup");
1738 out:
1739 return ret;
1740 }
1741
1742 static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
1743 const char *lxc_name)
1744 {
1745 FILE *file;
1746 int ret;
1747
1748 if (!fstab)
1749 return 0;
1750
1751 file = setmntent(fstab, "r");
1752 if (!file) {
1753 SYSERROR("failed to use '%s'", fstab);
1754 return -1;
1755 }
1756
1757 ret = mount_file_entries(rootfs, file, lxc_name);
1758
1759 endmntent(file);
1760 return ret;
1761 }
1762
1763 static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
1764 const char *lxc_name)
1765 {
1766 FILE *file;
1767 struct lxc_list *iterator;
1768 char *mount_entry;
1769 int ret;
1770
1771 file = tmpfile();
1772 if (!file) {
1773 ERROR("tmpfile error: %m");
1774 return -1;
1775 }
1776
1777 lxc_list_for_each(iterator, mount) {
1778 mount_entry = iterator->elem;
1779 fprintf(file, "%s\n", mount_entry);
1780 }
1781
1782 rewind(file);
1783
1784 ret = mount_file_entries(rootfs, file, lxc_name);
1785
1786 fclose(file);
1787 return ret;
1788 }
1789
1790 static int setup_caps(struct lxc_list *caps)
1791 {
1792 struct lxc_list *iterator;
1793 char *drop_entry;
1794 char *ptr;
1795 int i, capid;
1796
1797 lxc_list_for_each(iterator, caps) {
1798
1799 drop_entry = iterator->elem;
1800
1801 capid = -1;
1802
1803 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
1804
1805 if (strcmp(drop_entry, caps_opt[i].name))
1806 continue;
1807
1808 capid = caps_opt[i].value;
1809 break;
1810 }
1811
1812 if (capid < 0) {
1813 /* try to see if it's numeric, so the user may specify
1814 * capabilities that the running kernel knows about but
1815 * we don't */
1816 capid = strtol(drop_entry, &ptr, 10);
1817 if (!ptr || *ptr != '\0' ||
1818 capid == LONG_MIN || capid == LONG_MAX)
1819 /* not a valid number */
1820 capid = -1;
1821 else if (capid > lxc_caps_last_cap())
1822 /* we have a number but it's not a valid
1823 * capability */
1824 capid = -1;
1825 }
1826
1827 if (capid < 0) {
1828 ERROR("unknown capability %s", drop_entry);
1829 return -1;
1830 }
1831
1832 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
1833
1834 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
1835 SYSERROR("failed to remove %s capability", drop_entry);
1836 return -1;
1837 }
1838
1839 }
1840
1841 DEBUG("capabilities have been setup");
1842
1843 return 0;
1844 }
1845
1846 static int dropcaps_except(struct lxc_list *caps)
1847 {
1848 struct lxc_list *iterator;
1849 char *keep_entry;
1850 char *ptr;
1851 int i, capid;
1852 int numcaps = lxc_caps_last_cap() + 1;
1853 INFO("found %d capabilities\n", numcaps);
1854
1855 // caplist[i] is 1 if we keep capability i
1856 int *caplist = alloca(numcaps * sizeof(int));
1857 memset(caplist, 0, numcaps * sizeof(int));
1858
1859 lxc_list_for_each(iterator, caps) {
1860
1861 keep_entry = iterator->elem;
1862
1863 capid = -1;
1864
1865 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
1866
1867 if (strcmp(keep_entry, caps_opt[i].name))
1868 continue;
1869
1870 capid = caps_opt[i].value;
1871 break;
1872 }
1873
1874 if (capid < 0) {
1875 /* try to see if it's numeric, so the user may specify
1876 * capabilities that the running kernel knows about but
1877 * we don't */
1878 capid = strtol(keep_entry, &ptr, 10);
1879 if (!ptr || *ptr != '\0' ||
1880 capid == LONG_MIN || capid == LONG_MAX)
1881 /* not a valid number */
1882 capid = -1;
1883 else if (capid > lxc_caps_last_cap())
1884 /* we have a number but it's not a valid
1885 * capability */
1886 capid = -1;
1887 }
1888
1889 if (capid < 0) {
1890 ERROR("unknown capability %s", keep_entry);
1891 return -1;
1892 }
1893
1894 DEBUG("drop capability '%s' (%d)", keep_entry, capid);
1895
1896 caplist[capid] = 1;
1897 }
1898 for (i=0; i<numcaps; i++) {
1899 if (caplist[i])
1900 continue;
1901 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
1902 SYSERROR("failed to remove capability %d", i);
1903 return -1;
1904 }
1905 }
1906
1907 DEBUG("capabilities have been setup");
1908
1909 return 0;
1910 }
1911
1912 static int setup_hw_addr(char *hwaddr, const char *ifname)
1913 {
1914 struct sockaddr sockaddr;
1915 struct ifreq ifr;
1916 int ret, fd;
1917
1918 ret = lxc_convert_mac(hwaddr, &sockaddr);
1919 if (ret) {
1920 ERROR("mac address '%s' conversion failed : %s",
1921 hwaddr, strerror(-ret));
1922 return -1;
1923 }
1924
1925 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
1926 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
1927
1928 fd = socket(AF_INET, SOCK_DGRAM, 0);
1929 if (fd < 0) {
1930 ERROR("socket failure : %s", strerror(errno));
1931 return -1;
1932 }
1933
1934 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
1935 close(fd);
1936 if (ret)
1937 ERROR("ioctl failure : %s", strerror(errno));
1938
1939 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifname);
1940
1941 return ret;
1942 }
1943
1944 static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
1945 {
1946 struct lxc_list *iterator;
1947 struct lxc_inetdev *inetdev;
1948 int err;
1949
1950 lxc_list_for_each(iterator, ip) {
1951
1952 inetdev = iterator->elem;
1953
1954 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
1955 &inetdev->bcast, inetdev->prefix);
1956 if (err) {
1957 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
1958 ifindex, strerror(-err));
1959 return -1;
1960 }
1961 }
1962
1963 return 0;
1964 }
1965
1966 static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
1967 {
1968 struct lxc_list *iterator;
1969 struct lxc_inet6dev *inet6dev;
1970 int err;
1971
1972 lxc_list_for_each(iterator, ip) {
1973
1974 inet6dev = iterator->elem;
1975
1976 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
1977 &inet6dev->mcast, &inet6dev->acast,
1978 inet6dev->prefix);
1979 if (err) {
1980 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
1981 ifindex, strerror(-err));
1982 return -1;
1983 }
1984 }
1985
1986 return 0;
1987 }
1988
1989 static int setup_netdev(struct lxc_netdev *netdev)
1990 {
1991 char ifname[IFNAMSIZ];
1992 char *current_ifname = ifname;
1993 int err;
1994
1995 /* empty network namespace */
1996 if (!netdev->ifindex) {
1997 if (netdev->flags & IFF_UP) {
1998 err = lxc_netdev_up("lo");
1999 if (err) {
2000 ERROR("failed to set the loopback up : %s",
2001 strerror(-err));
2002 return -1;
2003 }
2004 }
2005 return 0;
2006 }
2007
2008 /* retrieve the name of the interface */
2009 if (!if_indextoname(netdev->ifindex, current_ifname)) {
2010 ERROR("no interface corresponding to index '%d'",
2011 netdev->ifindex);
2012 return -1;
2013 }
2014
2015 /* default: let the system to choose one interface name */
2016 if (!netdev->name)
2017 netdev->name = netdev->type == LXC_NET_PHYS ?
2018 netdev->link : "eth%d";
2019
2020 /* rename the interface name */
2021 err = lxc_netdev_rename_by_name(ifname, netdev->name);
2022 if (err) {
2023 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2024 strerror(-err));
2025 return -1;
2026 }
2027
2028 /* Re-read the name of the interface because its name has changed
2029 * and would be automatically allocated by the system
2030 */
2031 if (!if_indextoname(netdev->ifindex, current_ifname)) {
2032 ERROR("no interface corresponding to index '%d'",
2033 netdev->ifindex);
2034 return -1;
2035 }
2036
2037 /* set a mac address */
2038 if (netdev->hwaddr) {
2039 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
2040 ERROR("failed to setup hw address for '%s'",
2041 current_ifname);
2042 return -1;
2043 }
2044 }
2045
2046 /* setup ipv4 addresses on the interface */
2047 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
2048 ERROR("failed to setup ip addresses for '%s'",
2049 ifname);
2050 return -1;
2051 }
2052
2053 /* setup ipv6 addresses on the interface */
2054 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
2055 ERROR("failed to setup ipv6 addresses for '%s'",
2056 ifname);
2057 return -1;
2058 }
2059
2060 /* set the network device up */
2061 if (netdev->flags & IFF_UP) {
2062 int err;
2063
2064 err = lxc_netdev_up(current_ifname);
2065 if (err) {
2066 ERROR("failed to set '%s' up : %s", current_ifname,
2067 strerror(-err));
2068 return -1;
2069 }
2070
2071 /* the network is up, make the loopback up too */
2072 err = lxc_netdev_up("lo");
2073 if (err) {
2074 ERROR("failed to set the loopback up : %s",
2075 strerror(-err));
2076 return -1;
2077 }
2078 }
2079
2080 /* We can only set up the default routes after bringing
2081 * up the interface, sine bringing up the interface adds
2082 * the link-local routes and we can't add a default
2083 * route if the gateway is not reachable. */
2084
2085 /* setup ipv4 gateway on the interface */
2086 if (netdev->ipv4_gateway) {
2087 if (!(netdev->flags & IFF_UP)) {
2088 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2089 return -1;
2090 }
2091
2092 if (lxc_list_empty(&netdev->ipv4)) {
2093 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2094 return -1;
2095 }
2096
2097 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2098 if (err) {
2099 ERROR("failed to setup ipv4 gateway for '%s': %s",
2100 ifname, strerror(-err));
2101 if (netdev->ipv4_gateway_auto) {
2102 char buf[INET_ADDRSTRLEN];
2103 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2104 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2105 }
2106 return -1;
2107 }
2108 }
2109
2110 /* setup ipv6 gateway on the interface */
2111 if (netdev->ipv6_gateway) {
2112 if (!(netdev->flags & IFF_UP)) {
2113 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2114 return -1;
2115 }
2116
2117 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2118 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2119 return -1;
2120 }
2121
2122 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2123 if (err) {
2124 ERROR("failed to setup ipv6 gateway for '%s': %s",
2125 ifname, strerror(-err));
2126 if (netdev->ipv6_gateway_auto) {
2127 char buf[INET6_ADDRSTRLEN];
2128 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2129 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2130 }
2131 return -1;
2132 }
2133 }
2134
2135 DEBUG("'%s' has been setup", current_ifname);
2136
2137 return 0;
2138 }
2139
2140 static int setup_network(struct lxc_list *network)
2141 {
2142 struct lxc_list *iterator;
2143 struct lxc_netdev *netdev;
2144
2145 lxc_list_for_each(iterator, network) {
2146
2147 netdev = iterator->elem;
2148
2149 if (setup_netdev(netdev)) {
2150 ERROR("failed to setup netdev");
2151 return -1;
2152 }
2153 }
2154
2155 if (!lxc_list_empty(network))
2156 INFO("network has been setup");
2157
2158 return 0;
2159 }
2160
2161 void lxc_rename_phys_nics_on_shutdown(struct lxc_conf *conf)
2162 {
2163 int i;
2164
2165 INFO("running to reset %d nic names", conf->num_savednics);
2166 for (i=0; i<conf->num_savednics; i++) {
2167 struct saved_nic *s = &conf->saved_nics[i];
2168 INFO("resetting nic %d to %s\n", s->ifindex, s->orig_name);
2169 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
2170 free(s->orig_name);
2171 }
2172 conf->num_savednics = 0;
2173 free(conf->saved_nics);
2174 }
2175
2176 static int setup_private_host_hw_addr(char *veth1)
2177 {
2178 struct ifreq ifr;
2179 int err;
2180 int sockfd;
2181
2182 sockfd = socket(AF_INET, SOCK_DGRAM, 0);
2183 if (sockfd < 0)
2184 return -errno;
2185
2186 snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
2187 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2188 if (err < 0) {
2189 close(sockfd);
2190 return -errno;
2191 }
2192
2193 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2194 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
2195 close(sockfd);
2196 if (err < 0)
2197 return -errno;
2198
2199 DEBUG("mac address of host interface '%s' changed to private "
2200 "%02x:%02x:%02x:%02x:%02x:%02x", veth1,
2201 ifr.ifr_hwaddr.sa_data[0] & 0xff,
2202 ifr.ifr_hwaddr.sa_data[1] & 0xff,
2203 ifr.ifr_hwaddr.sa_data[2] & 0xff,
2204 ifr.ifr_hwaddr.sa_data[3] & 0xff,
2205 ifr.ifr_hwaddr.sa_data[4] & 0xff,
2206 ifr.ifr_hwaddr.sa_data[5] & 0xff);
2207
2208 return 0;
2209 }
2210
2211 static char *default_rootfs_mount = LXCROOTFSMOUNT;
2212
2213 struct lxc_conf *lxc_conf_init(void)
2214 {
2215 struct lxc_conf *new;
2216 int i;
2217
2218 new = malloc(sizeof(*new));
2219 if (!new) {
2220 ERROR("lxc_conf_init : %m");
2221 return NULL;
2222 }
2223 memset(new, 0, sizeof(*new));
2224
2225 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
2226 new->personality = -1;
2227 new->console.log_path = NULL;
2228 new->console.log_fd = -1;
2229 new->console.path = NULL;
2230 new->console.peer = -1;
2231 new->console.peerpty.busy = -1;
2232 new->console.peerpty.master = -1;
2233 new->console.peerpty.slave = -1;
2234 new->console.master = -1;
2235 new->console.slave = -1;
2236 new->console.name[0] = '\0';
2237 new->maincmd_fd = -1;
2238 new->rootfs.mount = strdup(default_rootfs_mount);
2239 if (!new->rootfs.mount) {
2240 ERROR("lxc_conf_init : %m");
2241 free(new);
2242 return NULL;
2243 }
2244 new->kmsg = 1;
2245 lxc_list_init(&new->cgroup);
2246 lxc_list_init(&new->network);
2247 lxc_list_init(&new->mount_list);
2248 lxc_list_init(&new->caps);
2249 lxc_list_init(&new->keepcaps);
2250 lxc_list_init(&new->id_map);
2251 for (i=0; i<NUM_LXC_HOOKS; i++)
2252 lxc_list_init(&new->hooks[i]);
2253 #if HAVE_APPARMOR
2254 new->aa_profile = NULL;
2255 #endif
2256 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
2257 new->lsm_umount_proc = 0;
2258 #endif
2259
2260 return new;
2261 }
2262
2263 static int instanciate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2264 {
2265 char veth1buf[IFNAMSIZ], *veth1;
2266 char veth2buf[IFNAMSIZ], *veth2;
2267 int err;
2268
2269 if (netdev->priv.veth_attr.pair)
2270 veth1 = netdev->priv.veth_attr.pair;
2271 else {
2272 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2273 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2274 ERROR("veth1 name too long");
2275 return -1;
2276 }
2277 veth1 = mkifname(veth1buf);
2278 /* store away for deconf */
2279 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
2280 }
2281
2282 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
2283 veth2 = mkifname(veth2buf);
2284
2285 if (!strlen(veth1) || !strlen(veth2)) {
2286 ERROR("failed to allocate a temporary name");
2287 return -1;
2288 }
2289
2290 err = lxc_veth_create(veth1, veth2);
2291 if (err) {
2292 ERROR("failed to create %s-%s : %s", veth1, veth2,
2293 strerror(-err));
2294 return -1;
2295 }
2296
2297 /* changing the high byte of the mac address to 0xfe, the bridge interface
2298 * will always keep the host's mac address and not take the mac address
2299 * of a container */
2300 err = setup_private_host_hw_addr(veth1);
2301 if (err) {
2302 ERROR("failed to change mac address of host interface '%s' : %s",
2303 veth1, strerror(-err));
2304 goto out_delete;
2305 }
2306
2307 if (netdev->mtu) {
2308 err = lxc_netdev_set_mtu(veth1, atoi(netdev->mtu));
2309 if (!err)
2310 err = lxc_netdev_set_mtu(veth2, atoi(netdev->mtu));
2311 if (err) {
2312 ERROR("failed to set mtu '%s' for %s-%s : %s",
2313 netdev->mtu, veth1, veth2, strerror(-err));
2314 goto out_delete;
2315 }
2316 }
2317
2318 if (netdev->link) {
2319 err = lxc_bridge_attach(netdev->link, veth1);
2320 if (err) {
2321 ERROR("failed to attach '%s' to the bridge '%s' : %s",
2322 veth1, netdev->link, strerror(-err));
2323 goto out_delete;
2324 }
2325 }
2326
2327 netdev->ifindex = if_nametoindex(veth2);
2328 if (!netdev->ifindex) {
2329 ERROR("failed to retrieve the index for %s", veth2);
2330 goto out_delete;
2331 }
2332
2333 err = lxc_netdev_up(veth1);
2334 if (err) {
2335 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2336 goto out_delete;
2337 }
2338
2339 if (netdev->upscript) {
2340 err = run_script(handler->name, "net", netdev->upscript, "up",
2341 "veth", veth1, (char*) NULL);
2342 if (err)
2343 goto out_delete;
2344 }
2345
2346 DEBUG("instanciated veth '%s/%s', index is '%d'",
2347 veth1, veth2, netdev->ifindex);
2348
2349 return 0;
2350
2351 out_delete:
2352 lxc_netdev_delete_by_name(veth1);
2353 return -1;
2354 }
2355
2356 static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2357 {
2358 char *veth1;
2359 int err;
2360
2361 if (netdev->priv.veth_attr.pair)
2362 veth1 = netdev->priv.veth_attr.pair;
2363 else
2364 veth1 = netdev->priv.veth_attr.veth1;
2365
2366 if (netdev->downscript) {
2367 err = run_script(handler->name, "net", netdev->downscript,
2368 "down", "veth", veth1, (char*) NULL);
2369 if (err)
2370 return -1;
2371 }
2372 return 0;
2373 }
2374
2375 static int instanciate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2376 {
2377 char peerbuf[IFNAMSIZ], *peer;
2378 int err;
2379
2380 if (!netdev->link) {
2381 ERROR("no link specified for macvlan netdev");
2382 return -1;
2383 }
2384
2385 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2386 if (err >= sizeof(peerbuf))
2387 return -1;
2388
2389 peer = mkifname(peerbuf);
2390 if (!strlen(peer)) {
2391 ERROR("failed to make a temporary name");
2392 return -1;
2393 }
2394
2395 err = lxc_macvlan_create(netdev->link, peer,
2396 netdev->priv.macvlan_attr.mode);
2397 if (err) {
2398 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2399 peer, netdev->link, strerror(-err));
2400 return -1;
2401 }
2402
2403 netdev->ifindex = if_nametoindex(peer);
2404 if (!netdev->ifindex) {
2405 ERROR("failed to retrieve the index for %s", peer);
2406 lxc_netdev_delete_by_name(peer);
2407 return -1;
2408 }
2409
2410 if (netdev->upscript) {
2411 err = run_script(handler->name, "net", netdev->upscript, "up",
2412 "macvlan", netdev->link, (char*) NULL);
2413 if (err)
2414 return -1;
2415 }
2416
2417 DEBUG("instanciated macvlan '%s', index is '%d' and mode '%d'",
2418 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
2419
2420 return 0;
2421 }
2422
2423 static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2424 {
2425 int err;
2426
2427 if (netdev->downscript) {
2428 err = run_script(handler->name, "net", netdev->downscript,
2429 "down", "macvlan", netdev->link,
2430 (char*) NULL);
2431 if (err)
2432 return -1;
2433 }
2434 return 0;
2435 }
2436
2437 /* XXX: merge with instanciate_macvlan */
2438 static int instanciate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2439 {
2440 char peer[IFNAMSIZ];
2441 int err;
2442
2443 if (!netdev->link) {
2444 ERROR("no link specified for vlan netdev");
2445 return -1;
2446 }
2447
2448 err = snprintf(peer, sizeof(peer), "vlan%d", netdev->priv.vlan_attr.vid);
2449 if (err >= sizeof(peer)) {
2450 ERROR("peer name too long");
2451 return -1;
2452 }
2453
2454 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2455 if (err) {
2456 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2457 peer, netdev->link, strerror(-err));
2458 return -1;
2459 }
2460
2461 netdev->ifindex = if_nametoindex(peer);
2462 if (!netdev->ifindex) {
2463 ERROR("failed to retrieve the ifindex for %s", peer);
2464 lxc_netdev_delete_by_name(peer);
2465 return -1;
2466 }
2467
2468 DEBUG("instanciated vlan '%s', ifindex is '%d'", " vlan1000",
2469 netdev->ifindex);
2470
2471 return 0;
2472 }
2473
2474 static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2475 {
2476 return 0;
2477 }
2478
2479 static int instanciate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2480 {
2481 if (!netdev->link) {
2482 ERROR("no link specified for the physical interface");
2483 return -1;
2484 }
2485
2486 netdev->ifindex = if_nametoindex(netdev->link);
2487 if (!netdev->ifindex) {
2488 ERROR("failed to retrieve the index for %s", netdev->link);
2489 return -1;
2490 }
2491
2492 if (netdev->upscript) {
2493 int err;
2494 err = run_script(handler->name, "net", netdev->upscript,
2495 "up", "phys", netdev->link, (char*) NULL);
2496 if (err)
2497 return -1;
2498 }
2499
2500 return 0;
2501 }
2502
2503 static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2504 {
2505 int err;
2506
2507 if (netdev->downscript) {
2508 err = run_script(handler->name, "net", netdev->downscript,
2509 "down", "phys", netdev->link, (char*) NULL);
2510 if (err)
2511 return -1;
2512 }
2513 return 0;
2514 }
2515
2516 static int instanciate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2517 {
2518 netdev->ifindex = 0;
2519 if (netdev->upscript) {
2520 int err;
2521 err = run_script(handler->name, "net", netdev->upscript,
2522 "up", "empty", (char*) NULL);
2523 if (err)
2524 return -1;
2525 }
2526 return 0;
2527 }
2528
2529 static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2530 {
2531 int err;
2532
2533 if (netdev->downscript) {
2534 err = run_script(handler->name, "net", netdev->downscript,
2535 "down", "empty", (char*) NULL);
2536 if (err)
2537 return -1;
2538 }
2539 return 0;
2540 }
2541
2542 int lxc_create_network(struct lxc_handler *handler)
2543 {
2544 struct lxc_list *network = &handler->conf->network;
2545 struct lxc_list *iterator;
2546 struct lxc_netdev *netdev;
2547
2548 lxc_list_for_each(iterator, network) {
2549
2550 netdev = iterator->elem;
2551
2552 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
2553 ERROR("invalid network configuration type '%d'",
2554 netdev->type);
2555 return -1;
2556 }
2557
2558 if (netdev_conf[netdev->type](handler, netdev)) {
2559 ERROR("failed to create netdev");
2560 return -1;
2561 }
2562
2563 }
2564
2565 return 0;
2566 }
2567
2568 void lxc_delete_network(struct lxc_handler *handler)
2569 {
2570 struct lxc_list *network = &handler->conf->network;
2571 struct lxc_list *iterator;
2572 struct lxc_netdev *netdev;
2573
2574 lxc_list_for_each(iterator, network) {
2575 netdev = iterator->elem;
2576
2577 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
2578 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
2579 WARN("failed to rename to the initial name the " \
2580 "netdev '%s'", netdev->link);
2581 continue;
2582 }
2583
2584 if (netdev_deconf[netdev->type](handler, netdev)) {
2585 WARN("failed to destroy netdev");
2586 }
2587
2588 /* Recent kernel remove the virtual interfaces when the network
2589 * namespace is destroyed but in case we did not moved the
2590 * interface to the network namespace, we have to destroy it
2591 */
2592 if (netdev->ifindex != 0 &&
2593 lxc_netdev_delete_by_index(netdev->ifindex))
2594 WARN("failed to remove interface '%s'", netdev->name);
2595 }
2596 }
2597
2598 int lxc_assign_network(struct lxc_list *network, pid_t pid)
2599 {
2600 struct lxc_list *iterator;
2601 struct lxc_netdev *netdev;
2602 int err;
2603
2604 lxc_list_for_each(iterator, network) {
2605
2606 netdev = iterator->elem;
2607
2608 /* empty network namespace, nothing to move */
2609 if (!netdev->ifindex)
2610 continue;
2611
2612 err = lxc_netdev_move_by_index(netdev->ifindex, pid);
2613 if (err) {
2614 ERROR("failed to move '%s' to the container : %s",
2615 netdev->link, strerror(-err));
2616 return -1;
2617 }
2618
2619 DEBUG("move '%s' to '%d'", netdev->name, pid);
2620 }
2621
2622 return 0;
2623 }
2624
2625 static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
2626 size_t buf_size)
2627 {
2628 char path[PATH_MAX];
2629 int ret, closeret;
2630 FILE *f;
2631
2632 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
2633 if (ret < 0 || ret >= PATH_MAX) {
2634 fprintf(stderr, "%s: path name too long", __func__);
2635 return -E2BIG;
2636 }
2637 f = fopen(path, "w");
2638 if (!f) {
2639 perror("open");
2640 return -EINVAL;
2641 }
2642 ret = fwrite(buf, buf_size, 1, f);
2643 if (ret < 0)
2644 SYSERROR("writing id mapping");
2645 closeret = fclose(f);
2646 if (closeret)
2647 SYSERROR("writing id mapping");
2648 return ret < 0 ? ret : closeret;
2649 }
2650
2651 int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
2652 {
2653 struct lxc_list *iterator;
2654 struct id_map *map;
2655 int ret = 0;
2656 enum idtype type;
2657 char *buf = NULL, *pos;
2658
2659 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
2660 int left, fill;
2661
2662 pos = buf;
2663 lxc_list_for_each(iterator, idmap) {
2664 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
2665 if (!buf)
2666 buf = pos = malloc(4096);
2667 if (!buf)
2668 return -ENOMEM;
2669
2670 map = iterator->elem;
2671 if (map->idtype == type) {
2672 left = 4096 - (pos - buf);
2673 fill = snprintf(pos, left, "%lu %lu %lu\n",
2674 map->nsid, map->hostid, map->range);
2675 if (fill <= 0 || fill >= left)
2676 SYSERROR("snprintf failed, too many mappings");
2677 pos += fill;
2678 }
2679 }
2680 if (pos == buf) // no mappings were found
2681 continue;
2682 ret = write_id_mapping(type, pid, buf, pos-buf);
2683 if (ret)
2684 break;
2685 }
2686
2687 if (buf)
2688 free(buf);
2689 return ret;
2690 }
2691
2692 int lxc_find_gateway_addresses(struct lxc_handler *handler)
2693 {
2694 struct lxc_list *network = &handler->conf->network;
2695 struct lxc_list *iterator;
2696 struct lxc_netdev *netdev;
2697 int link_index;
2698
2699 lxc_list_for_each(iterator, network) {
2700 netdev = iterator->elem;
2701
2702 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2703 continue;
2704
2705 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
2706 ERROR("gateway = auto only supported for "
2707 "veth and macvlan");
2708 return -1;
2709 }
2710
2711 if (!netdev->link) {
2712 ERROR("gateway = auto needs a link interface");
2713 return -1;
2714 }
2715
2716 link_index = if_nametoindex(netdev->link);
2717 if (!link_index)
2718 return -EINVAL;
2719
2720 if (netdev->ipv4_gateway_auto) {
2721 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
2722 ERROR("failed to automatically find ipv4 gateway "
2723 "address from link interface '%s'", netdev->link);
2724 return -1;
2725 }
2726 }
2727
2728 if (netdev->ipv6_gateway_auto) {
2729 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
2730 ERROR("failed to automatically find ipv6 gateway "
2731 "address from link interface '%s'", netdev->link);
2732 return -1;
2733 }
2734 }
2735 }
2736
2737 return 0;
2738 }
2739
2740 int lxc_create_tty(const char *name, struct lxc_conf *conf)
2741 {
2742 struct lxc_tty_info *tty_info = &conf->tty_info;
2743 int i;
2744
2745 /* no tty in the configuration */
2746 if (!conf->tty)
2747 return 0;
2748
2749 tty_info->pty_info =
2750 malloc(sizeof(*tty_info->pty_info)*conf->tty);
2751 if (!tty_info->pty_info) {
2752 SYSERROR("failed to allocate pty_info");
2753 return -1;
2754 }
2755
2756 for (i = 0; i < conf->tty; i++) {
2757
2758 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
2759
2760 if (openpty(&pty_info->master, &pty_info->slave,
2761 pty_info->name, NULL, NULL)) {
2762 SYSERROR("failed to create pty #%d", i);
2763 tty_info->nbtty = i;
2764 lxc_delete_tty(tty_info);
2765 return -1;
2766 }
2767
2768 DEBUG("allocated pty '%s' (%d/%d)",
2769 pty_info->name, pty_info->master, pty_info->slave);
2770
2771 /* Prevent leaking the file descriptors to the container */
2772 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
2773 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
2774
2775 pty_info->busy = 0;
2776 }
2777
2778 tty_info->nbtty = conf->tty;
2779
2780 INFO("tty's configured");
2781
2782 return 0;
2783 }
2784
2785 void lxc_delete_tty(struct lxc_tty_info *tty_info)
2786 {
2787 int i;
2788
2789 for (i = 0; i < tty_info->nbtty; i++) {
2790 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
2791
2792 close(pty_info->master);
2793 close(pty_info->slave);
2794 }
2795
2796 free(tty_info->pty_info);
2797 tty_info->nbtty = 0;
2798 }
2799
2800 /*
2801 * given a host uid, return the ns uid if it is mapped.
2802 * if it is not mapped, return the original host id.
2803 */
2804 static int shiftid(struct lxc_conf *c, int uid, enum idtype w)
2805 {
2806 struct lxc_list *iterator;
2807 struct id_map *map;
2808 int low, high;
2809
2810 lxc_list_for_each(iterator, &c->id_map) {
2811 map = iterator->elem;
2812 if (map->idtype != w)
2813 continue;
2814
2815 low = map->nsid;
2816 high = map->nsid + map->range;
2817 if (uid < low || uid >= high)
2818 continue;
2819
2820 return uid - low + map->hostid;
2821 }
2822
2823 return uid;
2824 }
2825
2826 /*
2827 * Take a pathname for a file created on the host, and map the uid and gid
2828 * into the container if needed. (Used for ttys)
2829 */
2830 static int uid_shift_file(char *path, struct lxc_conf *c)
2831 {
2832 struct stat statbuf;
2833 int newuid, newgid;
2834
2835 if (stat(path, &statbuf)) {
2836 SYSERROR("stat(%s)", path);
2837 return -1;
2838 }
2839
2840 newuid = shiftid(c, statbuf.st_uid, ID_TYPE_UID);
2841 newgid = shiftid(c, statbuf.st_gid, ID_TYPE_GID);
2842 if (newuid != statbuf.st_uid || newgid != statbuf.st_gid) {
2843 DEBUG("chowning %s from %d:%d to %d:%d\n", path, (int)statbuf.st_uid, (int)statbuf.st_gid, newuid, newgid);
2844 if (chown(path, newuid, newgid)) {
2845 SYSERROR("chown(%s)", path);
2846 return -1;
2847 }
2848 }
2849 return 0;
2850 }
2851
2852 int uid_shift_ttys(int pid, struct lxc_conf *conf)
2853 {
2854 int i, ret;
2855 struct lxc_tty_info *tty_info = &conf->tty_info;
2856 char path[MAXPATHLEN];
2857 char *ttydir = conf->ttydir;
2858
2859 if (!conf->rootfs.path)
2860 return 0;
2861 /* first the console */
2862 ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/%s/console", pid, ttydir ? ttydir : "");
2863 if (ret < 0 || ret >= sizeof(path)) {
2864 ERROR("console path too long\n");
2865 return -1;
2866 }
2867 if (uid_shift_file(path, conf)) {
2868 DEBUG("Failed to chown the console %s.\n", path);
2869 return -1;
2870 }
2871 for (i=0; i< tty_info->nbtty; i++) {
2872 ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/%s/tty%d",
2873 pid, ttydir ? ttydir : "", i + 1);
2874 if (ret < 0 || ret >= sizeof(path)) {
2875 ERROR("pathname too long for ttys");
2876 return -1;
2877 }
2878 if (uid_shift_file(path, conf)) {
2879 DEBUG("Failed to chown pty %s.\n", path);
2880 return -1;
2881 }
2882 }
2883
2884 return 0;
2885 }
2886
2887 int lxc_setup(const char *name, struct lxc_conf *lxc_conf, const char *lxcpath)
2888 {
2889 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
2890 int mounted;
2891 #endif
2892
2893 if (setup_utsname(lxc_conf->utsname)) {
2894 ERROR("failed to setup the utsname for '%s'", name);
2895 return -1;
2896 }
2897
2898 if (setup_network(&lxc_conf->network)) {
2899 ERROR("failed to setup the network for '%s'", name);
2900 return -1;
2901 }
2902
2903 if (run_lxc_hooks(name, "pre-mount", lxc_conf, lxcpath, NULL)) {
2904 ERROR("failed to run pre-mount hooks for container '%s'.", name);
2905 return -1;
2906 }
2907
2908 if (setup_rootfs(lxc_conf)) {
2909 ERROR("failed to setup rootfs for '%s'", name);
2910 return -1;
2911 }
2912
2913 if (lxc_conf->autodev) {
2914 if (mount_autodev(lxc_conf->rootfs.mount)) {
2915 ERROR("failed to mount /dev in the container");
2916 return -1;
2917 }
2918 }
2919
2920 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
2921 ERROR("failed to setup the mounts for '%s'", name);
2922 return -1;
2923 }
2924
2925 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
2926 ERROR("failed to setup the mount entries for '%s'", name);
2927 return -1;
2928 }
2929
2930 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
2931 ERROR("failed to run mount hooks for container '%s'.", name);
2932 return -1;
2933 }
2934
2935 if (lxc_conf->autodev) {
2936 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
2937 ERROR("failed to run autodev hooks for container '%s'.", name);
2938 return -1;
2939 }
2940 if (setup_autodev(lxc_conf->rootfs.mount)) {
2941 ERROR("failed to populate /dev in the container");
2942 return -1;
2943 }
2944 }
2945
2946 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
2947 ERROR("failed to setup the console for '%s'", name);
2948 return -1;
2949 }
2950
2951 if (lxc_conf->kmsg) {
2952 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
2953 ERROR("failed to setup kmsg for '%s'", name);
2954 }
2955
2956 if (!lxc_conf->is_execute && setup_tty(&lxc_conf->rootfs, &lxc_conf->tty_info, lxc_conf->ttydir)) {
2957 ERROR("failed to setup the ttys for '%s'", name);
2958 return -1;
2959 }
2960
2961 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
2962 INFO("rootfs path is .%s., mount is .%s.", lxc_conf->rootfs.path,
2963 lxc_conf->rootfs.mount);
2964 if (lxc_conf->rootfs.path == NULL || strlen(lxc_conf->rootfs.path) == 0) {
2965 if (mount("proc", "/proc", "proc", 0, NULL)) {
2966 SYSERROR("Failed mounting /proc, proceeding");
2967 mounted = 0;
2968 } else
2969 mounted = 1;
2970 } else
2971 mounted = lsm_mount_proc_if_needed(lxc_conf->rootfs.path, lxc_conf->rootfs.mount);
2972 if (mounted == -1) {
2973 SYSERROR("failed to mount /proc in the container.");
2974 return -1;
2975 } else if (mounted == 1) {
2976 lxc_conf->lsm_umount_proc = 1;
2977 }
2978 #endif
2979
2980 if (setup_pivot_root(&lxc_conf->rootfs)) {
2981 ERROR("failed to set rootfs for '%s'", name);
2982 return -1;
2983 }
2984
2985 if (setup_pts(lxc_conf->pts)) {
2986 ERROR("failed to setup the new pts instance");
2987 return -1;
2988 }
2989
2990 if (setup_personality(lxc_conf->personality)) {
2991 ERROR("failed to setup personality");
2992 return -1;
2993 }
2994
2995 if (lxc_list_empty(&lxc_conf->id_map)) {
2996 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
2997 if (!lxc_list_empty(&lxc_conf->caps)) {
2998 ERROR("Simultaneously requested dropping and keeping caps");
2999 return -1;
3000 }
3001 if (dropcaps_except(&lxc_conf->keepcaps)) {
3002 ERROR("failed to keep requested caps\n");
3003 return -1;
3004 }
3005 } else if (setup_caps(&lxc_conf->caps)) {
3006 ERROR("failed to drop capabilities");
3007 return -1;
3008 }
3009 }
3010
3011 NOTICE("'%s' is setup.", name);
3012
3013 return 0;
3014 }
3015
3016 int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
3017 const char *lxcpath, char *argv[])
3018 {
3019 int which = -1;
3020 struct lxc_list *it;
3021
3022 if (strcmp(hook, "pre-start") == 0)
3023 which = LXCHOOK_PRESTART;
3024 else if (strcmp(hook, "pre-mount") == 0)
3025 which = LXCHOOK_PREMOUNT;
3026 else if (strcmp(hook, "mount") == 0)
3027 which = LXCHOOK_MOUNT;
3028 else if (strcmp(hook, "autodev") == 0)
3029 which = LXCHOOK_AUTODEV;
3030 else if (strcmp(hook, "start") == 0)
3031 which = LXCHOOK_START;
3032 else if (strcmp(hook, "post-stop") == 0)
3033 which = LXCHOOK_POSTSTOP;
3034 else if (strcmp(hook, "clone") == 0)
3035 which = LXCHOOK_CLONE;
3036 else
3037 return -1;
3038 lxc_list_for_each(it, &conf->hooks[which]) {
3039 int ret;
3040 char *hookname = it->elem;
3041 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
3042 if (ret)
3043 return ret;
3044 }
3045 return 0;
3046 }
3047
3048 static void lxc_remove_nic(struct lxc_list *it)
3049 {
3050 struct lxc_netdev *netdev = it->elem;
3051 struct lxc_list *it2,*next;
3052
3053 lxc_list_del(it);
3054
3055 if (netdev->link)
3056 free(netdev->link);
3057 if (netdev->name)
3058 free(netdev->name);
3059 if (netdev->upscript)
3060 free(netdev->upscript);
3061 if (netdev->hwaddr)
3062 free(netdev->hwaddr);
3063 if (netdev->mtu)
3064 free(netdev->mtu);
3065 if (netdev->ipv4_gateway)
3066 free(netdev->ipv4_gateway);
3067 if (netdev->ipv6_gateway)
3068 free(netdev->ipv6_gateway);
3069 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
3070 lxc_list_del(it2);
3071 free(it2->elem);
3072 free(it2);
3073 }
3074 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
3075 lxc_list_del(it2);
3076 free(it2->elem);
3077 free(it2);
3078 }
3079 free(netdev);
3080 free(it);
3081 }
3082
3083 /* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
3084 int lxc_clear_nic(struct lxc_conf *c, const char *key)
3085 {
3086 char *p1;
3087 int ret, idx, i;
3088 struct lxc_list *it;
3089 struct lxc_netdev *netdev;
3090
3091 p1 = index(key, '.');
3092 if (!p1 || *(p1+1) == '\0')
3093 p1 = NULL;
3094
3095 ret = sscanf(key, "%d", &idx);
3096 if (ret != 1) return -1;
3097 if (idx < 0)
3098 return -1;
3099
3100 i = 0;
3101 lxc_list_for_each(it, &c->network) {
3102 if (i == idx)
3103 break;
3104 i++;
3105 }
3106 if (i < idx) // we don't have that many nics defined
3107 return -1;
3108
3109 if (!it || !it->elem)
3110 return -1;
3111
3112 netdev = it->elem;
3113
3114 if (!p1) {
3115 lxc_remove_nic(it);
3116 } else if (strcmp(p1, "ipv4") == 0) {
3117 struct lxc_list *it2,*next;
3118 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
3119 lxc_list_del(it2);
3120 free(it2->elem);
3121 free(it2);
3122 }
3123 } else if (strcmp(p1, "ipv6") == 0) {
3124 struct lxc_list *it2,*next;
3125 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
3126 lxc_list_del(it2);
3127 free(it2->elem);
3128 free(it2);
3129 }
3130 } else if (strcmp(p1, "link") == 0) {
3131 if (netdev->link) {
3132 free(netdev->link);
3133 netdev->link = NULL;
3134 }
3135 } else if (strcmp(p1, "name") == 0) {
3136 if (netdev->name) {
3137 free(netdev->name);
3138 netdev->name = NULL;
3139 }
3140 } else if (strcmp(p1, "script.up") == 0) {
3141 if (netdev->upscript) {
3142 free(netdev->upscript);
3143 netdev->upscript = NULL;
3144 }
3145 } else if (strcmp(p1, "hwaddr") == 0) {
3146 if (netdev->hwaddr) {
3147 free(netdev->hwaddr);
3148 netdev->hwaddr = NULL;
3149 }
3150 } else if (strcmp(p1, "mtu") == 0) {
3151 if (netdev->mtu) {
3152 free(netdev->mtu);
3153 netdev->mtu = NULL;
3154 }
3155 } else if (strcmp(p1, "ipv4_gateway") == 0) {
3156 if (netdev->ipv4_gateway) {
3157 free(netdev->ipv4_gateway);
3158 netdev->ipv4_gateway = NULL;
3159 }
3160 } else if (strcmp(p1, "ipv6_gateway") == 0) {
3161 if (netdev->ipv6_gateway) {
3162 free(netdev->ipv6_gateway);
3163 netdev->ipv6_gateway = NULL;
3164 }
3165 }
3166 else return -1;
3167
3168 return 0;
3169 }
3170
3171 int lxc_clear_config_network(struct lxc_conf *c)
3172 {
3173 struct lxc_list *it,*next;
3174 lxc_list_for_each_safe(it, &c->network, next) {
3175 lxc_remove_nic(it);
3176 }
3177 return 0;
3178 }
3179
3180 int lxc_clear_config_caps(struct lxc_conf *c)
3181 {
3182 struct lxc_list *it,*next;
3183
3184 lxc_list_for_each_safe(it, &c->caps, next) {
3185 lxc_list_del(it);
3186 free(it->elem);
3187 free(it);
3188 }
3189 return 0;
3190 }
3191
3192 int lxc_clear_idmaps(struct lxc_conf *c)
3193 {
3194 struct lxc_list *it, *next;
3195
3196 lxc_list_for_each_safe(it, &c->id_map, next) {
3197 lxc_list_del(it);
3198 free(it->elem);
3199 free(it);
3200 }
3201 return 0;
3202 }
3203
3204 int lxc_clear_config_keepcaps(struct lxc_conf *c)
3205 {
3206 struct lxc_list *it,*next;
3207
3208 lxc_list_for_each_safe(it, &c->keepcaps, next) {
3209 lxc_list_del(it);
3210 free(it->elem);
3211 free(it);
3212 }
3213 return 0;
3214 }
3215
3216 int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
3217 {
3218 struct lxc_list *it,*next;
3219 bool all = false;
3220 const char *k = key + 11;
3221
3222 if (strcmp(key, "lxc.cgroup") == 0)
3223 all = true;
3224
3225 lxc_list_for_each_safe(it, &c->cgroup, next) {
3226 struct lxc_cgroup *cg = it->elem;
3227 if (!all && strcmp(cg->subsystem, k) != 0)
3228 continue;
3229 lxc_list_del(it);
3230 free(cg->subsystem);
3231 free(cg->value);
3232 free(cg);
3233 free(it);
3234 }
3235 return 0;
3236 }
3237
3238 int lxc_clear_mount_entries(struct lxc_conf *c)
3239 {
3240 struct lxc_list *it,*next;
3241
3242 lxc_list_for_each_safe(it, &c->mount_list, next) {
3243 lxc_list_del(it);
3244 free(it->elem);
3245 free(it);
3246 }
3247 return 0;
3248 }
3249
3250 int lxc_clear_hooks(struct lxc_conf *c, const char *key)
3251 {
3252 struct lxc_list *it,*next;
3253 bool all = false, done = false;
3254 const char *k = key + 9;
3255 int i;
3256
3257 if (strcmp(key, "lxc.hook") == 0)
3258 all = true;
3259
3260 for (i=0; i<NUM_LXC_HOOKS; i++) {
3261 if (all || strcmp(k, lxchook_names[i]) == 0) {
3262 lxc_list_for_each_safe(it, &c->hooks[i], next) {
3263 lxc_list_del(it);
3264 free(it->elem);
3265 free(it);
3266 }
3267 done = true;
3268 }
3269 }
3270
3271 if (!done) {
3272 ERROR("Invalid hook key: %s", key);
3273 return -1;
3274 }
3275 return 0;
3276 }
3277
3278 void lxc_clear_saved_nics(struct lxc_conf *conf)
3279 {
3280 int i;
3281
3282 if (!conf->num_savednics)
3283 return;
3284 for (i=0; i < conf->num_savednics; i++)
3285 free(conf->saved_nics[i].orig_name);
3286 conf->saved_nics = 0;
3287 free(conf->saved_nics);
3288 }
3289
3290 void lxc_conf_free(struct lxc_conf *conf)
3291 {
3292 if (!conf)
3293 return;
3294 if (conf->console.path)
3295 free(conf->console.path);
3296 if (conf->rootfs.mount)
3297 free(conf->rootfs.mount);
3298 if (conf->rootfs.path)
3299 free(conf->rootfs.path);
3300 if (conf->utsname)
3301 free(conf->utsname);
3302 if (conf->ttydir)
3303 free(conf->ttydir);
3304 if (conf->fstab)
3305 free(conf->fstab);
3306 if (conf->rcfile)
3307 free(conf->rcfile);
3308 lxc_clear_config_network(conf);
3309 #if HAVE_APPARMOR
3310 if (conf->aa_profile)
3311 free(conf->aa_profile);
3312 #endif
3313 lxc_seccomp_free(conf);
3314 lxc_clear_config_caps(conf);
3315 lxc_clear_config_keepcaps(conf);
3316 lxc_clear_cgroups(conf, "lxc.cgroup");
3317 lxc_clear_hooks(conf, "lxc.hook");
3318 lxc_clear_mount_entries(conf);
3319 lxc_clear_saved_nics(conf);
3320 lxc_clear_idmaps(conf);
3321 free(conf);
3322 }