]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/conf.c
Use LXCPATH and LOCALSTATEDIR instead of hardcoded /var
[mirror_lxc.git] / src / lxc / conf.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <dlezcano at fr.ibm.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23 #define _GNU_SOURCE
24 #include <stdio.h>
25 #undef _GNU_SOURCE
26 #include <stdlib.h>
27 #include <stdarg.h>
28 #include <errno.h>
29 #include <string.h>
30 #include <dirent.h>
31 #include <mntent.h>
32 #include <unistd.h>
33 #include <sys/wait.h>
34 #include <pty.h>
35
36 #include <linux/loop.h>
37
38 #include <sys/types.h>
39 #include <sys/utsname.h>
40 #include <sys/param.h>
41 #include <sys/stat.h>
42 #include <sys/socket.h>
43 #include <sys/mount.h>
44 #include <sys/mman.h>
45 #include <sys/prctl.h>
46 #include <sys/capability.h>
47 #include <sys/personality.h>
48
49 #include <arpa/inet.h>
50 #include <fcntl.h>
51 #include <netinet/in.h>
52 #include <net/if.h>
53 #include <libgen.h>
54
55 #include "network.h"
56 #include "error.h"
57 #include "parse.h"
58 #include "config.h"
59 #include "utils.h"
60 #include "conf.h"
61 #include "log.h"
62 #include "lxc.h" /* for lxc_cgroup_set() */
63 #include "caps.h" /* for lxc_caps_last_cap() */
64
65 #if HAVE_APPARMOR
66 #include <apparmor.h>
67 #endif
68
69 lxc_log_define(lxc_conf, lxc);
70
71 #define MAXHWLEN 18
72 #define MAXINDEXLEN 20
73 #define MAXMTULEN 16
74 #define MAXLINELEN 128
75
76 #ifndef MS_DIRSYNC
77 #define MS_DIRSYNC 128
78 #endif
79
80 #ifndef MS_REC
81 #define MS_REC 16384
82 #endif
83
84 #ifndef MNT_DETACH
85 #define MNT_DETACH 2
86 #endif
87
88 #ifndef MS_RELATIME
89 #define MS_RELATIME (1 << 21)
90 #endif
91
92 #ifndef MS_STRICTATIME
93 #define MS_STRICTATIME (1 << 24)
94 #endif
95
96 #ifndef CAP_SETFCAP
97 #define CAP_SETFCAP 31
98 #endif
99
100 #ifndef CAP_MAC_OVERRIDE
101 #define CAP_MAC_OVERRIDE 32
102 #endif
103
104 #ifndef CAP_MAC_ADMIN
105 #define CAP_MAC_ADMIN 33
106 #endif
107
108 #ifndef PR_CAPBSET_DROP
109 #define PR_CAPBSET_DROP 24
110 #endif
111
112 char *lxchook_names[NUM_LXC_HOOKS] = {
113 "pre-start", "pre-mount", "mount", "start", "post-stop" };
114
115 extern int pivot_root(const char * new_root, const char * put_old);
116
117 typedef int (*instanciate_cb)(struct lxc_handler *, struct lxc_netdev *);
118
119 struct mount_opt {
120 char *name;
121 int clear;
122 int flag;
123 };
124
125 struct caps_opt {
126 char *name;
127 int value;
128 };
129
130 static int instanciate_veth(struct lxc_handler *, struct lxc_netdev *);
131 static int instanciate_macvlan(struct lxc_handler *, struct lxc_netdev *);
132 static int instanciate_vlan(struct lxc_handler *, struct lxc_netdev *);
133 static int instanciate_phys(struct lxc_handler *, struct lxc_netdev *);
134 static int instanciate_empty(struct lxc_handler *, struct lxc_netdev *);
135
136 static instanciate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
137 [LXC_NET_VETH] = instanciate_veth,
138 [LXC_NET_MACVLAN] = instanciate_macvlan,
139 [LXC_NET_VLAN] = instanciate_vlan,
140 [LXC_NET_PHYS] = instanciate_phys,
141 [LXC_NET_EMPTY] = instanciate_empty,
142 };
143
144 static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
145 static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
146 static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
147 static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
148 static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
149
150 static instanciate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
151 [LXC_NET_VETH] = shutdown_veth,
152 [LXC_NET_MACVLAN] = shutdown_macvlan,
153 [LXC_NET_VLAN] = shutdown_vlan,
154 [LXC_NET_PHYS] = shutdown_phys,
155 [LXC_NET_EMPTY] = shutdown_empty,
156 };
157
158 static struct mount_opt mount_opt[] = {
159 { "defaults", 0, 0 },
160 { "ro", 0, MS_RDONLY },
161 { "rw", 1, MS_RDONLY },
162 { "suid", 1, MS_NOSUID },
163 { "nosuid", 0, MS_NOSUID },
164 { "dev", 1, MS_NODEV },
165 { "nodev", 0, MS_NODEV },
166 { "exec", 1, MS_NOEXEC },
167 { "noexec", 0, MS_NOEXEC },
168 { "sync", 0, MS_SYNCHRONOUS },
169 { "async", 1, MS_SYNCHRONOUS },
170 { "dirsync", 0, MS_DIRSYNC },
171 { "remount", 0, MS_REMOUNT },
172 { "mand", 0, MS_MANDLOCK },
173 { "nomand", 1, MS_MANDLOCK },
174 { "atime", 1, MS_NOATIME },
175 { "noatime", 0, MS_NOATIME },
176 { "diratime", 1, MS_NODIRATIME },
177 { "nodiratime", 0, MS_NODIRATIME },
178 { "bind", 0, MS_BIND },
179 { "rbind", 0, MS_BIND|MS_REC },
180 { "relatime", 0, MS_RELATIME },
181 { "norelatime", 1, MS_RELATIME },
182 { "strictatime", 0, MS_STRICTATIME },
183 { "nostrictatime", 1, MS_STRICTATIME },
184 { NULL, 0, 0 },
185 };
186
187 static struct caps_opt caps_opt[] = {
188 { "chown", CAP_CHOWN },
189 { "dac_override", CAP_DAC_OVERRIDE },
190 { "dac_read_search", CAP_DAC_READ_SEARCH },
191 { "fowner", CAP_FOWNER },
192 { "fsetid", CAP_FSETID },
193 { "kill", CAP_KILL },
194 { "setgid", CAP_SETGID },
195 { "setuid", CAP_SETUID },
196 { "setpcap", CAP_SETPCAP },
197 { "linux_immutable", CAP_LINUX_IMMUTABLE },
198 { "net_bind_service", CAP_NET_BIND_SERVICE },
199 { "net_broadcast", CAP_NET_BROADCAST },
200 { "net_admin", CAP_NET_ADMIN },
201 { "net_raw", CAP_NET_RAW },
202 { "ipc_lock", CAP_IPC_LOCK },
203 { "ipc_owner", CAP_IPC_OWNER },
204 { "sys_module", CAP_SYS_MODULE },
205 { "sys_rawio", CAP_SYS_RAWIO },
206 { "sys_chroot", CAP_SYS_CHROOT },
207 { "sys_ptrace", CAP_SYS_PTRACE },
208 { "sys_pacct", CAP_SYS_PACCT },
209 { "sys_admin", CAP_SYS_ADMIN },
210 { "sys_boot", CAP_SYS_BOOT },
211 { "sys_nice", CAP_SYS_NICE },
212 { "sys_resource", CAP_SYS_RESOURCE },
213 { "sys_time", CAP_SYS_TIME },
214 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
215 { "mknod", CAP_MKNOD },
216 { "lease", CAP_LEASE },
217 #ifdef CAP_AUDIT_WRITE
218 { "audit_write", CAP_AUDIT_WRITE },
219 #endif
220 #ifdef CAP_AUDIT_CONTROL
221 { "audit_control", CAP_AUDIT_CONTROL },
222 #endif
223 { "setfcap", CAP_SETFCAP },
224 { "mac_override", CAP_MAC_OVERRIDE },
225 { "mac_admin", CAP_MAC_ADMIN },
226 #ifdef CAP_SYSLOG
227 { "syslog", CAP_SYSLOG },
228 #endif
229 #ifdef CAP_WAKE_ALARM
230 { "wake_alarm", CAP_WAKE_ALARM },
231 #endif
232 };
233
234 static int run_buffer(char *buffer)
235 {
236 FILE *f;
237 char *output;
238
239 f = popen(buffer, "r");
240 if (!f) {
241 SYSERROR("popen failed");
242 return -1;
243 }
244
245 output = malloc(LXC_LOG_BUFFER_SIZE);
246 if (!output) {
247 ERROR("failed to allocate memory for script output");
248 return -1;
249 }
250
251 while(fgets(output, LXC_LOG_BUFFER_SIZE, f))
252 DEBUG("script output: %s", output);
253
254 free(output);
255
256 if (pclose(f) == -1) {
257 SYSERROR("Script exited on error");
258 return -1;
259 }
260
261 return 0;
262 }
263
264 static int run_script(const char *name, const char *section,
265 const char *script, ...)
266 {
267 int ret;
268 char *buffer, *p;
269 size_t size = 0;
270 va_list ap;
271
272 INFO("Executing script '%s' for container '%s', config section '%s'",
273 script, name, section);
274
275 va_start(ap, script);
276 while ((p = va_arg(ap, char *)))
277 size += strlen(p) + 1;
278 va_end(ap);
279
280 size += strlen(script);
281 size += strlen(name);
282 size += strlen(section);
283 size += 3;
284
285 if (size > INT_MAX)
286 return -1;
287
288 buffer = alloca(size);
289 if (!buffer) {
290 ERROR("failed to allocate memory");
291 return -1;
292 }
293
294 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
295 if (ret < 0 || ret >= size) {
296 ERROR("Script name too long");
297 free(buffer);
298 return -1;
299 }
300
301 va_start(ap, script);
302 while ((p = va_arg(ap, char *))) {
303 int len = size-ret;
304 int rc;
305 rc = snprintf(buffer + ret, len, " %s", p);
306 if (rc < 0 || rc >= len) {
307 free(buffer);
308 ERROR("Script args too long");
309 return -1;
310 }
311 ret += rc;
312 }
313 va_end(ap);
314
315 return run_buffer(buffer);
316 }
317
318 static int find_fstype_cb(char* buffer, void *data)
319 {
320 struct cbarg {
321 const char *rootfs;
322 const char *target;
323 int mntopt;
324 } *cbarg = data;
325
326 char *fstype;
327
328 /* we don't try 'nodev' entries */
329 if (strstr(buffer, "nodev"))
330 return 0;
331
332 fstype = buffer;
333 fstype += lxc_char_left_gc(fstype, strlen(fstype));
334 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
335
336 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
337 cbarg->rootfs, cbarg->target, fstype);
338
339 if (mount(cbarg->rootfs, cbarg->target, fstype, cbarg->mntopt, NULL)) {
340 DEBUG("mount failed with error: %s", strerror(errno));
341 return 0;
342 }
343
344 INFO("mounted '%s' on '%s', with fstype '%s'",
345 cbarg->rootfs, cbarg->target, fstype);
346
347 return 1;
348 }
349
350 static int mount_unknow_fs(const char *rootfs, const char *target, int mntopt)
351 {
352 int i;
353
354 struct cbarg {
355 const char *rootfs;
356 const char *target;
357 int mntopt;
358 } cbarg = {
359 .rootfs = rootfs,
360 .target = target,
361 .mntopt = mntopt,
362 };
363
364 /*
365 * find the filesystem type with brute force:
366 * first we check with /etc/filesystems, in case the modules
367 * are auto-loaded and fall back to the supported kernel fs
368 */
369 char *fsfile[] = {
370 "/etc/filesystems",
371 "/proc/filesystems",
372 };
373
374 for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
375
376 int ret;
377
378 if (access(fsfile[i], F_OK))
379 continue;
380
381 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
382 if (ret < 0) {
383 ERROR("failed to parse '%s'", fsfile[i]);
384 return -1;
385 }
386
387 if (ret)
388 return 0;
389 }
390
391 ERROR("failed to determine fs type for '%s'", rootfs);
392 return -1;
393 }
394
395 static int mount_rootfs_dir(const char *rootfs, const char *target)
396 {
397 return mount(rootfs, target, "none", MS_BIND | MS_REC, NULL);
398 }
399
400 static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
401 {
402 int rfd;
403 int ret = -1;
404
405 rfd = open(rootfs, O_RDWR);
406 if (rfd < 0) {
407 SYSERROR("failed to open '%s'", rootfs);
408 return -1;
409 }
410
411 memset(loinfo, 0, sizeof(*loinfo));
412
413 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
414
415 if (ioctl(fd, LOOP_SET_FD, rfd)) {
416 SYSERROR("failed to LOOP_SET_FD");
417 goto out;
418 }
419
420 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
421 SYSERROR("failed to LOOP_SET_STATUS64");
422 goto out;
423 }
424
425 ret = 0;
426 out:
427 close(rfd);
428
429 return ret;
430 }
431
432 static int mount_rootfs_file(const char *rootfs, const char *target)
433 {
434 struct dirent dirent, *direntp;
435 struct loop_info64 loinfo;
436 int ret = -1, fd = -1, rc;
437 DIR *dir;
438 char path[MAXPATHLEN];
439
440 dir = opendir("/dev");
441 if (!dir) {
442 SYSERROR("failed to open '/dev'");
443 return -1;
444 }
445
446 while (!readdir_r(dir, &dirent, &direntp)) {
447
448 if (!direntp)
449 break;
450
451 if (!strcmp(direntp->d_name, "."))
452 continue;
453
454 if (!strcmp(direntp->d_name, ".."))
455 continue;
456
457 if (strncmp(direntp->d_name, "loop", 4))
458 continue;
459
460 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
461 if (rc < 0 || rc >= MAXPATHLEN)
462 continue;
463
464 fd = open(path, O_RDWR);
465 if (fd < 0)
466 continue;
467
468 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
469 close(fd);
470 continue;
471 }
472
473 if (errno != ENXIO) {
474 WARN("unexpected error for ioctl on '%s': %m",
475 direntp->d_name);
476 continue;
477 }
478
479 DEBUG("found '%s' free lodev", path);
480
481 ret = setup_lodev(rootfs, fd, &loinfo);
482 if (!ret)
483 ret = mount_unknow_fs(path, target, 0);
484 close(fd);
485
486 break;
487 }
488
489 if (closedir(dir))
490 WARN("failed to close directory");
491
492 return ret;
493 }
494
495 static int mount_rootfs_block(const char *rootfs, const char *target)
496 {
497 return mount_unknow_fs(rootfs, target, 0);
498 }
499
500 /*
501 * pin_rootfs
502 * if rootfs is a directory, then open ${rootfs}.hold for writing for the
503 * duration of the container run, to prevent the container from marking the
504 * underlying fs readonly on shutdown.
505 * return -1 on error.
506 * return -2 if nothing needed to be pinned.
507 * return an open fd (>=0) if we pinned it.
508 */
509 int pin_rootfs(const char *rootfs)
510 {
511 char absrootfs[MAXPATHLEN];
512 char absrootfspin[MAXPATHLEN];
513 struct stat s;
514 int ret, fd;
515
516 if (rootfs == NULL || strlen(rootfs) == 0)
517 return 0;
518
519 if (!realpath(rootfs, absrootfs)) {
520 SYSERROR("failed to get real path for '%s'", rootfs);
521 return -1;
522 }
523
524 if (access(absrootfs, F_OK)) {
525 SYSERROR("'%s' is not accessible", absrootfs);
526 return -1;
527 }
528
529 if (stat(absrootfs, &s)) {
530 SYSERROR("failed to stat '%s'", absrootfs);
531 return -1;
532 }
533
534 if (!__S_ISTYPE(s.st_mode, S_IFDIR))
535 return -2;
536
537 ret = snprintf(absrootfspin, MAXPATHLEN, "%s%s", absrootfs, ".hold");
538 if (ret >= MAXPATHLEN) {
539 SYSERROR("pathname too long for rootfs hold file");
540 return -1;
541 }
542
543 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
544 INFO("opened %s as fd %d\n", absrootfspin, fd);
545 return fd;
546 }
547
548 static int mount_rootfs(const char *rootfs, const char *target)
549 {
550 char absrootfs[MAXPATHLEN];
551 struct stat s;
552 int i;
553
554 typedef int (*rootfs_cb)(const char *, const char *);
555
556 struct rootfs_type {
557 int type;
558 rootfs_cb cb;
559 } rtfs_type[] = {
560 { S_IFDIR, mount_rootfs_dir },
561 { S_IFBLK, mount_rootfs_block },
562 { S_IFREG, mount_rootfs_file },
563 };
564
565 if (!realpath(rootfs, absrootfs)) {
566 SYSERROR("failed to get real path for '%s'", rootfs);
567 return -1;
568 }
569
570 if (access(absrootfs, F_OK)) {
571 SYSERROR("'%s' is not accessible", absrootfs);
572 return -1;
573 }
574
575 if (stat(absrootfs, &s)) {
576 SYSERROR("failed to stat '%s'", absrootfs);
577 return -1;
578 }
579
580 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
581
582 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
583 continue;
584
585 return rtfs_type[i].cb(absrootfs, target);
586 }
587
588 ERROR("unsupported rootfs type for '%s'", absrootfs);
589 return -1;
590 }
591
592 static int setup_utsname(struct utsname *utsname)
593 {
594 if (!utsname)
595 return 0;
596
597 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
598 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
599 return -1;
600 }
601
602 INFO("'%s' hostname has been setup", utsname->nodename);
603
604 return 0;
605 }
606
607 static int setup_tty(const struct lxc_rootfs *rootfs,
608 const struct lxc_tty_info *tty_info, char *ttydir)
609 {
610 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
611 int i, ret;
612
613 if (!rootfs->path)
614 return 0;
615
616 for (i = 0; i < tty_info->nbtty; i++) {
617
618 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
619
620 ret = snprintf(path, sizeof(path), "%s/dev/tty%d",
621 rootfs->mount, i + 1);
622 if (ret >= sizeof(path)) {
623 ERROR("pathname too long for ttys");
624 return -1;
625 }
626 if (ttydir) {
627 /* create dev/lxc/tty%d" */
628 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/tty%d",
629 rootfs->mount, ttydir, i + 1);
630 if (ret >= sizeof(lxcpath)) {
631 ERROR("pathname too long for ttys");
632 return -1;
633 }
634 ret = creat(lxcpath, 0660);
635 if (ret==-1 && errno != EEXIST) {
636 SYSERROR("error creating %s\n", lxcpath);
637 return -1;
638 }
639 close(ret);
640 ret = unlink(path);
641 if (ret && errno != ENOENT) {
642 SYSERROR("error unlinking %s\n", path);
643 return -1;
644 }
645
646 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
647 WARN("failed to mount '%s'->'%s'",
648 pty_info->name, path);
649 continue;
650 }
651
652 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
653 if (ret >= sizeof(lxcpath)) {
654 ERROR("tty pathname too long");
655 return -1;
656 }
657 ret = symlink(lxcpath, path);
658 if (ret) {
659 SYSERROR("failed to create symlink for tty %d\n", i+1);
660 return -1;
661 }
662 } else {
663 /* If we populated /dev, then we need to create /dev/ttyN */
664 if (access(path, F_OK)) {
665 ret = creat(path, 0660);
666 if (ret==-1) {
667 SYSERROR("error creating %s\n", path);
668 /* this isn't fatal, continue */
669 } else
670 close(ret);
671 }
672 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
673 WARN("failed to mount '%s'->'%s'",
674 pty_info->name, path);
675 continue;
676 }
677 }
678 }
679
680 INFO("%d tty(s) has been setup", tty_info->nbtty);
681
682 return 0;
683 }
684
685 static int setup_rootfs_pivot_root_cb(char *buffer, void *data)
686 {
687 struct lxc_list *mountlist, *listentry, *iterator;
688 char *pivotdir, *mountpoint, *mountentry;
689 int found;
690 void **cbparm;
691
692 mountentry = buffer;
693 cbparm = (void **)data;
694
695 mountlist = cbparm[0];
696 pivotdir = cbparm[1];
697
698 /* parse entry, first field is mountname, ignore */
699 mountpoint = strtok(mountentry, " ");
700 if (!mountpoint)
701 return -1;
702
703 /* second field is mountpoint */
704 mountpoint = strtok(NULL, " ");
705 if (!mountpoint)
706 return -1;
707
708 /* only consider mountpoints below old root fs */
709 if (strncmp(mountpoint, pivotdir, strlen(pivotdir)))
710 return 0;
711
712 /* filter duplicate mountpoints */
713 found = 0;
714 lxc_list_for_each(iterator, mountlist) {
715 if (!strcmp(iterator->elem, mountpoint)) {
716 found = 1;
717 break;
718 }
719 }
720 if (found)
721 return 0;
722
723 /* add entry to list */
724 listentry = malloc(sizeof(*listentry));
725 if (!listentry) {
726 SYSERROR("malloc for mountpoint listentry failed");
727 return -1;
728 }
729
730 listentry->elem = strdup(mountpoint);
731 if (!listentry->elem) {
732 SYSERROR("strdup failed");
733 return -1;
734 }
735 lxc_list_add_tail(mountlist, listentry);
736
737 return 0;
738 }
739
740 static int umount_oldrootfs(const char *oldrootfs)
741 {
742 char path[MAXPATHLEN];
743 void *cbparm[2];
744 struct lxc_list mountlist, *iterator, *next;
745 int ok, still_mounted, last_still_mounted;
746 int rc;
747
748 /* read and parse /proc/mounts in old root fs */
749 lxc_list_init(&mountlist);
750
751 /* oldrootfs is on the top tree directory now */
752 rc = snprintf(path, sizeof(path), "/%s", oldrootfs);
753 if (rc >= sizeof(path)) {
754 ERROR("rootfs name too long");
755 return -1;
756 }
757 cbparm[0] = &mountlist;
758
759 cbparm[1] = strdup(path);
760 if (!cbparm[1]) {
761 SYSERROR("strdup failed");
762 return -1;
763 }
764
765 rc = snprintf(path, sizeof(path), "%s/proc/mounts", oldrootfs);
766 if (rc >= sizeof(path)) {
767 ERROR("container proc/mounts name too long");
768 return -1;
769 }
770
771 ok = lxc_file_for_each_line(path,
772 setup_rootfs_pivot_root_cb, &cbparm);
773 if (ok < 0) {
774 SYSERROR("failed to read or parse mount list '%s'", path);
775 return -1;
776 }
777
778 /* umount filesystems until none left or list no longer shrinks */
779 still_mounted = 0;
780 do {
781 last_still_mounted = still_mounted;
782 still_mounted = 0;
783
784 lxc_list_for_each_safe(iterator, &mountlist, next) {
785
786 /* umount normally */
787 if (!umount(iterator->elem)) {
788 DEBUG("umounted '%s'", (char *)iterator->elem);
789 lxc_list_del(iterator);
790 continue;
791 }
792
793 still_mounted++;
794 }
795
796 } while (still_mounted > 0 && still_mounted != last_still_mounted);
797
798
799 lxc_list_for_each(iterator, &mountlist) {
800
801 /* let's try a lazy umount */
802 if (!umount2(iterator->elem, MNT_DETACH)) {
803 INFO("lazy unmount of '%s'", (char *)iterator->elem);
804 continue;
805 }
806
807 /* be more brutal (nfs) */
808 if (!umount2(iterator->elem, MNT_FORCE)) {
809 INFO("forced unmount of '%s'", (char *)iterator->elem);
810 continue;
811 }
812
813 WARN("failed to unmount '%s'", (char *)iterator->elem);
814 }
815
816 return 0;
817 }
818
819 static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
820 {
821 char path[MAXPATHLEN];
822 int remove_pivotdir = 0;
823 int rc;
824
825 /* change into new root fs */
826 if (chdir(rootfs)) {
827 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
828 return -1;
829 }
830
831 if (!pivotdir)
832 pivotdir = "lxc_putold";
833
834 /* compute the full path to pivotdir under rootfs */
835 rc = snprintf(path, sizeof(path), "%s/%s", rootfs, pivotdir);
836 if (rc >= sizeof(path)) {
837 ERROR("pivot dir name too long");
838 return -1;
839 }
840
841 if (access(path, F_OK)) {
842
843 if (mkdir_p(path, 0755)) {
844 SYSERROR("failed to create pivotdir '%s'", path);
845 return -1;
846 }
847
848 remove_pivotdir = 1;
849 DEBUG("created '%s' directory", path);
850 }
851
852 DEBUG("mountpoint for old rootfs is '%s'", path);
853
854 /* pivot_root into our new root fs */
855 if (pivot_root(".", path)) {
856 SYSERROR("pivot_root syscall failed");
857 return -1;
858 }
859
860 if (chdir("/")) {
861 SYSERROR("can't chdir to / after pivot_root");
862 return -1;
863 }
864
865 DEBUG("pivot_root syscall to '%s' successful", rootfs);
866
867 /* we switch from absolute path to relative path */
868 if (umount_oldrootfs(pivotdir))
869 return -1;
870
871 /* remove temporary mount point, we don't consider the removing
872 * as fatal */
873 if (remove_pivotdir && rmdir(pivotdir))
874 WARN("can't remove mountpoint '%s': %m", pivotdir);
875
876 return 0;
877 }
878
879 /*
880 * Do we want to add options for max size of /dev and a file to
881 * specify which devices to create?
882 */
883 static int mount_autodev(char *root)
884 {
885 int ret;
886 char path[MAXPATHLEN];
887
888 INFO("Mounting /dev under %s\n", root);
889 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
890 if (ret < 0 || ret > MAXPATHLEN)
891 return -1;
892 ret = mount("none", path, "tmpfs", 0, "size=100000");
893 if (ret) {
894 SYSERROR("Failed to mount /dev at %s\n", root);
895 return -1;
896 }
897 ret = snprintf(path, MAXPATHLEN, "%s/dev/pts", root);
898 if (ret < 0 || ret >= MAXPATHLEN)
899 return -1;
900 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
901 if (ret) {
902 SYSERROR("Failed to create /dev/pts in container");
903 return -1;
904 }
905
906 INFO("Mounted /dev under %s\n", root);
907 return 0;
908 }
909
910 /*
911 * Try to run MAKEDEV console in the container. If something fails,
912 * continue anyway as it should not be detrimental to the container.
913 * This makes sure that things like /dev/vcs* exist.
914 * (Pass devpath in to reduce stack usage)
915 */
916 static void run_makedev(char *devpath)
917 {
918 int curd;
919 int ret;
920
921 curd = open(".", O_RDONLY);
922 if (curd < 0)
923 return;
924 ret = chdir(devpath);
925 if (ret) {
926 close(curd);
927 return;
928 }
929 if (run_buffer("/sbin/MAKEDEV console"))
930 INFO("Error running MAKEDEV console in %s", devpath);
931 ret = fchdir(curd);
932 if (ret)
933 INFO("Error returning to original directory: expect breakage");
934 close(curd);
935 }
936
937 struct lxc_devs {
938 char *name;
939 mode_t mode;
940 int maj;
941 int min;
942 };
943
944 struct lxc_devs lxc_devs[] = {
945 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
946 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
947 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
948 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
949 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
950 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
951 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
952 };
953
954 static int setup_autodev(char *root)
955 {
956 int ret;
957 struct lxc_devs *d;
958 char path[MAXPATHLEN];
959 int i;
960
961 INFO("Creating initial consoles under %s/dev\n", root);
962
963 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
964 if (ret < 0 || ret >= MAXPATHLEN) {
965 ERROR("Error calculating container /dev location");
966 return -1;
967 } else
968 run_makedev(path);
969
970 INFO("Populating /dev under %s\n", root);
971 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
972 d = &lxc_devs[i];
973 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", root, d->name);
974 if (ret < 0 || ret >= MAXPATHLEN)
975 return -1;
976 ret = mknod(path, d->mode, makedev(d->maj, d->min));
977 if (ret && errno != EEXIST) {
978 SYSERROR("Error creating %s\n", d->name);
979 return -1;
980 }
981 }
982
983 INFO("Populated /dev under %s\n", root);
984 return 0;
985 }
986
987 static int setup_rootfs(const struct lxc_rootfs *rootfs)
988 {
989 if (!rootfs->path)
990 return 0;
991
992 if (access(rootfs->mount, F_OK)) {
993 SYSERROR("failed to access to '%s', check it is present",
994 rootfs->mount);
995 return -1;
996 }
997
998 if (mount_rootfs(rootfs->path, rootfs->mount)) {
999 ERROR("failed to mount rootfs");
1000 return -1;
1001 }
1002
1003 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1004
1005 return 0;
1006 }
1007
1008 int setup_pivot_root(const struct lxc_rootfs *rootfs)
1009 {
1010 if (!rootfs->path)
1011 return 0;
1012
1013 if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
1014 ERROR("failed to setup pivot root");
1015 return -1;
1016 }
1017
1018 return 0;
1019 }
1020
1021 static int setup_pts(int pts)
1022 {
1023 char target[PATH_MAX];
1024
1025 if (!pts)
1026 return 0;
1027
1028 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
1029 SYSERROR("failed to umount 'dev/pts'");
1030 return -1;
1031 }
1032
1033 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
1034 "newinstance,ptmxmode=0666")) {
1035 SYSERROR("failed to mount a new instance of '/dev/pts'");
1036 return -1;
1037 }
1038
1039 if (access("/dev/ptmx", F_OK)) {
1040 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1041 goto out;
1042 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
1043 return -1;
1044 }
1045
1046 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1047 goto out;
1048
1049 /* fallback here, /dev/pts/ptmx exists just mount bind */
1050 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
1051 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
1052 return -1;
1053 }
1054
1055 INFO("created new pts instance");
1056
1057 out:
1058 return 0;
1059 }
1060
1061 static int setup_personality(int persona)
1062 {
1063 if (persona == -1)
1064 return 0;
1065
1066 if (personality(persona) < 0) {
1067 SYSERROR("failed to set personality to '0x%x'", persona);
1068 return -1;
1069 }
1070
1071 INFO("set personality to '0x%x'", persona);
1072
1073 return 0;
1074 }
1075
1076 static int setup_dev_console(const struct lxc_rootfs *rootfs,
1077 const struct lxc_console *console)
1078 {
1079 char path[MAXPATHLEN];
1080 struct stat s;
1081 int ret;
1082
1083 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1084 if (ret >= sizeof(path)) {
1085 ERROR("console path too long\n");
1086 return -1;
1087 }
1088
1089 if (access(path, F_OK)) {
1090 WARN("rootfs specified but no console found at '%s'", path);
1091 return 0;
1092 }
1093
1094 if (console->peer == -1) {
1095 INFO("no console output required");
1096 return 0;
1097 }
1098
1099 if (stat(path, &s)) {
1100 SYSERROR("failed to stat '%s'", path);
1101 return -1;
1102 }
1103
1104 if (chmod(console->name, s.st_mode)) {
1105 SYSERROR("failed to set mode '0%o' to '%s'",
1106 s.st_mode, console->name);
1107 return -1;
1108 }
1109
1110 if (mount(console->name, path, "none", MS_BIND, 0)) {
1111 ERROR("failed to mount '%s' on '%s'", console->name, path);
1112 return -1;
1113 }
1114
1115 INFO("console has been setup");
1116 return 0;
1117 }
1118
1119 static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1120 const struct lxc_console *console,
1121 char *ttydir)
1122 {
1123 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1124 int ret;
1125
1126 /* create rootfs/dev/<ttydir> directory */
1127 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1128 ttydir);
1129 if (ret >= sizeof(path))
1130 return -1;
1131 ret = mkdir(path, 0755);
1132 if (ret && errno != EEXIST) {
1133 SYSERROR("failed with errno %d to create %s\n", errno, path);
1134 return -1;
1135 }
1136 INFO("created %s\n", path);
1137
1138 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1139 rootfs->mount, ttydir);
1140 if (ret >= sizeof(lxcpath)) {
1141 ERROR("console path too long\n");
1142 return -1;
1143 }
1144
1145 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1146 ret = unlink(path);
1147 if (ret && errno != ENOENT) {
1148 SYSERROR("error unlinking %s\n", path);
1149 return -1;
1150 }
1151
1152 ret = creat(lxcpath, 0660);
1153 if (ret==-1 && errno != EEXIST) {
1154 SYSERROR("error %d creating %s\n", errno, lxcpath);
1155 return -1;
1156 }
1157 close(ret);
1158
1159 if (console->peer == -1) {
1160 INFO("no console output required");
1161 return 0;
1162 }
1163
1164 if (mount(console->name, lxcpath, "none", MS_BIND, 0)) {
1165 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1166 return -1;
1167 }
1168
1169 /* create symlink from rootfs/dev/console to 'lxc/console' */
1170 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1171 if (ret >= sizeof(lxcpath)) {
1172 ERROR("lxc/console path too long");
1173 return -1;
1174 }
1175 ret = symlink(lxcpath, path);
1176 if (ret) {
1177 SYSERROR("failed to create symlink for console");
1178 return -1;
1179 }
1180
1181 INFO("console has been setup on %s", lxcpath);
1182
1183 return 0;
1184 }
1185
1186 static int setup_console(const struct lxc_rootfs *rootfs,
1187 const struct lxc_console *console,
1188 char *ttydir)
1189 {
1190 /* We don't have a rootfs, /dev/console will be shared */
1191 if (!rootfs->path)
1192 return 0;
1193 if (!ttydir)
1194 return setup_dev_console(rootfs, console);
1195
1196 return setup_ttydir_console(rootfs, console, ttydir);
1197 }
1198
1199 static int setup_kmsg(const struct lxc_rootfs *rootfs,
1200 const struct lxc_console *console)
1201 {
1202 char kpath[MAXPATHLEN];
1203 int ret;
1204
1205 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1206 if (ret < 0 || ret >= sizeof(kpath))
1207 return -1;
1208
1209 ret = unlink(kpath);
1210 if (ret && errno != ENOENT) {
1211 SYSERROR("error unlinking %s\n", kpath);
1212 return -1;
1213 }
1214
1215 ret = symlink("console", kpath);
1216 if (ret) {
1217 SYSERROR("failed to create symlink for kmsg");
1218 return -1;
1219 }
1220
1221 return 0;
1222 }
1223
1224 static int setup_cgroup(const char *name, struct lxc_list *cgroups)
1225 {
1226 struct lxc_list *iterator;
1227 struct lxc_cgroup *cg;
1228 int ret = -1;
1229
1230 if (lxc_list_empty(cgroups))
1231 return 0;
1232
1233 lxc_list_for_each(iterator, cgroups) {
1234
1235 cg = iterator->elem;
1236
1237 if (lxc_cgroup_set(name, cg->subsystem, cg->value))
1238 goto out;
1239
1240 DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
1241 }
1242
1243 ret = 0;
1244 INFO("cgroup has been setup");
1245 out:
1246 return ret;
1247 }
1248
1249 static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1250 {
1251 struct mount_opt *mo;
1252
1253 /* If opt is found in mount_opt, set or clear flags.
1254 * Otherwise append it to data. */
1255
1256 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1257 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1258 if (mo->clear)
1259 *flags &= ~mo->flag;
1260 else
1261 *flags |= mo->flag;
1262 return;
1263 }
1264 }
1265
1266 if (strlen(*data))
1267 strcat(*data, ",");
1268 strcat(*data, opt);
1269 }
1270
1271 static int parse_mntopts(const char *mntopts, unsigned long *mntflags,
1272 char **mntdata)
1273 {
1274 char *s, *data;
1275 char *p, *saveptr = NULL;
1276
1277 *mntdata = NULL;
1278 *mntflags = 0L;
1279
1280 if (!mntopts)
1281 return 0;
1282
1283 s = strdup(mntopts);
1284 if (!s) {
1285 SYSERROR("failed to allocate memory");
1286 return -1;
1287 }
1288
1289 data = malloc(strlen(s) + 1);
1290 if (!data) {
1291 SYSERROR("failed to allocate memory");
1292 free(s);
1293 return -1;
1294 }
1295 *data = 0;
1296
1297 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1298 p = strtok_r(NULL, ",", &saveptr))
1299 parse_mntopt(p, mntflags, &data);
1300
1301 if (*data)
1302 *mntdata = data;
1303 else
1304 free(data);
1305 free(s);
1306
1307 return 0;
1308 }
1309
1310 static int mount_entry(const char *fsname, const char *target,
1311 const char *fstype, unsigned long mountflags,
1312 const char *data)
1313 {
1314 if (mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data)) {
1315 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1316 return -1;
1317 }
1318
1319 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
1320
1321 DEBUG("remounting %s on %s to respect bind or remount options",
1322 fsname, target);
1323
1324 if (mount(fsname, target, fstype,
1325 mountflags | MS_REMOUNT, data)) {
1326 SYSERROR("failed to mount '%s' on '%s'",
1327 fsname, target);
1328 return -1;
1329 }
1330 }
1331
1332 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1333
1334 return 0;
1335 }
1336
1337 static inline int mount_entry_on_systemfs(struct mntent *mntent)
1338 {
1339 unsigned long mntflags;
1340 char *mntdata;
1341 int ret;
1342
1343 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1344 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1345 return -1;
1346 }
1347
1348 ret = mount_entry(mntent->mnt_fsname, mntent->mnt_dir,
1349 mntent->mnt_type, mntflags, mntdata);
1350
1351 free(mntdata);
1352
1353 return ret;
1354 }
1355
1356 static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
1357 const struct lxc_rootfs *rootfs,
1358 const char *lxc_name)
1359 {
1360 char *aux;
1361 char path[MAXPATHLEN];
1362 unsigned long mntflags;
1363 char *mntdata;
1364 int r, ret = 0, offset;
1365
1366 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1367 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1368 return -1;
1369 }
1370
1371 /* if rootfs->path is a blockdev path, allow container fstab to
1372 * use $LXCPATH/CN/rootfs as the target prefix */
1373 r = snprintf(path, MAXPATHLEN, LXCPATH "/%s/rootfs", lxc_name);
1374 if (r < 0 || r >= MAXPATHLEN)
1375 goto skipvarlib;
1376
1377 aux = strstr(mntent->mnt_dir, path);
1378 if (aux) {
1379 offset = strlen(path);
1380 goto skipabs;
1381 }
1382
1383 skipvarlib:
1384 aux = strstr(mntent->mnt_dir, rootfs->path);
1385 if (!aux) {
1386 WARN("ignoring mount point '%s'", mntent->mnt_dir);
1387 goto out;
1388 }
1389 offset = strlen(rootfs->path);
1390
1391 skipabs:
1392
1393 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
1394 aux + offset);
1395 if (r < 0 || r >= MAXPATHLEN) {
1396 WARN("pathnme too long for '%s'", mntent->mnt_dir);
1397 ret = -1;
1398 goto out;
1399 }
1400
1401
1402 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1403 mntflags, mntdata);
1404
1405 out:
1406 free(mntdata);
1407 return ret;
1408 }
1409
1410 static int mount_entry_on_relative_rootfs(struct mntent *mntent,
1411 const char *rootfs)
1412 {
1413 char path[MAXPATHLEN];
1414 unsigned long mntflags;
1415 char *mntdata;
1416 int ret;
1417
1418 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1419 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1420 return -1;
1421 }
1422
1423 /* relative to root mount point */
1424 ret = snprintf(path, sizeof(path), "%s/%s", rootfs, mntent->mnt_dir);
1425 if (ret >= sizeof(path)) {
1426 ERROR("path name too long");
1427 return -1;
1428 }
1429
1430 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1431 mntflags, mntdata);
1432
1433 free(mntdata);
1434
1435 return ret;
1436 }
1437
1438 static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
1439 const char *lxc_name)
1440 {
1441 struct mntent *mntent;
1442 int ret = -1;
1443
1444 while ((mntent = getmntent(file))) {
1445
1446 if (!rootfs->path) {
1447 if (mount_entry_on_systemfs(mntent))
1448 goto out;
1449 continue;
1450 }
1451
1452 /* We have a separate root, mounts are relative to it */
1453 if (mntent->mnt_dir[0] != '/') {
1454 if (mount_entry_on_relative_rootfs(mntent,
1455 rootfs->mount))
1456 goto out;
1457 continue;
1458 }
1459
1460 if (mount_entry_on_absolute_rootfs(mntent, rootfs, lxc_name))
1461 goto out;
1462 }
1463
1464 ret = 0;
1465
1466 INFO("mount points have been setup");
1467 out:
1468 return ret;
1469 }
1470
1471 static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
1472 const char *lxc_name)
1473 {
1474 FILE *file;
1475 int ret;
1476
1477 if (!fstab)
1478 return 0;
1479
1480 file = setmntent(fstab, "r");
1481 if (!file) {
1482 SYSERROR("failed to use '%s'", fstab);
1483 return -1;
1484 }
1485
1486 ret = mount_file_entries(rootfs, file, lxc_name);
1487
1488 endmntent(file);
1489 return ret;
1490 }
1491
1492 static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
1493 const char *lxc_name)
1494 {
1495 FILE *file;
1496 struct lxc_list *iterator;
1497 char *mount_entry;
1498 int ret;
1499
1500 file = tmpfile();
1501 if (!file) {
1502 ERROR("tmpfile error: %m");
1503 return -1;
1504 }
1505
1506 lxc_list_for_each(iterator, mount) {
1507 mount_entry = iterator->elem;
1508 fprintf(file, "%s\n", mount_entry);
1509 }
1510
1511 rewind(file);
1512
1513 ret = mount_file_entries(rootfs, file, lxc_name);
1514
1515 fclose(file);
1516 return ret;
1517 }
1518
1519 static int setup_caps(struct lxc_list *caps)
1520 {
1521 struct lxc_list *iterator;
1522 char *drop_entry;
1523 char *ptr;
1524 int i, capid;
1525
1526 lxc_list_for_each(iterator, caps) {
1527
1528 drop_entry = iterator->elem;
1529
1530 capid = -1;
1531
1532 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
1533
1534 if (strcmp(drop_entry, caps_opt[i].name))
1535 continue;
1536
1537 capid = caps_opt[i].value;
1538 break;
1539 }
1540
1541 if (capid < 0) {
1542 /* try to see if it's numeric, so the user may specify
1543 * capabilities that the running kernel knows about but
1544 * we don't */
1545 capid = strtol(drop_entry, &ptr, 10);
1546 if (!ptr || *ptr != '\0' ||
1547 capid == LONG_MIN || capid == LONG_MAX)
1548 /* not a valid number */
1549 capid = -1;
1550 else if (capid > lxc_caps_last_cap())
1551 /* we have a number but it's not a valid
1552 * capability */
1553 capid = -1;
1554 }
1555
1556 if (capid < 0) {
1557 ERROR("unknown capability %s", drop_entry);
1558 return -1;
1559 }
1560
1561 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
1562
1563 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
1564 SYSERROR("failed to remove %s capability", drop_entry);
1565 return -1;
1566 }
1567
1568 }
1569
1570 DEBUG("capabilities has been setup");
1571
1572 return 0;
1573 }
1574
1575 static int setup_hw_addr(char *hwaddr, const char *ifname)
1576 {
1577 struct sockaddr sockaddr;
1578 struct ifreq ifr;
1579 int ret, fd;
1580
1581 ret = lxc_convert_mac(hwaddr, &sockaddr);
1582 if (ret) {
1583 ERROR("mac address '%s' conversion failed : %s",
1584 hwaddr, strerror(-ret));
1585 return -1;
1586 }
1587
1588 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
1589 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
1590
1591 fd = socket(AF_INET, SOCK_DGRAM, 0);
1592 if (fd < 0) {
1593 ERROR("socket failure : %s", strerror(errno));
1594 return -1;
1595 }
1596
1597 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
1598 close(fd);
1599 if (ret)
1600 ERROR("ioctl failure : %s", strerror(errno));
1601
1602 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifname);
1603
1604 return ret;
1605 }
1606
1607 static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
1608 {
1609 struct lxc_list *iterator;
1610 struct lxc_inetdev *inetdev;
1611 int err;
1612
1613 lxc_list_for_each(iterator, ip) {
1614
1615 inetdev = iterator->elem;
1616
1617 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
1618 &inetdev->bcast, inetdev->prefix);
1619 if (err) {
1620 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
1621 ifindex, strerror(-err));
1622 return -1;
1623 }
1624 }
1625
1626 return 0;
1627 }
1628
1629 static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
1630 {
1631 struct lxc_list *iterator;
1632 struct lxc_inet6dev *inet6dev;
1633 int err;
1634
1635 lxc_list_for_each(iterator, ip) {
1636
1637 inet6dev = iterator->elem;
1638
1639 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
1640 &inet6dev->mcast, &inet6dev->acast,
1641 inet6dev->prefix);
1642 if (err) {
1643 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
1644 ifindex, strerror(-err));
1645 return -1;
1646 }
1647 }
1648
1649 return 0;
1650 }
1651
1652 static int setup_netdev(struct lxc_netdev *netdev)
1653 {
1654 char ifname[IFNAMSIZ];
1655 char *current_ifname = ifname;
1656 int err;
1657
1658 /* empty network namespace */
1659 if (!netdev->ifindex) {
1660 if (netdev->flags & IFF_UP) {
1661 err = lxc_netdev_up("lo");
1662 if (err) {
1663 ERROR("failed to set the loopback up : %s",
1664 strerror(-err));
1665 return -1;
1666 }
1667 }
1668 return 0;
1669 }
1670
1671 /* retrieve the name of the interface */
1672 if (!if_indextoname(netdev->ifindex, current_ifname)) {
1673 ERROR("no interface corresponding to index '%d'",
1674 netdev->ifindex);
1675 return -1;
1676 }
1677
1678 /* default: let the system to choose one interface name */
1679 if (!netdev->name)
1680 netdev->name = netdev->type == LXC_NET_PHYS ?
1681 netdev->link : "eth%d";
1682
1683 /* rename the interface name */
1684 err = lxc_netdev_rename_by_name(ifname, netdev->name);
1685 if (err) {
1686 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
1687 strerror(-err));
1688 return -1;
1689 }
1690
1691 /* Re-read the name of the interface because its name has changed
1692 * and would be automatically allocated by the system
1693 */
1694 if (!if_indextoname(netdev->ifindex, current_ifname)) {
1695 ERROR("no interface corresponding to index '%d'",
1696 netdev->ifindex);
1697 return -1;
1698 }
1699
1700 /* set a mac address */
1701 if (netdev->hwaddr) {
1702 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
1703 ERROR("failed to setup hw address for '%s'",
1704 current_ifname);
1705 return -1;
1706 }
1707 }
1708
1709 /* setup ipv4 addresses on the interface */
1710 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
1711 ERROR("failed to setup ip addresses for '%s'",
1712 ifname);
1713 return -1;
1714 }
1715
1716 /* setup ipv6 addresses on the interface */
1717 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
1718 ERROR("failed to setup ipv6 addresses for '%s'",
1719 ifname);
1720 return -1;
1721 }
1722
1723 /* set the network device up */
1724 if (netdev->flags & IFF_UP) {
1725 int err;
1726
1727 err = lxc_netdev_up(current_ifname);
1728 if (err) {
1729 ERROR("failed to set '%s' up : %s", current_ifname,
1730 strerror(-err));
1731 return -1;
1732 }
1733
1734 /* the network is up, make the loopback up too */
1735 err = lxc_netdev_up("lo");
1736 if (err) {
1737 ERROR("failed to set the loopback up : %s",
1738 strerror(-err));
1739 return -1;
1740 }
1741 }
1742
1743 /* We can only set up the default routes after bringing
1744 * up the interface, sine bringing up the interface adds
1745 * the link-local routes and we can't add a default
1746 * route if the gateway is not reachable. */
1747
1748 /* setup ipv4 gateway on the interface */
1749 if (netdev->ipv4_gateway) {
1750 if (!(netdev->flags & IFF_UP)) {
1751 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
1752 return -1;
1753 }
1754
1755 if (lxc_list_empty(&netdev->ipv4)) {
1756 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
1757 return -1;
1758 }
1759
1760 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
1761 if (err) {
1762 ERROR("failed to setup ipv4 gateway for '%s': %s",
1763 ifname, strerror(-err));
1764 if (netdev->ipv4_gateway_auto) {
1765 char buf[INET_ADDRSTRLEN];
1766 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
1767 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
1768 }
1769 return -1;
1770 }
1771 }
1772
1773 /* setup ipv6 gateway on the interface */
1774 if (netdev->ipv6_gateway) {
1775 if (!(netdev->flags & IFF_UP)) {
1776 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
1777 return -1;
1778 }
1779
1780 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
1781 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
1782 return -1;
1783 }
1784
1785 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
1786 if (err) {
1787 ERROR("failed to setup ipv6 gateway for '%s': %s",
1788 ifname, strerror(-err));
1789 if (netdev->ipv6_gateway_auto) {
1790 char buf[INET6_ADDRSTRLEN];
1791 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
1792 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
1793 }
1794 return -1;
1795 }
1796 }
1797
1798 DEBUG("'%s' has been setup", current_ifname);
1799
1800 return 0;
1801 }
1802
1803 static int setup_network(struct lxc_list *network)
1804 {
1805 struct lxc_list *iterator;
1806 struct lxc_netdev *netdev;
1807
1808 lxc_list_for_each(iterator, network) {
1809
1810 netdev = iterator->elem;
1811
1812 if (setup_netdev(netdev)) {
1813 ERROR("failed to setup netdev");
1814 return -1;
1815 }
1816 }
1817
1818 if (!lxc_list_empty(network))
1819 INFO("network has been setup");
1820
1821 return 0;
1822 }
1823
1824 void lxc_rename_phys_nics_on_shutdown(struct lxc_conf *conf)
1825 {
1826 int i;
1827
1828 INFO("running to reset %d nic names", conf->num_savednics);
1829 for (i=0; i<conf->num_savednics; i++) {
1830 struct saved_nic *s = &conf->saved_nics[i];
1831 INFO("resetting nic %d to %s\n", s->ifindex, s->orig_name);
1832 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
1833 free(s->orig_name);
1834 }
1835 conf->num_savednics = 0;
1836 free(conf->saved_nics);
1837 }
1838
1839 static int setup_private_host_hw_addr(char *veth1)
1840 {
1841 struct ifreq ifr;
1842 int err;
1843 int sockfd;
1844
1845 sockfd = socket(AF_INET, SOCK_DGRAM, 0);
1846 if (sockfd < 0)
1847 return -errno;
1848
1849 snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
1850 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
1851 if (err < 0) {
1852 close(sockfd);
1853 return -errno;
1854 }
1855
1856 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
1857 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
1858 close(sockfd);
1859 if (err < 0)
1860 return -errno;
1861
1862 DEBUG("mac address of host interface '%s' changed to private "
1863 "%02x:%02x:%02x:%02x:%02x:%02x", veth1,
1864 ifr.ifr_hwaddr.sa_data[0] & 0xff,
1865 ifr.ifr_hwaddr.sa_data[1] & 0xff,
1866 ifr.ifr_hwaddr.sa_data[2] & 0xff,
1867 ifr.ifr_hwaddr.sa_data[3] & 0xff,
1868 ifr.ifr_hwaddr.sa_data[4] & 0xff,
1869 ifr.ifr_hwaddr.sa_data[5] & 0xff);
1870
1871 return 0;
1872 }
1873
1874 static char *default_rootfs_mount = LXCROOTFSMOUNT;
1875
1876 struct lxc_conf *lxc_conf_init(void)
1877 {
1878 struct lxc_conf *new;
1879 int i;
1880
1881 new = malloc(sizeof(*new));
1882 if (!new) {
1883 ERROR("lxc_conf_init : %m");
1884 return NULL;
1885 }
1886 memset(new, 0, sizeof(*new));
1887
1888 new->personality = -1;
1889 new->console.path = NULL;
1890 new->console.peer = -1;
1891 new->console.master = -1;
1892 new->console.slave = -1;
1893 new->console.name[0] = '\0';
1894 new->rootfs.mount = default_rootfs_mount;
1895 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
1896 lxc_list_init(&new->cgroup);
1897 lxc_list_init(&new->network);
1898 lxc_list_init(&new->mount_list);
1899 lxc_list_init(&new->caps);
1900 for (i=0; i<NUM_LXC_HOOKS; i++)
1901 lxc_list_init(&new->hooks[i]);
1902 #if HAVE_APPARMOR
1903 new->aa_profile = NULL;
1904 #endif
1905 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
1906 new->lsm_umount_proc = 0;
1907 #endif
1908
1909 return new;
1910 }
1911
1912 static int instanciate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
1913 {
1914 char veth1buf[IFNAMSIZ], *veth1;
1915 char veth2buf[IFNAMSIZ], *veth2;
1916 int err;
1917
1918 if (netdev->priv.veth_attr.pair)
1919 veth1 = netdev->priv.veth_attr.pair;
1920 else {
1921 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
1922 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
1923 ERROR("veth1 name too long");
1924 return -1;
1925 }
1926 veth1 = mktemp(veth1buf);
1927 /* store away for deconf */
1928 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
1929 }
1930
1931 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
1932 veth2 = mktemp(veth2buf);
1933
1934 if (!strlen(veth1) || !strlen(veth2)) {
1935 ERROR("failed to allocate a temporary name");
1936 return -1;
1937 }
1938
1939 err = lxc_veth_create(veth1, veth2);
1940 if (err) {
1941 ERROR("failed to create %s-%s : %s", veth1, veth2,
1942 strerror(-err));
1943 return -1;
1944 }
1945
1946 /* changing the high byte of the mac address to 0xfe, the bridge interface
1947 * will always keep the host's mac address and not take the mac address
1948 * of a container */
1949 err = setup_private_host_hw_addr(veth1);
1950 if (err) {
1951 ERROR("failed to change mac address of host interface '%s' : %s",
1952 veth1, strerror(-err));
1953 goto out_delete;
1954 }
1955
1956 if (netdev->mtu) {
1957 err = lxc_netdev_set_mtu(veth1, atoi(netdev->mtu));
1958 if (!err)
1959 err = lxc_netdev_set_mtu(veth2, atoi(netdev->mtu));
1960 if (err) {
1961 ERROR("failed to set mtu '%s' for %s-%s : %s",
1962 netdev->mtu, veth1, veth2, strerror(-err));
1963 goto out_delete;
1964 }
1965 }
1966
1967 if (netdev->link) {
1968 err = lxc_bridge_attach(netdev->link, veth1);
1969 if (err) {
1970 ERROR("failed to attach '%s' to the bridge '%s' : %s",
1971 veth1, netdev->link, strerror(-err));
1972 goto out_delete;
1973 }
1974 }
1975
1976 netdev->ifindex = if_nametoindex(veth2);
1977 if (!netdev->ifindex) {
1978 ERROR("failed to retrieve the index for %s", veth2);
1979 goto out_delete;
1980 }
1981
1982 err = lxc_netdev_up(veth1);
1983 if (err) {
1984 ERROR("failed to set %s up : %s", veth1, strerror(-err));
1985 goto out_delete;
1986 }
1987
1988 if (netdev->upscript) {
1989 err = run_script(handler->name, "net", netdev->upscript, "up",
1990 "veth", veth1, (char*) NULL);
1991 if (err)
1992 goto out_delete;
1993 }
1994
1995 DEBUG("instanciated veth '%s/%s', index is '%d'",
1996 veth1, veth2, netdev->ifindex);
1997
1998 return 0;
1999
2000 out_delete:
2001 lxc_netdev_delete_by_name(veth1);
2002 return -1;
2003 }
2004
2005 static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2006 {
2007 char *veth1;
2008 int err;
2009
2010 if (netdev->priv.veth_attr.pair)
2011 veth1 = netdev->priv.veth_attr.pair;
2012 else
2013 veth1 = netdev->priv.veth_attr.veth1;
2014
2015 if (netdev->downscript) {
2016 err = run_script(handler->name, "net", netdev->downscript,
2017 "down", "veth", veth1, (char*) NULL);
2018 if (err)
2019 return -1;
2020 }
2021 return 0;
2022 }
2023
2024 static int instanciate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2025 {
2026 char peerbuf[IFNAMSIZ], *peer;
2027 int err;
2028
2029 if (!netdev->link) {
2030 ERROR("no link specified for macvlan netdev");
2031 return -1;
2032 }
2033
2034 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2035 if (err >= sizeof(peerbuf))
2036 return -1;
2037
2038 peer = mktemp(peerbuf);
2039 if (!strlen(peer)) {
2040 ERROR("failed to make a temporary name");
2041 return -1;
2042 }
2043
2044 err = lxc_macvlan_create(netdev->link, peer,
2045 netdev->priv.macvlan_attr.mode);
2046 if (err) {
2047 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2048 peer, netdev->link, strerror(-err));
2049 return -1;
2050 }
2051
2052 netdev->ifindex = if_nametoindex(peer);
2053 if (!netdev->ifindex) {
2054 ERROR("failed to retrieve the index for %s", peer);
2055 lxc_netdev_delete_by_name(peer);
2056 return -1;
2057 }
2058
2059 if (netdev->upscript) {
2060 err = run_script(handler->name, "net", netdev->upscript, "up",
2061 "macvlan", netdev->link, (char*) NULL);
2062 if (err)
2063 return -1;
2064 }
2065
2066 DEBUG("instanciated macvlan '%s', index is '%d' and mode '%d'",
2067 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
2068
2069 return 0;
2070 }
2071
2072 static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2073 {
2074 int err;
2075
2076 if (netdev->downscript) {
2077 err = run_script(handler->name, "net", netdev->downscript,
2078 "down", "macvlan", netdev->link,
2079 (char*) NULL);
2080 if (err)
2081 return -1;
2082 }
2083 return 0;
2084 }
2085
2086 /* XXX: merge with instanciate_macvlan */
2087 static int instanciate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2088 {
2089 char peer[IFNAMSIZ];
2090 int err;
2091
2092 if (!netdev->link) {
2093 ERROR("no link specified for vlan netdev");
2094 return -1;
2095 }
2096
2097 err = snprintf(peer, sizeof(peer), "vlan%d", netdev->priv.vlan_attr.vid);
2098 if (err >= sizeof(peer)) {
2099 ERROR("peer name too long");
2100 return -1;
2101 }
2102
2103 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2104 if (err) {
2105 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2106 peer, netdev->link, strerror(-err));
2107 return -1;
2108 }
2109
2110 netdev->ifindex = if_nametoindex(peer);
2111 if (!netdev->ifindex) {
2112 ERROR("failed to retrieve the ifindex for %s", peer);
2113 lxc_netdev_delete_by_name(peer);
2114 return -1;
2115 }
2116
2117 DEBUG("instanciated vlan '%s', ifindex is '%d'", " vlan1000",
2118 netdev->ifindex);
2119
2120 return 0;
2121 }
2122
2123 static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2124 {
2125 return 0;
2126 }
2127
2128 static int instanciate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2129 {
2130 if (!netdev->link) {
2131 ERROR("no link specified for the physical interface");
2132 return -1;
2133 }
2134
2135 netdev->ifindex = if_nametoindex(netdev->link);
2136 if (!netdev->ifindex) {
2137 ERROR("failed to retrieve the index for %s", netdev->link);
2138 return -1;
2139 }
2140
2141 if (netdev->upscript) {
2142 int err;
2143 err = run_script(handler->name, "net", netdev->upscript,
2144 "up", "phys", netdev->link, (char*) NULL);
2145 if (err)
2146 return -1;
2147 }
2148
2149 return 0;
2150 }
2151
2152 static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2153 {
2154 int err;
2155
2156 if (netdev->downscript) {
2157 err = run_script(handler->name, "net", netdev->downscript,
2158 "down", "phys", netdev->link, (char*) NULL);
2159 if (err)
2160 return -1;
2161 }
2162 return 0;
2163 }
2164
2165 static int instanciate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2166 {
2167 netdev->ifindex = 0;
2168 if (netdev->upscript) {
2169 int err;
2170 err = run_script(handler->name, "net", netdev->upscript,
2171 "up", "empty", (char*) NULL);
2172 if (err)
2173 return -1;
2174 }
2175 return 0;
2176 }
2177
2178 static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2179 {
2180 int err;
2181
2182 if (netdev->downscript) {
2183 err = run_script(handler->name, "net", netdev->downscript,
2184 "down", "empty", (char*) NULL);
2185 if (err)
2186 return -1;
2187 }
2188 return 0;
2189 }
2190
2191 int lxc_create_network(struct lxc_handler *handler)
2192 {
2193 struct lxc_list *network = &handler->conf->network;
2194 struct lxc_list *iterator;
2195 struct lxc_netdev *netdev;
2196
2197 lxc_list_for_each(iterator, network) {
2198
2199 netdev = iterator->elem;
2200
2201 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
2202 ERROR("invalid network configuration type '%d'",
2203 netdev->type);
2204 return -1;
2205 }
2206
2207 if (netdev_conf[netdev->type](handler, netdev)) {
2208 ERROR("failed to create netdev");
2209 return -1;
2210 }
2211
2212 }
2213
2214 return 0;
2215 }
2216
2217 void lxc_delete_network(struct lxc_handler *handler)
2218 {
2219 struct lxc_list *network = &handler->conf->network;
2220 struct lxc_list *iterator;
2221 struct lxc_netdev *netdev;
2222
2223 lxc_list_for_each(iterator, network) {
2224 netdev = iterator->elem;
2225
2226 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
2227 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
2228 WARN("failed to rename to the initial name the " \
2229 "netdev '%s'", netdev->link);
2230 continue;
2231 }
2232
2233 if (netdev_deconf[netdev->type](handler, netdev)) {
2234 WARN("failed to destroy netdev");
2235 }
2236
2237 /* Recent kernel remove the virtual interfaces when the network
2238 * namespace is destroyed but in case we did not moved the
2239 * interface to the network namespace, we have to destroy it
2240 */
2241 if (netdev->ifindex != 0 &&
2242 lxc_netdev_delete_by_index(netdev->ifindex))
2243 WARN("failed to remove interface '%s'", netdev->name);
2244 }
2245 }
2246
2247 int lxc_assign_network(struct lxc_list *network, pid_t pid)
2248 {
2249 struct lxc_list *iterator;
2250 struct lxc_netdev *netdev;
2251 int err;
2252
2253 lxc_list_for_each(iterator, network) {
2254
2255 netdev = iterator->elem;
2256
2257 /* empty network namespace, nothing to move */
2258 if (!netdev->ifindex)
2259 continue;
2260
2261 err = lxc_netdev_move_by_index(netdev->ifindex, pid);
2262 if (err) {
2263 ERROR("failed to move '%s' to the container : %s",
2264 netdev->link, strerror(-err));
2265 return -1;
2266 }
2267
2268 DEBUG("move '%s' to '%d'", netdev->name, pid);
2269 }
2270
2271 return 0;
2272 }
2273
2274 int lxc_find_gateway_addresses(struct lxc_handler *handler)
2275 {
2276 struct lxc_list *network = &handler->conf->network;
2277 struct lxc_list *iterator;
2278 struct lxc_netdev *netdev;
2279 int link_index;
2280
2281 lxc_list_for_each(iterator, network) {
2282 netdev = iterator->elem;
2283
2284 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2285 continue;
2286
2287 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
2288 ERROR("gateway = auto only supported for "
2289 "veth and macvlan");
2290 return -1;
2291 }
2292
2293 if (!netdev->link) {
2294 ERROR("gateway = auto needs a link interface");
2295 return -1;
2296 }
2297
2298 link_index = if_nametoindex(netdev->link);
2299 if (!link_index)
2300 return -EINVAL;
2301
2302 if (netdev->ipv4_gateway_auto) {
2303 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
2304 ERROR("failed to automatically find ipv4 gateway "
2305 "address from link interface '%s'", netdev->link);
2306 return -1;
2307 }
2308 }
2309
2310 if (netdev->ipv6_gateway_auto) {
2311 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
2312 ERROR("failed to automatically find ipv6 gateway "
2313 "address from link interface '%s'", netdev->link);
2314 return -1;
2315 }
2316 }
2317 }
2318
2319 return 0;
2320 }
2321
2322 int lxc_create_tty(const char *name, struct lxc_conf *conf)
2323 {
2324 struct lxc_tty_info *tty_info = &conf->tty_info;
2325 int i;
2326
2327 /* no tty in the configuration */
2328 if (!conf->tty)
2329 return 0;
2330
2331 tty_info->pty_info =
2332 malloc(sizeof(*tty_info->pty_info)*conf->tty);
2333 if (!tty_info->pty_info) {
2334 SYSERROR("failed to allocate pty_info");
2335 return -1;
2336 }
2337
2338 for (i = 0; i < conf->tty; i++) {
2339
2340 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
2341
2342 if (openpty(&pty_info->master, &pty_info->slave,
2343 pty_info->name, NULL, NULL)) {
2344 SYSERROR("failed to create pty #%d", i);
2345 tty_info->nbtty = i;
2346 lxc_delete_tty(tty_info);
2347 return -1;
2348 }
2349
2350 DEBUG("allocated pty '%s' (%d/%d)",
2351 pty_info->name, pty_info->master, pty_info->slave);
2352
2353 /* Prevent leaking the file descriptors to the container */
2354 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
2355 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
2356
2357 pty_info->busy = 0;
2358 }
2359
2360 tty_info->nbtty = conf->tty;
2361
2362 INFO("tty's configured");
2363
2364 return 0;
2365 }
2366
2367 void lxc_delete_tty(struct lxc_tty_info *tty_info)
2368 {
2369 int i;
2370
2371 for (i = 0; i < tty_info->nbtty; i++) {
2372 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
2373
2374 close(pty_info->master);
2375 close(pty_info->slave);
2376 }
2377
2378 free(tty_info->pty_info);
2379 tty_info->nbtty = 0;
2380 }
2381
2382 int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
2383 {
2384 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
2385 int mounted;
2386 #endif
2387
2388 if (setup_utsname(lxc_conf->utsname)) {
2389 ERROR("failed to setup the utsname for '%s'", name);
2390 return -1;
2391 }
2392
2393 if (setup_network(&lxc_conf->network)) {
2394 ERROR("failed to setup the network for '%s'", name);
2395 return -1;
2396 }
2397
2398 if (run_lxc_hooks(name, "pre-mount", lxc_conf)) {
2399 ERROR("failed to run pre-mount hooks for container '%s'.", name);
2400 return -1;
2401 }
2402
2403 if (setup_rootfs(&lxc_conf->rootfs)) {
2404 ERROR("failed to setup rootfs for '%s'", name);
2405 return -1;
2406 }
2407
2408 if (lxc_conf->autodev) {
2409 if (mount_autodev(lxc_conf->rootfs.mount)) {
2410 ERROR("failed to mount /dev in the container");
2411 return -1;
2412 }
2413 }
2414
2415 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
2416 ERROR("failed to setup the mounts for '%s'", name);
2417 return -1;
2418 }
2419
2420 if (setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
2421 ERROR("failed to setup the mount entries for '%s'", name);
2422 return -1;
2423 }
2424
2425 if (run_lxc_hooks(name, "mount", lxc_conf)) {
2426 ERROR("failed to run mount hooks for container '%s'.", name);
2427 return -1;
2428 }
2429
2430 if (lxc_conf->autodev) {
2431 if (setup_autodev(lxc_conf->rootfs.mount)) {
2432 ERROR("failed to populate /dev in the container");
2433 return -1;
2434 }
2435 }
2436
2437 if (setup_cgroup(name, &lxc_conf->cgroup)) {
2438 ERROR("failed to setup the cgroups for '%s'", name);
2439 return -1;
2440 }
2441
2442 if (setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
2443 ERROR("failed to setup the console for '%s'", name);
2444 return -1;
2445 }
2446
2447 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
2448 ERROR("failed to setup kmsg for '%s'", name);
2449
2450 if (setup_tty(&lxc_conf->rootfs, &lxc_conf->tty_info, lxc_conf->ttydir)) {
2451 ERROR("failed to setup the ttys for '%s'", name);
2452 return -1;
2453 }
2454
2455 #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
2456 INFO("rootfs path is .%s., mount is .%s.", lxc_conf->rootfs.path,
2457 lxc_conf->rootfs.mount);
2458 if (lxc_conf->rootfs.path == NULL || strlen(lxc_conf->rootfs.path) == 0)
2459 mounted = 0;
2460 else
2461 mounted = lsm_mount_proc_if_needed(lxc_conf->rootfs.path, lxc_conf->rootfs.mount);
2462 if (mounted == -1) {
2463 SYSERROR("failed to mount /proc in the container.");
2464 return -1;
2465 } else if (mounted == 1) {
2466 lxc_conf->lsm_umount_proc = 1;
2467 }
2468 #endif
2469
2470 if (setup_pivot_root(&lxc_conf->rootfs)) {
2471 ERROR("failed to set rootfs for '%s'", name);
2472 return -1;
2473 }
2474
2475 if (setup_pts(lxc_conf->pts)) {
2476 ERROR("failed to setup the new pts instance");
2477 return -1;
2478 }
2479
2480 if (setup_personality(lxc_conf->personality)) {
2481 ERROR("failed to setup personality");
2482 return -1;
2483 }
2484
2485 if (setup_caps(&lxc_conf->caps)) {
2486 ERROR("failed to drop capabilities");
2487 return -1;
2488 }
2489
2490 NOTICE("'%s' is setup.", name);
2491
2492 return 0;
2493 }
2494
2495 int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf)
2496 {
2497 int which = -1;
2498 struct lxc_list *it;
2499
2500 if (strcmp(hook, "pre-start") == 0)
2501 which = LXCHOOK_PRESTART;
2502 else if (strcmp(hook, "pre-mount") == 0)
2503 which = LXCHOOK_PREMOUNT;
2504 else if (strcmp(hook, "mount") == 0)
2505 which = LXCHOOK_MOUNT;
2506 else if (strcmp(hook, "start") == 0)
2507 which = LXCHOOK_START;
2508 else if (strcmp(hook, "post-stop") == 0)
2509 which = LXCHOOK_POSTSTOP;
2510 else
2511 return -1;
2512 lxc_list_for_each(it, &conf->hooks[which]) {
2513 int ret;
2514 char *hookname = it->elem;
2515 ret = run_script(name, "lxc", hookname, hook, NULL);
2516 if (ret)
2517 return ret;
2518 }
2519 return 0;
2520 }
2521
2522 static void lxc_remove_nic(struct lxc_list *it)
2523 {
2524 struct lxc_netdev *netdev = it->elem;
2525 struct lxc_list *it2,*next;
2526
2527 lxc_list_del(it);
2528
2529 if (netdev->link)
2530 free(netdev->link);
2531 if (netdev->name)
2532 free(netdev->name);
2533 if (netdev->upscript)
2534 free(netdev->upscript);
2535 if (netdev->hwaddr)
2536 free(netdev->hwaddr);
2537 if (netdev->mtu)
2538 free(netdev->mtu);
2539 if (netdev->ipv4_gateway)
2540 free(netdev->ipv4_gateway);
2541 if (netdev->ipv6_gateway)
2542 free(netdev->ipv6_gateway);
2543 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
2544 lxc_list_del(it2);
2545 free(it2->elem);
2546 free(it2);
2547 }
2548 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
2549 lxc_list_del(it2);
2550 free(it2->elem);
2551 free(it2);
2552 }
2553 free(netdev);
2554 free(it);
2555 }
2556
2557 /* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
2558 int lxc_clear_nic(struct lxc_conf *c, const char *key)
2559 {
2560 char *p1;
2561 int ret, idx, i;
2562 struct lxc_list *it;
2563 struct lxc_netdev *netdev;
2564
2565 p1 = index(key, '.');
2566 if (!p1 || *(p1+1) == '\0')
2567 p1 = NULL;
2568
2569 ret = sscanf(key, "%d", &idx);
2570 if (ret != 1) return -1;
2571 if (idx < 0)
2572 return -1;
2573
2574 i = 0;
2575 lxc_list_for_each(it, &c->network) {
2576 if (i == idx)
2577 break;
2578 i++;
2579 }
2580 if (i < idx) // we don't have that many nics defined
2581 return -1;
2582
2583 if (!it || !it->elem)
2584 return -1;
2585
2586 netdev = it->elem;
2587
2588 if (!p1) {
2589 lxc_remove_nic(it);
2590 } else if (strcmp(p1, "ipv4") == 0) {
2591 struct lxc_list *it2,*next;
2592 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
2593 lxc_list_del(it2);
2594 free(it2->elem);
2595 free(it2);
2596 }
2597 } else if (strcmp(p1, "ipv6") == 0) {
2598 struct lxc_list *it2,*next;
2599 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
2600 lxc_list_del(it2);
2601 free(it2->elem);
2602 free(it2);
2603 }
2604 } else if (strcmp(p1, "link") == 0) {
2605 if (netdev->link) {
2606 free(netdev->link);
2607 netdev->link = NULL;
2608 }
2609 } else if (strcmp(p1, "name") == 0) {
2610 if (netdev->name) {
2611 free(netdev->name);
2612 netdev->name = NULL;
2613 }
2614 } else if (strcmp(p1, "script.up") == 0) {
2615 if (netdev->upscript) {
2616 free(netdev->upscript);
2617 netdev->upscript = NULL;
2618 }
2619 } else if (strcmp(p1, "hwaddr") == 0) {
2620 if (netdev->hwaddr) {
2621 free(netdev->hwaddr);
2622 netdev->hwaddr = NULL;
2623 }
2624 } else if (strcmp(p1, "mtu") == 0) {
2625 if (netdev->mtu) {
2626 free(netdev->mtu);
2627 netdev->mtu = NULL;
2628 }
2629 } else if (strcmp(p1, "ipv4_gateway") == 0) {
2630 if (netdev->ipv4_gateway) {
2631 free(netdev->ipv4_gateway);
2632 netdev->ipv4_gateway = NULL;
2633 }
2634 } else if (strcmp(p1, "ipv6_gateway") == 0) {
2635 if (netdev->ipv6_gateway) {
2636 free(netdev->ipv6_gateway);
2637 netdev->ipv6_gateway = NULL;
2638 }
2639 }
2640 else return -1;
2641
2642 return 0;
2643 }
2644
2645 int lxc_clear_config_network(struct lxc_conf *c)
2646 {
2647 struct lxc_list *it,*next;
2648 lxc_list_for_each_safe(it, &c->network, next) {
2649 lxc_remove_nic(it);
2650 }
2651 return 0;
2652 }
2653
2654 int lxc_clear_config_caps(struct lxc_conf *c)
2655 {
2656 struct lxc_list *it,*next;
2657
2658 lxc_list_for_each_safe(it, &c->caps, next) {
2659 lxc_list_del(it);
2660 free(it->elem);
2661 free(it);
2662 }
2663 return 0;
2664 }
2665
2666 int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
2667 {
2668 struct lxc_list *it,*next;
2669 bool all = false;
2670 const char *k = key + 11;
2671
2672 if (strcmp(key, "lxc.cgroup") == 0)
2673 all = true;
2674
2675 lxc_list_for_each_safe(it, &c->cgroup, next) {
2676 struct lxc_cgroup *cg = it->elem;
2677 if (!all && strcmp(cg->subsystem, k) != 0)
2678 continue;
2679 lxc_list_del(it);
2680 free(cg->subsystem);
2681 free(cg->value);
2682 free(cg);
2683 free(it);
2684 }
2685 return 0;
2686 }
2687
2688 int lxc_clear_mount_entries(struct lxc_conf *c)
2689 {
2690 struct lxc_list *it,*next;
2691
2692 lxc_list_for_each_safe(it, &c->mount_list, next) {
2693 lxc_list_del(it);
2694 free(it->elem);
2695 free(it);
2696 }
2697 return 0;
2698 }
2699
2700 int lxc_clear_hooks(struct lxc_conf *c, const char *key)
2701 {
2702 struct lxc_list *it,*next;
2703 bool all = false, done = false;
2704 const char *k = key + 9;
2705 int i;
2706
2707 if (strcmp(key, "lxc.hook") == 0)
2708 all = true;
2709
2710 for (i=0; i<NUM_LXC_HOOKS; i++) {
2711 if (all || strcmp(k, lxchook_names[i]) == 0) {
2712 lxc_list_for_each_safe(it, &c->hooks[i], next) {
2713 lxc_list_del(it);
2714 free(it->elem);
2715 free(it);
2716 }
2717 done = true;
2718 }
2719 }
2720
2721 if (!done) {
2722 ERROR("Invalid hook key: %s", key);
2723 return -1;
2724 }
2725 return 0;
2726 }
2727
2728 void lxc_clear_saved_nics(struct lxc_conf *conf)
2729 {
2730 int i;
2731
2732 if (!conf->num_savednics)
2733 return;
2734 for (i=0; i < conf->num_savednics; i++)
2735 free(conf->saved_nics[i].orig_name);
2736 conf->saved_nics = 0;
2737 free(conf->saved_nics);
2738 }
2739
2740 void lxc_conf_free(struct lxc_conf *conf)
2741 {
2742 if (!conf)
2743 return;
2744 if (conf->console.path)
2745 free(conf->console.path);
2746 if (conf->rootfs.mount != default_rootfs_mount)
2747 free(conf->rootfs.mount);
2748 if (conf->rootfs.path)
2749 free(conf->rootfs.path);
2750 if (conf->utsname)
2751 free(conf->utsname);
2752 if (conf->ttydir)
2753 free(conf->ttydir);
2754 if (conf->fstab)
2755 free(conf->fstab);
2756 if (conf->logfile)
2757 free(conf->logfile);
2758 lxc_clear_config_network(conf);
2759 #if HAVE_APPARMOR
2760 if (conf->aa_profile)
2761 free(conf->aa_profile);
2762 #endif
2763 lxc_clear_config_caps(conf);
2764 lxc_clear_cgroups(conf, "lxc.cgroup");
2765 lxc_clear_hooks(conf, "lxc.hook");
2766 lxc_clear_mount_entries(conf);
2767 lxc_clear_saved_nics(conf);
2768 free(conf);
2769 }