]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/conf.c
fix network devices cleanup on error
[mirror_lxc.git] / src / lxc / conf.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <dlezcano at fr.ibm.com>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23 #define _GNU_SOURCE
24 #include <stdio.h>
25 #undef _GNU_SOURCE
26 #include <stdlib.h>
27 #include <errno.h>
28 #include <string.h>
29 #include <dirent.h>
30 #include <mntent.h>
31 #include <unistd.h>
32 #include <pty.h>
33
34 #include <sys/types.h>
35 #include <sys/utsname.h>
36 #include <sys/param.h>
37 #include <sys/stat.h>
38 #include <sys/socket.h>
39 #include <sys/mount.h>
40 #include <sys/mman.h>
41 #include <sys/prctl.h>
42 #include <sys/capability.h>
43
44 #include <arpa/inet.h>
45 #include <fcntl.h>
46 #include <netinet/in.h>
47 #include <net/if.h>
48 #include <libgen.h>
49
50 #include "network.h"
51 #include "error.h"
52 #include "parse.h"
53 #include "config.h"
54
55 #include <lxc/conf.h>
56 #include <lxc/log.h>
57 #include <lxc/lxc.h> /* for lxc_cgroup_set() */
58
59 lxc_log_define(lxc_conf, lxc);
60
61 #define MAXHWLEN 18
62 #define MAXINDEXLEN 20
63 #define MAXMTULEN 16
64 #define MAXLINELEN 128
65
66 #ifndef MS_REC
67 #define MS_REC 16384
68 #endif
69
70 #ifndef MNT_DETACH
71 #define MNT_DETACH 2
72 #endif
73
74 #ifndef CAP_SETFCAP
75 #define CAP_SETFCAP 31
76 #endif
77
78 #ifndef CAP_MAC_OVERRIDE
79 #define CAP_MAC_OVERRIDE 32
80 #endif
81
82 #ifndef CAP_MAC_ADMIN
83 #define CAP_MAC_ADMIN 33
84 #endif
85
86 #ifndef PR_CAPBSET_DROP
87 #define PR_CAPBSET_DROP 24
88 #endif
89
90 extern int pivot_root(const char * new_root, const char * put_old);
91
92 typedef int (*instanciate_cb)(struct lxc_netdev *);
93
94 struct mount_opt {
95 char *name;
96 int clear;
97 int flag;
98 };
99
100 struct caps_opt {
101 char *name;
102 int value;
103 };
104
105 static int instanciate_veth(struct lxc_netdev *);
106 static int instanciate_macvlan(struct lxc_netdev *);
107 static int instanciate_vlan(struct lxc_netdev *);
108 static int instanciate_phys(struct lxc_netdev *);
109 static int instanciate_empty(struct lxc_netdev *);
110
111 static instanciate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
112 [LXC_NET_VETH] = instanciate_veth,
113 [LXC_NET_MACVLAN] = instanciate_macvlan,
114 [LXC_NET_VLAN] = instanciate_vlan,
115 [LXC_NET_PHYS] = instanciate_phys,
116 [LXC_NET_EMPTY] = instanciate_empty,
117 };
118
119 static struct mount_opt mount_opt[] = {
120 { "defaults", 0, 0 },
121 { "ro", 0, MS_RDONLY },
122 { "rw", 1, MS_RDONLY },
123 { "suid", 1, MS_NOSUID },
124 { "nosuid", 0, MS_NOSUID },
125 { "dev", 1, MS_NODEV },
126 { "nodev", 0, MS_NODEV },
127 { "exec", 1, MS_NOEXEC },
128 { "noexec", 0, MS_NOEXEC },
129 { "sync", 0, MS_SYNCHRONOUS },
130 { "async", 1, MS_SYNCHRONOUS },
131 { "remount", 0, MS_REMOUNT },
132 { "mand", 0, MS_MANDLOCK },
133 { "nomand", 1, MS_MANDLOCK },
134 { "atime", 1, MS_NOATIME },
135 { "noatime", 0, MS_NOATIME },
136 { "diratime", 1, MS_NODIRATIME },
137 { "nodiratime", 0, MS_NODIRATIME },
138 { "bind", 0, MS_BIND },
139 { "rbind", 0, MS_BIND|MS_REC },
140 { NULL, 0, 0 },
141 };
142
143 static struct caps_opt caps_opt[] = {
144 { "chown", CAP_CHOWN },
145 { "dac_override", CAP_DAC_OVERRIDE },
146 { "dac_read_search", CAP_DAC_READ_SEARCH },
147 { "fowner", CAP_FOWNER },
148 { "fsetid", CAP_FSETID },
149 { "kill", CAP_KILL },
150 { "setgid", CAP_SETGID },
151 { "setuid", CAP_SETUID },
152 { "setpcap", CAP_SETPCAP },
153 { "linux_immutable", CAP_LINUX_IMMUTABLE },
154 { "net_bind_service", CAP_NET_BIND_SERVICE },
155 { "net_broadcast", CAP_NET_BROADCAST },
156 { "net_admin", CAP_NET_ADMIN },
157 { "net_raw", CAP_NET_RAW },
158 { "ipc_lock", CAP_IPC_LOCK },
159 { "ipc_owner", CAP_IPC_OWNER },
160 { "sys_module", CAP_SYS_MODULE },
161 { "sys_rawio", CAP_SYS_RAWIO },
162 { "sys_chroot", CAP_SYS_CHROOT },
163 { "sys_ptrace", CAP_SYS_PTRACE },
164 { "sys_pacct", CAP_SYS_PACCT },
165 { "sys_admin", CAP_SYS_ADMIN },
166 { "sys_boot", CAP_SYS_BOOT },
167 { "sys_nice", CAP_SYS_NICE },
168 { "sys_resource", CAP_SYS_RESOURCE },
169 { "sys_time", CAP_SYS_TIME },
170 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
171 { "mknod", CAP_MKNOD },
172 { "lease", CAP_LEASE },
173 { "audit_write", CAP_AUDIT_WRITE },
174 { "audit_control", CAP_AUDIT_CONTROL },
175 { "setfcap", CAP_SETFCAP },
176 { "mac_override", CAP_MAC_OVERRIDE },
177 { "mac_admin", CAP_MAC_ADMIN },
178 };
179
180
181 static int configure_find_fstype_cb(char* buffer, void *data)
182 {
183 struct cbarg {
184 const char *rootfs;
185 const char *testdir;
186 char *fstype;
187 int mntopt;
188 } *cbarg = data;
189
190 char *fstype;
191
192 /* we don't try 'nodev' entries */
193 if (strstr(buffer, "nodev"))
194 return 0;
195
196 fstype = buffer;
197 fstype += lxc_char_left_gc(fstype, strlen(fstype));
198 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
199
200 if (mount(cbarg->rootfs, cbarg->testdir, fstype, cbarg->mntopt, NULL))
201 return 0;
202
203 /* found ! */
204 umount(cbarg->testdir);
205 strcpy(cbarg->fstype, fstype);
206
207 return 1;
208 }
209
210 /* find the filesystem type with brute force */
211 static int configure_find_fstype(const char *rootfs, char *fstype, int mntopt)
212 {
213 int i, found;
214
215 struct cbarg {
216 const char *rootfs;
217 const char *testdir;
218 char *fstype;
219 int mntopt;
220 } cbarg = {
221 .rootfs = rootfs,
222 .fstype = fstype,
223 .mntopt = mntopt,
224 };
225
226 /* first we check with /etc/filesystems, in case the modules
227 * are auto-loaded and fall back to the supported kernel fs
228 */
229 char *fsfile[] = {
230 "/etc/filesystems",
231 "/proc/filesystems",
232 };
233
234 cbarg.testdir = tempnam("/tmp", "lxc-");
235 if (!cbarg.testdir) {
236 SYSERROR("failed to build a temp name");
237 return -1;
238 }
239
240 if (mkdir(cbarg.testdir, 0755)) {
241 SYSERROR("failed to create temporary directory");
242 return -1;
243 }
244
245 for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
246
247 found = lxc_file_for_each_line(fsfile[i],
248 configure_find_fstype_cb,
249 &cbarg);
250
251 if (found < 0) {
252 SYSERROR("failed to read '%s'", fsfile[i]);
253 goto out;
254 }
255
256 if (found)
257 break;
258 }
259
260 if (!found) {
261 ERROR("failed to determine fs type for '%s'", rootfs);
262 goto out;
263 }
264
265 out:
266 rmdir(cbarg.testdir);
267 return found - 1;
268 }
269
270 static int configure_rootfs_dir_cb(const char *rootfs, const char *absrootfs,
271 FILE *f)
272 {
273 return fprintf(f, "%s %s none rbind 0 0\n", absrootfs, rootfs);
274 }
275
276 static int configure_rootfs_blk_cb(const char *rootfs, const char *absrootfs,
277 FILE *f)
278 {
279 char fstype[MAXPATHLEN];
280
281 if (configure_find_fstype(absrootfs, fstype, 0)) {
282 ERROR("failed to configure mount for block device '%s'",
283 absrootfs);
284 return -1;
285 }
286
287 return fprintf(f, "%s %s %s defaults 0 0\n", absrootfs, rootfs, fstype);
288 }
289
290 static int configure_rootfs(const char *name, const char *rootfs)
291 {
292 char path[MAXPATHLEN];
293 char absrootfs[MAXPATHLEN];
294 char fstab[MAXPATHLEN];
295 struct stat s;
296 FILE *f;
297 int i, ret;
298
299 typedef int (*rootfs_cb)(const char *, const char *, FILE *);
300
301 struct rootfs_type {
302 int type;
303 rootfs_cb cb;
304 } rtfs_type[] = {
305 { __S_IFDIR, configure_rootfs_dir_cb },
306 { __S_IFBLK, configure_rootfs_blk_cb },
307 };
308
309 if (!realpath(rootfs, absrootfs)) {
310 SYSERROR("failed to get real path for '%s'", rootfs);
311 return -1;
312 }
313
314 snprintf(path, MAXPATHLEN, LXCPATH "/%s/rootfs", name);
315
316 if (mkdir(path, 0755)) {
317 SYSERROR("failed to create the '%s' directory", path);
318 return -1;
319 }
320
321 if (access(absrootfs, F_OK)) {
322 SYSERROR("'%s' is not accessible", absrootfs);
323 return -1;
324 }
325
326 if (stat(absrootfs, &s)) {
327 SYSERROR("failed to stat '%s'", absrootfs);
328 return -1;
329 }
330
331 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
332
333 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
334 continue;
335
336 snprintf(fstab, MAXPATHLEN, LXCPATH "/%s/fstab", name);
337
338 f = fopen(fstab, "a+");
339 if (!f) {
340 SYSERROR("failed to open fstab file");
341 return -1;
342 }
343
344 ret = rtfs_type[i].cb(path, absrootfs, f);
345
346 fclose(f);
347
348 if (ret < 0) {
349 ERROR("failed to add rootfs mount in fstab");
350 return -1;
351 }
352
353 snprintf(path, MAXPATHLEN, LXCPATH "/%s/rootfs/rootfs", name);
354
355 return symlink(absrootfs, path);
356 }
357
358 ERROR("unsupported rootfs type for '%s'", absrootfs);
359 return -1;
360 }
361
362 static int setup_utsname(struct utsname *utsname)
363 {
364 if (!utsname)
365 return 0;
366
367 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
368 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
369 return -1;
370 }
371
372 INFO("'%s' hostname has been setup", utsname->nodename);
373
374 return 0;
375 }
376
377 static int setup_tty(const char *rootfs, const struct lxc_tty_info *tty_info)
378 {
379 char path[MAXPATHLEN];
380 int i;
381
382 for (i = 0; i < tty_info->nbtty; i++) {
383
384 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
385
386 snprintf(path, sizeof(path), "%s/dev/tty%d",
387 rootfs ? rootfs : "", i + 1);
388
389 /* At this point I can not use the "access" function
390 * to check the file is present or not because it fails
391 * with EACCES errno and I don't know why :( */
392
393 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
394 WARN("failed to mount '%s'->'%s'",
395 pty_info->name, path);
396 continue;
397 }
398 }
399
400 INFO("%d tty(s) has been setup", tty_info->nbtty);
401
402 return 0;
403 }
404
405 static int setup_rootfs_pivot_root_cb(char *buffer, void *data)
406 {
407 struct lxc_list *mountlist, *listentry, *iterator;
408 char *pivotdir, *mountpoint, *mountentry;
409 int found;
410 void **cbparm;
411
412 mountentry = buffer;
413 cbparm = (void **)data;
414
415 mountlist = cbparm[0];
416 pivotdir = cbparm[1];
417
418 /* parse entry, first field is mountname, ignore */
419 mountpoint = strtok(mountentry, " ");
420 if (!mountpoint)
421 return -1;
422
423 /* second field is mountpoint */
424 mountpoint = strtok(NULL, " ");
425 if (!mountpoint)
426 return -1;
427
428 /* only consider mountpoints below old root fs */
429 if (strncmp(mountpoint, pivotdir, strlen(pivotdir)))
430 return 0;
431
432 /* filter duplicate mountpoints */
433 found = 0;
434 lxc_list_for_each(iterator, mountlist) {
435 if (!strcmp(iterator->elem, mountpoint)) {
436 found = 1;
437 break;
438 }
439 }
440 if (found)
441 return 0;
442
443 /* add entry to list */
444 listentry = malloc(sizeof(*listentry));
445 if (!listentry) {
446 SYSERROR("malloc for mountpoint listentry failed");
447 return -1;
448 }
449
450 listentry->elem = strdup(mountpoint);
451 if (!listentry->elem) {
452 SYSERROR("strdup failed");
453 return -1;
454 }
455 lxc_list_add_tail(mountlist, listentry);
456
457 return 0;
458 }
459
460
461 static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
462 {
463 char path[MAXPATHLEN];
464 void *cbparm[2];
465 struct lxc_list mountlist, *iterator;
466 int ok, still_mounted, last_still_mounted;
467 int pivotdir_is_temp = 0;
468
469 /* change into new root fs */
470 if (chdir(rootfs)) {
471 SYSERROR("can't chroot to new rootfs '%s'", rootfs);
472 return -1;
473 }
474
475 /* create temporary mountpoint if none specified */
476 if (!pivotdir) {
477
478 snprintf(path, sizeof(path), "./lxc-oldrootfs-XXXXXX" );
479 if (!mkdtemp(path)) {
480 SYSERROR("can't make temporary mountpoint");
481 return -1;
482 }
483
484 pivotdir = strdup(&path[1]); /* get rid of leading dot */
485 if (!pivotdir) {
486 SYSERROR("strdup failed");
487 return -1;
488 }
489
490 pivotdir_is_temp = 1;
491 }
492 else {
493 snprintf(path, sizeof(path), ".%s", pivotdir);
494 }
495
496 DEBUG("temporary mountpoint for old rootfs is '%s'", path);
497
498 /* pivot_root into our new root fs */
499
500 if (pivot_root(".", path)) {
501 SYSERROR("pivot_root syscall failed");
502 return -1;
503 }
504
505 if (chdir("/")) {
506 SYSERROR("can't chroot to / after pivot_root");
507 return -1;
508 }
509
510 DEBUG("pivot_root syscall to '%s' successful", pivotdir);
511
512 /* read and parse /proc/mounts in old root fs */
513 lxc_list_init(&mountlist);
514
515 snprintf(path, sizeof(path), "%s/", pivotdir);
516 cbparm[0] = &mountlist;
517 cbparm[1] = strdup(path);
518
519 if (!cbparm[1]) {
520 SYSERROR("strdup failed");
521 return -1;
522 }
523
524 snprintf(path, sizeof(path), "/%s/proc/mounts", pivotdir);
525 ok = lxc_file_for_each_line(path, setup_rootfs_pivot_root_cb, &cbparm);
526 if (ok < 0) {
527 SYSERROR("failed to read or parse mount list '%s'", path);
528 return -1;
529 }
530
531 /* umount filesystems until none left or list no longer shrinks */
532 still_mounted = 0;
533 do {
534 last_still_mounted = still_mounted;
535 still_mounted = 0;
536
537 lxc_list_for_each(iterator, &mountlist) {
538
539 /* umount normally */
540 if (!umount(iterator->elem)) {
541 DEBUG("umounted '%s'", (char *)iterator->elem);
542 lxc_list_del(iterator);
543 continue;
544 }
545
546 still_mounted++;
547 }
548
549 } while (still_mounted > 0 && still_mounted != last_still_mounted);
550
551
552 lxc_list_for_each(iterator, &mountlist) {
553
554 /* let's try a lazy umount */
555 if (!umount2(iterator->elem, MNT_DETACH)) {
556 INFO("lazy unmount of '%s'", (char *)iterator->elem);
557 continue;
558 }
559
560 /* be more brutal (nfs) */
561 if (!umount2(iterator->elem, MNT_FORCE)) {
562 INFO("forced unmount of '%s'", (char *)iterator->elem);
563 continue;
564 }
565
566 WARN("failed to unmount '%s'", (char *)iterator->elem);
567 }
568
569 /* umount old root fs; if some other mount points are still
570 * there, we won't be able to umount it, so we have to do
571 * that in a lazy way otherwise the container will always
572 * fail to start
573 */
574 if (umount2(pivotdir, MNT_DETACH)) {
575 SYSERROR("could not unmount old rootfs");
576 return -1;
577 }
578 DEBUG("umounted '%s'", pivotdir);
579
580 /* remove temporary mount point, we don't consider the removing
581 * as fatal */
582 if (pivotdir_is_temp && rmdir(pivotdir))
583 WARN("can't remove temporary mountpoint: %m");
584
585 INFO("pivoted to '%s'", rootfs);
586 return 0;
587 }
588
589 static int setup_rootfs(const char *rootfs, const char *pivotdir)
590 {
591 char *tmpname;
592 int ret = -1;
593
594 if (!rootfs)
595 return 0;
596
597 tmpname = tempnam("/tmp", "lxc-rootfs");
598 if (!tmpname) {
599 SYSERROR("failed to generate temporary name");
600 return -1;
601 }
602
603 if (mkdir(tmpname, 0700)) {
604 SYSERROR("failed to create temporary directory '%s'", tmpname);
605 return -1;
606 }
607
608 if (mount(rootfs, tmpname, "none", MS_BIND|MS_REC, NULL)) {
609 SYSERROR("failed to mount '%s'->'%s'", rootfs, tmpname);
610 goto out;
611 }
612
613 if (setup_rootfs_pivot_root(tmpname, pivotdir)) {
614 ERROR("failed to pivot_root to '%s'", rootfs);
615 goto out;
616 }
617
618 ret = 0;
619 out:
620 rmdir(tmpname);
621 return ret;
622 }
623
624 static int setup_pts(int pts)
625 {
626 if (!pts)
627 return 0;
628
629 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
630 SYSERROR("failed to umount 'dev/pts'");
631 return -1;
632 }
633
634 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL, "newinstance")) {
635 SYSERROR("failed to mount a new instance of '/dev/pts'");
636 return -1;
637 }
638
639 if (chmod("/dev/pts/ptmx", 0666)) {
640 SYSERROR("failed to set permission for '/dev/pts/ptmx'");
641 return -1;
642 }
643
644 if (access("/dev/ptmx", F_OK)) {
645 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
646 goto out;
647 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
648 return -1;
649 }
650
651 /* fallback here, /dev/pts/ptmx exists just mount bind */
652 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
653 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
654 return -1;
655 }
656
657 INFO("created new pts instance");
658
659 out:
660 return 0;
661 }
662
663 static int setup_console(const char *rootfs, const struct lxc_console *console)
664 {
665 char path[MAXPATHLEN];
666 struct stat s;
667
668 /* We don't have a rootfs, /dev/console will be shared */
669 if (!rootfs)
670 return 0;
671
672 snprintf(path, sizeof(path), "%s/dev/console", rootfs);
673
674 if (access(path, F_OK)) {
675 WARN("rootfs specified but no console found");
676 return 0;
677 }
678
679 if (console->peer == -1)
680 INFO("no console output required");
681
682 if (stat(path, &s)) {
683 SYSERROR("failed to stat '%s'", path);
684 return -1;
685 }
686
687 if (chmod(console->name, s.st_mode)) {
688 SYSERROR("failed to set mode '0%o' to '%s'",
689 s.st_mode, console->name);
690 return -1;
691 }
692
693 if (mount(console->name, path, "none", MS_BIND, 0)) {
694 ERROR("failed to mount '%s' on '%s'", console->name, path);
695 return -1;
696 }
697
698 INFO("console has been setup");
699
700 return 0;
701 }
702
703 static int setup_cgroup(const char *name, struct lxc_list *cgroups)
704 {
705 struct lxc_list *iterator;
706 struct lxc_cgroup *cg;
707 int ret = -1;
708
709 if (lxc_list_empty(cgroups))
710 return 0;
711
712 lxc_list_for_each(iterator, cgroups) {
713
714 cg = iterator->elem;
715
716 if (lxc_cgroup_set(name, cg->subsystem, cg->value))
717 goto out;
718
719 DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
720 }
721
722 ret = 0;
723 INFO("cgroup has been setup");
724 out:
725 return ret;
726 }
727
728 static void parse_mntopt(char *opt, unsigned long *flags, char **data)
729 {
730 struct mount_opt *mo;
731
732 /* If opt is found in mount_opt, set or clear flags.
733 * Otherwise append it to data. */
734
735 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
736 if (!strncmp(opt, mo->name, strlen(mo->name))) {
737 if (mo->clear)
738 *flags &= ~mo->flag;
739 else
740 *flags |= mo->flag;
741 return;
742 }
743 }
744
745 if (strlen(*data))
746 strcat(*data, ",");
747 strcat(*data, opt);
748 }
749
750 static int parse_mntopts(struct mntent *mntent, unsigned long *mntflags,
751 char **mntdata)
752 {
753 char *s, *data;
754 char *p, *saveptr = NULL;
755
756 if (!mntent->mnt_opts)
757 return 0;
758
759 s = strdup(mntent->mnt_opts);
760 if (!s) {
761 SYSERROR("failed to allocate memory");
762 return -1;
763 }
764
765 data = malloc(strlen(s) + 1);
766 if (!data) {
767 SYSERROR("failed to allocate memory");
768 free(s);
769 return -1;
770 }
771 *data = 0;
772
773 for (p = strtok_r(s, ",", &saveptr); p != NULL;
774 p = strtok_r(NULL, ",", &saveptr))
775 parse_mntopt(p, mntflags, &data);
776
777 if (*data)
778 *mntdata = data;
779 else
780 free(data);
781 free(s);
782
783 return 0;
784 }
785
786 static int mount_file_entries(FILE *file)
787 {
788 struct mntent *mntent;
789 int ret = -1;
790 unsigned long mntflags;
791 char *mntdata;
792
793 while ((mntent = getmntent(file))) {
794
795 mntflags = 0;
796 mntdata = NULL;
797 if (parse_mntopts(mntent, &mntflags, &mntdata) < 0) {
798 ERROR("failed to parse mount option '%s'",
799 mntent->mnt_opts);
800 goto out;
801 }
802
803 if (mount(mntent->mnt_fsname, mntent->mnt_dir,
804 mntent->mnt_type, mntflags, mntdata)) {
805 SYSERROR("failed to mount '%s' on '%s'",
806 mntent->mnt_fsname, mntent->mnt_dir);
807 goto out;
808 }
809
810 DEBUG("mounted %s on %s, type %s", mntent->mnt_fsname,
811 mntent->mnt_dir, mntent->mnt_type);
812
813 free(mntdata);
814 }
815
816 ret = 0;
817
818 INFO("mount points have been setup");
819 out:
820 return ret;
821 }
822
823 static int setup_mount(const char *fstab)
824 {
825 FILE *file;
826 int ret;
827
828 if (!fstab)
829 return 0;
830
831 file = setmntent(fstab, "r");
832 if (!file) {
833 SYSERROR("failed to use '%s'", fstab);
834 return -1;
835 }
836
837 ret = mount_file_entries(file);
838
839 endmntent(file);
840 return ret;
841 }
842
843 static int setup_mount_entries(struct lxc_list *mount)
844 {
845 FILE *file;
846 struct lxc_list *iterator;
847 char *mount_entry;
848 int ret;
849
850 file = tmpfile();
851 if (!file) {
852 ERROR("tmpfile error: %m");
853 return -1;
854 }
855
856 lxc_list_for_each(iterator, mount) {
857 mount_entry = iterator->elem;
858 fprintf(file, "%s\n", mount_entry);
859 }
860
861 rewind(file);
862
863 ret = mount_file_entries(file);
864
865 fclose(file);
866 return ret;
867 }
868
869 static int setup_caps(struct lxc_list *caps)
870 {
871 struct lxc_list *iterator;
872 char *drop_entry;
873 int i, capid;
874
875 lxc_list_for_each(iterator, caps) {
876
877 drop_entry = iterator->elem;
878
879 capid = -1;
880
881 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
882
883 if (strcmp(drop_entry, caps_opt[i].name))
884 continue;
885
886 capid = caps_opt[i].value;
887 break;
888 }
889
890 if (capid < 0) {
891 ERROR("unknown capability %s", drop_entry);
892 return -1;
893 }
894
895 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
896
897 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
898 SYSERROR("failed to remove %s capability", drop_entry);
899 return -1;
900 }
901
902 }
903
904 DEBUG("capabilities has been setup");
905
906 return 0;
907 }
908
909 static int setup_hw_addr(char *hwaddr, const char *ifname)
910 {
911 struct sockaddr sockaddr;
912 struct ifreq ifr;
913 int ret, fd;
914
915 if (lxc_convert_mac(hwaddr, &sockaddr)) {
916 ERROR("conversion has failed");
917 return -1;
918 }
919
920 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
921 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
922
923 fd = socket(AF_INET, SOCK_DGRAM, 0);
924 if (fd < 0) {
925 ERROR("socket failure : %s", strerror(errno));
926 return -1;
927 }
928
929 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
930 close(fd);
931 if (ret)
932 ERROR("ioctl failure : %s", strerror(errno));
933
934 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifname);
935
936 return ret;
937 }
938
939 static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
940 {
941 struct lxc_list *iterator;
942 struct lxc_inetdev *inetdev;
943
944 lxc_list_for_each(iterator, ip) {
945
946 inetdev = iterator->elem;
947
948 if (lxc_ip_addr_add(AF_INET, ifindex,
949 &inetdev->addr, inetdev->prefix)) {
950 return -1;
951 }
952 }
953
954 return 0;
955 }
956
957 static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
958 {
959 struct lxc_list *iterator;
960 struct lxc_inet6dev *inet6dev;
961
962 lxc_list_for_each(iterator, ip) {
963
964 inet6dev = iterator->elem;
965
966 if (lxc_ip_addr_add(AF_INET6, ifindex,
967 & inet6dev->addr, inet6dev->prefix))
968 return -1;
969 }
970
971 return 0;
972 }
973
974 static int setup_netdev(struct lxc_netdev *netdev)
975 {
976 char ifname[IFNAMSIZ];
977 char *current_ifname = ifname;
978
979 /* empty network namespace */
980 if (!netdev->ifindex) {
981 if (netdev->flags | IFF_UP) {
982 if (lxc_device_up("lo")) {
983 ERROR("failed to set the loopback up");
984 return -1;
985 }
986 return 0;
987 }
988 }
989
990 /* retrieve the name of the interface */
991 if (!if_indextoname(netdev->ifindex, current_ifname)) {
992 ERROR("no interface corresponding to index '%d'",
993 netdev->ifindex);
994 return -1;
995 }
996
997 /* default: let the system to choose one interface name */
998 if (!netdev->name)
999 netdev->name = "eth%d";
1000
1001 /* rename the interface name */
1002 if (lxc_device_rename(ifname, netdev->name)) {
1003 ERROR("failed to rename %s->%s", ifname, current_ifname);
1004 return -1;
1005 }
1006
1007 /* Re-read the name of the interface because its name has changed
1008 * and would be automatically allocated by the system
1009 */
1010 if (!if_indextoname(netdev->ifindex, current_ifname)) {
1011 ERROR("no interface corresponding to index '%d'",
1012 netdev->ifindex);
1013 return -1;
1014 }
1015
1016 /* set a mac address */
1017 if (netdev->hwaddr) {
1018 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
1019 ERROR("failed to setup hw address for '%s'",
1020 current_ifname);
1021 return -1;
1022 }
1023 }
1024
1025 /* setup ipv4 addresses on the interface */
1026 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
1027 ERROR("failed to setup ip addresses for '%s'",
1028 ifname);
1029 return -1;
1030 }
1031
1032 /* setup ipv6 addresses on the interface */
1033 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
1034 ERROR("failed to setup ipv6 addresses for '%s'",
1035 ifname);
1036 return -1;
1037 }
1038
1039 /* set the network device up */
1040 if (netdev->flags | IFF_UP) {
1041 if (lxc_device_up(current_ifname)) {
1042 ERROR("failed to set '%s' up", current_ifname);
1043 return -1;
1044 }
1045
1046 /* the network is up, make the loopback up too */
1047 if (lxc_device_up("lo")) {
1048 ERROR("failed to set the loopback up");
1049 return -1;
1050 }
1051 }
1052
1053 DEBUG("'%s' has been setup", current_ifname);
1054
1055 return 0;
1056 }
1057
1058 static int setup_network(struct lxc_list *network)
1059 {
1060 struct lxc_list *iterator;
1061 struct lxc_netdev *netdev;
1062
1063 lxc_list_for_each(iterator, network) {
1064
1065 netdev = iterator->elem;
1066
1067 if (setup_netdev(netdev)) {
1068 ERROR("failed to setup netdev");
1069 return -1;
1070 }
1071 }
1072
1073 if (!lxc_list_empty(network))
1074 INFO("network has been setup");
1075
1076 return 0;
1077 }
1078
1079 struct lxc_conf *lxc_conf_init(void)
1080 {
1081 struct lxc_conf *new;
1082
1083 new = malloc(sizeof(*new));
1084 if (!new) {
1085 ERROR("lxc_conf_init : %m");
1086 return NULL;
1087 }
1088 memset(new, 0, sizeof(*new));
1089
1090 new->rootfs = NULL;
1091 new->pivotdir = NULL;
1092 new->fstab = NULL;
1093 new->utsname = NULL;
1094 new->tty = 0;
1095 new->pts = 0;
1096 new->console.peer = -1;
1097 new->console.master = -1;
1098 new->console.slave = -1;
1099 new->console.name[0] = '\0';
1100 lxc_list_init(&new->cgroup);
1101 lxc_list_init(&new->network);
1102 lxc_list_init(&new->mount_list);
1103 lxc_list_init(&new->caps);
1104
1105 return new;
1106 }
1107
1108 static int instanciate_veth(struct lxc_netdev *netdev)
1109 {
1110 char veth1buf[IFNAMSIZ], *veth1;
1111 char veth2[IFNAMSIZ];
1112
1113 if (netdev->priv.veth_attr.pair)
1114 veth1 = netdev->priv.veth_attr.pair;
1115 else {
1116 snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
1117 mktemp(veth1buf);
1118 veth1 = veth1buf;
1119 }
1120
1121 snprintf(veth2, sizeof(veth2), "vethXXXXXX");
1122 mktemp(veth2);
1123
1124 if (!strlen(veth1) || !strlen(veth2)) {
1125 ERROR("failed to allocate a temporary name");
1126 return -1;
1127 }
1128
1129 if (lxc_veth_create(veth1, veth2)) {
1130 ERROR("failed to create %s-%s", veth1, veth2);
1131 return -1;
1132 }
1133
1134 if (netdev->mtu) {
1135 if (lxc_device_set_mtu(veth1, atoi(netdev->mtu)) ||
1136 lxc_device_set_mtu(veth2, atoi(netdev->mtu))) {
1137 ERROR("failed to set mtu '%s' for %s-%s",
1138 netdev->mtu, veth1, veth2);
1139 goto out_delete;
1140 }
1141 }
1142
1143 if (netdev->link && lxc_bridge_attach(netdev->link, veth1)) {
1144 ERROR("failed to attach '%s' to the bridge '%s'",
1145 veth1, netdev->link);
1146 goto out_delete;
1147 }
1148
1149 netdev->ifindex = if_nametoindex(veth2);
1150 if (!netdev->ifindex) {
1151 ERROR("failed to retrieve the index for %s", veth2);
1152 goto out_delete;
1153 }
1154
1155 if (netdev->flags & IFF_UP) {
1156 if (lxc_device_up(veth1)) {
1157 ERROR("failed to set %s up", veth1);
1158 goto out_delete;
1159 }
1160 }
1161
1162 DEBUG("instanciated veth '%s/%s', index is '%d'",
1163 veth1, veth2, netdev->ifindex);
1164
1165 return 0;
1166
1167 out_delete:
1168 lxc_device_delete(veth1);
1169 return -1;
1170 }
1171
1172 static int instanciate_macvlan(struct lxc_netdev *netdev)
1173 {
1174 char peer[IFNAMSIZ];
1175
1176 if (!netdev->link) {
1177 ERROR("no link specified for macvlan netdev");
1178 return -1;
1179 }
1180
1181 snprintf(peer, sizeof(peer), "mcXXXXXX");
1182
1183 mktemp(peer);
1184
1185 if (!strlen(peer)) {
1186 ERROR("failed to make a temporary name");
1187 return -1;
1188 }
1189
1190 if (lxc_macvlan_create(netdev->link, peer,
1191 netdev->priv.macvlan_attr.mode)) {
1192 ERROR("failed to create macvlan interface '%s' on '%s'",
1193 peer, netdev->link);
1194 return -1;
1195 }
1196
1197 netdev->ifindex = if_nametoindex(peer);
1198 if (!netdev->ifindex) {
1199 ERROR("failed to retrieve the index for %s", peer);
1200 lxc_device_delete(peer);
1201 return -1;
1202 }
1203
1204 DEBUG("instanciated macvlan '%s', index is '%d' and mode '%d'",
1205 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
1206
1207 return 0;
1208 }
1209
1210 /* XXX: merge with instanciate_macvlan */
1211 static int instanciate_vlan(struct lxc_netdev *netdev)
1212 {
1213 char peer[IFNAMSIZ];
1214
1215 if (!netdev->link) {
1216 ERROR("no link specified for vlan netdev");
1217 return -1;
1218 }
1219
1220 snprintf(peer, sizeof(peer), "vlan%d", netdev->priv.vlan_attr.vid);
1221
1222 if (lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid)) {
1223 ERROR("failed to create vlan interface '%s' on '%s'",
1224 peer, netdev->link);
1225 return -1;
1226 }
1227
1228 netdev->ifindex = if_nametoindex(peer);
1229 if (!netdev->ifindex) {
1230 ERROR("failed to retrieve the ifindex for %s", peer);
1231 lxc_device_delete(peer);
1232 return -1;
1233 }
1234
1235 DEBUG("instanciated vlan '%s', ifindex is '%d'", " vlan1000",
1236 netdev->ifindex);
1237
1238 return 0;
1239 }
1240
1241 static int instanciate_phys(struct lxc_netdev *netdev)
1242 {
1243 netdev->ifindex = if_nametoindex(netdev->link);
1244 if (!netdev->ifindex) {
1245 ERROR("failed to retrieve the index for %s", netdev->link);
1246 return -1;
1247 }
1248
1249 return 0;
1250 }
1251
1252 static int instanciate_empty(struct lxc_netdev *netdev)
1253 {
1254 netdev->ifindex = 0;
1255 return 0;
1256 }
1257
1258 int lxc_create_network(struct lxc_list *network)
1259 {
1260 struct lxc_list *iterator;
1261 struct lxc_netdev *netdev;
1262
1263 lxc_list_for_each(iterator, network) {
1264
1265 netdev = iterator->elem;
1266
1267 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
1268 ERROR("invalid network configuration type '%d'",
1269 netdev->type);
1270 return -1;
1271 }
1272
1273 if (netdev_conf[netdev->type](netdev)) {
1274 ERROR("failed to create netdev");
1275 return -1;
1276 }
1277 }
1278
1279 return 0;
1280 }
1281
1282 void lxc_delete_network(struct lxc_list *network)
1283 {
1284 struct lxc_list *iterator;
1285 struct lxc_netdev *netdev;
1286
1287 lxc_list_for_each(iterator, network) {
1288 netdev = iterator->elem;
1289 if (netdev->ifindex > 0)
1290 lxc_device_delete_index(netdev->ifindex);
1291 }
1292 }
1293
1294 int lxc_assign_network(struct lxc_list *network, pid_t pid)
1295 {
1296 struct lxc_list *iterator;
1297 struct lxc_netdev *netdev;
1298
1299 lxc_list_for_each(iterator, network) {
1300
1301 netdev = iterator->elem;
1302
1303 /* empty network namespace, nothing to move */
1304 if (!netdev->ifindex)
1305 continue;
1306
1307 if (lxc_device_move(netdev->ifindex, pid)) {
1308 ERROR("failed to move '%s' to the container",
1309 netdev->link);
1310 return -1;
1311 }
1312
1313 DEBUG("move '%s' to '%d'", netdev->link, pid);
1314 }
1315
1316 return 0;
1317 }
1318
1319 int lxc_create_tty(const char *name, struct lxc_conf *conf)
1320 {
1321 struct lxc_tty_info *tty_info = &conf->tty_info;
1322 int i;
1323
1324 /* no tty in the configuration */
1325 if (!conf->tty)
1326 return 0;
1327
1328 tty_info->pty_info =
1329 malloc(sizeof(*tty_info->pty_info)*conf->tty);
1330 if (!tty_info->pty_info) {
1331 SYSERROR("failed to allocate pty_info");
1332 return -1;
1333 }
1334
1335 for (i = 0; i < conf->tty; i++) {
1336
1337 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
1338
1339 if (openpty(&pty_info->master, &pty_info->slave,
1340 pty_info->name, NULL, NULL)) {
1341 SYSERROR("failed to create pty #%d", i);
1342 tty_info->nbtty = i;
1343 lxc_delete_tty(tty_info);
1344 return -1;
1345 }
1346
1347 /* Prevent leaking the file descriptors to the container */
1348 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
1349 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
1350
1351 pty_info->busy = 0;
1352 }
1353
1354 tty_info->nbtty = conf->tty;
1355
1356 INFO("tty's configured");
1357
1358 return 0;
1359 }
1360
1361 void lxc_delete_tty(struct lxc_tty_info *tty_info)
1362 {
1363 int i;
1364
1365 for (i = 0; i < tty_info->nbtty; i++) {
1366 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
1367
1368 close(pty_info->master);
1369 close(pty_info->slave);
1370 }
1371
1372 free(tty_info->pty_info);
1373 tty_info->nbtty = 0;
1374 }
1375
1376 int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
1377 {
1378 if (setup_utsname(lxc_conf->utsname)) {
1379 ERROR("failed to setup the utsname for '%s'", name);
1380 return -1;
1381 }
1382
1383 if (setup_network(&lxc_conf->network)) {
1384 ERROR("failed to setup the network for '%s'", name);
1385 return -1;
1386 }
1387
1388 if (setup_cgroup(name, &lxc_conf->cgroup)) {
1389 ERROR("failed to setup the cgroups for '%s'", name);
1390 return -1;
1391 }
1392
1393 if (setup_mount(lxc_conf->fstab)) {
1394 ERROR("failed to setup the mounts for '%s'", name);
1395 return -1;
1396 }
1397
1398 if (setup_mount_entries(&lxc_conf->mount_list)) {
1399 ERROR("failed to setup the mount entries for '%s'", name);
1400 return -1;
1401 }
1402
1403 if (setup_console(lxc_conf->rootfs, &lxc_conf->console)) {
1404 ERROR("failed to setup the console for '%s'", name);
1405 return -1;
1406 }
1407
1408 if (setup_tty(lxc_conf->rootfs, &lxc_conf->tty_info)) {
1409 ERROR("failed to setup the ttys for '%s'", name);
1410 return -1;
1411 }
1412
1413 if (setup_rootfs(lxc_conf->rootfs, lxc_conf->pivotdir)) {
1414 ERROR("failed to set rootfs for '%s'", name);
1415 return -1;
1416 }
1417
1418 if (setup_pts(lxc_conf->pts)) {
1419 ERROR("failed to setup the new pts instance");
1420 return -1;
1421 }
1422
1423 if (setup_caps(&lxc_conf->caps)) {
1424 ERROR("failed to drop capabilities");
1425 return -1;
1426 }
1427
1428 NOTICE("'%s' is setup.", name);
1429
1430 return 0;
1431 }