]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
cgroup: prevent DOS when a hierachy is mounted multiple times
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23#define _GNU_SOURCE
24#include <stdio.h>
25#undef _GNU_SOURCE
26#include <stdlib.h>
e3b4c4c4 27#include <stdarg.h>
0ad19a3f 28#include <errno.h>
29#include <string.h>
30#include <dirent.h>
0ad19a3f 31#include <unistd.h>
e3b4c4c4 32#include <sys/wait.h>
2d76d1d7 33#include <sys/syscall.h>
e827ff7e
SG
34
35#if HAVE_PTY_H
b0a33c1e 36#include <pty.h>
e827ff7e
SG
37#else
38#include <../include/openpty.h>
39#endif
0ad19a3f 40
b3ecde1e
DL
41#include <linux/loop.h>
42
0ad19a3f 43#include <sys/types.h>
44#include <sys/utsname.h>
45#include <sys/param.h>
46#include <sys/stat.h>
47#include <sys/socket.h>
48#include <sys/mount.h>
49#include <sys/mman.h>
81810dd1 50#include <sys/prctl.h>
0ad19a3f 51
52#include <arpa/inet.h>
53#include <fcntl.h>
54#include <netinet/in.h>
55#include <net/if.h>
6f4a3756 56#include <libgen.h>
0ad19a3f 57
e5bda9ee 58#include "network.h"
59#include "error.h"
b2718c72 60#include "parse.h"
881450bb 61#include "config.h"
1b09f2c0
DL
62#include "utils.h"
63#include "conf.h"
64#include "log.h"
65#include "lxc.h" /* for lxc_cgroup_set() */
d55bc1ad 66#include "caps.h" /* for lxc_caps_last_cap() */
9be53773 67#include "bdev.h"
36eb9bde 68
d0a36f2c
SG
69#if HAVE_APPARMOR
70#include <apparmor.h>
71#endif
72
495d2046
SG
73#if HAVE_SYS_CAPABILITY_H
74#include <sys/capability.h>
75#endif
76
6ff05e18
SG
77#if HAVE_SYS_PERSONALITY_H
78#include <sys/personality.h>
79#endif
80
edaf8b1b
SG
81#if IS_BIONIC
82#include <../include/lxcmntent.h>
83#else
84#include <mntent.h>
85#endif
86
769872f9
SH
87#include "lxcseccomp.h"
88
36eb9bde 89lxc_log_define(lxc_conf, lxc);
e5bda9ee 90
0ad19a3f 91#define MAXHWLEN 18
92#define MAXINDEXLEN 20
442cbbe6 93#define MAXMTULEN 16
0ad19a3f 94#define MAXLINELEN 128
95
968fbd36
SK
96#ifndef MS_DIRSYNC
97#define MS_DIRSYNC 128
98#endif
99
fdc03323
DL
100#ifndef MS_REC
101#define MS_REC 16384
102#endif
103
c08556c6
DL
104#ifndef MNT_DETACH
105#define MNT_DETACH 2
106#endif
107
859a6da0
NC
108#ifndef MS_SLAVE
109#define MS_SLAVE (1<<19)
110#endif
111
88d413d5
SW
112#ifndef MS_RELATIME
113#define MS_RELATIME (1 << 21)
114#endif
115
116#ifndef MS_STRICTATIME
117#define MS_STRICTATIME (1 << 24)
118#endif
119
495d2046 120#if HAVE_SYS_CAPABILITY_H
b09094da
MN
121#ifndef CAP_SETFCAP
122#define CAP_SETFCAP 31
123#endif
124
125#ifndef CAP_MAC_OVERRIDE
126#define CAP_MAC_OVERRIDE 32
127#endif
128
129#ifndef CAP_MAC_ADMIN
130#define CAP_MAC_ADMIN 33
131#endif
495d2046 132#endif
b09094da
MN
133
134#ifndef PR_CAPBSET_DROP
135#define PR_CAPBSET_DROP 24
136#endif
137
9818cae4
SG
138#ifndef LO_FLAGS_AUTOCLEAR
139#define LO_FLAGS_AUTOCLEAR 4
140#endif
141
2d76d1d7
SG
142/* Define pivot_root() if missing from the C library */
143#ifndef HAVE_PIVOT_ROOT
144static int pivot_root(const char * new_root, const char * put_old)
145{
146#ifdef __NR_pivot_root
147return syscall(__NR_pivot_root, new_root, put_old);
148#else
149errno = ENOSYS;
150return -1;
151#endif
152}
153#else
154extern int pivot_root(const char * new_root, const char * put_old);
155#endif
156
157/* Define sethostname() if missing from the C library */
158#ifndef HAVE_SETHOSTNAME
159static int sethostname(const char * name, size_t len)
160{
161#ifdef __NR_sethostname
162return syscall(__NR_sethostname, name, len);
163#else
164errno = ENOSYS;
165return -1;
166#endif
167}
168#endif
169
72f919c4
SG
170/* Define __S_ISTYPE if missing from the C library */
171#ifndef __S_ISTYPE
172#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
173#endif
174
72d0e1cb 175char *lxchook_names[NUM_LXC_HOOKS] = {
f7bee6c6 176 "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop" };
72d0e1cb 177
e3b4c4c4 178typedef int (*instanciate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 179
998ac676
RT
180struct mount_opt {
181 char *name;
182 int clear;
183 int flag;
184};
185
81810dd1
DL
186struct caps_opt {
187 char *name;
188 int value;
189};
190
e3b4c4c4
ST
191static int instanciate_veth(struct lxc_handler *, struct lxc_netdev *);
192static int instanciate_macvlan(struct lxc_handler *, struct lxc_netdev *);
193static int instanciate_vlan(struct lxc_handler *, struct lxc_netdev *);
194static int instanciate_phys(struct lxc_handler *, struct lxc_netdev *);
195static int instanciate_empty(struct lxc_handler *, struct lxc_netdev *);
82d5ae15 196
24654103
DL
197static instanciate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
198 [LXC_NET_VETH] = instanciate_veth,
199 [LXC_NET_MACVLAN] = instanciate_macvlan,
200 [LXC_NET_VLAN] = instanciate_vlan,
201 [LXC_NET_PHYS] = instanciate_phys,
202 [LXC_NET_EMPTY] = instanciate_empty,
0ad19a3f 203};
204
74a2b586
JK
205static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
206static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
207static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
208static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
209static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
210
211static instanciate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
212 [LXC_NET_VETH] = shutdown_veth,
213 [LXC_NET_MACVLAN] = shutdown_macvlan,
214 [LXC_NET_VLAN] = shutdown_vlan,
215 [LXC_NET_PHYS] = shutdown_phys,
216 [LXC_NET_EMPTY] = shutdown_empty,
217};
218
998ac676 219static struct mount_opt mount_opt[] = {
88d413d5
SW
220 { "defaults", 0, 0 },
221 { "ro", 0, MS_RDONLY },
222 { "rw", 1, MS_RDONLY },
223 { "suid", 1, MS_NOSUID },
224 { "nosuid", 0, MS_NOSUID },
225 { "dev", 1, MS_NODEV },
226 { "nodev", 0, MS_NODEV },
227 { "exec", 1, MS_NOEXEC },
228 { "noexec", 0, MS_NOEXEC },
229 { "sync", 0, MS_SYNCHRONOUS },
230 { "async", 1, MS_SYNCHRONOUS },
231 { "dirsync", 0, MS_DIRSYNC },
232 { "remount", 0, MS_REMOUNT },
233 { "mand", 0, MS_MANDLOCK },
234 { "nomand", 1, MS_MANDLOCK },
235 { "atime", 1, MS_NOATIME },
236 { "noatime", 0, MS_NOATIME },
237 { "diratime", 1, MS_NODIRATIME },
238 { "nodiratime", 0, MS_NODIRATIME },
239 { "bind", 0, MS_BIND },
240 { "rbind", 0, MS_BIND|MS_REC },
241 { "relatime", 0, MS_RELATIME },
242 { "norelatime", 1, MS_RELATIME },
243 { "strictatime", 0, MS_STRICTATIME },
244 { "nostrictatime", 1, MS_STRICTATIME },
245 { NULL, 0, 0 },
998ac676
RT
246};
247
495d2046 248#if HAVE_SYS_CAPABILITY_H
81810dd1 249static struct caps_opt caps_opt[] = {
a6afdde9 250 { "chown", CAP_CHOWN },
1e11be34
DL
251 { "dac_override", CAP_DAC_OVERRIDE },
252 { "dac_read_search", CAP_DAC_READ_SEARCH },
253 { "fowner", CAP_FOWNER },
254 { "fsetid", CAP_FSETID },
81810dd1
DL
255 { "kill", CAP_KILL },
256 { "setgid", CAP_SETGID },
257 { "setuid", CAP_SETUID },
258 { "setpcap", CAP_SETPCAP },
259 { "linux_immutable", CAP_LINUX_IMMUTABLE },
260 { "net_bind_service", CAP_NET_BIND_SERVICE },
261 { "net_broadcast", CAP_NET_BROADCAST },
262 { "net_admin", CAP_NET_ADMIN },
263 { "net_raw", CAP_NET_RAW },
264 { "ipc_lock", CAP_IPC_LOCK },
265 { "ipc_owner", CAP_IPC_OWNER },
266 { "sys_module", CAP_SYS_MODULE },
267 { "sys_rawio", CAP_SYS_RAWIO },
268 { "sys_chroot", CAP_SYS_CHROOT },
269 { "sys_ptrace", CAP_SYS_PTRACE },
270 { "sys_pacct", CAP_SYS_PACCT },
271 { "sys_admin", CAP_SYS_ADMIN },
272 { "sys_boot", CAP_SYS_BOOT },
273 { "sys_nice", CAP_SYS_NICE },
274 { "sys_resource", CAP_SYS_RESOURCE },
275 { "sys_time", CAP_SYS_TIME },
276 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
277 { "mknod", CAP_MKNOD },
278 { "lease", CAP_LEASE },
9527e566 279#ifdef CAP_AUDIT_WRITE
81810dd1 280 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
281#endif
282#ifdef CAP_AUDIT_CONTROL
81810dd1 283 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 284#endif
81810dd1
DL
285 { "setfcap", CAP_SETFCAP },
286 { "mac_override", CAP_MAC_OVERRIDE },
287 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
288#ifdef CAP_SYSLOG
289 { "syslog", CAP_SYSLOG },
290#endif
291#ifdef CAP_WAKE_ALARM
292 { "wake_alarm", CAP_WAKE_ALARM },
293#endif
81810dd1 294};
495d2046
SG
295#else
296static struct caps_opt caps_opt[] = {};
297#endif
81810dd1 298
91c3830e
SH
299static int run_buffer(char *buffer)
300{
301 FILE *f;
302 char *output;
8e7da691 303 int ret;
91c3830e
SH
304
305 f = popen(buffer, "r");
306 if (!f) {
307 SYSERROR("popen failed");
308 return -1;
309 }
310
311 output = malloc(LXC_LOG_BUFFER_SIZE);
312 if (!output) {
313 ERROR("failed to allocate memory for script output");
00b6be44 314 pclose(f);
91c3830e
SH
315 return -1;
316 }
317
318 while(fgets(output, LXC_LOG_BUFFER_SIZE, f))
319 DEBUG("script output: %s", output);
320
321 free(output);
322
8e7da691
DE
323 ret = pclose(f);
324 if (ret == -1) {
91c3830e
SH
325 SYSERROR("Script exited on error");
326 return -1;
8e7da691
DE
327 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
328 ERROR("Script exited with status %d", WEXITSTATUS(ret));
329 return -1;
330 } else if (WIFSIGNALED(ret)) {
331 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
332 strsignal(WTERMSIG(ret)));
333 return -1;
91c3830e
SH
334 }
335
336 return 0;
337}
338
751d9dcd
DL
339static int run_script(const char *name, const char *section,
340 const char *script, ...)
e3b4c4c4 341{
abbfd20b 342 int ret;
91c3830e 343 char *buffer, *p;
abbfd20b
DL
344 size_t size = 0;
345 va_list ap;
751d9dcd
DL
346
347 INFO("Executing script '%s' for container '%s', config section '%s'",
348 script, name, section);
e3b4c4c4 349
abbfd20b
DL
350 va_start(ap, script);
351 while ((p = va_arg(ap, char *)))
95642a10 352 size += strlen(p) + 1;
abbfd20b
DL
353 va_end(ap);
354
355 size += strlen(script);
356 size += strlen(name);
357 size += strlen(section);
95642a10 358 size += 3;
abbfd20b 359
95642a10
MS
360 if (size > INT_MAX)
361 return -1;
362
363 buffer = alloca(size);
abbfd20b
DL
364 if (!buffer) {
365 ERROR("failed to allocate memory");
751d9dcd
DL
366 return -1;
367 }
368
9ba8130c
SH
369 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
370 if (ret < 0 || ret >= size) {
371 ERROR("Script name too long");
9ba8130c
SH
372 return -1;
373 }
751d9dcd 374
abbfd20b 375 va_start(ap, script);
9ba8130c
SH
376 while ((p = va_arg(ap, char *))) {
377 int len = size-ret;
378 int rc;
379 rc = snprintf(buffer + ret, len, " %s", p);
380 if (rc < 0 || rc >= len) {
9ba8130c
SH
381 ERROR("Script args too long");
382 return -1;
383 }
384 ret += rc;
385 }
abbfd20b 386 va_end(ap);
751d9dcd 387
91c3830e 388 return run_buffer(buffer);
e3b4c4c4
ST
389}
390
a6afdde9 391static int find_fstype_cb(char* buffer, void *data)
78ae2fcc 392{
393 struct cbarg {
394 const char *rootfs;
a6afdde9 395 const char *target;
78ae2fcc 396 int mntopt;
397 } *cbarg = data;
398
399 char *fstype;
400
401 /* we don't try 'nodev' entries */
402 if (strstr(buffer, "nodev"))
403 return 0;
404
405 fstype = buffer;
b2718c72 406 fstype += lxc_char_left_gc(fstype, strlen(fstype));
407 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
78ae2fcc 408
a6afdde9
DL
409 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
410 cbarg->rootfs, cbarg->target, fstype);
411
412 if (mount(cbarg->rootfs, cbarg->target, fstype, cbarg->mntopt, NULL)) {
413 DEBUG("mount failed with error: %s", strerror(errno));
78ae2fcc 414 return 0;
a6afdde9 415 }
78ae2fcc 416
a6afdde9
DL
417 INFO("mounted '%s' on '%s', with fstype '%s'",
418 cbarg->rootfs, cbarg->target, fstype);
78ae2fcc 419
420 return 1;
421}
422
2656d231 423static int mount_unknow_fs(const char *rootfs, const char *target, int mntopt)
78ae2fcc 424{
a6afdde9 425 int i;
78ae2fcc 426
427 struct cbarg {
428 const char *rootfs;
a6afdde9 429 const char *target;
78ae2fcc 430 int mntopt;
431 } cbarg = {
432 .rootfs = rootfs,
a6afdde9 433 .target = target,
78ae2fcc 434 .mntopt = mntopt,
435 };
436
a6afdde9
DL
437 /*
438 * find the filesystem type with brute force:
439 * first we check with /etc/filesystems, in case the modules
78ae2fcc 440 * are auto-loaded and fall back to the supported kernel fs
441 */
442 char *fsfile[] = {
443 "/etc/filesystems",
444 "/proc/filesystems",
445 };
446
a6afdde9
DL
447 for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
448
449 int ret;
450
451 if (access(fsfile[i], F_OK))
452 continue;
453
454 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
455 if (ret < 0) {
456 ERROR("failed to parse '%s'", fsfile[i]);
457 return -1;
458 }
459
460 if (ret)
461 return 0;
78ae2fcc 462 }
463
a6afdde9
DL
464 ERROR("failed to determine fs type for '%s'", rootfs);
465 return -1;
466}
467
2656d231 468static int mount_rootfs_dir(const char *rootfs, const char *target)
a6afdde9
DL
469{
470 return mount(rootfs, target, "none", MS_BIND | MS_REC, NULL);
471}
472
473static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
474{
475 int rfd;
476 int ret = -1;
477
478 rfd = open(rootfs, O_RDWR);
479 if (rfd < 0) {
480 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 481 return -1;
482 }
483
a6afdde9 484 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 485
a6afdde9 486 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 487
a6afdde9
DL
488 if (ioctl(fd, LOOP_SET_FD, rfd)) {
489 SYSERROR("failed to LOOP_SET_FD");
490 goto out;
78ae2fcc 491 }
492
a6afdde9
DL
493 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
494 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 495 goto out;
496 }
497
a6afdde9 498 ret = 0;
78ae2fcc 499out:
a6afdde9 500 close(rfd);
78ae2fcc 501
a6afdde9 502 return ret;
78ae2fcc 503}
504
2656d231 505static int mount_rootfs_file(const char *rootfs, const char *target)
78ae2fcc 506{
a6afdde9
DL
507 struct dirent dirent, *direntp;
508 struct loop_info64 loinfo;
9ba8130c 509 int ret = -1, fd = -1, rc;
a6afdde9
DL
510 DIR *dir;
511 char path[MAXPATHLEN];
78ae2fcc 512
a6afdde9
DL
513 dir = opendir("/dev");
514 if (!dir) {
515 SYSERROR("failed to open '/dev'");
78ae2fcc 516 return -1;
517 }
518
a6afdde9
DL
519 while (!readdir_r(dir, &dirent, &direntp)) {
520
521 if (!direntp)
522 break;
523
524 if (!strcmp(direntp->d_name, "."))
525 continue;
526
527 if (!strcmp(direntp->d_name, ".."))
528 continue;
529
530 if (strncmp(direntp->d_name, "loop", 4))
531 continue;
532
9ba8130c
SH
533 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
534 if (rc < 0 || rc >= MAXPATHLEN)
535 continue;
536
a6afdde9
DL
537 fd = open(path, O_RDWR);
538 if (fd < 0)
539 continue;
540
541 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
542 close(fd);
543 continue;
544 }
545
546 if (errno != ENXIO) {
547 WARN("unexpected error for ioctl on '%s': %m",
548 direntp->d_name);
00b6be44 549 close(fd);
a6afdde9
DL
550 continue;
551 }
552
553 DEBUG("found '%s' free lodev", path);
554
555 ret = setup_lodev(rootfs, fd, &loinfo);
556 if (!ret)
2656d231 557 ret = mount_unknow_fs(path, target, 0);
a6afdde9
DL
558 close(fd);
559
560 break;
561 }
562
563 if (closedir(dir))
564 WARN("failed to close directory");
565
566 return ret;
78ae2fcc 567}
568
2656d231 569static int mount_rootfs_block(const char *rootfs, const char *target)
a6afdde9 570{
2656d231 571 return mount_unknow_fs(rootfs, target, 0);
a6afdde9
DL
572}
573
0c547523
SH
574/*
575 * pin_rootfs
576 * if rootfs is a directory, then open ${rootfs}.hold for writing for the
577 * duration of the container run, to prevent the container from marking the
578 * underlying fs readonly on shutdown.
579 * return -1 on error.
580 * return -2 if nothing needed to be pinned.
581 * return an open fd (>=0) if we pinned it.
582 */
583int pin_rootfs(const char *rootfs)
584{
585 char absrootfs[MAXPATHLEN];
586 char absrootfspin[MAXPATHLEN];
587 struct stat s;
588 int ret, fd;
589
e99ee0de 590 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 591 return -2;
e99ee0de 592
0c547523 593 if (!realpath(rootfs, absrootfs)) {
9be53773
SH
594 INFO("failed to get real path for '%s', not pinning", rootfs);
595 return -2;
0c547523
SH
596 }
597
598 if (access(absrootfs, F_OK)) {
599 SYSERROR("'%s' is not accessible", absrootfs);
600 return -1;
601 }
602
603 if (stat(absrootfs, &s)) {
604 SYSERROR("failed to stat '%s'", absrootfs);
605 return -1;
606 }
607
72f919c4 608 if (!S_ISDIR(s.st_mode))
0c547523
SH
609 return -2;
610
611 ret = snprintf(absrootfspin, MAXPATHLEN, "%s%s", absrootfs, ".hold");
612 if (ret >= MAXPATHLEN) {
613 SYSERROR("pathname too long for rootfs hold file");
614 return -1;
615 }
616
617 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
618 INFO("opened %s as fd %d\n", absrootfspin, fd);
619 return fd;
620}
621
2656d231 622static int mount_rootfs(const char *rootfs, const char *target)
0ad19a3f 623{
b09ef133 624 char absrootfs[MAXPATHLEN];
78ae2fcc 625 struct stat s;
a6afdde9 626 int i;
78ae2fcc 627
a6afdde9 628 typedef int (*rootfs_cb)(const char *, const char *);
78ae2fcc 629
630 struct rootfs_type {
631 int type;
632 rootfs_cb cb;
633 } rtfs_type[] = {
2656d231
DL
634 { S_IFDIR, mount_rootfs_dir },
635 { S_IFBLK, mount_rootfs_block },
636 { S_IFREG, mount_rootfs_file },
78ae2fcc 637 };
0ad19a3f 638
4c8ab83b 639 if (!realpath(rootfs, absrootfs)) {
36eb9bde 640 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 641 return -1;
642 }
b09ef133 643
b09ef133 644 if (access(absrootfs, F_OK)) {
36eb9bde 645 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 646 return -1;
647 }
648
78ae2fcc 649 if (stat(absrootfs, &s)) {
36eb9bde 650 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 651 return -1;
652 }
653
78ae2fcc 654 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 655
78ae2fcc 656 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
657 continue;
9b0f0477 658
a6afdde9 659 return rtfs_type[i].cb(absrootfs, target);
78ae2fcc 660 }
9b0f0477 661
36eb9bde 662 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 663 return -1;
0ad19a3f 664}
665
4e5440c6 666static int setup_utsname(struct utsname *utsname)
0ad19a3f 667{
4e5440c6
DL
668 if (!utsname)
669 return 0;
0ad19a3f 670
4e5440c6
DL
671 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
672 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 673 return -1;
674 }
675
4e5440c6 676 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 677
0ad19a3f 678 return 0;
679}
680
33fcb7a0 681static int setup_tty(const struct lxc_rootfs *rootfs,
7c6ef2a2 682 const struct lxc_tty_info *tty_info, char *ttydir)
b0a33c1e 683{
7c6ef2a2
SH
684 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
685 int i, ret;
b0a33c1e 686
bc9bd0e3
DL
687 if (!rootfs->path)
688 return 0;
689
b0a33c1e 690 for (i = 0; i < tty_info->nbtty; i++) {
691
692 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
693
7c6ef2a2 694 ret = snprintf(path, sizeof(path), "%s/dev/tty%d",
12297168 695 rootfs->mount, i + 1);
7c6ef2a2
SH
696 if (ret >= sizeof(path)) {
697 ERROR("pathname too long for ttys");
698 return -1;
699 }
700 if (ttydir) {
701 /* create dev/lxc/tty%d" */
9ba8130c 702 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/tty%d",
7c6ef2a2
SH
703 rootfs->mount, ttydir, i + 1);
704 if (ret >= sizeof(lxcpath)) {
705 ERROR("pathname too long for ttys");
706 return -1;
707 }
708 ret = creat(lxcpath, 0660);
709 if (ret==-1 && errno != EEXIST) {
710 SYSERROR("error creating %s\n", lxcpath);
711 return -1;
712 }
4d44e274
SH
713 if (ret >= 0)
714 close(ret);
7c6ef2a2
SH
715 ret = unlink(path);
716 if (ret && errno != ENOENT) {
717 SYSERROR("error unlinking %s\n", path);
718 return -1;
719 }
b0a33c1e 720
7c6ef2a2
SH
721 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
722 WARN("failed to mount '%s'->'%s'",
723 pty_info->name, path);
724 continue;
725 }
13954cce 726
9ba8130c
SH
727 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
728 if (ret >= sizeof(lxcpath)) {
729 ERROR("tty pathname too long");
730 return -1;
731 }
7c6ef2a2
SH
732 ret = symlink(lxcpath, path);
733 if (ret) {
734 SYSERROR("failed to create symlink for tty %d\n", i+1);
735 return -1;
736 }
737 } else {
c6883f38
SH
738 /* If we populated /dev, then we need to create /dev/ttyN */
739 if (access(path, F_OK)) {
740 ret = creat(path, 0660);
741 if (ret==-1) {
742 SYSERROR("error creating %s\n", path);
743 /* this isn't fatal, continue */
744 } else
745 close(ret);
746 }
7c6ef2a2
SH
747 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
748 WARN("failed to mount '%s'->'%s'",
749 pty_info->name, path);
750 continue;
751 }
b0a33c1e 752 }
753 }
754
cd54d859
DL
755 INFO("%d tty(s) has been setup", tty_info->nbtty);
756
b0a33c1e 757 return 0;
758}
759
7a7ff0c6 760static int setup_rootfs_pivot_root_cb(char *buffer, void *data)
bf601689
MH
761{
762 struct lxc_list *mountlist, *listentry, *iterator;
2c7d90ac 763 char *pivotdir, *mountpoint, *mountentry, *saveptr = NULL;
bf601689
MH
764 int found;
765 void **cbparm;
766
767 mountentry = buffer;
768 cbparm = (void **)data;
769
770 mountlist = cbparm[0];
771 pivotdir = cbparm[1];
772
773 /* parse entry, first field is mountname, ignore */
2796cf79 774 mountpoint = strtok_r(mountentry, " ", &saveptr);
bf601689
MH
775 if (!mountpoint)
776 return -1;
777
778 /* second field is mountpoint */
2796cf79 779 mountpoint = strtok_r(NULL, " ", &saveptr);
bf601689
MH
780 if (!mountpoint)
781 return -1;
782
783 /* only consider mountpoints below old root fs */
784 if (strncmp(mountpoint, pivotdir, strlen(pivotdir)))
785 return 0;
786
787 /* filter duplicate mountpoints */
788 found = 0;
789 lxc_list_for_each(iterator, mountlist) {
790 if (!strcmp(iterator->elem, mountpoint)) {
791 found = 1;
792 break;
793 }
794 }
795 if (found)
796 return 0;
797
798 /* add entry to list */
799 listentry = malloc(sizeof(*listentry));
800 if (!listentry) {
801 SYSERROR("malloc for mountpoint listentry failed");
802 return -1;
803 }
804
805 listentry->elem = strdup(mountpoint);
806 if (!listentry->elem) {
807 SYSERROR("strdup failed");
00b6be44 808 free(listentry);
bf601689
MH
809 return -1;
810 }
811 lxc_list_add_tail(mountlist, listentry);
812
813 return 0;
814}
815
cc6f6dd7 816static int umount_oldrootfs(const char *oldrootfs)
bf601689 817{
2382ecff 818 char path[MAXPATHLEN];
bf601689 819 void *cbparm[2];
9ebb03ad 820 struct lxc_list mountlist, *iterator, *next;
bf601689 821 int ok, still_mounted, last_still_mounted;
9ba8130c 822 int rc;
bf601689
MH
823
824 /* read and parse /proc/mounts in old root fs */
825 lxc_list_init(&mountlist);
826
cc6f6dd7 827 /* oldrootfs is on the top tree directory now */
9ba8130c
SH
828 rc = snprintf(path, sizeof(path), "/%s", oldrootfs);
829 if (rc >= sizeof(path)) {
830 ERROR("rootfs name too long");
831 return -1;
832 }
bf601689 833 cbparm[0] = &mountlist;
bf601689 834
cc6f6dd7 835 cbparm[1] = strdup(path);
bf601689
MH
836 if (!cbparm[1]) {
837 SYSERROR("strdup failed");
838 return -1;
839 }
840
9ba8130c
SH
841 rc = snprintf(path, sizeof(path), "%s/proc/mounts", oldrootfs);
842 if (rc >= sizeof(path)) {
843 ERROR("container proc/mounts name too long");
844 return -1;
845 }
cc6f6dd7
DL
846
847 ok = lxc_file_for_each_line(path,
848 setup_rootfs_pivot_root_cb, &cbparm);
bf601689
MH
849 if (ok < 0) {
850 SYSERROR("failed to read or parse mount list '%s'", path);
851 return -1;
852 }
853
854 /* umount filesystems until none left or list no longer shrinks */
855 still_mounted = 0;
856 do {
857 last_still_mounted = still_mounted;
858 still_mounted = 0;
859
9ebb03ad 860 lxc_list_for_each_safe(iterator, &mountlist, next) {
bf601689 861
c08556c6 862 /* umount normally */
bf601689
MH
863 if (!umount(iterator->elem)) {
864 DEBUG("umounted '%s'", (char *)iterator->elem);
865 lxc_list_del(iterator);
866 continue;
867 }
868
bf601689
MH
869 still_mounted++;
870 }
7df119ee 871
bf601689
MH
872 } while (still_mounted > 0 && still_mounted != last_still_mounted);
873
7df119ee 874
c08556c6
DL
875 lxc_list_for_each(iterator, &mountlist) {
876
877 /* let's try a lazy umount */
878 if (!umount2(iterator->elem, MNT_DETACH)) {
879 INFO("lazy unmount of '%s'", (char *)iterator->elem);
880 continue;
881 }
882
883 /* be more brutal (nfs) */
884 if (!umount2(iterator->elem, MNT_FORCE)) {
885 INFO("forced unmount of '%s'", (char *)iterator->elem);
886 continue;
887 }
888
7df119ee 889 WARN("failed to unmount '%s'", (char *)iterator->elem);
c08556c6 890 }
bf601689 891
cc6f6dd7
DL
892 return 0;
893}
894
895static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
896{
897 char path[MAXPATHLEN];
898 int remove_pivotdir = 0;
9ba8130c 899 int rc;
cc6f6dd7
DL
900
901 /* change into new root fs */
902 if (chdir(rootfs)) {
903 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
904 return -1;
905 }
906
907 if (!pivotdir)
30c5d292 908 pivotdir = "lxc_putold";
cc6f6dd7 909
4f9293b1 910 /* compute the full path to pivotdir under rootfs */
9ba8130c
SH
911 rc = snprintf(path, sizeof(path), "%s/%s", rootfs, pivotdir);
912 if (rc >= sizeof(path)) {
913 ERROR("pivot dir name too long");
914 return -1;
915 }
cc6f6dd7
DL
916
917 if (access(path, F_OK)) {
918
919 if (mkdir_p(path, 0755)) {
920 SYSERROR("failed to create pivotdir '%s'", path);
921 return -1;
922 }
923
924 remove_pivotdir = 1;
925 DEBUG("created '%s' directory", path);
926 }
927
928 DEBUG("mountpoint for old rootfs is '%s'", path);
929
930 /* pivot_root into our new root fs */
931 if (pivot_root(".", path)) {
932 SYSERROR("pivot_root syscall failed");
bf601689
MH
933 return -1;
934 }
cc6f6dd7
DL
935
936 if (chdir("/")) {
937 SYSERROR("can't chdir to / after pivot_root");
938 return -1;
939 }
940
941 DEBUG("pivot_root syscall to '%s' successful", rootfs);
942
943 /* we switch from absolute path to relative path */
944 if (umount_oldrootfs(pivotdir))
945 return -1;
bf601689 946
c08556c6
DL
947 /* remove temporary mount point, we don't consider the removing
948 * as fatal */
a91d897a
FW
949 if (remove_pivotdir && rmdir(pivotdir))
950 WARN("can't remove mountpoint '%s': %m", pivotdir);
bf601689 951
bf601689
MH
952 return 0;
953}
954
91c3830e
SH
955/*
956 * Do we want to add options for max size of /dev and a file to
957 * specify which devices to create?
958 */
959static int mount_autodev(char *root)
960{
961 int ret;
962 char path[MAXPATHLEN];
963
964 INFO("Mounting /dev under %s\n", root);
965 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
966 if (ret < 0 || ret > MAXPATHLEN)
967 return -1;
968 ret = mount("none", path, "tmpfs", 0, "size=100000");
969 if (ret) {
970 SYSERROR("Failed to mount /dev at %s\n", root);
971 return -1;
972 }
973 ret = snprintf(path, MAXPATHLEN, "%s/dev/pts", root);
974 if (ret < 0 || ret >= MAXPATHLEN)
975 return -1;
976 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
977 if (ret) {
978 SYSERROR("Failed to create /dev/pts in container");
979 return -1;
980 }
981
982 INFO("Mounted /dev under %s\n", root);
983 return 0;
984}
985
c6883f38
SH
986struct lxc_devs {
987 char *name;
988 mode_t mode;
989 int maj;
990 int min;
991};
992
993struct lxc_devs lxc_devs[] = {
994 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
995 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
996 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
997 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
998 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
999 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1000 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1001};
1002
c6883f38
SH
1003static int setup_autodev(char *root)
1004{
1005 int ret;
1006 struct lxc_devs *d;
1007 char path[MAXPATHLEN];
1008 int i;
3a32201c 1009 mode_t cmask;
c6883f38 1010
91c3830e
SH
1011 INFO("Creating initial consoles under %s/dev\n", root);
1012
c6883f38 1013 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
91c3830e
SH
1014 if (ret < 0 || ret >= MAXPATHLEN) {
1015 ERROR("Error calculating container /dev location");
c6883f38 1016 return -1;
f7bee6c6 1017 }
91c3830e
SH
1018
1019 INFO("Populating /dev under %s\n", root);
3a32201c 1020 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38
SH
1021 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
1022 d = &lxc_devs[i];
1023 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", root, d->name);
1024 if (ret < 0 || ret >= MAXPATHLEN)
1025 return -1;
1026 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1027 if (ret && errno != EEXIST) {
c6883f38
SH
1028 SYSERROR("Error creating %s\n", d->name);
1029 return -1;
1030 }
1031 }
3a32201c 1032 umask(cmask);
c6883f38
SH
1033
1034 INFO("Populated /dev under %s\n", root);
1035 return 0;
1036}
1037
cc28d0b0
SH
1038/*
1039 * Detect whether / is mounted MS_SHARED. The only way I know of to
1040 * check that is through /proc/self/mountinfo.
1041 * I'm only checking for /. If the container rootfs or mount location
1042 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1043 * out would be too much work to be worth it.
1044 */
1045#define LINELEN 4096
1046int detect_shared_rootfs(void)
1047{
1048 char buf[LINELEN], *p;
1049 FILE *f;
1050 int i;
1051 char *p2;
1052
1053 f = fopen("/proc/self/mountinfo", "r");
1054 if (!f)
1055 return 0;
1056 while ((p = fgets(buf, LINELEN, f))) {
1057 INFO("looking at .%s.", p);
1058 for (p = buf, i=0; p && i < 4; i++)
1059 p = index(p+1, ' ');
1060 if (!p)
1061 continue;
1062 p2 = index(p+1, ' ');
1063 if (!p2)
1064 continue;
1065 *p2 = '\0';
1066 INFO("now p is .%s.", p);
1067 if (strcmp(p+1, "/") == 0) {
1068 // this is '/'. is it shared?
1069 p = index(p2+1, ' ');
ab81cef0 1070 if (p && strstr(p, "shared:")) {
00b6be44 1071 fclose(f);
cc28d0b0 1072 return 1;
00b6be44 1073 }
cc28d0b0
SH
1074 }
1075 }
1076 fclose(f);
1077 return 0;
1078}
1079
1080/*
1081 * I'll forgive you for asking whether all of this is needed :) The
1082 * answer is yes.
1083 * pivot_root will fail if the new root, the put_old dir, or the parent
1084 * of current->fs->root are MS_SHARED. (parent of current->fs_root may
1085 * or may not be current->fs_root - if we assumed it always was, we could
1086 * just mount --make-rslave /). So,
1087 * 1. mount a tiny tmpfs to be parent of current->fs->root.
1088 * 2. make that MS_SLAVE
1089 * 3. make a 'root' directory under that
1090 * 4. mount --rbind / under the $tinyroot/root.
1091 * 5. make that rslave
1092 * 6. chdir and chroot into $tinyroot/root
1093 * 7. $tinyroot will be unmounted by our parent in start.c
1094 */
1095static int chroot_into_slave(struct lxc_conf *conf)
1096{
1097 char path[MAXPATHLEN];
1098 const char *destpath = conf->rootfs.mount;
1099 int ret;
1100
1101 if (mount(destpath, destpath, NULL, MS_BIND, 0)) {
1102 SYSERROR("failed to mount %s bind", destpath);
1103 return -1;
1104 }
1105 if (mount("", destpath, NULL, MS_SLAVE, 0)) {
1106 SYSERROR("failed to make %s slave", destpath);
1107 return -1;
1108 }
1109 if (mount("none", destpath, "tmpfs", 0, "size=10000")) {
1110 SYSERROR("Failed to mount tmpfs / at %s", destpath);
1111 return -1;
1112 }
1113 ret = snprintf(path, MAXPATHLEN, "%s/root", destpath);
1114 if (ret < 0 || ret >= MAXPATHLEN) {
1115 ERROR("out of memory making root path");
1116 return -1;
1117 }
1118 if (mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) {
1119 SYSERROR("Failed to create /dev/pts in container");
1120 return -1;
1121 }
1122 if (mount("/", path, NULL, MS_BIND|MS_REC, 0)) {
1123 SYSERROR("Failed to rbind mount / to %s", path);
1124 return -1;
1125 }
1126 if (mount("", destpath, NULL, MS_SLAVE|MS_REC, 0)) {
1127 SYSERROR("Failed to make tmp-/ at %s rslave", path);
1128 return -1;
1129 }
1130 if (chdir(path)) {
1131 SYSERROR("Failed to chdir into tmp-/");
1132 return -1;
1133 }
1134 if (chroot(path)) {
1135 SYSERROR("Failed to chroot into tmp-/");
1136 return -1;
1137 }
1138 INFO("Chrooted into tmp-/ at %s\n", path);
1139 return 0;
1140}
1141
1142static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1143{
cc28d0b0
SH
1144 const struct lxc_rootfs *rootfs = &conf->rootfs;
1145
a0f379bf
DW
1146 if (!rootfs->path) {
1147 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1148 SYSERROR("Failed to make / rslave");
1149 return -1;
1150 }
c69bd12f 1151 return 0;
a0f379bf 1152 }
0ad19a3f 1153
12297168 1154 if (access(rootfs->mount, F_OK)) {
b1789442 1155 SYSERROR("failed to access to '%s', check it is present",
12297168 1156 rootfs->mount);
b1789442
DL
1157 return -1;
1158 }
1159
cc28d0b0
SH
1160 if (detect_shared_rootfs()) {
1161 if (chroot_into_slave(conf)) {
1162 ERROR("Failed to chroot into slave /");
1163 return -1;
1164 }
1165 }
1166
9be53773
SH
1167 // First try mounting rootfs using a bdev
1168 struct bdev *bdev = bdev_init(rootfs->path, rootfs->mount, NULL);
1169 if (bdev && bdev->ops->mount(bdev) == 0) {
1170 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1171 return 0;
1172 }
2656d231 1173 if (mount_rootfs(rootfs->path, rootfs->mount)) {
a6afdde9 1174 ERROR("failed to mount rootfs");
c3f0a28c 1175 return -1;
1176 }
0ad19a3f 1177
12297168 1178 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1179
ac778708
DL
1180 return 0;
1181}
1182
1183int setup_pivot_root(const struct lxc_rootfs *rootfs)
1184{
ac778708
DL
1185 if (!rootfs->path)
1186 return 0;
1187
12297168 1188 if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
cc6f6dd7 1189 ERROR("failed to setup pivot root");
25368b52 1190 return -1;
c69bd12f
DL
1191 }
1192
25368b52 1193 return 0;
0ad19a3f 1194}
1195
d852c78c 1196static int setup_pts(int pts)
3c26f34e 1197{
77890c6d
SW
1198 char target[PATH_MAX];
1199
d852c78c
DL
1200 if (!pts)
1201 return 0;
3c26f34e 1202
1203 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1204 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1205 return -1;
1206 }
1207
a6afdde9
DL
1208 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
1209 "newinstance,ptmxmode=0666")) {
36eb9bde 1210 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1211 return -1;
1212 }
1213
3c26f34e 1214 if (access("/dev/ptmx", F_OK)) {
1215 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1216 goto out;
36eb9bde 1217 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1218 return -1;
1219 }
1220
77890c6d
SW
1221 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1222 goto out;
1223
3c26f34e 1224 /* fallback here, /dev/pts/ptmx exists just mount bind */
1225 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1226 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1227 return -1;
1228 }
cd54d859
DL
1229
1230 INFO("created new pts instance");
d852c78c 1231
3c26f34e 1232out:
1233 return 0;
1234}
1235
cccc74b5
DL
1236static int setup_personality(int persona)
1237{
6ff05e18 1238 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1239 if (persona == -1)
1240 return 0;
1241
1242 if (personality(persona) < 0) {
1243 SYSERROR("failed to set personality to '0x%x'", persona);
1244 return -1;
1245 }
1246
1247 INFO("set personality to '0x%x'", persona);
6ff05e18 1248 #endif
cccc74b5
DL
1249
1250 return 0;
1251}
1252
7c6ef2a2 1253static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1254 const struct lxc_console *console)
6e590161 1255{
63376d7d
DL
1256 char path[MAXPATHLEN];
1257 struct stat s;
7c6ef2a2 1258 int ret;
52e35957 1259
7c6ef2a2
SH
1260 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1261 if (ret >= sizeof(path)) {
1262 ERROR("console path too long\n");
1263 return -1;
1264 }
52e35957 1265
63376d7d 1266 if (access(path, F_OK)) {
466978b0 1267 WARN("rootfs specified but no console found at '%s'", path);
63376d7d 1268 return 0;
52e35957
DL
1269 }
1270
f78a1f32 1271 if (console->peer == -1) {
63376d7d 1272 INFO("no console output required");
f78a1f32
DL
1273 return 0;
1274 }
ed502555 1275
63376d7d
DL
1276 if (stat(path, &s)) {
1277 SYSERROR("failed to stat '%s'", path);
1278 return -1;
1279 }
1280
1281 if (chmod(console->name, s.st_mode)) {
1282 SYSERROR("failed to set mode '0%o' to '%s'",
1283 s.st_mode, console->name);
1284 return -1;
1285 }
13954cce 1286
63376d7d
DL
1287 if (mount(console->name, path, "none", MS_BIND, 0)) {
1288 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1289 return -1;
1290 }
1291
63376d7d 1292 INFO("console has been setup");
7c6ef2a2
SH
1293 return 0;
1294}
1295
1296static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1297 const struct lxc_console *console,
1298 char *ttydir)
1299{
1300 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1301 int ret;
1302
1303 /* create rootfs/dev/<ttydir> directory */
1304 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1305 ttydir);
1306 if (ret >= sizeof(path))
1307 return -1;
1308 ret = mkdir(path, 0755);
1309 if (ret && errno != EEXIST) {
1310 SYSERROR("failed with errno %d to create %s\n", errno, path);
1311 return -1;
1312 }
1313 INFO("created %s\n", path);
1314
1315 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1316 rootfs->mount, ttydir);
1317 if (ret >= sizeof(lxcpath)) {
1318 ERROR("console path too long\n");
1319 return -1;
1320 }
1321
1322 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1323 ret = unlink(path);
1324 if (ret && errno != ENOENT) {
1325 SYSERROR("error unlinking %s\n", path);
1326 return -1;
1327 }
1328
1329 ret = creat(lxcpath, 0660);
1330 if (ret==-1 && errno != EEXIST) {
1331 SYSERROR("error %d creating %s\n", errno, lxcpath);
1332 return -1;
1333 }
4d44e274
SH
1334 if (ret >= 0)
1335 close(ret);
7c6ef2a2
SH
1336
1337 if (console->peer == -1) {
1338 INFO("no console output required");
1339 return 0;
1340 }
1341
1342 if (mount(console->name, lxcpath, "none", MS_BIND, 0)) {
1343 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1344 return -1;
1345 }
1346
1347 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1348 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1349 if (ret >= sizeof(lxcpath)) {
1350 ERROR("lxc/console path too long");
1351 return -1;
1352 }
7c6ef2a2
SH
1353 ret = symlink(lxcpath, path);
1354 if (ret) {
1355 SYSERROR("failed to create symlink for console");
1356 return -1;
1357 }
1358
1359 INFO("console has been setup on %s", lxcpath);
cd54d859 1360
6e590161 1361 return 0;
1362}
1363
7c6ef2a2
SH
1364static int setup_console(const struct lxc_rootfs *rootfs,
1365 const struct lxc_console *console,
1366 char *ttydir)
1367{
1368 /* We don't have a rootfs, /dev/console will be shared */
1369 if (!rootfs->path)
1370 return 0;
1371 if (!ttydir)
1372 return setup_dev_console(rootfs, console);
1373
1374 return setup_ttydir_console(rootfs, console, ttydir);
1375}
1376
1bd051a6
SH
1377static int setup_kmsg(const struct lxc_rootfs *rootfs,
1378 const struct lxc_console *console)
1379{
1380 char kpath[MAXPATHLEN];
1381 int ret;
1382
222fea5a
DE
1383 if (!rootfs->path)
1384 return 0;
1bd051a6
SH
1385 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1386 if (ret < 0 || ret >= sizeof(kpath))
1387 return -1;
1388
1389 ret = unlink(kpath);
1390 if (ret && errno != ENOENT) {
1391 SYSERROR("error unlinking %s\n", kpath);
1392 return -1;
1393 }
1394
1395 ret = symlink("console", kpath);
1396 if (ret) {
1397 SYSERROR("failed to create symlink for kmsg");
1398 return -1;
1399 }
1400
1401 return 0;
1402}
1403
6031a6e5
DE
1404static int _setup_cgroup(const char *cgpath, struct lxc_list *cgroups,
1405 int devices)
576f946d 1406{
102a5303
DL
1407 struct lxc_list *iterator;
1408 struct lxc_cgroup *cg;
88329c69 1409 int ret = -1;
6f4a3756 1410
102a5303
DL
1411 if (lxc_list_empty(cgroups))
1412 return 0;
6f4a3756 1413
102a5303 1414 lxc_list_for_each(iterator, cgroups) {
102a5303 1415 cg = iterator->elem;
6f4a3756 1416
6031a6e5
DE
1417 if (devices == !strncmp("devices", cg->subsystem, 7)) {
1418 if (lxc_cgroup_set_bypath(cgpath, cg->subsystem,
1419 cg->value)) {
1420 ERROR("Error setting %s to %s for %s\n",
1421 cg->subsystem, cg->value, cgpath);
1422 goto out;
1423 }
ae5c8b8e 1424 }
6f4a3756 1425
102a5303 1426 DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
6f4a3756 1427 }
13954cce 1428
88329c69 1429 ret = 0;
cd54d859 1430 INFO("cgroup has been setup");
88329c69
MN
1431out:
1432 return ret;
576f946d 1433}
1434
6031a6e5
DE
1435int setup_cgroup_devices(const char *cgpath, struct lxc_list *cgroups)
1436{
1437 return _setup_cgroup(cgpath, cgroups, 1);
1438}
1439
1440int setup_cgroup(const char *cgpath, struct lxc_list *cgroups)
1441{
1442 return _setup_cgroup(cgpath, cgroups, 0);
1443}
1444
998ac676
RT
1445static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1446{
1447 struct mount_opt *mo;
1448
1449 /* If opt is found in mount_opt, set or clear flags.
1450 * Otherwise append it to data. */
1451
1452 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1453 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1454 if (mo->clear)
1455 *flags &= ~mo->flag;
1456 else
1457 *flags |= mo->flag;
1458 return;
1459 }
1460 }
1461
1462 if (strlen(*data))
1463 strcat(*data, ",");
1464 strcat(*data, opt);
1465}
1466
911324ef 1467static int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1468 char **mntdata)
1469{
1470 char *s, *data;
1471 char *p, *saveptr = NULL;
1472
911324ef 1473 *mntdata = NULL;
91656ce5 1474 *mntflags = 0L;
911324ef
DL
1475
1476 if (!mntopts)
998ac676
RT
1477 return 0;
1478
911324ef 1479 s = strdup(mntopts);
998ac676 1480 if (!s) {
36eb9bde 1481 SYSERROR("failed to allocate memory");
998ac676
RT
1482 return -1;
1483 }
1484
1485 data = malloc(strlen(s) + 1);
1486 if (!data) {
36eb9bde 1487 SYSERROR("failed to allocate memory");
998ac676
RT
1488 free(s);
1489 return -1;
1490 }
1491 *data = 0;
1492
1493 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1494 p = strtok_r(NULL, ",", &saveptr))
1495 parse_mntopt(p, mntflags, &data);
1496
1497 if (*data)
1498 *mntdata = data;
1499 else
1500 free(data);
1501 free(s);
1502
1503 return 0;
1504}
1505
911324ef
DL
1506static int mount_entry(const char *fsname, const char *target,
1507 const char *fstype, unsigned long mountflags,
1508 const char *data)
1509{
1510 if (mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data)) {
1511 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1512 return -1;
1513 }
1514
1515 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
1516
1517 DEBUG("remounting %s on %s to respect bind or remount options",
1518 fsname, target);
1519
1520 if (mount(fsname, target, fstype,
1521 mountflags | MS_REMOUNT, data)) {
1522 SYSERROR("failed to mount '%s' on '%s'",
1523 fsname, target);
1524 return -1;
1525 }
1526 }
1527
1528 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1529
1530 return 0;
1531}
1532
1533static inline int mount_entry_on_systemfs(struct mntent *mntent)
0ad19a3f 1534{
998ac676
RT
1535 unsigned long mntflags;
1536 char *mntdata;
911324ef
DL
1537 int ret;
1538
1539 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1540 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1541 return -1;
1542 }
1543
1544 ret = mount_entry(mntent->mnt_fsname, mntent->mnt_dir,
1545 mntent->mnt_type, mntflags, mntdata);
1546
68c152ef
SH
1547 if (hasmntopt(mntent, "optional") != NULL)
1548 ret = 0;
1549
911324ef
DL
1550 free(mntdata);
1551
1552 return ret;
1553}
1554
1555static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2
SH
1556 const struct lxc_rootfs *rootfs,
1557 const char *lxc_name)
911324ef 1558{
013bd428 1559 char *aux;
59760f5d 1560 char path[MAXPATHLEN];
911324ef
DL
1561 unsigned long mntflags;
1562 char *mntdata;
80a881b2 1563 int r, ret = 0, offset;
67e571de 1564 const char *lxcpath;
0ad19a3f 1565
911324ef
DL
1566 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1567 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1568 return -1;
1569 }
1bc60a65 1570
2a59a681
SH
1571 lxcpath = default_lxc_path();
1572 if (!lxcpath) {
1573 ERROR("Out of memory");
1574 return -1;
1575 }
1576
80a881b2 1577 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
1578 * use $lxcpath/CN/rootfs as the target prefix */
1579 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
1580 if (r < 0 || r >= MAXPATHLEN)
1581 goto skipvarlib;
1582
1583 aux = strstr(mntent->mnt_dir, path);
1584 if (aux) {
1585 offset = strlen(path);
1586 goto skipabs;
1587 }
1588
1589skipvarlib:
013bd428
DL
1590 aux = strstr(mntent->mnt_dir, rootfs->path);
1591 if (!aux) {
1592 WARN("ignoring mount point '%s'", mntent->mnt_dir);
1593 goto out;
1594 }
80a881b2
SH
1595 offset = strlen(rootfs->path);
1596
1597skipabs:
013bd428 1598
9ba8130c 1599 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
1600 aux + offset);
1601 if (r < 0 || r >= MAXPATHLEN) {
1602 WARN("pathnme too long for '%s'", mntent->mnt_dir);
1603 ret = -1;
1604 goto out;
1605 }
1606
d330fe7b 1607
013bd428 1608 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
911324ef 1609 mntflags, mntdata);
0ad19a3f 1610
68c152ef
SH
1611 if (hasmntopt(mntent, "optional") != NULL)
1612 ret = 0;
1613
013bd428 1614out:
911324ef
DL
1615 free(mntdata);
1616 return ret;
1617}
d330fe7b 1618
911324ef
DL
1619static int mount_entry_on_relative_rootfs(struct mntent *mntent,
1620 const char *rootfs)
1621{
1622 char path[MAXPATHLEN];
1623 unsigned long mntflags;
1624 char *mntdata;
1625 int ret;
d330fe7b 1626
911324ef
DL
1627 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1628 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1629 return -1;
1630 }
d330fe7b 1631
911324ef 1632 /* relative to root mount point */
9ba8130c
SH
1633 ret = snprintf(path, sizeof(path), "%s/%s", rootfs, mntent->mnt_dir);
1634 if (ret >= sizeof(path)) {
1635 ERROR("path name too long");
1636 return -1;
1637 }
911324ef
DL
1638
1639 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1640 mntflags, mntdata);
1641
68c152ef
SH
1642 if (hasmntopt(mntent, "optional") != NULL)
1643 ret = 0;
1644
911324ef 1645 free(mntdata);
998ac676 1646
911324ef
DL
1647 return ret;
1648}
1649
80a881b2
SH
1650static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
1651 const char *lxc_name)
911324ef
DL
1652{
1653 struct mntent *mntent;
1654 int ret = -1;
e76b8764 1655
911324ef 1656 while ((mntent = getmntent(file))) {
e76b8764 1657
911324ef
DL
1658 if (!rootfs->path) {
1659 if (mount_entry_on_systemfs(mntent))
e76b8764 1660 goto out;
911324ef 1661 continue;
e76b8764
CDC
1662 }
1663
911324ef
DL
1664 /* We have a separate root, mounts are relative to it */
1665 if (mntent->mnt_dir[0] != '/') {
1666 if (mount_entry_on_relative_rootfs(mntent,
1667 rootfs->mount))
1668 goto out;
1669 continue;
1670 }
cd54d859 1671
80a881b2 1672 if (mount_entry_on_absolute_rootfs(mntent, rootfs, lxc_name))
911324ef 1673 goto out;
0ad19a3f 1674 }
cd54d859 1675
0ad19a3f 1676 ret = 0;
cd54d859
DL
1677
1678 INFO("mount points have been setup");
0ad19a3f 1679out:
e7938e9e
MN
1680 return ret;
1681}
1682
80a881b2
SH
1683static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
1684 const char *lxc_name)
e7938e9e
MN
1685{
1686 FILE *file;
1687 int ret;
1688
1689 if (!fstab)
1690 return 0;
1691
1692 file = setmntent(fstab, "r");
1693 if (!file) {
1694 SYSERROR("failed to use '%s'", fstab);
1695 return -1;
1696 }
1697
80a881b2 1698 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e 1699
0ad19a3f 1700 endmntent(file);
1701 return ret;
1702}
1703
80a881b2
SH
1704static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
1705 const char *lxc_name)
e7938e9e
MN
1706{
1707 FILE *file;
1708 struct lxc_list *iterator;
1709 char *mount_entry;
1710 int ret;
1711
1712 file = tmpfile();
1713 if (!file) {
1714 ERROR("tmpfile error: %m");
1715 return -1;
1716 }
1717
1718 lxc_list_for_each(iterator, mount) {
1719 mount_entry = iterator->elem;
1d6b1976 1720 fprintf(file, "%s\n", mount_entry);
e7938e9e
MN
1721 }
1722
1723 rewind(file);
1724
80a881b2 1725 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e
MN
1726
1727 fclose(file);
1728 return ret;
1729}
1730
81810dd1
DL
1731static int setup_caps(struct lxc_list *caps)
1732{
1733 struct lxc_list *iterator;
1734 char *drop_entry;
d55bc1ad 1735 char *ptr;
81810dd1
DL
1736 int i, capid;
1737
1738 lxc_list_for_each(iterator, caps) {
1739
1740 drop_entry = iterator->elem;
1741
1742 capid = -1;
1743
1744 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
1745
1746 if (strcmp(drop_entry, caps_opt[i].name))
1747 continue;
1748
1749 capid = caps_opt[i].value;
1750 break;
1751 }
1752
d55bc1ad
CS
1753 if (capid < 0) {
1754 /* try to see if it's numeric, so the user may specify
1755 * capabilities that the running kernel knows about but
1756 * we don't */
1757 capid = strtol(drop_entry, &ptr, 10);
1758 if (!ptr || *ptr != '\0' ||
1759 capid == LONG_MIN || capid == LONG_MAX)
1760 /* not a valid number */
1761 capid = -1;
1762 else if (capid > lxc_caps_last_cap())
1763 /* we have a number but it's not a valid
1764 * capability */
1765 capid = -1;
1766 }
1767
81810dd1 1768 if (capid < 0) {
1e11be34
DL
1769 ERROR("unknown capability %s", drop_entry);
1770 return -1;
81810dd1
DL
1771 }
1772
1773 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
1774
1775 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
1776 SYSERROR("failed to remove %s capability", drop_entry);
1777 return -1;
1778 }
1779
1780 }
1781
1782 DEBUG("capabilities has been setup");
1783
1784 return 0;
1785}
1786
0ad19a3f 1787static int setup_hw_addr(char *hwaddr, const char *ifname)
1788{
1789 struct sockaddr sockaddr;
1790 struct ifreq ifr;
1791 int ret, fd;
1792
3cfc0f3a
MN
1793 ret = lxc_convert_mac(hwaddr, &sockaddr);
1794 if (ret) {
1795 ERROR("mac address '%s' conversion failed : %s",
1796 hwaddr, strerror(-ret));
0ad19a3f 1797 return -1;
1798 }
1799
1800 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
1801 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
1802
1803 fd = socket(AF_INET, SOCK_DGRAM, 0);
1804 if (fd < 0) {
3ab87b66 1805 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 1806 return -1;
1807 }
1808
1809 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
1810 close(fd);
1811 if (ret)
3ab87b66 1812 ERROR("ioctl failure : %s", strerror(errno));
0ad19a3f 1813
cd54d859
DL
1814 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifname);
1815
0ad19a3f 1816 return ret;
1817}
1818
82d5ae15 1819static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 1820{
82d5ae15
DL
1821 struct lxc_list *iterator;
1822 struct lxc_inetdev *inetdev;
3cfc0f3a 1823 int err;
0ad19a3f 1824
82d5ae15
DL
1825 lxc_list_for_each(iterator, ip) {
1826
1827 inetdev = iterator->elem;
1828
0093bb8c
DL
1829 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
1830 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
1831 if (err) {
1832 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
1833 ifindex, strerror(-err));
82d5ae15
DL
1834 return -1;
1835 }
1836 }
1837
1838 return 0;
0ad19a3f 1839}
1840
82d5ae15 1841static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 1842{
82d5ae15 1843 struct lxc_list *iterator;
7fa9074f 1844 struct lxc_inet6dev *inet6dev;
3cfc0f3a 1845 int err;
0ad19a3f 1846
82d5ae15
DL
1847 lxc_list_for_each(iterator, ip) {
1848
1849 inet6dev = iterator->elem;
1850
b3df193c 1851 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
1852 &inet6dev->mcast, &inet6dev->acast,
1853 inet6dev->prefix);
3cfc0f3a
MN
1854 if (err) {
1855 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
1856 ifindex, strerror(-err));
82d5ae15 1857 return -1;
3cfc0f3a 1858 }
82d5ae15
DL
1859 }
1860
1861 return 0;
0ad19a3f 1862}
1863
82d5ae15 1864static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 1865{
0ad19a3f 1866 char ifname[IFNAMSIZ];
0ad19a3f 1867 char *current_ifname = ifname;
3cfc0f3a 1868 int err;
0ad19a3f 1869
82d5ae15
DL
1870 /* empty network namespace */
1871 if (!netdev->ifindex) {
b0efbac4 1872 if (netdev->flags & IFF_UP) {
d472214b 1873 err = lxc_netdev_up("lo");
3cfc0f3a
MN
1874 if (err) {
1875 ERROR("failed to set the loopback up : %s",
1876 strerror(-err));
82d5ae15
DL
1877 return -1;
1878 }
82d5ae15 1879 }
7b57e8b6 1880 return 0;
0ad19a3f 1881 }
13954cce 1882
82d5ae15
DL
1883 /* retrieve the name of the interface */
1884 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 1885 ERROR("no interface corresponding to index '%d'",
82d5ae15 1886 netdev->ifindex);
0ad19a3f 1887 return -1;
1888 }
13954cce 1889
018ef520 1890 /* default: let the system to choose one interface name */
9d083402 1891 if (!netdev->name)
fb6d9b2f
DL
1892 netdev->name = netdev->type == LXC_NET_PHYS ?
1893 netdev->link : "eth%d";
018ef520 1894
82d5ae15 1895 /* rename the interface name */
b84f58b9 1896 err = lxc_netdev_rename_by_name(ifname, netdev->name);
3cfc0f3a
MN
1897 if (err) {
1898 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
1899 strerror(-err));
018ef520
DL
1900 return -1;
1901 }
1902
1903 /* Re-read the name of the interface because its name has changed
1904 * and would be automatically allocated by the system
1905 */
82d5ae15 1906 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 1907 ERROR("no interface corresponding to index '%d'",
82d5ae15 1908 netdev->ifindex);
018ef520 1909 return -1;
0ad19a3f 1910 }
1911
82d5ae15
DL
1912 /* set a mac address */
1913 if (netdev->hwaddr) {
1914 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 1915 ERROR("failed to setup hw address for '%s'",
82d5ae15 1916 current_ifname);
0ad19a3f 1917 return -1;
1918 }
1919 }
1920
82d5ae15
DL
1921 /* setup ipv4 addresses on the interface */
1922 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 1923 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 1924 ifname);
1925 return -1;
1926 }
1927
82d5ae15
DL
1928 /* setup ipv6 addresses on the interface */
1929 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 1930 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 1931 ifname);
1932 return -1;
1933 }
1934
82d5ae15 1935 /* set the network device up */
b0efbac4 1936 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
1937 int err;
1938
d472214b 1939 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
1940 if (err) {
1941 ERROR("failed to set '%s' up : %s", current_ifname,
1942 strerror(-err));
0ad19a3f 1943 return -1;
1944 }
1945
1946 /* the network is up, make the loopback up too */
d472214b 1947 err = lxc_netdev_up("lo");
3cfc0f3a
MN
1948 if (err) {
1949 ERROR("failed to set the loopback up : %s",
1950 strerror(-err));
0ad19a3f 1951 return -1;
1952 }
1953 }
1954
f8fee0e2
MK
1955 /* We can only set up the default routes after bringing
1956 * up the interface, sine bringing up the interface adds
1957 * the link-local routes and we can't add a default
1958 * route if the gateway is not reachable. */
1959
1960 /* setup ipv4 gateway on the interface */
1961 if (netdev->ipv4_gateway) {
1962 if (!(netdev->flags & IFF_UP)) {
1963 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
1964 return -1;
1965 }
1966
1967 if (lxc_list_empty(&netdev->ipv4)) {
1968 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
1969 return -1;
1970 }
1971
1972 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
1973 if (err) {
1974 ERROR("failed to setup ipv4 gateway for '%s': %s",
1975 ifname, strerror(-err));
19a26f82
MK
1976 if (netdev->ipv4_gateway_auto) {
1977 char buf[INET_ADDRSTRLEN];
1978 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
1979 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
1980 }
f8fee0e2
MK
1981 return -1;
1982 }
1983 }
1984
1985 /* setup ipv6 gateway on the interface */
1986 if (netdev->ipv6_gateway) {
1987 if (!(netdev->flags & IFF_UP)) {
1988 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
1989 return -1;
1990 }
1991
1992 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
1993 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
1994 return -1;
1995 }
1996
1997 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
1998 if (err) {
1999 ERROR("failed to setup ipv6 gateway for '%s': %s",
2000 ifname, strerror(-err));
19a26f82
MK
2001 if (netdev->ipv6_gateway_auto) {
2002 char buf[INET6_ADDRSTRLEN];
72d0e1cb 2003 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
19a26f82
MK
2004 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2005 }
f8fee0e2
MK
2006 return -1;
2007 }
2008 }
2009
cd54d859
DL
2010 DEBUG("'%s' has been setup", current_ifname);
2011
0ad19a3f 2012 return 0;
2013}
2014
5f4535a3 2015static int setup_network(struct lxc_list *network)
0ad19a3f 2016{
82d5ae15 2017 struct lxc_list *iterator;
82d5ae15 2018 struct lxc_netdev *netdev;
0ad19a3f 2019
5f4535a3 2020 lxc_list_for_each(iterator, network) {
cd54d859 2021
5f4535a3 2022 netdev = iterator->elem;
82d5ae15
DL
2023
2024 if (setup_netdev(netdev)) {
2025 ERROR("failed to setup netdev");
2026 return -1;
2027 }
2028 }
cd54d859 2029
5f4535a3
DL
2030 if (!lxc_list_empty(network))
2031 INFO("network has been setup");
cd54d859
DL
2032
2033 return 0;
0ad19a3f 2034}
2035
7b35f3d6
SH
2036void lxc_rename_phys_nics_on_shutdown(struct lxc_conf *conf)
2037{
2038 int i;
2039
2040 INFO("running to reset %d nic names", conf->num_savednics);
2041 for (i=0; i<conf->num_savednics; i++) {
2042 struct saved_nic *s = &conf->saved_nics[i];
2043 INFO("resetting nic %d to %s\n", s->ifindex, s->orig_name);
2044 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
2045 free(s->orig_name);
2046 }
2047 conf->num_savednics = 0;
2048 free(conf->saved_nics);
2049}
2050
49684c0b
CS
2051static int setup_private_host_hw_addr(char *veth1)
2052{
2053 struct ifreq ifr;
2054 int err;
2055 int sockfd;
2056
2057 sockfd = socket(AF_INET, SOCK_DGRAM, 0);
2058 if (sockfd < 0)
2059 return -errno;
2060
2061 snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
2062 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2063 if (err < 0) {
2064 close(sockfd);
2065 return -errno;
2066 }
2067
2068 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2069 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
2070 close(sockfd);
2071 if (err < 0)
2072 return -errno;
2073
7ad84da7
DL
2074 DEBUG("mac address of host interface '%s' changed to private "
2075 "%02x:%02x:%02x:%02x:%02x:%02x", veth1,
2076 ifr.ifr_hwaddr.sa_data[0] & 0xff,
2077 ifr.ifr_hwaddr.sa_data[1] & 0xff,
2078 ifr.ifr_hwaddr.sa_data[2] & 0xff,
2079 ifr.ifr_hwaddr.sa_data[3] & 0xff,
2080 ifr.ifr_hwaddr.sa_data[4] & 0xff,
2081 ifr.ifr_hwaddr.sa_data[5] & 0xff);
49684c0b
CS
2082
2083 return 0;
2084}
2085
ae9242c8
SH
2086static char *default_rootfs_mount = LXCROOTFSMOUNT;
2087
7b379ab3 2088struct lxc_conf *lxc_conf_init(void)
089cd8b8 2089{
7b379ab3 2090 struct lxc_conf *new;
26ddeedd 2091 int i;
7b379ab3
MN
2092
2093 new = malloc(sizeof(*new));
2094 if (!new) {
2095 ERROR("lxc_conf_init : %m");
2096 return NULL;
2097 }
2098 memset(new, 0, sizeof(*new));
2099
cccc74b5 2100 new->personality = -1;
596a818d
DE
2101 new->console.log_path = NULL;
2102 new->console.log_fd = -1;
28a4b0e5 2103 new->console.path = NULL;
63376d7d
DL
2104 new->console.peer = -1;
2105 new->console.master = -1;
2106 new->console.slave = -1;
2107 new->console.name[0] = '\0';
d2e30e99 2108 new->maincmd_fd = -1;
ae9242c8 2109 new->rootfs.mount = default_rootfs_mount;
2f3f41d0 2110 new->kmsg = 1;
7b379ab3
MN
2111 lxc_list_init(&new->cgroup);
2112 lxc_list_init(&new->network);
2113 lxc_list_init(&new->mount_list);
81810dd1 2114 lxc_list_init(&new->caps);
f6d3e3e4 2115 lxc_list_init(&new->id_map);
26ddeedd
SH
2116 for (i=0; i<NUM_LXC_HOOKS; i++)
2117 lxc_list_init(&new->hooks[i]);
e075f5d9
SH
2118#if HAVE_APPARMOR
2119 new->aa_profile = NULL;
2120#endif
2121#if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
2122 new->lsm_umount_proc = 0;
2123#endif
7b379ab3
MN
2124
2125 return new;
089cd8b8
DL
2126}
2127
e3b4c4c4 2128static int instanciate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2129{
8634bc19 2130 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2131 char veth2buf[IFNAMSIZ], *veth2;
3cfc0f3a 2132 int err;
13954cce 2133
e892973e
DL
2134 if (netdev->priv.veth_attr.pair)
2135 veth1 = netdev->priv.veth_attr.pair;
8634bc19 2136 else {
9ba8130c
SH
2137 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2138 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2139 ERROR("veth1 name too long");
2140 return -1;
2141 }
0e391e57 2142 veth1 = mktemp(veth1buf);
74a2b586
JK
2143 /* store away for deconf */
2144 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2145 }
82d5ae15 2146
0e391e57
DL
2147 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
2148 veth2 = mktemp(veth2buf);
82d5ae15
DL
2149
2150 if (!strlen(veth1) || !strlen(veth2)) {
2151 ERROR("failed to allocate a temporary name");
2152 return -1;
0ad19a3f 2153 }
2154
3cfc0f3a
MN
2155 err = lxc_veth_create(veth1, veth2);
2156 if (err) {
2157 ERROR("failed to create %s-%s : %s", veth1, veth2,
2158 strerror(-err));
6ab9ab6d 2159 return -1;
0ad19a3f 2160 }
13954cce 2161
49684c0b
CS
2162 /* changing the high byte of the mac address to 0xfe, the bridge interface
2163 * will always keep the host's mac address and not take the mac address
2164 * of a container */
2165 err = setup_private_host_hw_addr(veth1);
2166 if (err) {
2167 ERROR("failed to change mac address of host interface '%s' : %s",
2168 veth1, strerror(-err));
2169 goto out_delete;
2170 }
2171
82d5ae15 2172 if (netdev->mtu) {
d472214b 2173 err = lxc_netdev_set_mtu(veth1, atoi(netdev->mtu));
3cfc0f3a 2174 if (!err)
d472214b 2175 err = lxc_netdev_set_mtu(veth2, atoi(netdev->mtu));
3cfc0f3a
MN
2176 if (err) {
2177 ERROR("failed to set mtu '%s' for %s-%s : %s",
2178 netdev->mtu, veth1, veth2, strerror(-err));
eb14c10a 2179 goto out_delete;
75d09f83
DL
2180 }
2181 }
2182
3cfc0f3a
MN
2183 if (netdev->link) {
2184 err = lxc_bridge_attach(netdev->link, veth1);
2185 if (err) {
2186 ERROR("failed to attach '%s' to the bridge '%s' : %s",
2187 veth1, netdev->link, strerror(-err));
2188 goto out_delete;
2189 }
eb14c10a
DL
2190 }
2191
82d5ae15
DL
2192 netdev->ifindex = if_nametoindex(veth2);
2193 if (!netdev->ifindex) {
36eb9bde 2194 ERROR("failed to retrieve the index for %s", veth2);
eb14c10a
DL
2195 goto out_delete;
2196 }
2197
d472214b 2198 err = lxc_netdev_up(veth1);
6e35af2e
DL
2199 if (err) {
2200 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2201 goto out_delete;
0ad19a3f 2202 }
2203
e3b4c4c4 2204 if (netdev->upscript) {
751d9dcd
DL
2205 err = run_script(handler->name, "net", netdev->upscript, "up",
2206 "veth", veth1, (char*) NULL);
2207 if (err)
e3b4c4c4 2208 goto out_delete;
e3b4c4c4
ST
2209 }
2210
82d5ae15
DL
2211 DEBUG("instanciated veth '%s/%s', index is '%d'",
2212 veth1, veth2, netdev->ifindex);
2213
6ab9ab6d 2214 return 0;
eb14c10a
DL
2215
2216out_delete:
b84f58b9 2217 lxc_netdev_delete_by_name(veth1);
6ab9ab6d 2218 return -1;
13954cce 2219}
d957ae2d 2220
74a2b586
JK
2221static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2222{
2223 char *veth1;
2224 int err;
2225
2226 if (netdev->priv.veth_attr.pair)
2227 veth1 = netdev->priv.veth_attr.pair;
2228 else
2229 veth1 = netdev->priv.veth_attr.veth1;
2230
2231 if (netdev->downscript) {
2232 err = run_script(handler->name, "net", netdev->downscript,
2233 "down", "veth", veth1, (char*) NULL);
2234 if (err)
2235 return -1;
2236 }
2237 return 0;
2238}
2239
e3b4c4c4 2240static int instanciate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2241{
0e391e57 2242 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2243 int err;
d957ae2d
MT
2244
2245 if (!netdev->link) {
2246 ERROR("no link specified for macvlan netdev");
2247 return -1;
2248 }
13954cce 2249
9ba8130c
SH
2250 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2251 if (err >= sizeof(peerbuf))
2252 return -1;
82d5ae15 2253
0e391e57 2254 peer = mktemp(peerbuf);
82d5ae15
DL
2255 if (!strlen(peer)) {
2256 ERROR("failed to make a temporary name");
2257 return -1;
0ad19a3f 2258 }
2259
3cfc0f3a
MN
2260 err = lxc_macvlan_create(netdev->link, peer,
2261 netdev->priv.macvlan_attr.mode);
2262 if (err) {
2263 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2264 peer, netdev->link, strerror(-err));
d957ae2d 2265 return -1;
0ad19a3f 2266 }
2267
82d5ae15
DL
2268 netdev->ifindex = if_nametoindex(peer);
2269 if (!netdev->ifindex) {
36eb9bde 2270 ERROR("failed to retrieve the index for %s", peer);
b84f58b9 2271 lxc_netdev_delete_by_name(peer);
d957ae2d 2272 return -1;
22ebac19 2273 }
2274
e3b4c4c4 2275 if (netdev->upscript) {
751d9dcd
DL
2276 err = run_script(handler->name, "net", netdev->upscript, "up",
2277 "macvlan", netdev->link, (char*) NULL);
2278 if (err)
e3b4c4c4 2279 return -1;
e3b4c4c4
ST
2280 }
2281
e892973e
DL
2282 DEBUG("instanciated macvlan '%s', index is '%d' and mode '%d'",
2283 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 2284
d957ae2d 2285 return 0;
0ad19a3f 2286}
2287
74a2b586
JK
2288static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2289{
2290 int err;
2291
2292 if (netdev->downscript) {
2293 err = run_script(handler->name, "net", netdev->downscript,
2294 "down", "macvlan", netdev->link,
2295 (char*) NULL);
2296 if (err)
2297 return -1;
2298 }
2299 return 0;
2300}
2301
26c39028 2302/* XXX: merge with instanciate_macvlan */
e3b4c4c4 2303static int instanciate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
2304{
2305 char peer[IFNAMSIZ];
3cfc0f3a 2306 int err;
26c39028
JHS
2307
2308 if (!netdev->link) {
2309 ERROR("no link specified for vlan netdev");
2310 return -1;
2311 }
2312
9ba8130c
SH
2313 err = snprintf(peer, sizeof(peer), "vlan%d", netdev->priv.vlan_attr.vid);
2314 if (err >= sizeof(peer)) {
2315 ERROR("peer name too long");
2316 return -1;
2317 }
26c39028 2318
3cfc0f3a
MN
2319 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2320 if (err) {
2321 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2322 peer, netdev->link, strerror(-err));
26c39028
JHS
2323 return -1;
2324 }
2325
2326 netdev->ifindex = if_nametoindex(peer);
2327 if (!netdev->ifindex) {
2328 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 2329 lxc_netdev_delete_by_name(peer);
26c39028
JHS
2330 return -1;
2331 }
2332
e892973e
DL
2333 DEBUG("instanciated vlan '%s', ifindex is '%d'", " vlan1000",
2334 netdev->ifindex);
2335
26c39028
JHS
2336 return 0;
2337}
2338
74a2b586
JK
2339static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2340{
2341 return 0;
2342}
2343
e3b4c4c4 2344static int instanciate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2345{
6168e99f
DL
2346 if (!netdev->link) {
2347 ERROR("no link specified for the physical interface");
2348 return -1;
2349 }
2350
9d083402 2351 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 2352 if (!netdev->ifindex) {
9d083402 2353 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 2354 return -1;
2355 }
2356
e3b4c4c4
ST
2357 if (netdev->upscript) {
2358 int err;
751d9dcd
DL
2359 err = run_script(handler->name, "net", netdev->upscript,
2360 "up", "phys", netdev->link, (char*) NULL);
2361 if (err)
e3b4c4c4 2362 return -1;
e3b4c4c4
ST
2363 }
2364
82d5ae15 2365 return 0;
0ad19a3f 2366}
2367
74a2b586
JK
2368static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2369{
2370 int err;
2371
2372 if (netdev->downscript) {
2373 err = run_script(handler->name, "net", netdev->downscript,
2374 "down", "phys", netdev->link, (char*) NULL);
2375 if (err)
2376 return -1;
2377 }
2378 return 0;
2379}
2380
e3b4c4c4 2381static int instanciate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2382{
82d5ae15 2383 netdev->ifindex = 0;
e3b4c4c4
ST
2384 if (netdev->upscript) {
2385 int err;
751d9dcd
DL
2386 err = run_script(handler->name, "net", netdev->upscript,
2387 "up", "empty", (char*) NULL);
2388 if (err)
e3b4c4c4 2389 return -1;
e3b4c4c4 2390 }
82d5ae15 2391 return 0;
0ad19a3f 2392}
2393
74a2b586
JK
2394static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2395{
2396 int err;
2397
2398 if (netdev->downscript) {
2399 err = run_script(handler->name, "net", netdev->downscript,
2400 "down", "empty", (char*) NULL);
2401 if (err)
2402 return -1;
2403 }
2404 return 0;
2405}
2406
e3b4c4c4 2407int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 2408{
e3b4c4c4 2409 struct lxc_list *network = &handler->conf->network;
82d5ae15 2410 struct lxc_list *iterator;
82d5ae15 2411 struct lxc_netdev *netdev;
0ad19a3f 2412
5f4535a3 2413 lxc_list_for_each(iterator, network) {
0ad19a3f 2414
5f4535a3 2415 netdev = iterator->elem;
13954cce 2416
24654103 2417 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 2418 ERROR("invalid network configuration type '%d'",
5f4535a3 2419 netdev->type);
82d5ae15
DL
2420 return -1;
2421 }
0ad19a3f 2422
e3b4c4c4 2423 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
2424 ERROR("failed to create netdev");
2425 return -1;
2426 }
e3b4c4c4 2427
0ad19a3f 2428 }
2429
2430 return 0;
2431}
2432
74a2b586 2433void lxc_delete_network(struct lxc_handler *handler)
7fef7a06 2434{
74a2b586 2435 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
2436 struct lxc_list *iterator;
2437 struct lxc_netdev *netdev;
2438
2439 lxc_list_for_each(iterator, network) {
2440 netdev = iterator->elem;
d472214b 2441
74a2b586 2442 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352
DL
2443 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
2444 WARN("failed to rename to the initial name the " \
2445 "netdev '%s'", netdev->link);
d472214b 2446 continue;
d8f8e352 2447 }
d472214b 2448
74a2b586
JK
2449 if (netdev_deconf[netdev->type](handler, netdev)) {
2450 WARN("failed to destroy netdev");
2451 }
2452
d8f8e352
DL
2453 /* Recent kernel remove the virtual interfaces when the network
2454 * namespace is destroyed but in case we did not moved the
2455 * interface to the network namespace, we have to destroy it
2456 */
74a2b586
JK
2457 if (netdev->ifindex != 0 &&
2458 lxc_netdev_delete_by_index(netdev->ifindex))
d8f8e352 2459 WARN("failed to remove interface '%s'", netdev->name);
7fef7a06
DL
2460 }
2461}
2462
5f4535a3 2463int lxc_assign_network(struct lxc_list *network, pid_t pid)
0ad19a3f 2464{
82d5ae15 2465 struct lxc_list *iterator;
82d5ae15 2466 struct lxc_netdev *netdev;
3cfc0f3a 2467 int err;
0ad19a3f 2468
5f4535a3 2469 lxc_list_for_each(iterator, network) {
82d5ae15 2470
5f4535a3 2471 netdev = iterator->elem;
82d5ae15 2472
236087a6
DL
2473 /* empty network namespace, nothing to move */
2474 if (!netdev->ifindex)
2475 continue;
2476
d472214b 2477 err = lxc_netdev_move_by_index(netdev->ifindex, pid);
3cfc0f3a
MN
2478 if (err) {
2479 ERROR("failed to move '%s' to the container : %s",
2480 netdev->link, strerror(-err));
82d5ae15
DL
2481 return -1;
2482 }
2483
c1c75c04 2484 DEBUG("move '%s' to '%d'", netdev->name, pid);
0ad19a3f 2485 }
2486
2487 return 0;
2488}
2489
251d0d2a
DE
2490static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
2491 size_t buf_size)
f6d3e3e4
SH
2492{
2493 char path[PATH_MAX];
e4ccd113 2494 int ret, closeret;
f6d3e3e4
SH
2495 FILE *f;
2496
2497 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
2498 if (ret < 0 || ret >= PATH_MAX) {
2499 fprintf(stderr, "%s: path name too long", __func__);
2500 return -E2BIG;
2501 }
2502 f = fopen(path, "w");
2503 if (!f) {
2504 perror("open");
2505 return -EINVAL;
2506 }
251d0d2a 2507 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 2508 if (ret < 0)
e4ccd113
SH
2509 SYSERROR("writing id mapping");
2510 closeret = fclose(f);
2511 if (closeret)
2512 SYSERROR("writing id mapping");
2513 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
2514}
2515
2516int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
2517{
2518 struct lxc_list *iterator;
2519 struct id_map *map;
2520 int ret = 0;
251d0d2a 2521 enum idtype type;
4f7521b4 2522 char *buf = NULL, *pos;
251d0d2a
DE
2523
2524 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4
SH
2525 int left, fill;
2526
2527 pos = buf;
251d0d2a 2528 lxc_list_for_each(iterator, idmap) {
4f7521b4
SH
2529 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
2530 if (!buf)
2531 buf = pos = malloc(4096);
2532 if (!buf)
2533 return -ENOMEM;
2534
251d0d2a
DE
2535 map = iterator->elem;
2536 if (map->idtype == type) {
2537 left = 4096 - (pos - buf);
2538 fill = snprintf(pos, left, "%lu %lu %lu\n",
2539 map->nsid, map->hostid, map->range);
2540 if (fill <= 0 || fill >= left)
2541 SYSERROR("snprintf failed, too many mappings");
2542 pos += fill;
2543 }
2544 }
4f7521b4
SH
2545 if (pos == buf) // no mappings were found
2546 continue;
251d0d2a 2547 ret = write_id_mapping(type, pid, buf, pos-buf);
f6d3e3e4
SH
2548 if (ret)
2549 break;
2550 }
251d0d2a 2551
4f7521b4
SH
2552 if (buf)
2553 free(buf);
f6d3e3e4
SH
2554 return ret;
2555}
2556
19a26f82
MK
2557int lxc_find_gateway_addresses(struct lxc_handler *handler)
2558{
2559 struct lxc_list *network = &handler->conf->network;
2560 struct lxc_list *iterator;
2561 struct lxc_netdev *netdev;
2562 int link_index;
2563
2564 lxc_list_for_each(iterator, network) {
2565 netdev = iterator->elem;
2566
2567 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2568 continue;
2569
2570 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
2571 ERROR("gateway = auto only supported for "
2572 "veth and macvlan");
2573 return -1;
2574 }
2575
2576 if (!netdev->link) {
2577 ERROR("gateway = auto needs a link interface");
2578 return -1;
2579 }
2580
2581 link_index = if_nametoindex(netdev->link);
2582 if (!link_index)
2583 return -EINVAL;
2584
2585 if (netdev->ipv4_gateway_auto) {
2586 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
2587 ERROR("failed to automatically find ipv4 gateway "
2588 "address from link interface '%s'", netdev->link);
2589 return -1;
2590 }
2591 }
2592
2593 if (netdev->ipv6_gateway_auto) {
2594 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
2595 ERROR("failed to automatically find ipv6 gateway "
2596 "address from link interface '%s'", netdev->link);
2597 return -1;
2598 }
2599 }
2600 }
2601
2602 return 0;
2603}
2604
5e4a62bf 2605int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 2606{
5e4a62bf 2607 struct lxc_tty_info *tty_info = &conf->tty_info;
985d15b1 2608 int i;
b0a33c1e 2609
5e4a62bf
DL
2610 /* no tty in the configuration */
2611 if (!conf->tty)
b0a33c1e 2612 return 0;
2613
13954cce 2614 tty_info->pty_info =
e4e7d59d 2615 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 2616 if (!tty_info->pty_info) {
36eb9bde 2617 SYSERROR("failed to allocate pty_info");
985d15b1 2618 return -1;
b0a33c1e 2619 }
2620
985d15b1 2621 for (i = 0; i < conf->tty; i++) {
13954cce 2622
b0a33c1e 2623 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
2624
13954cce 2625 if (openpty(&pty_info->master, &pty_info->slave,
b0a33c1e 2626 pty_info->name, NULL, NULL)) {
36eb9bde 2627 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
2628 tty_info->nbtty = i;
2629 lxc_delete_tty(tty_info);
2630 return -1;
b0a33c1e 2631 }
2632
5332bb84
DL
2633 DEBUG("allocated pty '%s' (%d/%d)",
2634 pty_info->name, pty_info->master, pty_info->slave);
2635
b035ad62
MS
2636 /* Prevent leaking the file descriptors to the container */
2637 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
2638 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
2639
b0a33c1e 2640 pty_info->busy = 0;
2641 }
2642
985d15b1 2643 tty_info->nbtty = conf->tty;
1ac470c0
DL
2644
2645 INFO("tty's configured");
2646
985d15b1 2647 return 0;
b0a33c1e 2648}
2649
2650void lxc_delete_tty(struct lxc_tty_info *tty_info)
2651{
2652 int i;
2653
2654 for (i = 0; i < tty_info->nbtty; i++) {
2655 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
2656
2657 close(pty_info->master);
2658 close(pty_info->slave);
2659 }
2660
2661 free(tty_info->pty_info);
2662 tty_info->nbtty = 0;
2663}
2664
f6d3e3e4
SH
2665/*
2666 * given a host uid, return the ns uid if it is mapped.
2667 * if it is not mapped, return the original host id.
2668 */
2669static int shiftid(struct lxc_conf *c, int uid, enum idtype w)
2670{
2671 struct lxc_list *iterator;
2672 struct id_map *map;
2673 int low, high;
2674
2675 lxc_list_for_each(iterator, &c->id_map) {
2676 map = iterator->elem;
2677 if (map->idtype != w)
2678 continue;
2679
2680 low = map->nsid;
2681 high = map->nsid + map->range;
2682 if (uid < low || uid >= high)
2683 continue;
2684
2685 return uid - low + map->hostid;
2686 }
2687
2688 return uid;
2689}
2690
2691/*
2692 * Take a pathname for a file created on the host, and map the uid and gid
2693 * into the container if needed. (Used for ttys)
2694 */
2695static int uid_shift_file(char *path, struct lxc_conf *c)
2696{
2697 struct stat statbuf;
2698 int newuid, newgid;
2699
2700 if (stat(path, &statbuf)) {
2701 SYSERROR("stat(%s)", path);
2702 return -1;
2703 }
2704
2705 newuid = shiftid(c, statbuf.st_uid, ID_TYPE_UID);
2706 newgid = shiftid(c, statbuf.st_gid, ID_TYPE_GID);
2707 if (newuid != statbuf.st_uid || newgid != statbuf.st_gid) {
20087962 2708 DEBUG("chowning %s from %d:%d to %d:%d\n", path, (int)statbuf.st_uid, (int)statbuf.st_gid, newuid, newgid);
f6d3e3e4
SH
2709 if (chown(path, newuid, newgid)) {
2710 SYSERROR("chown(%s)", path);
2711 return -1;
2712 }
2713 }
2714 return 0;
2715}
2716
2717int uid_shift_ttys(int pid, struct lxc_conf *conf)
2718{
2719 int i, ret;
2720 struct lxc_tty_info *tty_info = &conf->tty_info;
2721 char path[MAXPATHLEN];
2722 char *ttydir = conf->ttydir;
2723
2724 if (!conf->rootfs.path)
2725 return 0;
2726 /* first the console */
2727 ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/%s/console", pid, ttydir ? ttydir : "");
2728 if (ret < 0 || ret >= sizeof(path)) {
2729 ERROR("console path too long\n");
2730 return -1;
2731 }
2732 if (uid_shift_file(path, conf)) {
2733 DEBUG("Failed to chown the console %s.\n", path);
2734 return -1;
2735 }
2736 for (i=0; i< tty_info->nbtty; i++) {
2737 ret = snprintf(path, sizeof(path), "/proc/%d/root/dev/%s/tty%d",
2738 pid, ttydir ? ttydir : "", i + 1);
2739 if (ret < 0 || ret >= sizeof(path)) {
2740 ERROR("pathname too long for ttys");
2741 return -1;
2742 }
2743 if (uid_shift_file(path, conf)) {
2744 DEBUG("Failed to chown pty %s.\n", path);
2745 return -1;
2746 }
2747 }
2748
2749 return 0;
2750}
2751
571e6ec8 2752int lxc_setup(const char *name, struct lxc_conf *lxc_conf)
0ad19a3f 2753{
e075f5d9
SH
2754#if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
2755 int mounted;
2756#endif
2757
571e6ec8 2758 if (setup_utsname(lxc_conf->utsname)) {
36eb9bde 2759 ERROR("failed to setup the utsname for '%s'", name);
95b5ffaf 2760 return -1;
0ad19a3f 2761 }
2762
5f4535a3 2763 if (setup_network(&lxc_conf->network)) {
36eb9bde 2764 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 2765 return -1;
0ad19a3f 2766 }
2767
89eaa05e
SH
2768 if (run_lxc_hooks(name, "pre-mount", lxc_conf)) {
2769 ERROR("failed to run pre-mount hooks for container '%s'.", name);
2770 return -1;
2771 }
5ea6163a 2772
cc28d0b0 2773 if (setup_rootfs(lxc_conf)) {
ac778708 2774 ERROR("failed to setup rootfs for '%s'", name);
95b5ffaf 2775 return -1;
0ad19a3f 2776 }
2777
c6883f38 2778 if (lxc_conf->autodev) {
91c3830e
SH
2779 if (mount_autodev(lxc_conf->rootfs.mount)) {
2780 ERROR("failed to mount /dev in the container");
c6883f38
SH
2781 return -1;
2782 }
2783 }
2784
80a881b2 2785 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
36eb9bde 2786 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 2787 return -1;
576f946d 2788 }
2789
c1dc38c2 2790 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
e7938e9e
MN
2791 ERROR("failed to setup the mount entries for '%s'", name);
2792 return -1;
2793 }
2794
773fb9ca
SH
2795 if (run_lxc_hooks(name, "mount", lxc_conf)) {
2796 ERROR("failed to run mount hooks for container '%s'.", name);
2797 return -1;
2798 }
2799
91c3830e 2800 if (lxc_conf->autodev) {
f7bee6c6
MW
2801 if (run_lxc_hooks(name, "autodev", lxc_conf)) {
2802 ERROR("failed to run autodev hooks for container '%s'.", name);
2803 return -1;
2804 }
91c3830e
SH
2805 if (setup_autodev(lxc_conf->rootfs.mount)) {
2806 ERROR("failed to populate /dev in the container");
2807 return -1;
2808 }
2809 }
2810
7c6ef2a2 2811 if (setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 2812 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 2813 return -1;
6e590161 2814 }
2815
7e0e1d94
AV
2816 if (lxc_conf->kmsg) {
2817 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
2818 ERROR("failed to setup kmsg for '%s'", name);
2819 }
1bd051a6 2820
7c6ef2a2 2821 if (setup_tty(&lxc_conf->rootfs, &lxc_conf->tty_info, lxc_conf->ttydir)) {
36eb9bde 2822 ERROR("failed to setup the ttys for '%s'", name);
95b5ffaf 2823 return -1;
b0a33c1e 2824 }
2825
e075f5d9 2826#if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */
9ac3ffb5
SG
2827 INFO("rootfs path is .%s., mount is .%s.", lxc_conf->rootfs.path,
2828 lxc_conf->rootfs.mount);
2829 if (lxc_conf->rootfs.path == NULL || strlen(lxc_conf->rootfs.path) == 0)
2830 mounted = 0;
2831 else
2832 mounted = lsm_mount_proc_if_needed(lxc_conf->rootfs.path, lxc_conf->rootfs.mount);
e075f5d9
SH
2833 if (mounted == -1) {
2834 SYSERROR("failed to mount /proc in the container.");
2835 return -1;
2836 } else if (mounted == 1) {
2837 lxc_conf->lsm_umount_proc = 1;
2838 }
2839#endif
2840
ac778708 2841 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 2842 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 2843 return -1;
ed502555 2844 }
2845
571e6ec8 2846 if (setup_pts(lxc_conf->pts)) {
36eb9bde 2847 ERROR("failed to setup the new pts instance");
95b5ffaf 2848 return -1;
3c26f34e 2849 }
2850
cccc74b5
DL
2851 if (setup_personality(lxc_conf->personality)) {
2852 ERROR("failed to setup personality");
2853 return -1;
2854 }
2855
f6d3e3e4
SH
2856 if (lxc_list_empty(&lxc_conf->id_map)) {
2857 if (setup_caps(&lxc_conf->caps)) {
2858 ERROR("failed to drop capabilities");
2859 return -1;
2860 }
81810dd1
DL
2861 }
2862
cd54d859
DL
2863 NOTICE("'%s' is setup.", name);
2864
0ad19a3f 2865 return 0;
2866}
26ddeedd
SH
2867
2868int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf)
2869{
2870 int which = -1;
2871 struct lxc_list *it;
2872
2873 if (strcmp(hook, "pre-start") == 0)
2874 which = LXCHOOK_PRESTART;
5ea6163a
SH
2875 else if (strcmp(hook, "pre-mount") == 0)
2876 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
2877 else if (strcmp(hook, "mount") == 0)
2878 which = LXCHOOK_MOUNT;
f7bee6c6
MW
2879 else if (strcmp(hook, "autodev") == 0)
2880 which = LXCHOOK_AUTODEV;
26ddeedd
SH
2881 else if (strcmp(hook, "start") == 0)
2882 which = LXCHOOK_START;
2883 else if (strcmp(hook, "post-stop") == 0)
2884 which = LXCHOOK_POSTSTOP;
2885 else
2886 return -1;
2887 lxc_list_for_each(it, &conf->hooks[which]) {
2888 int ret;
2889 char *hookname = it->elem;
2890 ret = run_script(name, "lxc", hookname, hook, NULL);
2891 if (ret)
2892 return ret;
2893 }
2894 return 0;
2895}
72d0e1cb 2896
427b3a21 2897static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
2898{
2899 struct lxc_netdev *netdev = it->elem;
9ebb03ad 2900 struct lxc_list *it2,*next;
72d0e1cb
SG
2901
2902 lxc_list_del(it);
2903
2904 if (netdev->link)
2905 free(netdev->link);
2906 if (netdev->name)
2907 free(netdev->name);
2908 if (netdev->upscript)
2909 free(netdev->upscript);
2910 if (netdev->hwaddr)
2911 free(netdev->hwaddr);
2912 if (netdev->mtu)
2913 free(netdev->mtu);
2914 if (netdev->ipv4_gateway)
2915 free(netdev->ipv4_gateway);
2916 if (netdev->ipv6_gateway)
2917 free(netdev->ipv6_gateway);
9ebb03ad 2918 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
2919 lxc_list_del(it2);
2920 free(it2->elem);
2921 free(it2);
2922 }
9ebb03ad 2923 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
2924 lxc_list_del(it2);
2925 free(it2->elem);
2926 free(it2);
2927 }
d95db067 2928 free(netdev);
72d0e1cb
SG
2929 free(it);
2930}
2931
2932/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 2933int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
2934{
2935 char *p1;
2936 int ret, idx, i;
2937 struct lxc_list *it;
2938 struct lxc_netdev *netdev;
2939
2940 p1 = index(key, '.');
2941 if (!p1 || *(p1+1) == '\0')
2942 p1 = NULL;
2943
2944 ret = sscanf(key, "%d", &idx);
2945 if (ret != 1) return -1;
2946 if (idx < 0)
2947 return -1;
2948
2949 i = 0;
2950 lxc_list_for_each(it, &c->network) {
2951 if (i == idx)
2952 break;
2953 i++;
2954 }
2955 if (i < idx) // we don't have that many nics defined
2956 return -1;
2957
2958 if (!it || !it->elem)
2959 return -1;
2960
2961 netdev = it->elem;
2962
2963 if (!p1) {
2964 lxc_remove_nic(it);
2965 } else if (strcmp(p1, "ipv4") == 0) {
9ebb03ad
DE
2966 struct lxc_list *it2,*next;
2967 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
2968 lxc_list_del(it2);
2969 free(it2->elem);
2970 free(it2);
2971 }
2972 } else if (strcmp(p1, "ipv6") == 0) {
9ebb03ad
DE
2973 struct lxc_list *it2,*next;
2974 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
2975 lxc_list_del(it2);
2976 free(it2->elem);
2977 free(it2);
2978 }
2979 } else if (strcmp(p1, "link") == 0) {
2980 if (netdev->link) {
2981 free(netdev->link);
2982 netdev->link = NULL;
2983 }
2984 } else if (strcmp(p1, "name") == 0) {
2985 if (netdev->name) {
2986 free(netdev->name);
2987 netdev->name = NULL;
2988 }
2989 } else if (strcmp(p1, "script.up") == 0) {
2990 if (netdev->upscript) {
2991 free(netdev->upscript);
2992 netdev->upscript = NULL;
2993 }
2994 } else if (strcmp(p1, "hwaddr") == 0) {
2995 if (netdev->hwaddr) {
2996 free(netdev->hwaddr);
2997 netdev->hwaddr = NULL;
2998 }
2999 } else if (strcmp(p1, "mtu") == 0) {
3000 if (netdev->mtu) {
3001 free(netdev->mtu);
3002 netdev->mtu = NULL;
3003 }
3004 } else if (strcmp(p1, "ipv4_gateway") == 0) {
3005 if (netdev->ipv4_gateway) {
3006 free(netdev->ipv4_gateway);
3007 netdev->ipv4_gateway = NULL;
3008 }
3009 } else if (strcmp(p1, "ipv6_gateway") == 0) {
3010 if (netdev->ipv6_gateway) {
3011 free(netdev->ipv6_gateway);
3012 netdev->ipv6_gateway = NULL;
3013 }
3014 }
3015 else return -1;
3016
3017 return 0;
3018}
3019
3020int lxc_clear_config_network(struct lxc_conf *c)
3021{
9ebb03ad
DE
3022 struct lxc_list *it,*next;
3023 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
3024 lxc_remove_nic(it);
3025 }
3026 return 0;
3027}
3028
3029int lxc_clear_config_caps(struct lxc_conf *c)
3030{
9ebb03ad 3031 struct lxc_list *it,*next;
72d0e1cb 3032
9ebb03ad 3033 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
3034 lxc_list_del(it);
3035 free(it->elem);
3036 free(it);
3037 }
3038 return 0;
3039}
3040
12a50cc6 3041int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 3042{
9ebb03ad 3043 struct lxc_list *it,*next;
72d0e1cb 3044 bool all = false;
12a50cc6 3045 const char *k = key + 11;
72d0e1cb
SG
3046
3047 if (strcmp(key, "lxc.cgroup") == 0)
3048 all = true;
3049
9ebb03ad 3050 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
3051 struct lxc_cgroup *cg = it->elem;
3052 if (!all && strcmp(cg->subsystem, k) != 0)
3053 continue;
3054 lxc_list_del(it);
3055 free(cg->subsystem);
3056 free(cg->value);
3057 free(cg);
3058 free(it);
3059 }
3060 return 0;
3061}
3062
3063int lxc_clear_mount_entries(struct lxc_conf *c)
3064{
9ebb03ad 3065 struct lxc_list *it,*next;
72d0e1cb 3066
9ebb03ad 3067 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
3068 lxc_list_del(it);
3069 free(it->elem);
3070 free(it);
3071 }
3072 return 0;
3073}
3074
12a50cc6 3075int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 3076{
9ebb03ad 3077 struct lxc_list *it,*next;
17ed13a3 3078 bool all = false, done = false;
12a50cc6 3079 const char *k = key + 9;
72d0e1cb
SG
3080 int i;
3081
17ed13a3
SH
3082 if (strcmp(key, "lxc.hook") == 0)
3083 all = true;
3084
72d0e1cb 3085 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 3086 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 3087 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
3088 lxc_list_del(it);
3089 free(it->elem);
3090 free(it);
3091 }
3092 done = true;
72d0e1cb
SG
3093 }
3094 }
17ed13a3
SH
3095
3096 if (!done) {
3097 ERROR("Invalid hook key: %s", key);
3098 return -1;
3099 }
72d0e1cb
SG
3100 return 0;
3101}
8eb5694b 3102
7b35f3d6
SH
3103void lxc_clear_saved_nics(struct lxc_conf *conf)
3104{
3105 int i;
3106
3107 if (!conf->num_savednics)
3108 return;
3109 for (i=0; i < conf->num_savednics; i++)
3110 free(conf->saved_nics[i].orig_name);
3111 conf->saved_nics = 0;
3112 free(conf->saved_nics);
3113}
3114
8eb5694b
SH
3115void lxc_conf_free(struct lxc_conf *conf)
3116{
3117 if (!conf)
3118 return;
3119 if (conf->console.path)
3120 free(conf->console.path);
ae9242c8 3121 if (conf->rootfs.mount != default_rootfs_mount)
8eb5694b 3122 free(conf->rootfs.mount);
d95db067
DE
3123 if (conf->rootfs.path)
3124 free(conf->rootfs.path);
3125 if (conf->utsname)
3126 free(conf->utsname);
3127 if (conf->ttydir)
3128 free(conf->ttydir);
3129 if (conf->fstab)
3130 free(conf->fstab);
fc7e8864
WM
3131 if (conf->rcfile)
3132 free(conf->rcfile);
8eb5694b 3133 lxc_clear_config_network(conf);
1f530df6 3134#if HAVE_APPARMOR
8eb5694b
SH
3135 if (conf->aa_profile)
3136 free(conf->aa_profile);
1f530df6 3137#endif
769872f9 3138 lxc_seccomp_free(conf);
8eb5694b
SH
3139 lxc_clear_config_caps(conf);
3140 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 3141 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 3142 lxc_clear_mount_entries(conf);
7b35f3d6 3143 lxc_clear_saved_nics(conf);
8eb5694b
SH
3144 free(conf);
3145}