]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
tests: Fix startone's call to want_daemonize
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
23#define _GNU_SOURCE
24#include <stdio.h>
25#undef _GNU_SOURCE
26#include <stdlib.h>
e3b4c4c4 27#include <stdarg.h>
0ad19a3f 28#include <errno.h>
29#include <string.h>
30#include <dirent.h>
0ad19a3f 31#include <unistd.h>
bc6928ff 32#include <inttypes.h>
e3b4c4c4 33#include <sys/wait.h>
2d76d1d7 34#include <sys/syscall.h>
4a0ba80d 35#include <time.h>
e827ff7e
SG
36
37#if HAVE_PTY_H
b0a33c1e 38#include <pty.h>
e827ff7e
SG
39#else
40#include <../include/openpty.h>
41#endif
0ad19a3f 42
b3ecde1e
DL
43#include <linux/loop.h>
44
0ad19a3f 45#include <sys/types.h>
46#include <sys/utsname.h>
47#include <sys/param.h>
48#include <sys/stat.h>
49#include <sys/socket.h>
50#include <sys/mount.h>
51#include <sys/mman.h>
81810dd1 52#include <sys/prctl.h>
0ad19a3f 53
54#include <arpa/inet.h>
55#include <fcntl.h>
56#include <netinet/in.h>
57#include <net/if.h>
6f4a3756 58#include <libgen.h>
0ad19a3f 59
e5bda9ee 60#include "network.h"
61#include "error.h"
b2718c72 62#include "parse.h"
881450bb 63#include "config.h"
1b09f2c0
DL
64#include "utils.h"
65#include "conf.h"
66#include "log.h"
67#include "lxc.h" /* for lxc_cgroup_set() */
d55bc1ad 68#include "caps.h" /* for lxc_caps_last_cap() */
9be53773 69#include "bdev.h"
368bbc02 70#include "cgroup.h"
025ed0f3 71#include "lxclock.h"
4355ab5f 72#include "namespace.h"
fe4de9a6 73#include "lsm/lsm.h"
d0a36f2c 74
495d2046
SG
75#if HAVE_SYS_CAPABILITY_H
76#include <sys/capability.h>
77#endif
78
6ff05e18
SG
79#if HAVE_SYS_PERSONALITY_H
80#include <sys/personality.h>
81#endif
82
edaf8b1b
SG
83#if IS_BIONIC
84#include <../include/lxcmntent.h>
85#else
86#include <mntent.h>
87#endif
88
769872f9
SH
89#include "lxcseccomp.h"
90
36eb9bde 91lxc_log_define(lxc_conf, lxc);
e5bda9ee 92
0ad19a3f 93#define MAXHWLEN 18
94#define MAXINDEXLEN 20
442cbbe6 95#define MAXMTULEN 16
0ad19a3f 96#define MAXLINELEN 128
97
495d2046 98#if HAVE_SYS_CAPABILITY_H
b09094da
MN
99#ifndef CAP_SETFCAP
100#define CAP_SETFCAP 31
101#endif
102
103#ifndef CAP_MAC_OVERRIDE
104#define CAP_MAC_OVERRIDE 32
105#endif
106
107#ifndef CAP_MAC_ADMIN
108#define CAP_MAC_ADMIN 33
109#endif
495d2046 110#endif
b09094da
MN
111
112#ifndef PR_CAPBSET_DROP
113#define PR_CAPBSET_DROP 24
114#endif
115
9818cae4
SG
116#ifndef LO_FLAGS_AUTOCLEAR
117#define LO_FLAGS_AUTOCLEAR 4
118#endif
119
2d76d1d7
SG
120/* Define pivot_root() if missing from the C library */
121#ifndef HAVE_PIVOT_ROOT
122static int pivot_root(const char * new_root, const char * put_old)
123{
124#ifdef __NR_pivot_root
125return syscall(__NR_pivot_root, new_root, put_old);
126#else
127errno = ENOSYS;
128return -1;
129#endif
130}
131#else
132extern int pivot_root(const char * new_root, const char * put_old);
133#endif
134
135/* Define sethostname() if missing from the C library */
136#ifndef HAVE_SETHOSTNAME
137static int sethostname(const char * name, size_t len)
138{
139#ifdef __NR_sethostname
140return syscall(__NR_sethostname, name, len);
141#else
142errno = ENOSYS;
143return -1;
144#endif
145}
146#endif
147
72f919c4
SG
148/* Define __S_ISTYPE if missing from the C library */
149#ifndef __S_ISTYPE
150#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
151#endif
152
72d0e1cb 153char *lxchook_names[NUM_LXC_HOOKS] = {
148e91f5 154 "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop", "clone" };
72d0e1cb 155
e3b4c4c4 156typedef int (*instanciate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 157
998ac676
RT
158struct mount_opt {
159 char *name;
160 int clear;
161 int flag;
162};
163
81810dd1
DL
164struct caps_opt {
165 char *name;
166 int value;
167};
168
e3b4c4c4
ST
169static int instanciate_veth(struct lxc_handler *, struct lxc_netdev *);
170static int instanciate_macvlan(struct lxc_handler *, struct lxc_netdev *);
171static int instanciate_vlan(struct lxc_handler *, struct lxc_netdev *);
172static int instanciate_phys(struct lxc_handler *, struct lxc_netdev *);
173static int instanciate_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 174static int instanciate_none(struct lxc_handler *, struct lxc_netdev *);
82d5ae15 175
24654103
DL
176static instanciate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
177 [LXC_NET_VETH] = instanciate_veth,
178 [LXC_NET_MACVLAN] = instanciate_macvlan,
179 [LXC_NET_VLAN] = instanciate_vlan,
180 [LXC_NET_PHYS] = instanciate_phys,
181 [LXC_NET_EMPTY] = instanciate_empty,
26b797f3 182 [LXC_NET_NONE] = instanciate_none,
0ad19a3f 183};
184
74a2b586
JK
185static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
186static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
187static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
188static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
189static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 190static int shutdown_none(struct lxc_handler *, struct lxc_netdev *);
74a2b586
JK
191
192static instanciate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
193 [LXC_NET_VETH] = shutdown_veth,
194 [LXC_NET_MACVLAN] = shutdown_macvlan,
195 [LXC_NET_VLAN] = shutdown_vlan,
196 [LXC_NET_PHYS] = shutdown_phys,
197 [LXC_NET_EMPTY] = shutdown_empty,
26b797f3 198 [LXC_NET_NONE] = shutdown_none,
74a2b586
JK
199};
200
998ac676 201static struct mount_opt mount_opt[] = {
88d413d5
SW
202 { "defaults", 0, 0 },
203 { "ro", 0, MS_RDONLY },
204 { "rw", 1, MS_RDONLY },
205 { "suid", 1, MS_NOSUID },
206 { "nosuid", 0, MS_NOSUID },
207 { "dev", 1, MS_NODEV },
208 { "nodev", 0, MS_NODEV },
209 { "exec", 1, MS_NOEXEC },
210 { "noexec", 0, MS_NOEXEC },
211 { "sync", 0, MS_SYNCHRONOUS },
212 { "async", 1, MS_SYNCHRONOUS },
213 { "dirsync", 0, MS_DIRSYNC },
214 { "remount", 0, MS_REMOUNT },
215 { "mand", 0, MS_MANDLOCK },
216 { "nomand", 1, MS_MANDLOCK },
217 { "atime", 1, MS_NOATIME },
218 { "noatime", 0, MS_NOATIME },
219 { "diratime", 1, MS_NODIRATIME },
220 { "nodiratime", 0, MS_NODIRATIME },
221 { "bind", 0, MS_BIND },
222 { "rbind", 0, MS_BIND|MS_REC },
223 { "relatime", 0, MS_RELATIME },
224 { "norelatime", 1, MS_RELATIME },
225 { "strictatime", 0, MS_STRICTATIME },
226 { "nostrictatime", 1, MS_STRICTATIME },
227 { NULL, 0, 0 },
998ac676
RT
228};
229
495d2046 230#if HAVE_SYS_CAPABILITY_H
81810dd1 231static struct caps_opt caps_opt[] = {
a6afdde9 232 { "chown", CAP_CHOWN },
1e11be34
DL
233 { "dac_override", CAP_DAC_OVERRIDE },
234 { "dac_read_search", CAP_DAC_READ_SEARCH },
235 { "fowner", CAP_FOWNER },
236 { "fsetid", CAP_FSETID },
81810dd1
DL
237 { "kill", CAP_KILL },
238 { "setgid", CAP_SETGID },
239 { "setuid", CAP_SETUID },
240 { "setpcap", CAP_SETPCAP },
241 { "linux_immutable", CAP_LINUX_IMMUTABLE },
242 { "net_bind_service", CAP_NET_BIND_SERVICE },
243 { "net_broadcast", CAP_NET_BROADCAST },
244 { "net_admin", CAP_NET_ADMIN },
245 { "net_raw", CAP_NET_RAW },
246 { "ipc_lock", CAP_IPC_LOCK },
247 { "ipc_owner", CAP_IPC_OWNER },
248 { "sys_module", CAP_SYS_MODULE },
249 { "sys_rawio", CAP_SYS_RAWIO },
250 { "sys_chroot", CAP_SYS_CHROOT },
251 { "sys_ptrace", CAP_SYS_PTRACE },
252 { "sys_pacct", CAP_SYS_PACCT },
253 { "sys_admin", CAP_SYS_ADMIN },
254 { "sys_boot", CAP_SYS_BOOT },
255 { "sys_nice", CAP_SYS_NICE },
256 { "sys_resource", CAP_SYS_RESOURCE },
257 { "sys_time", CAP_SYS_TIME },
258 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
259 { "mknod", CAP_MKNOD },
260 { "lease", CAP_LEASE },
9527e566 261#ifdef CAP_AUDIT_WRITE
81810dd1 262 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
263#endif
264#ifdef CAP_AUDIT_CONTROL
81810dd1 265 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 266#endif
81810dd1
DL
267 { "setfcap", CAP_SETFCAP },
268 { "mac_override", CAP_MAC_OVERRIDE },
269 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
270#ifdef CAP_SYSLOG
271 { "syslog", CAP_SYSLOG },
272#endif
273#ifdef CAP_WAKE_ALARM
274 { "wake_alarm", CAP_WAKE_ALARM },
275#endif
81810dd1 276};
495d2046
SG
277#else
278static struct caps_opt caps_opt[] = {};
279#endif
81810dd1 280
91c3830e
SH
281static int run_buffer(char *buffer)
282{
ebec9176 283 struct lxc_popen_FILE *f;
91c3830e 284 char *output;
8e7da691 285 int ret;
91c3830e 286
ebec9176 287 f = lxc_popen(buffer);
91c3830e
SH
288 if (!f) {
289 SYSERROR("popen failed");
290 return -1;
291 }
292
293 output = malloc(LXC_LOG_BUFFER_SIZE);
294 if (!output) {
295 ERROR("failed to allocate memory for script output");
ebec9176 296 lxc_pclose(f);
91c3830e
SH
297 return -1;
298 }
299
ebec9176 300 while(fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
91c3830e
SH
301 DEBUG("script output: %s", output);
302
303 free(output);
304
ebec9176 305 ret = lxc_pclose(f);
8e7da691 306 if (ret == -1) {
91c3830e
SH
307 SYSERROR("Script exited on error");
308 return -1;
8e7da691
DE
309 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
310 ERROR("Script exited with status %d", WEXITSTATUS(ret));
311 return -1;
312 } else if (WIFSIGNALED(ret)) {
313 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
314 strsignal(WTERMSIG(ret)));
315 return -1;
91c3830e
SH
316 }
317
318 return 0;
319}
320
148e91f5 321static int run_script_argv(const char *name, const char *section,
283678ed
SH
322 const char *script, const char *hook, const char *lxcpath,
323 char **argsin)
148e91f5
SH
324{
325 int ret, i;
326 char *buffer;
327 size_t size = 0;
328
329 INFO("Executing script '%s' for container '%s', config section '%s'",
330 script, name, section);
331
332 for (i=0; argsin && argsin[i]; i++)
333 size += strlen(argsin[i]) + 1;
334
335 size += strlen(hook) + 1;
336
337 size += strlen(script);
338 size += strlen(name);
339 size += strlen(section);
340 size += 3;
341
342 if (size > INT_MAX)
343 return -1;
344
345 buffer = alloca(size);
346 if (!buffer) {
347 ERROR("failed to allocate memory");
348 return -1;
349 }
350
351 ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
352 if (ret < 0 || ret >= size) {
353 ERROR("Script name too long");
354 return -1;
355 }
356
357 for (i=0; argsin && argsin[i]; i++) {
358 int len = size-ret;
359 int rc;
360 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
361 if (rc < 0 || rc >= len) {
362 ERROR("Script args too long");
363 return -1;
364 }
365 ret += rc;
366 }
367
368 return run_buffer(buffer);
369}
370
751d9dcd
DL
371static int run_script(const char *name, const char *section,
372 const char *script, ...)
e3b4c4c4 373{
abbfd20b 374 int ret;
91c3830e 375 char *buffer, *p;
abbfd20b
DL
376 size_t size = 0;
377 va_list ap;
751d9dcd
DL
378
379 INFO("Executing script '%s' for container '%s', config section '%s'",
380 script, name, section);
e3b4c4c4 381
abbfd20b
DL
382 va_start(ap, script);
383 while ((p = va_arg(ap, char *)))
95642a10 384 size += strlen(p) + 1;
abbfd20b
DL
385 va_end(ap);
386
387 size += strlen(script);
388 size += strlen(name);
389 size += strlen(section);
95642a10 390 size += 3;
abbfd20b 391
95642a10
MS
392 if (size > INT_MAX)
393 return -1;
394
395 buffer = alloca(size);
abbfd20b
DL
396 if (!buffer) {
397 ERROR("failed to allocate memory");
751d9dcd
DL
398 return -1;
399 }
400
9ba8130c
SH
401 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
402 if (ret < 0 || ret >= size) {
403 ERROR("Script name too long");
9ba8130c
SH
404 return -1;
405 }
751d9dcd 406
abbfd20b 407 va_start(ap, script);
9ba8130c
SH
408 while ((p = va_arg(ap, char *))) {
409 int len = size-ret;
410 int rc;
411 rc = snprintf(buffer + ret, len, " %s", p);
412 if (rc < 0 || rc >= len) {
9ba8130c
SH
413 ERROR("Script args too long");
414 return -1;
415 }
416 ret += rc;
417 }
abbfd20b 418 va_end(ap);
751d9dcd 419
91c3830e 420 return run_buffer(buffer);
e3b4c4c4
ST
421}
422
a6afdde9 423static int find_fstype_cb(char* buffer, void *data)
78ae2fcc 424{
425 struct cbarg {
426 const char *rootfs;
a6afdde9 427 const char *target;
78ae2fcc 428 int mntopt;
429 } *cbarg = data;
430
431 char *fstype;
432
433 /* we don't try 'nodev' entries */
434 if (strstr(buffer, "nodev"))
435 return 0;
436
437 fstype = buffer;
b2718c72 438 fstype += lxc_char_left_gc(fstype, strlen(fstype));
439 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
78ae2fcc 440
a6afdde9
DL
441 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
442 cbarg->rootfs, cbarg->target, fstype);
443
444 if (mount(cbarg->rootfs, cbarg->target, fstype, cbarg->mntopt, NULL)) {
445 DEBUG("mount failed with error: %s", strerror(errno));
78ae2fcc 446 return 0;
a6afdde9 447 }
78ae2fcc 448
a6afdde9
DL
449 INFO("mounted '%s' on '%s', with fstype '%s'",
450 cbarg->rootfs, cbarg->target, fstype);
78ae2fcc 451
452 return 1;
453}
454
2656d231 455static int mount_unknow_fs(const char *rootfs, const char *target, int mntopt)
78ae2fcc 456{
a6afdde9 457 int i;
78ae2fcc 458
459 struct cbarg {
460 const char *rootfs;
a6afdde9 461 const char *target;
78ae2fcc 462 int mntopt;
463 } cbarg = {
464 .rootfs = rootfs,
a6afdde9 465 .target = target,
78ae2fcc 466 .mntopt = mntopt,
467 };
468
a6afdde9
DL
469 /*
470 * find the filesystem type with brute force:
471 * first we check with /etc/filesystems, in case the modules
78ae2fcc 472 * are auto-loaded and fall back to the supported kernel fs
473 */
474 char *fsfile[] = {
475 "/etc/filesystems",
476 "/proc/filesystems",
477 };
478
a6afdde9
DL
479 for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
480
481 int ret;
482
483 if (access(fsfile[i], F_OK))
484 continue;
485
486 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
487 if (ret < 0) {
488 ERROR("failed to parse '%s'", fsfile[i]);
489 return -1;
490 }
491
492 if (ret)
493 return 0;
78ae2fcc 494 }
495
a6afdde9
DL
496 ERROR("failed to determine fs type for '%s'", rootfs);
497 return -1;
498}
499
2656d231 500static int mount_rootfs_dir(const char *rootfs, const char *target)
a6afdde9
DL
501{
502 return mount(rootfs, target, "none", MS_BIND | MS_REC, NULL);
503}
504
505static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
506{
507 int rfd;
508 int ret = -1;
509
510 rfd = open(rootfs, O_RDWR);
511 if (rfd < 0) {
512 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 513 return -1;
514 }
515
a6afdde9 516 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 517
a6afdde9 518 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 519
a6afdde9
DL
520 if (ioctl(fd, LOOP_SET_FD, rfd)) {
521 SYSERROR("failed to LOOP_SET_FD");
522 goto out;
78ae2fcc 523 }
524
a6afdde9
DL
525 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
526 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 527 goto out;
528 }
529
a6afdde9 530 ret = 0;
78ae2fcc 531out:
a6afdde9 532 close(rfd);
78ae2fcc 533
a6afdde9 534 return ret;
78ae2fcc 535}
536
2656d231 537static int mount_rootfs_file(const char *rootfs, const char *target)
78ae2fcc 538{
a6afdde9
DL
539 struct dirent dirent, *direntp;
540 struct loop_info64 loinfo;
9ba8130c 541 int ret = -1, fd = -1, rc;
a6afdde9
DL
542 DIR *dir;
543 char path[MAXPATHLEN];
78ae2fcc 544
a6afdde9
DL
545 dir = opendir("/dev");
546 if (!dir) {
547 SYSERROR("failed to open '/dev'");
78ae2fcc 548 return -1;
549 }
550
a6afdde9
DL
551 while (!readdir_r(dir, &dirent, &direntp)) {
552
553 if (!direntp)
554 break;
555
556 if (!strcmp(direntp->d_name, "."))
557 continue;
558
559 if (!strcmp(direntp->d_name, ".."))
560 continue;
561
562 if (strncmp(direntp->d_name, "loop", 4))
563 continue;
564
9ba8130c
SH
565 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
566 if (rc < 0 || rc >= MAXPATHLEN)
567 continue;
568
a6afdde9
DL
569 fd = open(path, O_RDWR);
570 if (fd < 0)
571 continue;
572
573 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
574 close(fd);
575 continue;
576 }
577
578 if (errno != ENXIO) {
579 WARN("unexpected error for ioctl on '%s': %m",
580 direntp->d_name);
00b6be44 581 close(fd);
a6afdde9
DL
582 continue;
583 }
584
585 DEBUG("found '%s' free lodev", path);
586
587 ret = setup_lodev(rootfs, fd, &loinfo);
588 if (!ret)
2656d231 589 ret = mount_unknow_fs(path, target, 0);
a6afdde9
DL
590 close(fd);
591
592 break;
593 }
594
595 if (closedir(dir))
596 WARN("failed to close directory");
597
598 return ret;
78ae2fcc 599}
600
2656d231 601static int mount_rootfs_block(const char *rootfs, const char *target)
a6afdde9 602{
2656d231 603 return mount_unknow_fs(rootfs, target, 0);
a6afdde9
DL
604}
605
0c547523
SH
606/*
607 * pin_rootfs
b7ed4bf0
CS
608 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
609 * the duration of the container run, to prevent the container from marking
610 * the underlying fs readonly on shutdown. unlink the file immediately so
611 * no name pollution is happens
0c547523
SH
612 * return -1 on error.
613 * return -2 if nothing needed to be pinned.
614 * return an open fd (>=0) if we pinned it.
615 */
616int pin_rootfs(const char *rootfs)
617{
618 char absrootfs[MAXPATHLEN];
619 char absrootfspin[MAXPATHLEN];
620 struct stat s;
621 int ret, fd;
622
e99ee0de 623 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 624 return -2;
e99ee0de 625
00ec333b 626 if (!realpath(rootfs, absrootfs))
9be53773 627 return -2;
0c547523 628
00ec333b 629 if (access(absrootfs, F_OK))
0c547523 630 return -1;
0c547523 631
00ec333b 632 if (stat(absrootfs, &s))
0c547523 633 return -1;
0c547523 634
72f919c4 635 if (!S_ISDIR(s.st_mode))
0c547523
SH
636 return -2;
637
b7ed4bf0 638 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
00ec333b 639 if (ret >= MAXPATHLEN)
0c547523 640 return -1;
0c547523
SH
641
642 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
b7ed4bf0
CS
643 if (fd < 0)
644 return fd;
645 (void)unlink(absrootfspin);
0c547523
SH
646 return fd;
647}
648
368bbc02
CS
649static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct cgroup_process_info *cgroup_info)
650{
368bbc02 651 int r;
b06b8511
CS
652 size_t i;
653 static struct {
654 int match_mask;
655 int match_flag;
656 const char *source;
657 const char *destination;
658 const char *fstype;
659 unsigned long flags;
660 const char *options;
661 } default_mounts[] = {
662 /* Read-only bind-mounting... In older kernels, doing that required
663 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
664 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
665 * kernel 2.6.26 onwards. However, this apparently does not work on
666 * kernel 3.8. Unfortunately, on that very same kernel, doing the
667 * same trick as above doesn't seem to work either, there one needs
668 * to ALSO specify MS_BIND for the remount, otherwise the entire
669 * fs is remounted read-only or the mount fails because it's busy...
670 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
671 * 2.6.32...
368bbc02 672 */
b06b8511
CS
673 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
674 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
675 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
676 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
677 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
678 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
679 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
680 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
681 { 0, 0, NULL, NULL, NULL, 0, NULL }
682 };
368bbc02 683
b06b8511
CS
684 for (i = 0; default_mounts[i].match_mask; i++) {
685 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
686 char *source = NULL;
687 char *destination = NULL;
688 int saved_errno;
689
690 if (default_mounts[i].source) {
691 /* will act like strdup if %r is not present */
692 source = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].source);
693 if (!source) {
694 SYSERROR("memory allocation error");
695 return -1;
696 }
697 }
698 if (default_mounts[i].destination) {
699 /* will act like strdup if %r is not present */
700 destination = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].destination);
701 if (!destination) {
702 saved_errno = errno;
703 SYSERROR("memory allocation error");
704 free(source);
705 errno = saved_errno;
706 return -1;
707 }
708 }
709 r = mount(source, destination, default_mounts[i].fstype, default_mounts[i].flags, default_mounts[i].options);
710 saved_errno = errno;
c414be25
DE
711 if (r < 0)
712 SYSERROR("error mounting %s on %s", source, destination);
b06b8511
CS
713 free(source);
714 free(destination);
715 if (r < 0) {
b06b8511
CS
716 errno = saved_errno;
717 return -1;
718 }
368bbc02 719 }
368bbc02
CS
720 }
721
b06b8511 722 if (flags & LXC_AUTO_CGROUP_MASK) {
7997d7da 723 r = lxc_setup_mount_cgroup(conf->rootfs.mount, cgroup_info, flags & LXC_AUTO_CGROUP_MASK);
368bbc02
CS
724 if (r < 0) {
725 SYSERROR("error mounting /sys/fs/cgroup");
b06b8511 726 return -1;
368bbc02
CS
727 }
728 }
729
368bbc02 730 return 0;
368bbc02
CS
731}
732
2656d231 733static int mount_rootfs(const char *rootfs, const char *target)
0ad19a3f 734{
b09ef133 735 char absrootfs[MAXPATHLEN];
78ae2fcc 736 struct stat s;
a6afdde9 737 int i;
78ae2fcc 738
a6afdde9 739 typedef int (*rootfs_cb)(const char *, const char *);
78ae2fcc 740
741 struct rootfs_type {
742 int type;
743 rootfs_cb cb;
744 } rtfs_type[] = {
2656d231
DL
745 { S_IFDIR, mount_rootfs_dir },
746 { S_IFBLK, mount_rootfs_block },
747 { S_IFREG, mount_rootfs_file },
78ae2fcc 748 };
0ad19a3f 749
4c8ab83b 750 if (!realpath(rootfs, absrootfs)) {
36eb9bde 751 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 752 return -1;
753 }
b09ef133 754
b09ef133 755 if (access(absrootfs, F_OK)) {
36eb9bde 756 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 757 return -1;
758 }
759
78ae2fcc 760 if (stat(absrootfs, &s)) {
36eb9bde 761 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 762 return -1;
763 }
764
78ae2fcc 765 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 766
78ae2fcc 767 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
768 continue;
9b0f0477 769
a6afdde9 770 return rtfs_type[i].cb(absrootfs, target);
78ae2fcc 771 }
9b0f0477 772
36eb9bde 773 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 774 return -1;
0ad19a3f 775}
776
4e5440c6 777static int setup_utsname(struct utsname *utsname)
0ad19a3f 778{
4e5440c6
DL
779 if (!utsname)
780 return 0;
0ad19a3f 781
4e5440c6
DL
782 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
783 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 784 return -1;
785 }
786
4e5440c6 787 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 788
0ad19a3f 789 return 0;
790}
791
33fcb7a0 792static int setup_tty(const struct lxc_rootfs *rootfs,
7c6ef2a2 793 const struct lxc_tty_info *tty_info, char *ttydir)
b0a33c1e 794{
7c6ef2a2
SH
795 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
796 int i, ret;
b0a33c1e 797
bc9bd0e3
DL
798 if (!rootfs->path)
799 return 0;
800
b0a33c1e 801 for (i = 0; i < tty_info->nbtty; i++) {
802
803 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
804
7c6ef2a2 805 ret = snprintf(path, sizeof(path), "%s/dev/tty%d",
12297168 806 rootfs->mount, i + 1);
7c6ef2a2
SH
807 if (ret >= sizeof(path)) {
808 ERROR("pathname too long for ttys");
809 return -1;
810 }
811 if (ttydir) {
812 /* create dev/lxc/tty%d" */
9ba8130c 813 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/tty%d",
7c6ef2a2
SH
814 rootfs->mount, ttydir, i + 1);
815 if (ret >= sizeof(lxcpath)) {
816 ERROR("pathname too long for ttys");
817 return -1;
818 }
819 ret = creat(lxcpath, 0660);
820 if (ret==-1 && errno != EEXIST) {
821 SYSERROR("error creating %s\n", lxcpath);
822 return -1;
823 }
4d44e274
SH
824 if (ret >= 0)
825 close(ret);
7c6ef2a2
SH
826 ret = unlink(path);
827 if (ret && errno != ENOENT) {
828 SYSERROR("error unlinking %s\n", path);
829 return -1;
830 }
b0a33c1e 831
7c6ef2a2
SH
832 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
833 WARN("failed to mount '%s'->'%s'",
834 pty_info->name, path);
835 continue;
836 }
13954cce 837
9ba8130c
SH
838 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
839 if (ret >= sizeof(lxcpath)) {
840 ERROR("tty pathname too long");
841 return -1;
842 }
7c6ef2a2
SH
843 ret = symlink(lxcpath, path);
844 if (ret) {
845 SYSERROR("failed to create symlink for tty %d\n", i+1);
846 return -1;
847 }
848 } else {
c6883f38
SH
849 /* If we populated /dev, then we need to create /dev/ttyN */
850 if (access(path, F_OK)) {
851 ret = creat(path, 0660);
852 if (ret==-1) {
853 SYSERROR("error creating %s\n", path);
854 /* this isn't fatal, continue */
025ed0f3 855 } else {
c6883f38 856 close(ret);
025ed0f3 857 }
c6883f38 858 }
7c6ef2a2
SH
859 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
860 WARN("failed to mount '%s'->'%s'",
861 pty_info->name, path);
862 continue;
863 }
b0a33c1e 864 }
865 }
866
cd54d859
DL
867 INFO("%d tty(s) has been setup", tty_info->nbtty);
868
b0a33c1e 869 return 0;
870}
871
7a7ff0c6 872static int setup_rootfs_pivot_root_cb(char *buffer, void *data)
bf601689
MH
873{
874 struct lxc_list *mountlist, *listentry, *iterator;
2c7d90ac 875 char *pivotdir, *mountpoint, *mountentry, *saveptr = NULL;
bf601689
MH
876 int found;
877 void **cbparm;
878
879 mountentry = buffer;
880 cbparm = (void **)data;
881
882 mountlist = cbparm[0];
883 pivotdir = cbparm[1];
884
885 /* parse entry, first field is mountname, ignore */
2796cf79 886 mountpoint = strtok_r(mountentry, " ", &saveptr);
bf601689
MH
887 if (!mountpoint)
888 return -1;
889
890 /* second field is mountpoint */
2796cf79 891 mountpoint = strtok_r(NULL, " ", &saveptr);
bf601689
MH
892 if (!mountpoint)
893 return -1;
894
895 /* only consider mountpoints below old root fs */
896 if (strncmp(mountpoint, pivotdir, strlen(pivotdir)))
897 return 0;
898
899 /* filter duplicate mountpoints */
900 found = 0;
901 lxc_list_for_each(iterator, mountlist) {
902 if (!strcmp(iterator->elem, mountpoint)) {
903 found = 1;
904 break;
905 }
906 }
907 if (found)
908 return 0;
909
910 /* add entry to list */
911 listentry = malloc(sizeof(*listentry));
912 if (!listentry) {
913 SYSERROR("malloc for mountpoint listentry failed");
914 return -1;
915 }
916
917 listentry->elem = strdup(mountpoint);
918 if (!listentry->elem) {
919 SYSERROR("strdup failed");
00b6be44 920 free(listentry);
bf601689
MH
921 return -1;
922 }
923 lxc_list_add_tail(mountlist, listentry);
924
925 return 0;
926}
927
cc6f6dd7 928static int umount_oldrootfs(const char *oldrootfs)
bf601689 929{
2382ecff 930 char path[MAXPATHLEN];
bf601689 931 void *cbparm[2];
9ebb03ad 932 struct lxc_list mountlist, *iterator, *next;
bf601689 933 int ok, still_mounted, last_still_mounted;
9ba8130c 934 int rc;
bf601689
MH
935
936 /* read and parse /proc/mounts in old root fs */
937 lxc_list_init(&mountlist);
938
cc6f6dd7 939 /* oldrootfs is on the top tree directory now */
9ba8130c
SH
940 rc = snprintf(path, sizeof(path), "/%s", oldrootfs);
941 if (rc >= sizeof(path)) {
942 ERROR("rootfs name too long");
943 return -1;
944 }
bf601689 945 cbparm[0] = &mountlist;
bf601689 946
cc6f6dd7 947 cbparm[1] = strdup(path);
bf601689
MH
948 if (!cbparm[1]) {
949 SYSERROR("strdup failed");
950 return -1;
951 }
952
9ba8130c
SH
953 rc = snprintf(path, sizeof(path), "%s/proc/mounts", oldrootfs);
954 if (rc >= sizeof(path)) {
955 ERROR("container proc/mounts name too long");
956 return -1;
957 }
cc6f6dd7
DL
958
959 ok = lxc_file_for_each_line(path,
960 setup_rootfs_pivot_root_cb, &cbparm);
bf601689
MH
961 if (ok < 0) {
962 SYSERROR("failed to read or parse mount list '%s'", path);
963 return -1;
964 }
965
966 /* umount filesystems until none left or list no longer shrinks */
967 still_mounted = 0;
968 do {
969 last_still_mounted = still_mounted;
970 still_mounted = 0;
971
9ebb03ad 972 lxc_list_for_each_safe(iterator, &mountlist, next) {
bf601689 973
c08556c6 974 /* umount normally */
bf601689
MH
975 if (!umount(iterator->elem)) {
976 DEBUG("umounted '%s'", (char *)iterator->elem);
977 lxc_list_del(iterator);
978 continue;
979 }
980
bf601689
MH
981 still_mounted++;
982 }
7df119ee 983
bf601689
MH
984 } while (still_mounted > 0 && still_mounted != last_still_mounted);
985
7df119ee 986
c08556c6
DL
987 lxc_list_for_each(iterator, &mountlist) {
988
989 /* let's try a lazy umount */
990 if (!umount2(iterator->elem, MNT_DETACH)) {
991 INFO("lazy unmount of '%s'", (char *)iterator->elem);
992 continue;
993 }
994
995 /* be more brutal (nfs) */
996 if (!umount2(iterator->elem, MNT_FORCE)) {
997 INFO("forced unmount of '%s'", (char *)iterator->elem);
998 continue;
999 }
1000
7df119ee 1001 WARN("failed to unmount '%s'", (char *)iterator->elem);
c08556c6 1002 }
bf601689 1003
cc6f6dd7
DL
1004 return 0;
1005}
1006
1007static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
1008{
1009 char path[MAXPATHLEN];
1010 int remove_pivotdir = 0;
9ba8130c 1011 int rc;
cc6f6dd7
DL
1012
1013 /* change into new root fs */
1014 if (chdir(rootfs)) {
1015 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
1016 return -1;
1017 }
1018
1019 if (!pivotdir)
30c5d292 1020 pivotdir = "lxc_putold";
cc6f6dd7 1021
4f9293b1 1022 /* compute the full path to pivotdir under rootfs */
9ba8130c
SH
1023 rc = snprintf(path, sizeof(path), "%s/%s", rootfs, pivotdir);
1024 if (rc >= sizeof(path)) {
1025 ERROR("pivot dir name too long");
1026 return -1;
1027 }
cc6f6dd7
DL
1028
1029 if (access(path, F_OK)) {
1030
1031 if (mkdir_p(path, 0755)) {
1032 SYSERROR("failed to create pivotdir '%s'", path);
1033 return -1;
1034 }
1035
1036 remove_pivotdir = 1;
1037 DEBUG("created '%s' directory", path);
1038 }
1039
1040 DEBUG("mountpoint for old rootfs is '%s'", path);
1041
1042 /* pivot_root into our new root fs */
1043 if (pivot_root(".", path)) {
1044 SYSERROR("pivot_root syscall failed");
bf601689
MH
1045 return -1;
1046 }
cc6f6dd7
DL
1047
1048 if (chdir("/")) {
1049 SYSERROR("can't chdir to / after pivot_root");
1050 return -1;
1051 }
1052
1053 DEBUG("pivot_root syscall to '%s' successful", rootfs);
1054
1055 /* we switch from absolute path to relative path */
1056 if (umount_oldrootfs(pivotdir))
1057 return -1;
bf601689 1058
c08556c6
DL
1059 /* remove temporary mount point, we don't consider the removing
1060 * as fatal */
a91d897a
FW
1061 if (remove_pivotdir && rmdir(pivotdir))
1062 WARN("can't remove mountpoint '%s': %m", pivotdir);
bf601689 1063
bf601689
MH
1064 return 0;
1065}
1066
bc6928ff
MW
1067
1068/*
1069 * Note: This is a verbatum copy of what is in monitor.c. We're just
1070 * usint it here to generate a safe subdirectory in /dev/ for the
1071 * containers /dev/
1072 */
1073
1074/* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS.
1075 * FNV has good anti collision properties and we're not worried
1076 * about pre-image resistance or one-way-ness, we're just trying to make
1077 * the name unique in the 108 bytes of space we have.
1078 */
1079#define FNV1A_64_INIT ((uint64_t)0xcbf29ce484222325ULL)
1080static uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
1081{
1082 unsigned char *bp;
1083
1084 for(bp = buf; bp < (unsigned char *)buf + len; bp++)
1085 {
1086 /* xor the bottom with the current octet */
1087 hval ^= (uint64_t)*bp;
1088
1089 /* gcc optimised:
1090 * multiply by the 64 bit FNV magic prime mod 2^64
1091 */
1092 hval += (hval << 1) + (hval << 4) + (hval << 5) +
1093 (hval << 7) + (hval << 8) + (hval << 40);
1094 }
1095
1096 return hval;
1097}
1098
1099/*
1100 * Check to see if a directory has something mounted on it and,
1101 * if it does, return the fstype.
1102 *
1103 * Code largely based on detect_shared_rootfs below
1104 *
1105 * Returns: # of matching entries in /proc/self/mounts
1106 * if != 0 fstype is filled with the last filesystem value.
1107 * if == 0 no matches found, fstype unchanged.
1108 *
1109 * ToDo: Maybe return the mount options in another parameter...
1110 */
1111
1112#define LINELEN 4096
1113#define MAX_FSTYPE_LEN 128
74a3920a 1114static int mount_check_fs( const char *dir, char *fstype )
bc6928ff
MW
1115{
1116 char buf[LINELEN], *p;
1117 struct stat s;
1118 FILE *f;
1119 int found_fs = 0;
1120 char *p2;
1121
1122 DEBUG("entering mount_check_fs for %s\n", dir);
1123
1124 if ( 0 != access(dir, F_OK) || 0 != stat(dir, &s) || 0 == S_ISDIR(s.st_mode) ) {
1125 return 0;
1126 }
1127
bc6928ff 1128 f = fopen("/proc/self/mounts", "r");
bc6928ff
MW
1129 if (!f)
1130 return 0;
1131 while ((p = fgets(buf, LINELEN, f))) {
1132 p = index(buf, ' ');
1133 if( !p )
1134 continue;
1135 *p = '\0';
1136 p2 = p + 1;
1137
1138 p = index(p2, ' ');
1139 if( !p )
1140 continue;
1141 *p = '\0';
1142
1143 /* Compare the directory in the entry to desired */
1144 if( strcmp( p2, dir ) ) {
1145 continue;
1146 }
1147
1148 p2 = p + 1;
1149 p = index( p2, ' ');
1150 if( !p )
1151 continue;
1152 *p = '\0';
1153
1154 ++found_fs;
1155
1156 if( fstype ) {
1157 strncpy( fstype, p2, MAX_FSTYPE_LEN - 1 );
1158 fstype [ MAX_FSTYPE_LEN - 1 ] = '\0';
1159 }
1160 }
1161
bc6928ff 1162 fclose(f);
bc6928ff
MW
1163
1164 DEBUG("mount_check_fs returning %d last %s\n", found_fs, fstype);
1165
1166 return found_fs;
1167}
1168
1169/*
1170 * Locate a devtmpfs mount (should be on /dev) and create a container
1171 * subdirectory on it which we can then bind mount to the container
1172 * /dev instead of mounting a tmpfs there.
1173 * If we fail, return NULL.
1174 * Else return the pointer to the name buffer with the string to
1175 * the devtmpfs subdirectory.
1176 */
1177
74a3920a 1178static char *mk_devtmpfs(const char *name, char *path, const char *lxcpath)
bc6928ff
MW
1179{
1180 int ret;
1181 struct stat s;
1182 char tmp_path[MAXPATHLEN];
1183 char fstype[MAX_FSTYPE_LEN];
1184 char *base_path = "/dev/.lxc";
1185 char *user_path = "/dev/.lxc/user";
1186 uint64_t hash;
1187
1188 if ( 0 != access(base_path, F_OK) || 0 != stat(base_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1189 /* This is just making /dev/.lxc it better work or we're done */
1190 ret = mkdir(base_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1191 if ( ret ) {
1192 SYSERROR( "Unable to create /dev/.lxc for autodev" );
1193 return NULL;
1194 }
1195 }
1196
1197 /*
1198 * Programmers notes:
1199 * We can not do mounts in this area of code that we want
1200 * to be visible in the host. Consequently, /dev/.lxc must
1201 * be set up earlier if we need a tmpfs mounted there.
1202 * That only affects the rare cases where autodev is enabled
1203 * for a container and devtmpfs is not mounted on /dev in the
1204 * host. In that case, we'll fall back to the old method
1205 * of mounting a tmpfs in the container and have no visibility
1206 * into the container /dev.
1207 */
1208 if( ! mount_check_fs( "/dev", fstype )
1209 || strcmp( "devtmpfs", fstype ) ) {
1210 /* Either /dev was not mounted or was not devtmpfs */
1211
1212 if ( ! mount_check_fs( "/dev/.lxc", NULL ) ) {
1213 /*
1214 * /dev/.lxc is not already mounted
1215 * Doing a mount here does no good, since
1216 * it's not visible in the host.
1217 */
1218
1219 ERROR("/dev/.lxc is not setup - taking fallback" );
1220 return NULL;
1221 }
1222 }
1223
1224 if ( 0 != access(user_path, F_OK) || 0 != stat(user_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1225 /*
1226 * This is making /dev/.lxc/user path for non-priv users.
1227 * If this doesn't work, we'll have to fall back in the
1228 * case of non-priv users. It's mode 1777 like /tmp.
1229 */
1230 ret = mkdir(user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
1231 if ( ret ) {
1232 /* Issue an error but don't fail yet! */
1233 ERROR("Unable to create /dev/.lxc/user");
1234 }
1235 /* Umask tends to screw us up here */
1236 chmod(user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
1237 }
1238
1239 /*
1240 * Since the container name must be unique within a given
1241 * lxcpath, we're going to use a hash of the path
1242 * /lxcpath/name as our hash name in /dev/.lxc/
1243 */
1244
1245 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s", lxcpath, name);
1246 if (ret < 0 || ret >= MAXPATHLEN)
1247 return NULL;
1248
1249 hash = fnv_64a_buf(tmp_path, ret, FNV1A_64_INIT);
1250
1251 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, base_path, name, hash);
1252 if (ret < 0 || ret >= MAXPATHLEN)
1253 return NULL;
1254
1255 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1256 ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1257 if ( ret ) {
1258 /* Something must have failed with the base_path...
1259 * Maybe unpriv user. Try user_path now... */
1260 INFO("Setup in /dev/.lxc failed. Trying /dev/.lxc/user." );
1261
1262 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, user_path, name, hash);
1263 if (ret < 0 || ret >= MAXPATHLEN)
1264 return NULL;
1265
1266 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1267 ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1268 if ( ret ) {
1269 ERROR("Container /dev setup in host /dev failed - taking fallback" );
1270 return NULL;
1271 }
1272 }
1273 }
1274 }
1275
1276 strcpy( path, tmp_path );
1277 return path;
1278}
1279
1280
91c3830e
SH
1281/*
1282 * Do we want to add options for max size of /dev and a file to
1283 * specify which devices to create?
1284 */
bc6928ff 1285static int mount_autodev(const char *name, char *root, const char *lxcpath)
91c3830e
SH
1286{
1287 int ret;
bc6928ff 1288 struct stat s;
91c3830e 1289 char path[MAXPATHLEN];
bc6928ff
MW
1290 char host_path[MAXPATHLEN];
1291 char devtmpfs_path[MAXPATHLEN];
91c3830e
SH
1292
1293 INFO("Mounting /dev under %s\n", root);
bc6928ff
MW
1294
1295 ret = snprintf(host_path, MAXPATHLEN, "%s/%s/rootfs.dev", lxcpath, name);
1296 if (ret < 0 || ret > MAXPATHLEN)
1297 return -1;
1298
91c3830e
SH
1299 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
1300 if (ret < 0 || ret > MAXPATHLEN)
1301 return -1;
bc6928ff
MW
1302
1303 if (mk_devtmpfs( name, devtmpfs_path, lxcpath ) ) {
1304 /*
1305 * Get rid of old links and directoriess
1306 * This could be either a symlink and we remove it,
1307 * or an empty directory and we remove it,
1308 * or non-existant and we don't care,
1309 * or a non-empty directory, and we will then emit an error
1310 * but we will not fail out the process.
1311 */
1312 unlink( host_path );
1313 rmdir( host_path );
1314 ret = symlink(devtmpfs_path, host_path);
1315
1316 if ( ret < 0 ) {
1317 SYSERROR("WARNING: Failed to create symlink '%s'->'%s'\n", host_path, devtmpfs_path);
1318 }
1319 DEBUG("Bind mounting %s to %s", devtmpfs_path , path );
1320 ret = mount(devtmpfs_path, path, NULL, MS_BIND, 0 );
1321 } else {
1322 /* Only mount a tmpfs on here if we don't already a mount */
1323 if ( ! mount_check_fs( host_path, NULL ) ) {
1324 DEBUG("Mounting tmpfs to %s", host_path );
1325 ret = mount("none", path, "tmpfs", 0, "size=100000");
1326 } else {
1327 /* This allows someone to manually set up a mount */
1328 DEBUG("Bind mounting %s to %s", host_path, path );
1329 ret = mount(host_path , path, NULL, MS_BIND, 0 );
1330 }
1331 }
91c3830e
SH
1332 if (ret) {
1333 SYSERROR("Failed to mount /dev at %s\n", root);
1334 return -1;
1335 }
1336 ret = snprintf(path, MAXPATHLEN, "%s/dev/pts", root);
1337 if (ret < 0 || ret >= MAXPATHLEN)
1338 return -1;
bc6928ff
MW
1339 /*
1340 * If we are running on a devtmpfs mapping, dev/pts may already exist.
1341 * If not, then create it and exit if that fails...
1342 */
1343 if ( 0 != access(path, F_OK) || 0 != stat(path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1344 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1345 if (ret) {
1346 SYSERROR("Failed to create /dev/pts in container");
1347 return -1;
1348 }
91c3830e
SH
1349 }
1350
1351 INFO("Mounted /dev under %s\n", root);
1352 return 0;
1353}
1354
c6883f38 1355struct lxc_devs {
74a3920a 1356 const char *name;
c6883f38
SH
1357 mode_t mode;
1358 int maj;
1359 int min;
1360};
1361
74a3920a 1362static const struct lxc_devs lxc_devs[] = {
c6883f38
SH
1363 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1364 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1365 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1366 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1367 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1368 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1369 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1370};
1371
74a3920a 1372static int setup_autodev(const char *root)
c6883f38
SH
1373{
1374 int ret;
c6883f38
SH
1375 char path[MAXPATHLEN];
1376 int i;
3a32201c 1377 mode_t cmask;
c6883f38 1378
91c3830e
SH
1379 INFO("Creating initial consoles under %s/dev\n", root);
1380
c6883f38 1381 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
91c3830e
SH
1382 if (ret < 0 || ret >= MAXPATHLEN) {
1383 ERROR("Error calculating container /dev location");
c6883f38 1384 return -1;
f7bee6c6 1385 }
91c3830e
SH
1386
1387 INFO("Populating /dev under %s\n", root);
3a32201c 1388 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38 1389 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
74a3920a 1390 const struct lxc_devs *d = &lxc_devs[i];
c6883f38
SH
1391 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", root, d->name);
1392 if (ret < 0 || ret >= MAXPATHLEN)
1393 return -1;
1394 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1395 if (ret && errno != EEXIST) {
c6883f38
SH
1396 SYSERROR("Error creating %s\n", d->name);
1397 return -1;
1398 }
1399 }
3a32201c 1400 umask(cmask);
c6883f38
SH
1401
1402 INFO("Populated /dev under %s\n", root);
1403 return 0;
1404}
1405
cc28d0b0
SH
1406/*
1407 * Detect whether / is mounted MS_SHARED. The only way I know of to
1408 * check that is through /proc/self/mountinfo.
1409 * I'm only checking for /. If the container rootfs or mount location
1410 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1411 * out would be too much work to be worth it.
1412 */
1413#define LINELEN 4096
1414int detect_shared_rootfs(void)
1415{
1416 char buf[LINELEN], *p;
1417 FILE *f;
1418 int i;
1419 char *p2;
1420
1421 f = fopen("/proc/self/mountinfo", "r");
1422 if (!f)
1423 return 0;
1424 while ((p = fgets(buf, LINELEN, f))) {
cc28d0b0
SH
1425 for (p = buf, i=0; p && i < 4; i++)
1426 p = index(p+1, ' ');
1427 if (!p)
1428 continue;
1429 p2 = index(p+1, ' ');
1430 if (!p2)
1431 continue;
1432 *p2 = '\0';
cc28d0b0
SH
1433 if (strcmp(p+1, "/") == 0) {
1434 // this is '/'. is it shared?
1435 p = index(p2+1, ' ');
ab81cef0 1436 if (p && strstr(p, "shared:")) {
00b6be44 1437 fclose(f);
cc28d0b0 1438 return 1;
00b6be44 1439 }
cc28d0b0
SH
1440 }
1441 }
1442 fclose(f);
1443 return 0;
1444}
1445
1446/*
1447 * I'll forgive you for asking whether all of this is needed :) The
1448 * answer is yes.
1449 * pivot_root will fail if the new root, the put_old dir, or the parent
1450 * of current->fs->root are MS_SHARED. (parent of current->fs_root may
1451 * or may not be current->fs_root - if we assumed it always was, we could
1452 * just mount --make-rslave /). So,
1453 * 1. mount a tiny tmpfs to be parent of current->fs->root.
1454 * 2. make that MS_SLAVE
1455 * 3. make a 'root' directory under that
1456 * 4. mount --rbind / under the $tinyroot/root.
1457 * 5. make that rslave
1458 * 6. chdir and chroot into $tinyroot/root
1459 * 7. $tinyroot will be unmounted by our parent in start.c
1460 */
1461static int chroot_into_slave(struct lxc_conf *conf)
1462{
1463 char path[MAXPATHLEN];
1464 const char *destpath = conf->rootfs.mount;
1465 int ret;
1466
1467 if (mount(destpath, destpath, NULL, MS_BIND, 0)) {
1468 SYSERROR("failed to mount %s bind", destpath);
1469 return -1;
1470 }
1471 if (mount("", destpath, NULL, MS_SLAVE, 0)) {
1472 SYSERROR("failed to make %s slave", destpath);
1473 return -1;
1474 }
1475 if (mount("none", destpath, "tmpfs", 0, "size=10000")) {
1476 SYSERROR("Failed to mount tmpfs / at %s", destpath);
1477 return -1;
1478 }
1479 ret = snprintf(path, MAXPATHLEN, "%s/root", destpath);
1480 if (ret < 0 || ret >= MAXPATHLEN) {
1481 ERROR("out of memory making root path");
1482 return -1;
1483 }
1484 if (mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) {
1485 SYSERROR("Failed to create /dev/pts in container");
1486 return -1;
1487 }
1488 if (mount("/", path, NULL, MS_BIND|MS_REC, 0)) {
1489 SYSERROR("Failed to rbind mount / to %s", path);
1490 return -1;
1491 }
1492 if (mount("", destpath, NULL, MS_SLAVE|MS_REC, 0)) {
1493 SYSERROR("Failed to make tmp-/ at %s rslave", path);
1494 return -1;
1495 }
1496 if (chdir(path)) {
1497 SYSERROR("Failed to chdir into tmp-/");
1498 return -1;
1499 }
1500 if (chroot(path)) {
1501 SYSERROR("Failed to chroot into tmp-/");
1502 return -1;
1503 }
1504 INFO("Chrooted into tmp-/ at %s\n", path);
1505 return 0;
1506}
1507
1508static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1509{
cc28d0b0
SH
1510 const struct lxc_rootfs *rootfs = &conf->rootfs;
1511
a0f379bf
DW
1512 if (!rootfs->path) {
1513 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1514 SYSERROR("Failed to make / rslave");
1515 return -1;
1516 }
c69bd12f 1517 return 0;
a0f379bf 1518 }
0ad19a3f 1519
12297168 1520 if (access(rootfs->mount, F_OK)) {
b1789442 1521 SYSERROR("failed to access to '%s', check it is present",
12297168 1522 rootfs->mount);
b1789442
DL
1523 return -1;
1524 }
1525
cc28d0b0
SH
1526 if (detect_shared_rootfs()) {
1527 if (chroot_into_slave(conf)) {
1528 ERROR("Failed to chroot into slave /");
1529 return -1;
1530 }
1531 }
1532
9be53773
SH
1533 // First try mounting rootfs using a bdev
1534 struct bdev *bdev = bdev_init(rootfs->path, rootfs->mount, NULL);
1535 if (bdev && bdev->ops->mount(bdev) == 0) {
59d66af2 1536 bdev_put(bdev);
9be53773
SH
1537 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1538 return 0;
1539 }
59d66af2
SH
1540 if (bdev)
1541 bdev_put(bdev);
2656d231 1542 if (mount_rootfs(rootfs->path, rootfs->mount)) {
a6afdde9 1543 ERROR("failed to mount rootfs");
c3f0a28c 1544 return -1;
1545 }
0ad19a3f 1546
12297168 1547 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1548
ac778708
DL
1549 return 0;
1550}
1551
74a3920a 1552static int setup_pivot_root(const struct lxc_rootfs *rootfs)
ac778708 1553{
ac778708
DL
1554 if (!rootfs->path)
1555 return 0;
1556
12297168 1557 if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
cc6f6dd7 1558 ERROR("failed to setup pivot root");
25368b52 1559 return -1;
c69bd12f
DL
1560 }
1561
25368b52 1562 return 0;
0ad19a3f 1563}
1564
d852c78c 1565static int setup_pts(int pts)
3c26f34e 1566{
77890c6d
SW
1567 char target[PATH_MAX];
1568
d852c78c
DL
1569 if (!pts)
1570 return 0;
3c26f34e 1571
1572 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1573 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1574 return -1;
1575 }
1576
a6afdde9 1577 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
67e5a20a 1578 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
36eb9bde 1579 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1580 return -1;
1581 }
1582
3c26f34e 1583 if (access("/dev/ptmx", F_OK)) {
1584 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1585 goto out;
36eb9bde 1586 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1587 return -1;
1588 }
1589
77890c6d
SW
1590 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1591 goto out;
1592
3c26f34e 1593 /* fallback here, /dev/pts/ptmx exists just mount bind */
1594 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1595 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1596 return -1;
1597 }
cd54d859
DL
1598
1599 INFO("created new pts instance");
d852c78c 1600
3c26f34e 1601out:
1602 return 0;
1603}
1604
cccc74b5
DL
1605static int setup_personality(int persona)
1606{
6ff05e18 1607 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1608 if (persona == -1)
1609 return 0;
1610
1611 if (personality(persona) < 0) {
1612 SYSERROR("failed to set personality to '0x%x'", persona);
1613 return -1;
1614 }
1615
1616 INFO("set personality to '0x%x'", persona);
6ff05e18 1617 #endif
cccc74b5
DL
1618
1619 return 0;
1620}
1621
7c6ef2a2 1622static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1623 const struct lxc_console *console)
6e590161 1624{
63376d7d
DL
1625 char path[MAXPATHLEN];
1626 struct stat s;
7c6ef2a2 1627 int ret;
52e35957 1628
7c6ef2a2
SH
1629 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1630 if (ret >= sizeof(path)) {
1631 ERROR("console path too long\n");
1632 return -1;
1633 }
52e35957 1634
63376d7d 1635 if (access(path, F_OK)) {
466978b0 1636 WARN("rootfs specified but no console found at '%s'", path);
63376d7d 1637 return 0;
52e35957
DL
1638 }
1639
b5159817
DE
1640 if (console->master < 0) {
1641 INFO("no console");
f78a1f32
DL
1642 return 0;
1643 }
ed502555 1644
63376d7d
DL
1645 if (stat(path, &s)) {
1646 SYSERROR("failed to stat '%s'", path);
1647 return -1;
1648 }
1649
1650 if (chmod(console->name, s.st_mode)) {
1651 SYSERROR("failed to set mode '0%o' to '%s'",
1652 s.st_mode, console->name);
1653 return -1;
1654 }
13954cce 1655
63376d7d
DL
1656 if (mount(console->name, path, "none", MS_BIND, 0)) {
1657 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1658 return -1;
1659 }
1660
63376d7d 1661 INFO("console has been setup");
7c6ef2a2
SH
1662 return 0;
1663}
1664
1665static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1666 const struct lxc_console *console,
1667 char *ttydir)
1668{
1669 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1670 int ret;
1671
1672 /* create rootfs/dev/<ttydir> directory */
1673 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1674 ttydir);
1675 if (ret >= sizeof(path))
1676 return -1;
1677 ret = mkdir(path, 0755);
1678 if (ret && errno != EEXIST) {
1679 SYSERROR("failed with errno %d to create %s\n", errno, path);
1680 return -1;
1681 }
1682 INFO("created %s\n", path);
1683
1684 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1685 rootfs->mount, ttydir);
1686 if (ret >= sizeof(lxcpath)) {
1687 ERROR("console path too long\n");
1688 return -1;
1689 }
1690
1691 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1692 ret = unlink(path);
1693 if (ret && errno != ENOENT) {
1694 SYSERROR("error unlinking %s\n", path);
1695 return -1;
1696 }
1697
1698 ret = creat(lxcpath, 0660);
1699 if (ret==-1 && errno != EEXIST) {
1700 SYSERROR("error %d creating %s\n", errno, lxcpath);
1701 return -1;
1702 }
4d44e274
SH
1703 if (ret >= 0)
1704 close(ret);
7c6ef2a2 1705
b5159817
DE
1706 if (console->master < 0) {
1707 INFO("no console");
7c6ef2a2
SH
1708 return 0;
1709 }
1710
1711 if (mount(console->name, lxcpath, "none", MS_BIND, 0)) {
1712 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1713 return -1;
1714 }
1715
1716 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1717 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1718 if (ret >= sizeof(lxcpath)) {
1719 ERROR("lxc/console path too long");
1720 return -1;
1721 }
7c6ef2a2
SH
1722 ret = symlink(lxcpath, path);
1723 if (ret) {
1724 SYSERROR("failed to create symlink for console");
1725 return -1;
1726 }
1727
1728 INFO("console has been setup on %s", lxcpath);
cd54d859 1729
6e590161 1730 return 0;
1731}
1732
7c6ef2a2
SH
1733static int setup_console(const struct lxc_rootfs *rootfs,
1734 const struct lxc_console *console,
1735 char *ttydir)
1736{
1737 /* We don't have a rootfs, /dev/console will be shared */
1738 if (!rootfs->path)
1739 return 0;
1740 if (!ttydir)
1741 return setup_dev_console(rootfs, console);
1742
1743 return setup_ttydir_console(rootfs, console, ttydir);
1744}
1745
1bd051a6
SH
1746static int setup_kmsg(const struct lxc_rootfs *rootfs,
1747 const struct lxc_console *console)
1748{
1749 char kpath[MAXPATHLEN];
1750 int ret;
1751
222fea5a
DE
1752 if (!rootfs->path)
1753 return 0;
1bd051a6
SH
1754 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1755 if (ret < 0 || ret >= sizeof(kpath))
1756 return -1;
1757
1758 ret = unlink(kpath);
1759 if (ret && errno != ENOENT) {
1760 SYSERROR("error unlinking %s\n", kpath);
1761 return -1;
1762 }
1763
1764 ret = symlink("console", kpath);
1765 if (ret) {
1766 SYSERROR("failed to create symlink for kmsg");
1767 return -1;
1768 }
1769
1770 return 0;
1771}
1772
998ac676
RT
1773static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1774{
1775 struct mount_opt *mo;
1776
1777 /* If opt is found in mount_opt, set or clear flags.
1778 * Otherwise append it to data. */
1779
1780 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1781 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1782 if (mo->clear)
1783 *flags &= ~mo->flag;
1784 else
1785 *flags |= mo->flag;
1786 return;
1787 }
1788 }
1789
1790 if (strlen(*data))
1791 strcat(*data, ",");
1792 strcat(*data, opt);
1793}
1794
911324ef 1795static int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1796 char **mntdata)
1797{
1798 char *s, *data;
1799 char *p, *saveptr = NULL;
1800
911324ef 1801 *mntdata = NULL;
91656ce5 1802 *mntflags = 0L;
911324ef
DL
1803
1804 if (!mntopts)
998ac676
RT
1805 return 0;
1806
911324ef 1807 s = strdup(mntopts);
998ac676 1808 if (!s) {
36eb9bde 1809 SYSERROR("failed to allocate memory");
998ac676
RT
1810 return -1;
1811 }
1812
1813 data = malloc(strlen(s) + 1);
1814 if (!data) {
36eb9bde 1815 SYSERROR("failed to allocate memory");
998ac676
RT
1816 free(s);
1817 return -1;
1818 }
1819 *data = 0;
1820
1821 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1822 p = strtok_r(NULL, ",", &saveptr))
1823 parse_mntopt(p, mntflags, &data);
1824
1825 if (*data)
1826 *mntdata = data;
1827 else
1828 free(data);
1829 free(s);
1830
1831 return 0;
1832}
1833
911324ef
DL
1834static int mount_entry(const char *fsname, const char *target,
1835 const char *fstype, unsigned long mountflags,
1836 const char *data)
1837{
1838 if (mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data)) {
1839 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1840 return -1;
1841 }
1842
1843 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
1844
1845 DEBUG("remounting %s on %s to respect bind or remount options",
1846 fsname, target);
1847
1848 if (mount(fsname, target, fstype,
1849 mountflags | MS_REMOUNT, data)) {
1850 SYSERROR("failed to mount '%s' on '%s'",
1851 fsname, target);
1852 return -1;
1853 }
1854 }
1855
1856 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1857
1858 return 0;
1859}
1860
aaf901be 1861static inline int mount_entry_on_systemfs(const struct mntent *mntent)
0ad19a3f 1862{
998ac676
RT
1863 unsigned long mntflags;
1864 char *mntdata;
911324ef 1865 int ret;
34cfffb3
SG
1866 FILE *pathfile = NULL;
1867 char* pathdirname = NULL;
911324ef
DL
1868
1869 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1870 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1871 return -1;
1872 }
1873
34cfffb3
SG
1874 if (hasmntopt(mntent, "create=dir")) {
1875 if (!mkdir_p(mntent->mnt_dir, 0755)) {
1876 WARN("Failed to create mount target '%s'", mntent->mnt_dir);
1877 ret = -1;
1878 }
1879 }
1880
1881 if (hasmntopt(mntent, "create=file") && access(mntent->mnt_dir, F_OK)) {
1882 pathdirname = strdup(mntent->mnt_dir);
1883 pathdirname = dirname(pathdirname);
1884 mkdir_p(pathdirname, 0755);
1885 pathfile = fopen(mntent->mnt_dir, "wb");
1886 if (!pathfile) {
1887 WARN("Failed to create mount target '%s'", mntent->mnt_dir);
1888 ret = -1;
1889 }
1890 else
1891 fclose(pathfile);
1892 }
1893
911324ef
DL
1894 ret = mount_entry(mntent->mnt_fsname, mntent->mnt_dir,
1895 mntent->mnt_type, mntflags, mntdata);
1896
68c152ef
SH
1897 if (hasmntopt(mntent, "optional") != NULL)
1898 ret = 0;
1899
34cfffb3 1900 free(pathdirname);
911324ef
DL
1901 free(mntdata);
1902
1903 return ret;
1904}
1905
aaf901be 1906static int mount_entry_on_absolute_rootfs(const struct mntent *mntent,
80a881b2
SH
1907 const struct lxc_rootfs *rootfs,
1908 const char *lxc_name)
911324ef 1909{
013bd428 1910 char *aux;
59760f5d 1911 char path[MAXPATHLEN];
911324ef
DL
1912 unsigned long mntflags;
1913 char *mntdata;
80a881b2 1914 int r, ret = 0, offset;
67e571de 1915 const char *lxcpath;
34cfffb3
SG
1916 FILE *pathfile = NULL;
1917 char *pathdirname = NULL;
0ad19a3f 1918
911324ef
DL
1919 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1920 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1921 return -1;
1922 }
1bc60a65 1923
593e8478 1924 lxcpath = lxc_global_config_value("lxc.lxcpath");
2a59a681
SH
1925 if (!lxcpath) {
1926 ERROR("Out of memory");
1927 return -1;
1928 }
1929
80a881b2 1930 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
1931 * use $lxcpath/CN/rootfs as the target prefix */
1932 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
1933 if (r < 0 || r >= MAXPATHLEN)
1934 goto skipvarlib;
1935
1936 aux = strstr(mntent->mnt_dir, path);
1937 if (aux) {
1938 offset = strlen(path);
1939 goto skipabs;
1940 }
1941
1942skipvarlib:
013bd428
DL
1943 aux = strstr(mntent->mnt_dir, rootfs->path);
1944 if (!aux) {
1945 WARN("ignoring mount point '%s'", mntent->mnt_dir);
1946 goto out;
1947 }
80a881b2
SH
1948 offset = strlen(rootfs->path);
1949
1950skipabs:
013bd428 1951
9ba8130c 1952 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
1953 aux + offset);
1954 if (r < 0 || r >= MAXPATHLEN) {
1955 WARN("pathnme too long for '%s'", mntent->mnt_dir);
1956 ret = -1;
1957 goto out;
1958 }
1959
34cfffb3
SG
1960 if (hasmntopt(mntent, "create=dir")) {
1961 if (!mkdir_p(path, 0755)) {
1962 WARN("Failed to create mount target '%s'", path);
1963 ret = -1;
1964 }
1965 }
1966
1967 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
1968 pathdirname = strdup(path);
1969 pathdirname = dirname(pathdirname);
1970 mkdir_p(pathdirname, 0755);
1971 pathfile = fopen(path, "wb");
1972 if (!pathfile) {
1973 WARN("Failed to create mount target '%s'", path);
1974 ret = -1;
1975 }
1976 else
1977 fclose(pathfile);
1978 }
d330fe7b 1979
013bd428 1980 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
911324ef 1981 mntflags, mntdata);
0ad19a3f 1982
68c152ef
SH
1983 if (hasmntopt(mntent, "optional") != NULL)
1984 ret = 0;
1985
013bd428 1986out:
34cfffb3 1987 free(pathdirname);
911324ef
DL
1988 free(mntdata);
1989 return ret;
1990}
d330fe7b 1991
aaf901be 1992static int mount_entry_on_relative_rootfs(const struct mntent *mntent,
911324ef
DL
1993 const char *rootfs)
1994{
1995 char path[MAXPATHLEN];
1996 unsigned long mntflags;
1997 char *mntdata;
1998 int ret;
34cfffb3
SG
1999 FILE *pathfile = NULL;
2000 char *pathdirname = NULL;
d330fe7b 2001
911324ef
DL
2002 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
2003 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
2004 return -1;
2005 }
d330fe7b 2006
34cfffb3 2007 /* relative to root mount point */
9ba8130c
SH
2008 ret = snprintf(path, sizeof(path), "%s/%s", rootfs, mntent->mnt_dir);
2009 if (ret >= sizeof(path)) {
2010 ERROR("path name too long");
2011 return -1;
2012 }
911324ef 2013
34cfffb3
SG
2014 if (hasmntopt(mntent, "create=dir")) {
2015 if (!mkdir_p(path, 0755)) {
2016 WARN("Failed to create mount target '%s'", path);
2017 ret = -1;
2018 }
2019 }
2020
2021 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
2022 pathdirname = strdup(path);
2023 pathdirname = dirname(pathdirname);
2024 mkdir_p(pathdirname, 0755);
2025 pathfile = fopen(path, "wb");
2026 if (!pathfile) {
2027 WARN("Failed to create mount target '%s'", path);
2028 ret = -1;
2029 }
2030 else
2031 fclose(pathfile);
2032 }
2033
911324ef
DL
2034 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
2035 mntflags, mntdata);
2036
68c152ef
SH
2037 if (hasmntopt(mntent, "optional") != NULL)
2038 ret = 0;
2039
34cfffb3 2040 free(pathdirname);
911324ef 2041 free(mntdata);
998ac676 2042
911324ef
DL
2043 return ret;
2044}
2045
80a881b2
SH
2046static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
2047 const char *lxc_name)
911324ef 2048{
aaf901be
AM
2049 struct mntent mntent;
2050 char buf[4096];
911324ef 2051 int ret = -1;
e76b8764 2052
aaf901be 2053 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
e76b8764 2054
911324ef 2055 if (!rootfs->path) {
aaf901be 2056 if (mount_entry_on_systemfs(&mntent))
e76b8764 2057 goto out;
911324ef 2058 continue;
e76b8764
CDC
2059 }
2060
911324ef 2061 /* We have a separate root, mounts are relative to it */
aaf901be
AM
2062 if (mntent.mnt_dir[0] != '/') {
2063 if (mount_entry_on_relative_rootfs(&mntent,
911324ef
DL
2064 rootfs->mount))
2065 goto out;
2066 continue;
2067 }
cd54d859 2068
aaf901be 2069 if (mount_entry_on_absolute_rootfs(&mntent, rootfs, lxc_name))
911324ef 2070 goto out;
0ad19a3f 2071 }
cd54d859 2072
0ad19a3f 2073 ret = 0;
cd54d859
DL
2074
2075 INFO("mount points have been setup");
0ad19a3f 2076out:
e7938e9e
MN
2077 return ret;
2078}
2079
80a881b2
SH
2080static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
2081 const char *lxc_name)
e7938e9e
MN
2082{
2083 FILE *file;
2084 int ret;
2085
2086 if (!fstab)
2087 return 0;
2088
2089 file = setmntent(fstab, "r");
2090 if (!file) {
2091 SYSERROR("failed to use '%s'", fstab);
2092 return -1;
2093 }
2094
80a881b2 2095 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e 2096
0ad19a3f 2097 endmntent(file);
2098 return ret;
2099}
2100
80a881b2
SH
2101static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
2102 const char *lxc_name)
e7938e9e
MN
2103{
2104 FILE *file;
2105 struct lxc_list *iterator;
2106 char *mount_entry;
2107 int ret;
2108
2109 file = tmpfile();
2110 if (!file) {
2111 ERROR("tmpfile error: %m");
2112 return -1;
2113 }
2114
2115 lxc_list_for_each(iterator, mount) {
2116 mount_entry = iterator->elem;
1d6b1976 2117 fprintf(file, "%s\n", mount_entry);
e7938e9e
MN
2118 }
2119
2120 rewind(file);
2121
80a881b2 2122 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e
MN
2123
2124 fclose(file);
2125 return ret;
2126}
2127
81810dd1
DL
2128static int setup_caps(struct lxc_list *caps)
2129{
2130 struct lxc_list *iterator;
2131 char *drop_entry;
d55bc1ad 2132 char *ptr;
81810dd1
DL
2133 int i, capid;
2134
2135 lxc_list_for_each(iterator, caps) {
2136
2137 drop_entry = iterator->elem;
2138
2139 capid = -1;
2140
2141 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2142
2143 if (strcmp(drop_entry, caps_opt[i].name))
2144 continue;
2145
2146 capid = caps_opt[i].value;
2147 break;
2148 }
2149
d55bc1ad
CS
2150 if (capid < 0) {
2151 /* try to see if it's numeric, so the user may specify
2152 * capabilities that the running kernel knows about but
2153 * we don't */
09bbd745 2154 errno = 0;
d55bc1ad 2155 capid = strtol(drop_entry, &ptr, 10);
09bbd745 2156 if (!ptr || *ptr != '\0' || errno != 0)
d55bc1ad
CS
2157 /* not a valid number */
2158 capid = -1;
2159 else if (capid > lxc_caps_last_cap())
2160 /* we have a number but it's not a valid
2161 * capability */
2162 capid = -1;
2163 }
2164
81810dd1 2165 if (capid < 0) {
1e11be34
DL
2166 ERROR("unknown capability %s", drop_entry);
2167 return -1;
81810dd1
DL
2168 }
2169
2170 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
2171
2172 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
2173 SYSERROR("failed to remove %s capability", drop_entry);
2174 return -1;
2175 }
2176
2177 }
2178
1fb86a7c
SH
2179 DEBUG("capabilities have been setup");
2180
2181 return 0;
2182}
2183
2184static int dropcaps_except(struct lxc_list *caps)
2185{
2186 struct lxc_list *iterator;
2187 char *keep_entry;
2188 char *ptr;
2189 int i, capid;
2190 int numcaps = lxc_caps_last_cap() + 1;
2191 INFO("found %d capabilities\n", numcaps);
2192
2caf9a97
SH
2193 if (numcaps <= 0 || numcaps > 200)
2194 return -1;
2195
1fb86a7c
SH
2196 // caplist[i] is 1 if we keep capability i
2197 int *caplist = alloca(numcaps * sizeof(int));
2198 memset(caplist, 0, numcaps * sizeof(int));
2199
2200 lxc_list_for_each(iterator, caps) {
2201
2202 keep_entry = iterator->elem;
2203
2204 capid = -1;
2205
2206 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2207
2208 if (strcmp(keep_entry, caps_opt[i].name))
2209 continue;
2210
2211 capid = caps_opt[i].value;
2212 break;
2213 }
2214
2215 if (capid < 0) {
2216 /* try to see if it's numeric, so the user may specify
2217 * capabilities that the running kernel knows about but
2218 * we don't */
2219 capid = strtol(keep_entry, &ptr, 10);
2220 if (!ptr || *ptr != '\0' ||
f371aca9 2221 capid == INT_MIN || capid == INT_MAX)
1fb86a7c
SH
2222 /* not a valid number */
2223 capid = -1;
2224 else if (capid > lxc_caps_last_cap())
2225 /* we have a number but it's not a valid
2226 * capability */
2227 capid = -1;
2228 }
2229
2230 if (capid < 0) {
2231 ERROR("unknown capability %s", keep_entry);
2232 return -1;
2233 }
2234
2235 DEBUG("drop capability '%s' (%d)", keep_entry, capid);
2236
2237 caplist[capid] = 1;
2238 }
2239 for (i=0; i<numcaps; i++) {
2240 if (caplist[i])
2241 continue;
2242 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
2243 SYSERROR("failed to remove capability %d", i);
2244 return -1;
2245 }
2246 }
2247
2248 DEBUG("capabilities have been setup");
81810dd1
DL
2249
2250 return 0;
2251}
2252
0ad19a3f 2253static int setup_hw_addr(char *hwaddr, const char *ifname)
2254{
2255 struct sockaddr sockaddr;
2256 struct ifreq ifr;
2257 int ret, fd;
2258
3cfc0f3a
MN
2259 ret = lxc_convert_mac(hwaddr, &sockaddr);
2260 if (ret) {
2261 ERROR("mac address '%s' conversion failed : %s",
2262 hwaddr, strerror(-ret));
0ad19a3f 2263 return -1;
2264 }
2265
2266 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
5da6aa8c 2267 ifr.ifr_name[IFNAMSIZ-1] = '\0';
0ad19a3f 2268 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2269
2270 fd = socket(AF_INET, SOCK_DGRAM, 0);
2271 if (fd < 0) {
3ab87b66 2272 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 2273 return -1;
2274 }
2275
2276 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
2277 close(fd);
2278 if (ret)
3ab87b66 2279 ERROR("ioctl failure : %s", strerror(errno));
0ad19a3f 2280
5da6aa8c 2281 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
cd54d859 2282
0ad19a3f 2283 return ret;
2284}
2285
82d5ae15 2286static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2287{
82d5ae15
DL
2288 struct lxc_list *iterator;
2289 struct lxc_inetdev *inetdev;
3cfc0f3a 2290 int err;
0ad19a3f 2291
82d5ae15
DL
2292 lxc_list_for_each(iterator, ip) {
2293
2294 inetdev = iterator->elem;
2295
0093bb8c
DL
2296 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2297 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
2298 if (err) {
2299 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2300 ifindex, strerror(-err));
82d5ae15
DL
2301 return -1;
2302 }
2303 }
2304
2305 return 0;
0ad19a3f 2306}
2307
82d5ae15 2308static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2309{
82d5ae15 2310 struct lxc_list *iterator;
7fa9074f 2311 struct lxc_inet6dev *inet6dev;
3cfc0f3a 2312 int err;
0ad19a3f 2313
82d5ae15
DL
2314 lxc_list_for_each(iterator, ip) {
2315
2316 inet6dev = iterator->elem;
2317
b3df193c 2318 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
2319 &inet6dev->mcast, &inet6dev->acast,
2320 inet6dev->prefix);
3cfc0f3a
MN
2321 if (err) {
2322 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2323 ifindex, strerror(-err));
82d5ae15 2324 return -1;
3cfc0f3a 2325 }
82d5ae15
DL
2326 }
2327
2328 return 0;
0ad19a3f 2329}
2330
82d5ae15 2331static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 2332{
0ad19a3f 2333 char ifname[IFNAMSIZ];
0ad19a3f 2334 char *current_ifname = ifname;
3cfc0f3a 2335 int err;
0ad19a3f 2336
82d5ae15
DL
2337 /* empty network namespace */
2338 if (!netdev->ifindex) {
b0efbac4 2339 if (netdev->flags & IFF_UP) {
d472214b 2340 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2341 if (err) {
2342 ERROR("failed to set the loopback up : %s",
2343 strerror(-err));
82d5ae15
DL
2344 return -1;
2345 }
82d5ae15 2346 }
7b57e8b6 2347 return 0;
0ad19a3f 2348 }
13954cce 2349
b466dc33
BP
2350 /* get the new ifindex in case of physical netdev */
2351 if (netdev->type == LXC_NET_PHYS)
2352 if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
2353 ERROR("failed to get ifindex for %s",
2354 netdev->link);
2355 return -1;
2356 }
2357
82d5ae15
DL
2358 /* retrieve the name of the interface */
2359 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 2360 ERROR("no interface corresponding to index '%d'",
82d5ae15 2361 netdev->ifindex);
0ad19a3f 2362 return -1;
2363 }
13954cce 2364
018ef520 2365 /* default: let the system to choose one interface name */
9d083402 2366 if (!netdev->name)
fb6d9b2f
DL
2367 netdev->name = netdev->type == LXC_NET_PHYS ?
2368 netdev->link : "eth%d";
018ef520 2369
82d5ae15 2370 /* rename the interface name */
b84f58b9 2371 err = lxc_netdev_rename_by_name(ifname, netdev->name);
3cfc0f3a
MN
2372 if (err) {
2373 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2374 strerror(-err));
018ef520
DL
2375 return -1;
2376 }
2377
2378 /* Re-read the name of the interface because its name has changed
2379 * and would be automatically allocated by the system
2380 */
82d5ae15 2381 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 2382 ERROR("no interface corresponding to index '%d'",
82d5ae15 2383 netdev->ifindex);
018ef520 2384 return -1;
0ad19a3f 2385 }
2386
82d5ae15
DL
2387 /* set a mac address */
2388 if (netdev->hwaddr) {
2389 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 2390 ERROR("failed to setup hw address for '%s'",
82d5ae15 2391 current_ifname);
0ad19a3f 2392 return -1;
2393 }
2394 }
2395
82d5ae15
DL
2396 /* setup ipv4 addresses on the interface */
2397 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 2398 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 2399 ifname);
2400 return -1;
2401 }
2402
82d5ae15
DL
2403 /* setup ipv6 addresses on the interface */
2404 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 2405 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 2406 ifname);
2407 return -1;
2408 }
2409
82d5ae15 2410 /* set the network device up */
b0efbac4 2411 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
2412 int err;
2413
d472214b 2414 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
2415 if (err) {
2416 ERROR("failed to set '%s' up : %s", current_ifname,
2417 strerror(-err));
0ad19a3f 2418 return -1;
2419 }
2420
2421 /* the network is up, make the loopback up too */
d472214b 2422 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2423 if (err) {
2424 ERROR("failed to set the loopback up : %s",
2425 strerror(-err));
0ad19a3f 2426 return -1;
2427 }
2428 }
2429
f8fee0e2
MK
2430 /* We can only set up the default routes after bringing
2431 * up the interface, sine bringing up the interface adds
2432 * the link-local routes and we can't add a default
2433 * route if the gateway is not reachable. */
2434
2435 /* setup ipv4 gateway on the interface */
2436 if (netdev->ipv4_gateway) {
2437 if (!(netdev->flags & IFF_UP)) {
2438 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2439 return -1;
2440 }
2441
2442 if (lxc_list_empty(&netdev->ipv4)) {
2443 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2444 return -1;
2445 }
2446
2447 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2448 if (err) {
fc739df5
SG
2449 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2450 if (err) {
2451 ERROR("failed to add ipv4 dest for '%s': %s",
2452 ifname, strerror(-err));
2453 }
2454
2455 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2456 if (err) {
2457 ERROR("failed to setup ipv4 gateway for '%s': %s",
2458 ifname, strerror(-err));
2459 if (netdev->ipv4_gateway_auto) {
2460 char buf[INET_ADDRSTRLEN];
2461 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2462 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2463 }
2464 return -1;
19a26f82 2465 }
f8fee0e2
MK
2466 }
2467 }
2468
2469 /* setup ipv6 gateway on the interface */
2470 if (netdev->ipv6_gateway) {
2471 if (!(netdev->flags & IFF_UP)) {
2472 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2473 return -1;
2474 }
2475
2476 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2477 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2478 return -1;
2479 }
2480
2481 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2482 if (err) {
fc739df5
SG
2483 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2484 if (err) {
2485 ERROR("failed to add ipv6 dest for '%s': %s",
f8fee0e2 2486 ifname, strerror(-err));
19a26f82 2487 }
fc739df5
SG
2488
2489 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2490 if (err) {
2491 ERROR("failed to setup ipv6 gateway for '%s': %s",
2492 ifname, strerror(-err));
2493 if (netdev->ipv6_gateway_auto) {
2494 char buf[INET6_ADDRSTRLEN];
2495 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2496 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2497 }
2498 return -1;
2499 }
f8fee0e2
MK
2500 }
2501 }
2502
cd54d859
DL
2503 DEBUG("'%s' has been setup", current_ifname);
2504
0ad19a3f 2505 return 0;
2506}
2507
5f4535a3 2508static int setup_network(struct lxc_list *network)
0ad19a3f 2509{
82d5ae15 2510 struct lxc_list *iterator;
82d5ae15 2511 struct lxc_netdev *netdev;
0ad19a3f 2512
5f4535a3 2513 lxc_list_for_each(iterator, network) {
cd54d859 2514
5f4535a3 2515 netdev = iterator->elem;
82d5ae15
DL
2516
2517 if (setup_netdev(netdev)) {
2518 ERROR("failed to setup netdev");
2519 return -1;
2520 }
2521 }
cd54d859 2522
5f4535a3
DL
2523 if (!lxc_list_empty(network))
2524 INFO("network has been setup");
cd54d859
DL
2525
2526 return 0;
0ad19a3f 2527}
2528
7b35f3d6
SH
2529void lxc_rename_phys_nics_on_shutdown(struct lxc_conf *conf)
2530{
2531 int i;
2532
2533 INFO("running to reset %d nic names", conf->num_savednics);
2534 for (i=0; i<conf->num_savednics; i++) {
2535 struct saved_nic *s = &conf->saved_nics[i];
2536 INFO("resetting nic %d to %s\n", s->ifindex, s->orig_name);
2537 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
2538 free(s->orig_name);
2539 }
2540 conf->num_savednics = 0;
2541 free(conf->saved_nics);
2542}
2543
ae9242c8
SH
2544static char *default_rootfs_mount = LXCROOTFSMOUNT;
2545
7b379ab3 2546struct lxc_conf *lxc_conf_init(void)
089cd8b8 2547{
7b379ab3 2548 struct lxc_conf *new;
26ddeedd 2549 int i;
7b379ab3
MN
2550
2551 new = malloc(sizeof(*new));
2552 if (!new) {
2553 ERROR("lxc_conf_init : %m");
2554 return NULL;
2555 }
2556 memset(new, 0, sizeof(*new));
2557
b40a606e 2558 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
cccc74b5 2559 new->personality = -1;
bc6928ff 2560 new->autodev = -1;
596a818d
DE
2561 new->console.log_path = NULL;
2562 new->console.log_fd = -1;
28a4b0e5 2563 new->console.path = NULL;
63376d7d 2564 new->console.peer = -1;
b5159817
DE
2565 new->console.peerpty.busy = -1;
2566 new->console.peerpty.master = -1;
2567 new->console.peerpty.slave = -1;
63376d7d
DL
2568 new->console.master = -1;
2569 new->console.slave = -1;
2570 new->console.name[0] = '\0';
d2e30e99 2571 new->maincmd_fd = -1;
54c30e29 2572 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048
SH
2573 if (!new->rootfs.mount) {
2574 ERROR("lxc_conf_init : %m");
2575 free(new);
2576 return NULL;
2577 }
2f3f41d0 2578 new->kmsg = 1;
7b379ab3
MN
2579 lxc_list_init(&new->cgroup);
2580 lxc_list_init(&new->network);
2581 lxc_list_init(&new->mount_list);
81810dd1 2582 lxc_list_init(&new->caps);
1fb86a7c 2583 lxc_list_init(&new->keepcaps);
f6d3e3e4 2584 lxc_list_init(&new->id_map);
26ddeedd
SH
2585 for (i=0; i<NUM_LXC_HOOKS; i++)
2586 lxc_list_init(&new->hooks[i]);
ee1e7aa0 2587 lxc_list_init(&new->groups);
fe4de9a6
DE
2588 new->lsm_aa_profile = NULL;
2589 new->lsm_se_context = NULL;
e075f5d9 2590 new->lsm_umount_proc = 0;
7b379ab3 2591
9f30a190
MM
2592 for (i = 0; i < LXC_NS_MAX; i++)
2593 new->inherit_ns_fd[i] = -1;
2594
7b379ab3 2595 return new;
089cd8b8
DL
2596}
2597
e3b4c4c4 2598static int instanciate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2599{
8634bc19 2600 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2601 char veth2buf[IFNAMSIZ], *veth2;
3cfc0f3a 2602 int err;
13954cce 2603
e892973e
DL
2604 if (netdev->priv.veth_attr.pair)
2605 veth1 = netdev->priv.veth_attr.pair;
8634bc19 2606 else {
9ba8130c
SH
2607 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2608 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2609 ERROR("veth1 name too long");
2610 return -1;
2611 }
a0265685 2612 veth1 = lxc_mkifname(veth1buf);
ad40563e
ÇO
2613 if (!veth1) {
2614 ERROR("failed to allocate a temporary name");
2615 return -1;
2616 }
74a2b586
JK
2617 /* store away for deconf */
2618 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2619 }
82d5ae15 2620
0e391e57 2621 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
a0265685 2622 veth2 = lxc_mkifname(veth2buf);
ad40563e 2623 if (!veth2) {
82d5ae15 2624 ERROR("failed to allocate a temporary name");
ad40563e 2625 goto out_delete;
0ad19a3f 2626 }
2627
3cfc0f3a
MN
2628 err = lxc_veth_create(veth1, veth2);
2629 if (err) {
2630 ERROR("failed to create %s-%s : %s", veth1, veth2,
2631 strerror(-err));
ad40563e 2632 goto out_delete;
0ad19a3f 2633 }
13954cce 2634
49684c0b
CS
2635 /* changing the high byte of the mac address to 0xfe, the bridge interface
2636 * will always keep the host's mac address and not take the mac address
2637 * of a container */
2638 err = setup_private_host_hw_addr(veth1);
2639 if (err) {
2640 ERROR("failed to change mac address of host interface '%s' : %s",
2641 veth1, strerror(-err));
2642 goto out_delete;
2643 }
2644
82d5ae15 2645 if (netdev->mtu) {
d472214b 2646 err = lxc_netdev_set_mtu(veth1, atoi(netdev->mtu));
3cfc0f3a 2647 if (!err)
d472214b 2648 err = lxc_netdev_set_mtu(veth2, atoi(netdev->mtu));
3cfc0f3a
MN
2649 if (err) {
2650 ERROR("failed to set mtu '%s' for %s-%s : %s",
2651 netdev->mtu, veth1, veth2, strerror(-err));
eb14c10a 2652 goto out_delete;
75d09f83
DL
2653 }
2654 }
2655
3cfc0f3a
MN
2656 if (netdev->link) {
2657 err = lxc_bridge_attach(netdev->link, veth1);
2658 if (err) {
2659 ERROR("failed to attach '%s' to the bridge '%s' : %s",
2660 veth1, netdev->link, strerror(-err));
2661 goto out_delete;
2662 }
eb14c10a
DL
2663 }
2664
82d5ae15
DL
2665 netdev->ifindex = if_nametoindex(veth2);
2666 if (!netdev->ifindex) {
36eb9bde 2667 ERROR("failed to retrieve the index for %s", veth2);
eb14c10a
DL
2668 goto out_delete;
2669 }
2670
d472214b 2671 err = lxc_netdev_up(veth1);
6e35af2e
DL
2672 if (err) {
2673 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2674 goto out_delete;
0ad19a3f 2675 }
2676
e3b4c4c4 2677 if (netdev->upscript) {
751d9dcd
DL
2678 err = run_script(handler->name, "net", netdev->upscript, "up",
2679 "veth", veth1, (char*) NULL);
2680 if (err)
e3b4c4c4 2681 goto out_delete;
e3b4c4c4
ST
2682 }
2683
82d5ae15
DL
2684 DEBUG("instanciated veth '%s/%s', index is '%d'",
2685 veth1, veth2, netdev->ifindex);
2686
6ab9ab6d 2687 return 0;
eb14c10a
DL
2688
2689out_delete:
b84f58b9 2690 lxc_netdev_delete_by_name(veth1);
ad40563e
ÇO
2691 if (!netdev->priv.veth_attr.pair && veth1)
2692 free(veth1);
2693 if(veth2)
2694 free(veth2);
6ab9ab6d 2695 return -1;
13954cce 2696}
d957ae2d 2697
74a2b586
JK
2698static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2699{
2700 char *veth1;
2701 int err;
2702
2703 if (netdev->priv.veth_attr.pair)
2704 veth1 = netdev->priv.veth_attr.pair;
2705 else
2706 veth1 = netdev->priv.veth_attr.veth1;
2707
2708 if (netdev->downscript) {
2709 err = run_script(handler->name, "net", netdev->downscript,
2710 "down", "veth", veth1, (char*) NULL);
2711 if (err)
2712 return -1;
2713 }
2714 return 0;
2715}
2716
e3b4c4c4 2717static int instanciate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2718{
0e391e57 2719 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2720 int err;
d957ae2d
MT
2721
2722 if (!netdev->link) {
2723 ERROR("no link specified for macvlan netdev");
2724 return -1;
2725 }
13954cce 2726
9ba8130c
SH
2727 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2728 if (err >= sizeof(peerbuf))
2729 return -1;
82d5ae15 2730
a0265685 2731 peer = lxc_mkifname(peerbuf);
ad40563e 2732 if (!peer) {
82d5ae15
DL
2733 ERROR("failed to make a temporary name");
2734 return -1;
0ad19a3f 2735 }
2736
3cfc0f3a
MN
2737 err = lxc_macvlan_create(netdev->link, peer,
2738 netdev->priv.macvlan_attr.mode);
2739 if (err) {
2740 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2741 peer, netdev->link, strerror(-err));
ad40563e 2742 goto out;
0ad19a3f 2743 }
2744
82d5ae15
DL
2745 netdev->ifindex = if_nametoindex(peer);
2746 if (!netdev->ifindex) {
36eb9bde 2747 ERROR("failed to retrieve the index for %s", peer);
ad40563e 2748 goto out;
22ebac19 2749 }
2750
e3b4c4c4 2751 if (netdev->upscript) {
751d9dcd
DL
2752 err = run_script(handler->name, "net", netdev->upscript, "up",
2753 "macvlan", netdev->link, (char*) NULL);
2754 if (err)
ad40563e 2755 goto out;
e3b4c4c4
ST
2756 }
2757
e892973e
DL
2758 DEBUG("instanciated macvlan '%s', index is '%d' and mode '%d'",
2759 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 2760
d957ae2d 2761 return 0;
ad40563e
ÇO
2762out:
2763 lxc_netdev_delete_by_name(peer);
2764 free(peer);
2765 return -1;
0ad19a3f 2766}
2767
74a2b586
JK
2768static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2769{
2770 int err;
2771
2772 if (netdev->downscript) {
2773 err = run_script(handler->name, "net", netdev->downscript,
2774 "down", "macvlan", netdev->link,
2775 (char*) NULL);
2776 if (err)
2777 return -1;
2778 }
2779 return 0;
2780}
2781
26c39028 2782/* XXX: merge with instanciate_macvlan */
e3b4c4c4 2783static int instanciate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
2784{
2785 char peer[IFNAMSIZ];
3cfc0f3a 2786 int err;
26c39028
JHS
2787
2788 if (!netdev->link) {
2789 ERROR("no link specified for vlan netdev");
2790 return -1;
2791 }
2792
9ba8130c
SH
2793 err = snprintf(peer, sizeof(peer), "vlan%d", netdev->priv.vlan_attr.vid);
2794 if (err >= sizeof(peer)) {
2795 ERROR("peer name too long");
2796 return -1;
2797 }
26c39028 2798
3cfc0f3a
MN
2799 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2800 if (err) {
2801 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2802 peer, netdev->link, strerror(-err));
26c39028
JHS
2803 return -1;
2804 }
2805
2806 netdev->ifindex = if_nametoindex(peer);
2807 if (!netdev->ifindex) {
2808 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 2809 lxc_netdev_delete_by_name(peer);
26c39028
JHS
2810 return -1;
2811 }
2812
e892973e
DL
2813 DEBUG("instanciated vlan '%s', ifindex is '%d'", " vlan1000",
2814 netdev->ifindex);
2815
26c39028
JHS
2816 return 0;
2817}
2818
74a2b586
JK
2819static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2820{
2821 return 0;
2822}
2823
e3b4c4c4 2824static int instanciate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2825{
6168e99f
DL
2826 if (!netdev->link) {
2827 ERROR("no link specified for the physical interface");
2828 return -1;
2829 }
2830
9d083402 2831 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 2832 if (!netdev->ifindex) {
9d083402 2833 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 2834 return -1;
2835 }
2836
e3b4c4c4
ST
2837 if (netdev->upscript) {
2838 int err;
751d9dcd
DL
2839 err = run_script(handler->name, "net", netdev->upscript,
2840 "up", "phys", netdev->link, (char*) NULL);
2841 if (err)
e3b4c4c4 2842 return -1;
e3b4c4c4
ST
2843 }
2844
82d5ae15 2845 return 0;
0ad19a3f 2846}
2847
74a2b586
JK
2848static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2849{
2850 int err;
2851
2852 if (netdev->downscript) {
2853 err = run_script(handler->name, "net", netdev->downscript,
2854 "down", "phys", netdev->link, (char*) NULL);
2855 if (err)
2856 return -1;
2857 }
2858 return 0;
2859}
2860
26b797f3
SH
2861static int instanciate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
2862{
2863 netdev->ifindex = 0;
2864 return 0;
2865}
2866
e3b4c4c4 2867static int instanciate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2868{
82d5ae15 2869 netdev->ifindex = 0;
e3b4c4c4
ST
2870 if (netdev->upscript) {
2871 int err;
751d9dcd
DL
2872 err = run_script(handler->name, "net", netdev->upscript,
2873 "up", "empty", (char*) NULL);
2874 if (err)
e3b4c4c4 2875 return -1;
e3b4c4c4 2876 }
82d5ae15 2877 return 0;
0ad19a3f 2878}
2879
74a2b586
JK
2880static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2881{
2882 int err;
2883
2884 if (netdev->downscript) {
2885 err = run_script(handler->name, "net", netdev->downscript,
2886 "down", "empty", (char*) NULL);
2887 if (err)
2888 return -1;
2889 }
2890 return 0;
2891}
2892
26b797f3
SH
2893static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
2894{
2895 return 0;
2896}
2897
2898int lxc_requests_empty_network(struct lxc_handler *handler)
2899{
2900 struct lxc_list *network = &handler->conf->network;
2901 struct lxc_list *iterator;
2902 struct lxc_netdev *netdev;
2903 bool found_none = false, found_nic = false;
2904
2905 if (lxc_list_empty(network))
2906 return 0;
2907
2908 lxc_list_for_each(iterator, network) {
2909
2910 netdev = iterator->elem;
2911
2912 if (netdev->type == LXC_NET_NONE)
2913 found_none = true;
2914 else
2915 found_nic = true;
2916 }
2917 if (found_none && !found_nic)
2918 return 1;
2919 return 0;
2920}
2921
e3b4c4c4 2922int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 2923{
e3b4c4c4 2924 struct lxc_list *network = &handler->conf->network;
82d5ae15 2925 struct lxc_list *iterator;
82d5ae15 2926 struct lxc_netdev *netdev;
cbef6c52
SH
2927 int am_root = (getuid() == 0);
2928
2929 if (!am_root)
2930 return 0;
0ad19a3f 2931
5f4535a3 2932 lxc_list_for_each(iterator, network) {
0ad19a3f 2933
5f4535a3 2934 netdev = iterator->elem;
13954cce 2935
24654103 2936 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 2937 ERROR("invalid network configuration type '%d'",
5f4535a3 2938 netdev->type);
82d5ae15
DL
2939 return -1;
2940 }
0ad19a3f 2941
e3b4c4c4 2942 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
2943 ERROR("failed to create netdev");
2944 return -1;
2945 }
e3b4c4c4 2946
0ad19a3f 2947 }
2948
2949 return 0;
2950}
2951
74a2b586 2952void lxc_delete_network(struct lxc_handler *handler)
7fef7a06 2953{
74a2b586 2954 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
2955 struct lxc_list *iterator;
2956 struct lxc_netdev *netdev;
2957
2958 lxc_list_for_each(iterator, network) {
2959 netdev = iterator->elem;
d472214b 2960
74a2b586 2961 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352
DL
2962 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
2963 WARN("failed to rename to the initial name the " \
2964 "netdev '%s'", netdev->link);
d472214b 2965 continue;
d8f8e352 2966 }
d472214b 2967
74a2b586
JK
2968 if (netdev_deconf[netdev->type](handler, netdev)) {
2969 WARN("failed to destroy netdev");
2970 }
2971
d8f8e352
DL
2972 /* Recent kernel remove the virtual interfaces when the network
2973 * namespace is destroyed but in case we did not moved the
2974 * interface to the network namespace, we have to destroy it
2975 */
74a2b586
JK
2976 if (netdev->ifindex != 0 &&
2977 lxc_netdev_delete_by_index(netdev->ifindex))
d8f8e352 2978 WARN("failed to remove interface '%s'", netdev->name);
7fef7a06
DL
2979 }
2980}
2981
74a3920a 2982static int unpriv_assign_nic(struct lxc_netdev *netdev, pid_t pid)
cbef6c52
SH
2983{
2984 pid_t child;
2985
2986 if (netdev->type != LXC_NET_VETH) {
2987 ERROR("nic type %d not support for unprivileged use",
2988 netdev->type);
2989 return -1;
2990 }
2991
2992 if ((child = fork()) < 0) {
2993 SYSERROR("fork");
2994 return -1;
2995 }
2996
2997 if (child > 0)
2998 return wait_for_pid(child);
2999
3000 // Call lxc-user-nic pid type bridge
3001 char pidstr[20];
4119204e 3002 char *args[] = { "lxc-user-nic", pidstr, "veth", netdev->link, netdev->name, NULL };
cbef6c52
SH
3003 snprintf(pidstr, 19, "%lu", (unsigned long) pid);
3004 pidstr[19] = '\0';
3005 execvp("lxc-user-nic", args);
3006 SYSERROR("execvp lxc-user-nic");
3007 exit(1);
3008}
3009
5f4535a3 3010int lxc_assign_network(struct lxc_list *network, pid_t pid)
0ad19a3f 3011{
82d5ae15 3012 struct lxc_list *iterator;
82d5ae15 3013 struct lxc_netdev *netdev;
cbef6c52 3014 int am_root = (getuid() == 0);
3cfc0f3a 3015 int err;
0ad19a3f 3016
5f4535a3 3017 lxc_list_for_each(iterator, network) {
82d5ae15 3018
5f4535a3 3019 netdev = iterator->elem;
82d5ae15 3020
fbb16259 3021 if (netdev->type == LXC_NET_VETH && !am_root) {
cbef6c52
SH
3022 if (unpriv_assign_nic(netdev, pid))
3023 return -1;
3024 // TODO fill in netdev->ifindex and name
3025 continue;
3026 }
236087a6 3027
fbb16259
SH
3028 /* empty network namespace, nothing to move */
3029 if (!netdev->ifindex)
3030 continue;
3031
d472214b 3032 err = lxc_netdev_move_by_index(netdev->ifindex, pid);
3cfc0f3a
MN
3033 if (err) {
3034 ERROR("failed to move '%s' to the container : %s",
3035 netdev->link, strerror(-err));
82d5ae15
DL
3036 return -1;
3037 }
3038
c1c75c04 3039 DEBUG("move '%s' to '%d'", netdev->name, pid);
0ad19a3f 3040 }
3041
3042 return 0;
3043}
3044
251d0d2a
DE
3045static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
3046 size_t buf_size)
f6d3e3e4
SH
3047{
3048 char path[PATH_MAX];
e4ccd113 3049 int ret, closeret;
f6d3e3e4
SH
3050 FILE *f;
3051
3052 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
3053 if (ret < 0 || ret >= PATH_MAX) {
3054 fprintf(stderr, "%s: path name too long", __func__);
3055 return -E2BIG;
3056 }
3057 f = fopen(path, "w");
3058 if (!f) {
3059 perror("open");
3060 return -EINVAL;
3061 }
251d0d2a 3062 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 3063 if (ret < 0)
e4ccd113
SH
3064 SYSERROR("writing id mapping");
3065 closeret = fclose(f);
3066 if (closeret)
3067 SYSERROR("writing id mapping");
3068 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
3069}
3070
3071int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
3072{
3073 struct lxc_list *iterator;
3074 struct id_map *map;
3075 int ret = 0;
251d0d2a 3076 enum idtype type;
4f7521b4 3077 char *buf = NULL, *pos;
cf3ef16d 3078 int am_root = (getuid() == 0);
251d0d2a
DE
3079
3080 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4 3081 int left, fill;
cf3ef16d
SH
3082 int had_entry = 0;
3083 if (!buf) {
3084 buf = pos = malloc(4096);
4f7521b4
SH
3085 if (!buf)
3086 return -ENOMEM;
cf3ef16d
SH
3087 }
3088 pos = buf;
3089 if (!am_root)
3090 pos += sprintf(buf, "new%cidmap %d ",
3091 type == ID_TYPE_UID ? 'u' : 'g',
3092 pid);
4f7521b4 3093
cf3ef16d
SH
3094 lxc_list_for_each(iterator, idmap) {
3095 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
251d0d2a 3096 map = iterator->elem;
cf3ef16d
SH
3097 if (map->idtype != type)
3098 continue;
3099
3100 had_entry = 1;
3101 left = 4096 - (pos - buf);
3102 fill = snprintf(pos, left, " %lu %lu %lu", map->nsid,
3103 map->hostid, map->range);
3104 if (fill <= 0 || fill >= left)
3105 SYSERROR("snprintf failed, too many mappings");
3106 pos += fill;
251d0d2a 3107 }
cf3ef16d 3108 if (!had_entry)
4f7521b4 3109 continue;
cf3ef16d
SH
3110 left = 4096 - (pos - buf);
3111 fill = snprintf(pos, left, "\n");
3112 if (fill <= 0 || fill >= left)
3113 SYSERROR("snprintf failed, too many mappings");
3114 pos += fill;
3115
3116 if (am_root)
3117 ret = write_id_mapping(type, pid, buf, pos-buf);
3118 else
3119 ret = system(buf);
3120
f6d3e3e4
SH
3121 if (ret)
3122 break;
3123 }
251d0d2a 3124
4f7521b4
SH
3125 if (buf)
3126 free(buf);
f6d3e3e4
SH
3127 return ret;
3128}
3129
cf3ef16d
SH
3130/*
3131 * return the host uid to which the container root is mapped, or -1 on
3132 * error
3133 */
74a3920a 3134static uid_t get_mapped_rootid(struct lxc_conf *conf)
cf3ef16d
SH
3135{
3136 struct lxc_list *it;
3137 struct id_map *map;
3138
3139 lxc_list_for_each(it, &conf->id_map) {
3140 map = it->elem;
3141 if (map->idtype != ID_TYPE_UID)
3142 continue;
3143 if (map->nsid != 0)
3144 continue;
c4d10a05 3145 return (uid_t) map->hostid;
cf3ef16d 3146 }
c4d10a05 3147 return (uid_t)-1;
cf3ef16d
SH
3148}
3149
57d116ab 3150int mapped_hostid(int id, struct lxc_conf *conf)
cf3ef16d
SH
3151{
3152 struct lxc_list *it;
3153 struct id_map *map;
3154 lxc_list_for_each(it, &conf->id_map) {
3155 map = it->elem;
3156 if (map->idtype != ID_TYPE_UID)
3157 continue;
3158 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 3159 return (id - map->hostid) + map->nsid;
cf3ef16d 3160 }
57d116ab 3161 return -1;
cf3ef16d
SH
3162}
3163
3164int find_unmapped_nsuid(struct lxc_conf *conf)
3165{
3166 struct lxc_list *it;
3167 struct id_map *map;
3168 uid_t freeid = 0;
3169again:
3170 lxc_list_for_each(it, &conf->id_map) {
3171 map = it->elem;
3172 if (map->idtype != ID_TYPE_UID)
3173 continue;
3174 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3175 freeid = map->nsid + map->range;
3176 goto again;
3177 }
3178 }
3179 return freeid;
3180}
3181
19a26f82
MK
3182int lxc_find_gateway_addresses(struct lxc_handler *handler)
3183{
3184 struct lxc_list *network = &handler->conf->network;
3185 struct lxc_list *iterator;
3186 struct lxc_netdev *netdev;
3187 int link_index;
3188
3189 lxc_list_for_each(iterator, network) {
3190 netdev = iterator->elem;
3191
3192 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
3193 continue;
3194
3195 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
3196 ERROR("gateway = auto only supported for "
3197 "veth and macvlan");
3198 return -1;
3199 }
3200
3201 if (!netdev->link) {
3202 ERROR("gateway = auto needs a link interface");
3203 return -1;
3204 }
3205
3206 link_index = if_nametoindex(netdev->link);
3207 if (!link_index)
3208 return -EINVAL;
3209
3210 if (netdev->ipv4_gateway_auto) {
3211 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
3212 ERROR("failed to automatically find ipv4 gateway "
3213 "address from link interface '%s'", netdev->link);
3214 return -1;
3215 }
3216 }
3217
3218 if (netdev->ipv6_gateway_auto) {
3219 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
3220 ERROR("failed to automatically find ipv6 gateway "
3221 "address from link interface '%s'", netdev->link);
3222 return -1;
3223 }
3224 }
3225 }
3226
3227 return 0;
3228}
3229
5e4a62bf 3230int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 3231{
5e4a62bf 3232 struct lxc_tty_info *tty_info = &conf->tty_info;
025ed0f3 3233 int i, ret;
b0a33c1e 3234
5e4a62bf
DL
3235 /* no tty in the configuration */
3236 if (!conf->tty)
b0a33c1e 3237 return 0;
3238
13954cce 3239 tty_info->pty_info =
e4e7d59d 3240 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 3241 if (!tty_info->pty_info) {
36eb9bde 3242 SYSERROR("failed to allocate pty_info");
985d15b1 3243 return -1;
b0a33c1e 3244 }
3245
985d15b1 3246 for (i = 0; i < conf->tty; i++) {
13954cce 3247
b0a33c1e 3248 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3249
025ed0f3
SH
3250 process_lock();
3251 ret = openpty(&pty_info->master, &pty_info->slave,
3252 pty_info->name, NULL, NULL);
3253 process_unlock();
3254 if (ret) {
36eb9bde 3255 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
3256 tty_info->nbtty = i;
3257 lxc_delete_tty(tty_info);
3258 return -1;
b0a33c1e 3259 }
3260
5332bb84
DL
3261 DEBUG("allocated pty '%s' (%d/%d)",
3262 pty_info->name, pty_info->master, pty_info->slave);
3263
b035ad62
MS
3264 /* Prevent leaking the file descriptors to the container */
3265 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3266 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3267
b0a33c1e 3268 pty_info->busy = 0;
3269 }
3270
985d15b1 3271 tty_info->nbtty = conf->tty;
1ac470c0
DL
3272
3273 INFO("tty's configured");
3274
985d15b1 3275 return 0;
b0a33c1e 3276}
3277
3278void lxc_delete_tty(struct lxc_tty_info *tty_info)
3279{
3280 int i;
3281
3282 for (i = 0; i < tty_info->nbtty; i++) {
3283 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3284
3285 close(pty_info->master);
3286 close(pty_info->slave);
3287 }
3288
3289 free(tty_info->pty_info);
3290 tty_info->nbtty = 0;
3291}
3292
f6d3e3e4 3293/*
c4d10a05
SH
3294 * chown_mapped_root: for an unprivileged user with uid X to chown a dir
3295 * to subuid Y, he needs to run chown as root in a userns where
3296 * nsid 0 is mapped to hostuid Y, and nsid Y is mapped to hostuid
3297 * X. That way, the container root is privileged with respect to
3298 * hostuid X, allowing him to do the chown.
f6d3e3e4 3299 */
c4d10a05 3300int chown_mapped_root(char *path, struct lxc_conf *conf)
f6d3e3e4 3301{
c4d10a05
SH
3302 uid_t rootid;
3303 pid_t pid;
f6d3e3e4 3304
c4d10a05
SH
3305 if ((rootid = get_mapped_rootid(conf)) <= 0) {
3306 ERROR("No mapping for container root");
3307 return -1;
f6d3e3e4 3308 }
c4d10a05
SH
3309 if (geteuid() == 0) {
3310 if (chown(path, rootid, -1) < 0) {
3311 ERROR("Error chowning %s", path);
3312 return -1;
3313 }
3314 return 0;
3315 }
3316 pid = fork();
3317 if (pid < 0) {
3318 SYSERROR("Failed forking");
f6d3e3e4
SH
3319 return -1;
3320 }
c4d10a05
SH
3321 if (!pid) {
3322 int hostuid = geteuid(), ret;
98e5ba51
SH
3323 char map1[100], map2[100], map3[100];
3324 char *args[] = {"lxc-usernsexec", "-m", map1, "-m", map2, "-m",
3325 map3, "--", "chown", "0", path, NULL};
f6d3e3e4 3326
98e5ba51
SH
3327 // "u:0:rootid:1"
3328 ret = snprintf(map1, 100, "u:0:%d:1", rootid);
c4d10a05
SH
3329 if (ret < 0 || ret >= 100) {
3330 ERROR("Error uid printing map string");
f6d3e3e4
SH
3331 return -1;
3332 }
c4d10a05 3333
98e5ba51
SH
3334 // "u:hostuid:hostuid:1"
3335 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3336 if (ret < 0 || ret >= 100) {
3337 ERROR("Error uid printing map string");
3338 return -1;
3339 }
3340
3341 // "g:0:hostgid:1"
3342 ret = snprintf(map3, 100, "g:0:%d:1", getgid());
c4d10a05
SH
3343 if (ret < 0 || ret >= 100) {
3344 ERROR("Error uid printing map string");
3345 return -1;
3346 }
3347
3348 ret = execvp("lxc-usernsexec", args);
3349 SYSERROR("Failed executing usernsexec");
3350 exit(1);
f6d3e3e4 3351 }
c4d10a05 3352 return wait_for_pid(pid);
f6d3e3e4
SH
3353}
3354
c4d10a05 3355int ttys_shift_ids(struct lxc_conf *c)
f6d3e3e4 3356{
c4d10a05 3357 int i;
f6d3e3e4 3358
c4d10a05 3359 if (lxc_list_empty(&c->id_map))
f6d3e3e4 3360 return 0;
c4d10a05
SH
3361
3362 for (i = 0; i < c->tty_info.nbtty; i++) {
3363 struct lxc_pty_info *pty_info = &c->tty_info.pty_info[i];
3364
3365 if (chown_mapped_root(pty_info->name, c) < 0) {
3366 ERROR("Failed to chown %s", pty_info->name);
f6d3e3e4
SH
3367 return -1;
3368 }
3369 }
3370
29b10e4f 3371 if (strcmp(c->console.name, "") !=0 && chown_mapped_root(c->console.name, c) < 0) {
c4d10a05
SH
3372 ERROR("Failed to chown %s", c->console.name);
3373 return -1;
3374 }
3375
f6d3e3e4
SH
3376 return 0;
3377}
3378
bc6928ff
MW
3379/*
3380 * This routine is called when the configuration does not already specify a value
3381 * for autodev (mounting a file system on /dev and populating it in a container).
3382 * If a hard override value has not be specified, then we try to apply some
3383 * heuristics to determine if we should switch to autodev mode.
3384 *
3385 * For instance, if the container has an /etc/systemd/system directory then it
3386 * is probably running systemd as the init process and it needs the autodev
3387 * mount to prevent it from mounting devtmpfs on /dev on it's own causing conflicts
3388 * in the host.
3389 *
3390 * We may also want to enable autodev if the host has devtmpfs mounted on its
3391 * /dev as this then enable us to use subdirectories under /dev for the container
3392 * /dev directories and we can fake udev devices.
3393 */
3394struct start_args {
3395 char *const *argv;
3396};
3397
3398#define MAX_SYMLINK_DEPTH 32
3399
74a3920a 3400static int check_autodev( const char *rootfs, void *data )
bc6928ff
MW
3401{
3402 struct start_args *arg = data;
3403 int ret;
3404 int loop_count = 0;
3405 struct stat s;
3406 char absrootfs[MAXPATHLEN];
3407 char path[MAXPATHLEN];
3408 char abs_path[MAXPATHLEN];
3409 char *command = "/sbin/init";
3410
3411 if (rootfs == NULL || strlen(rootfs) == 0)
3412 return -2;
3413
3414 if (!realpath(rootfs, absrootfs))
3415 return -2;
3416
3417 if( arg && arg->argv[0] ) {
3418 command = arg->argv[0];
3419 DEBUG("Set exec command to %s\n", command );
3420 }
3421
3422 strncpy( path, command, MAXPATHLEN-1 );
3423
3424 if ( 0 != access(path, F_OK) || 0 != stat(path, &s) )
3425 return -2;
3426
3427 /* Dereference down the symlink merry path testing as we go. */
3428 /* If anything references systemd in the path - set autodev! */
3429 /* Renormalize to the rootfs before each dereference */
3430 /* Relative symlinks should fall out in the wash even with .. */
3431 while( 1 ) {
3432 if ( strstr( path, "systemd" ) ) {
3433 INFO("Container with systemd init detected - enabling autodev!");
3434 return 1;
3435 }
3436
3437 ret = snprintf(abs_path, MAXPATHLEN-1, "%s/%s", absrootfs, path);
3438 if (ret < 0 || ret > MAXPATHLEN)
3439 return -2;
3440
3441 ret = readlink( abs_path, path, MAXPATHLEN-1 );
3442
3443 if ( ( ret <= 0 ) || ( ++loop_count > MAX_SYMLINK_DEPTH ) ) {
3444 break; /* Break out for other tests */
3445 }
3446 path[ret] = '\0';
3447 }
3448
3449 /*
3450 * Add future checks here.
3451 * Return positive if we should go autodev
3452 * Return 0 if we should NOT go autodev
3453 * Return negative if we encounter an error or can not determine...
3454 */
3455
3456 /* All else fails, we don't need autodev */
3457 INFO("Autodev not required.");
3458 return 0;
3459}
3460
3461int lxc_setup(const char *name, struct lxc_conf *lxc_conf, const char *lxcpath, struct cgroup_process_info *cgroup_info, void *data)
0ad19a3f 3462{
6c544cb3
MM
3463 if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
3464 if (setup_utsname(lxc_conf->utsname)) {
3465 ERROR("failed to setup the utsname for '%s'", name);
3466 return -1;
3467 }
0ad19a3f 3468 }
3469
5f4535a3 3470 if (setup_network(&lxc_conf->network)) {
36eb9bde 3471 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 3472 return -1;
0ad19a3f 3473 }
3474
283678ed 3475 if (run_lxc_hooks(name, "pre-mount", lxc_conf, lxcpath, NULL)) {
89eaa05e
SH
3476 ERROR("failed to run pre-mount hooks for container '%s'.", name);
3477 return -1;
3478 }
5ea6163a 3479
cc28d0b0 3480 if (setup_rootfs(lxc_conf)) {
ac778708 3481 ERROR("failed to setup rootfs for '%s'", name);
95b5ffaf 3482 return -1;
0ad19a3f 3483 }
3484
bc6928ff
MW
3485 if (lxc_conf->autodev < 0) {
3486 lxc_conf->autodev = check_autodev(lxc_conf->rootfs.mount, data);
3487 }
3488
3489 if (lxc_conf->autodev > 0) {
3490 if (mount_autodev(name, lxc_conf->rootfs.mount, lxcpath)) {
91c3830e 3491 ERROR("failed to mount /dev in the container");
c6883f38
SH
3492 return -1;
3493 }
3494 }
3495
368bbc02
CS
3496 /* do automatic mounts (mainly /proc and /sys), but exclude
3497 * those that need to wait until other stuff has finished
3498 */
b06b8511 3499 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, cgroup_info) < 0) {
368bbc02
CS
3500 ERROR("failed to setup the automatic mounts for '%s'", name);
3501 return -1;
3502 }
3503
80a881b2 3504 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
36eb9bde 3505 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 3506 return -1;
576f946d 3507 }
3508
c1dc38c2 3509 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
e7938e9e
MN
3510 ERROR("failed to setup the mount entries for '%s'", name);
3511 return -1;
3512 }
3513
368bbc02
CS
3514 /* now mount only cgroup, if wanted;
3515 * before, /sys could not have been mounted
3516 * (is either mounted automatically or via fstab entries)
3517 */
b06b8511 3518 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, cgroup_info) < 0) {
368bbc02
CS
3519 ERROR("failed to setup the automatic mounts for '%s'", name);
3520 return -1;
3521 }
3522
283678ed 3523 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
773fb9ca
SH
3524 ERROR("failed to run mount hooks for container '%s'.", name);
3525 return -1;
3526 }
3527
bc6928ff 3528 if (lxc_conf->autodev > 0) {
283678ed 3529 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
f7bee6c6
MW
3530 ERROR("failed to run autodev hooks for container '%s'.", name);
3531 return -1;
3532 }
91c3830e
SH
3533 if (setup_autodev(lxc_conf->rootfs.mount)) {
3534 ERROR("failed to populate /dev in the container");
3535 return -1;
3536 }
3537 }
368bbc02 3538
37903589 3539 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 3540 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 3541 return -1;
6e590161 3542 }
3543
7e0e1d94
AV
3544 if (lxc_conf->kmsg) {
3545 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
3546 ERROR("failed to setup kmsg for '%s'", name);
3547 }
1bd051a6 3548
37903589 3549 if (!lxc_conf->is_execute && setup_tty(&lxc_conf->rootfs, &lxc_conf->tty_info, lxc_conf->ttydir)) {
36eb9bde 3550 ERROR("failed to setup the ttys for '%s'", name);
95b5ffaf 3551 return -1;
b0a33c1e 3552 }
3553
fe4de9a6
DE
3554 /* mount /proc if needed for LSM transition */
3555 if (lsm_proc_mount(lxc_conf) < 0) {
3556 ERROR("failed to LSM mount proc for '%s'", name);
e075f5d9 3557 return -1;
e075f5d9 3558 }
e075f5d9 3559
ac778708 3560 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 3561 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 3562 return -1;
ed502555 3563 }
3564
571e6ec8 3565 if (setup_pts(lxc_conf->pts)) {
36eb9bde 3566 ERROR("failed to setup the new pts instance");
95b5ffaf 3567 return -1;
3c26f34e 3568 }
3569
cccc74b5
DL
3570 if (setup_personality(lxc_conf->personality)) {
3571 ERROR("failed to setup personality");
3572 return -1;
3573 }
3574
f6d3e3e4 3575 if (lxc_list_empty(&lxc_conf->id_map)) {
1fb86a7c
SH
3576 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
3577 if (!lxc_list_empty(&lxc_conf->caps)) {
3578 ERROR("Simultaneously requested dropping and keeping caps");
3579 return -1;
3580 }
3581 if (dropcaps_except(&lxc_conf->keepcaps)) {
3582 ERROR("failed to keep requested caps\n");
3583 return -1;
3584 }
3585 } else if (setup_caps(&lxc_conf->caps)) {
f6d3e3e4
SH
3586 ERROR("failed to drop capabilities");
3587 return -1;
3588 }
81810dd1
DL
3589 }
3590
cd54d859
DL
3591 NOTICE("'%s' is setup.", name);
3592
0ad19a3f 3593 return 0;
3594}
26ddeedd 3595
283678ed
SH
3596int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
3597 const char *lxcpath, char *argv[])
26ddeedd
SH
3598{
3599 int which = -1;
3600 struct lxc_list *it;
3601
3602 if (strcmp(hook, "pre-start") == 0)
3603 which = LXCHOOK_PRESTART;
5ea6163a
SH
3604 else if (strcmp(hook, "pre-mount") == 0)
3605 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
3606 else if (strcmp(hook, "mount") == 0)
3607 which = LXCHOOK_MOUNT;
f7bee6c6
MW
3608 else if (strcmp(hook, "autodev") == 0)
3609 which = LXCHOOK_AUTODEV;
26ddeedd
SH
3610 else if (strcmp(hook, "start") == 0)
3611 which = LXCHOOK_START;
3612 else if (strcmp(hook, "post-stop") == 0)
3613 which = LXCHOOK_POSTSTOP;
148e91f5
SH
3614 else if (strcmp(hook, "clone") == 0)
3615 which = LXCHOOK_CLONE;
26ddeedd
SH
3616 else
3617 return -1;
3618 lxc_list_for_each(it, &conf->hooks[which]) {
3619 int ret;
3620 char *hookname = it->elem;
283678ed 3621 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
26ddeedd
SH
3622 if (ret)
3623 return ret;
3624 }
3625 return 0;
3626}
72d0e1cb 3627
427b3a21 3628static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
3629{
3630 struct lxc_netdev *netdev = it->elem;
9ebb03ad 3631 struct lxc_list *it2,*next;
72d0e1cb
SG
3632
3633 lxc_list_del(it);
3634
3635 if (netdev->link)
3636 free(netdev->link);
3637 if (netdev->name)
3638 free(netdev->name);
c9bb9a85
DE
3639 if (netdev->type == LXC_NET_VETH && netdev->priv.veth_attr.pair)
3640 free(netdev->priv.veth_attr.pair);
72d0e1cb
SG
3641 if (netdev->upscript)
3642 free(netdev->upscript);
3643 if (netdev->hwaddr)
3644 free(netdev->hwaddr);
3645 if (netdev->mtu)
3646 free(netdev->mtu);
3647 if (netdev->ipv4_gateway)
3648 free(netdev->ipv4_gateway);
3649 if (netdev->ipv6_gateway)
3650 free(netdev->ipv6_gateway);
9ebb03ad 3651 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3652 lxc_list_del(it2);
3653 free(it2->elem);
3654 free(it2);
3655 }
9ebb03ad 3656 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3657 lxc_list_del(it2);
3658 free(it2->elem);
3659 free(it2);
3660 }
d95db067 3661 free(netdev);
72d0e1cb
SG
3662 free(it);
3663}
3664
3665/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 3666int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
3667{
3668 char *p1;
3669 int ret, idx, i;
3670 struct lxc_list *it;
3671 struct lxc_netdev *netdev;
3672
3673 p1 = index(key, '.');
3674 if (!p1 || *(p1+1) == '\0')
3675 p1 = NULL;
3676
3677 ret = sscanf(key, "%d", &idx);
3678 if (ret != 1) return -1;
3679 if (idx < 0)
3680 return -1;
3681
3682 i = 0;
3683 lxc_list_for_each(it, &c->network) {
3684 if (i == idx)
3685 break;
3686 i++;
3687 }
3688 if (i < idx) // we don't have that many nics defined
3689 return -1;
3690
3691 if (!it || !it->elem)
3692 return -1;
3693
3694 netdev = it->elem;
3695
3696 if (!p1) {
3697 lxc_remove_nic(it);
3698 } else if (strcmp(p1, "ipv4") == 0) {
9ebb03ad
DE
3699 struct lxc_list *it2,*next;
3700 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3701 lxc_list_del(it2);
3702 free(it2->elem);
3703 free(it2);
3704 }
3705 } else if (strcmp(p1, "ipv6") == 0) {
9ebb03ad
DE
3706 struct lxc_list *it2,*next;
3707 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3708 lxc_list_del(it2);
3709 free(it2->elem);
3710 free(it2);
3711 }
3712 } else if (strcmp(p1, "link") == 0) {
3713 if (netdev->link) {
3714 free(netdev->link);
3715 netdev->link = NULL;
3716 }
3717 } else if (strcmp(p1, "name") == 0) {
3718 if (netdev->name) {
3719 free(netdev->name);
3720 netdev->name = NULL;
3721 }
3722 } else if (strcmp(p1, "script.up") == 0) {
3723 if (netdev->upscript) {
3724 free(netdev->upscript);
3725 netdev->upscript = NULL;
3726 }
3727 } else if (strcmp(p1, "hwaddr") == 0) {
3728 if (netdev->hwaddr) {
3729 free(netdev->hwaddr);
3730 netdev->hwaddr = NULL;
3731 }
3732 } else if (strcmp(p1, "mtu") == 0) {
3733 if (netdev->mtu) {
3734 free(netdev->mtu);
3735 netdev->mtu = NULL;
3736 }
3737 } else if (strcmp(p1, "ipv4_gateway") == 0) {
3738 if (netdev->ipv4_gateway) {
3739 free(netdev->ipv4_gateway);
3740 netdev->ipv4_gateway = NULL;
3741 }
3742 } else if (strcmp(p1, "ipv6_gateway") == 0) {
3743 if (netdev->ipv6_gateway) {
3744 free(netdev->ipv6_gateway);
3745 netdev->ipv6_gateway = NULL;
3746 }
3747 }
3748 else return -1;
3749
3750 return 0;
3751}
3752
3753int lxc_clear_config_network(struct lxc_conf *c)
3754{
9ebb03ad
DE
3755 struct lxc_list *it,*next;
3756 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
3757 lxc_remove_nic(it);
3758 }
3759 return 0;
3760}
3761
3762int lxc_clear_config_caps(struct lxc_conf *c)
3763{
9ebb03ad 3764 struct lxc_list *it,*next;
72d0e1cb 3765
9ebb03ad 3766 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
3767 lxc_list_del(it);
3768 free(it->elem);
3769 free(it);
3770 }
3771 return 0;
3772}
3773
74a3920a 3774static int lxc_free_idmap(struct lxc_list *id_map) {
27c27d73
SH
3775 struct lxc_list *it, *next;
3776
4355ab5f 3777 lxc_list_for_each_safe(it, id_map, next) {
27c27d73
SH
3778 lxc_list_del(it);
3779 free(it->elem);
3780 free(it);
3781 }
3782 return 0;
3783}
3784
4355ab5f
SH
3785int lxc_clear_idmaps(struct lxc_conf *c)
3786{
3787 return lxc_free_idmap(&c->id_map);
3788}
3789
1fb86a7c
SH
3790int lxc_clear_config_keepcaps(struct lxc_conf *c)
3791{
3792 struct lxc_list *it,*next;
3793
3794 lxc_list_for_each_safe(it, &c->keepcaps, next) {
3795 lxc_list_del(it);
3796 free(it->elem);
3797 free(it);
3798 }
3799 return 0;
3800}
3801
12a50cc6 3802int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 3803{
9ebb03ad 3804 struct lxc_list *it,*next;
72d0e1cb 3805 bool all = false;
12a50cc6 3806 const char *k = key + 11;
72d0e1cb
SG
3807
3808 if (strcmp(key, "lxc.cgroup") == 0)
3809 all = true;
3810
9ebb03ad 3811 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
3812 struct lxc_cgroup *cg = it->elem;
3813 if (!all && strcmp(cg->subsystem, k) != 0)
3814 continue;
3815 lxc_list_del(it);
3816 free(cg->subsystem);
3817 free(cg->value);
3818 free(cg);
3819 free(it);
3820 }
3821 return 0;
3822}
3823
ee1e7aa0
SG
3824int lxc_clear_groups(struct lxc_conf *c)
3825{
3826 struct lxc_list *it,*next;
3827
3828 lxc_list_for_each_safe(it, &c->groups, next) {
3829 lxc_list_del(it);
3830 free(it->elem);
3831 free(it);
3832 }
3833 return 0;
3834}
3835
72d0e1cb
SG
3836int lxc_clear_mount_entries(struct lxc_conf *c)
3837{
9ebb03ad 3838 struct lxc_list *it,*next;
72d0e1cb 3839
9ebb03ad 3840 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
3841 lxc_list_del(it);
3842 free(it->elem);
3843 free(it);
3844 }
3845 return 0;
3846}
3847
12a50cc6 3848int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 3849{
9ebb03ad 3850 struct lxc_list *it,*next;
17ed13a3 3851 bool all = false, done = false;
12a50cc6 3852 const char *k = key + 9;
72d0e1cb
SG
3853 int i;
3854
17ed13a3
SH
3855 if (strcmp(key, "lxc.hook") == 0)
3856 all = true;
3857
72d0e1cb 3858 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 3859 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 3860 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
3861 lxc_list_del(it);
3862 free(it->elem);
3863 free(it);
3864 }
3865 done = true;
72d0e1cb
SG
3866 }
3867 }
17ed13a3
SH
3868
3869 if (!done) {
3870 ERROR("Invalid hook key: %s", key);
3871 return -1;
3872 }
72d0e1cb
SG
3873 return 0;
3874}
8eb5694b 3875
74a3920a 3876static void lxc_clear_saved_nics(struct lxc_conf *conf)
7b35f3d6
SH
3877{
3878 int i;
3879
3880 if (!conf->num_savednics)
3881 return;
3882 for (i=0; i < conf->num_savednics; i++)
3883 free(conf->saved_nics[i].orig_name);
3884 conf->saved_nics = 0;
3885 free(conf->saved_nics);
3886}
3887
8eb5694b
SH
3888void lxc_conf_free(struct lxc_conf *conf)
3889{
3890 if (!conf)
3891 return;
3892 if (conf->console.path)
3893 free(conf->console.path);
54c30e29 3894 if (conf->rootfs.mount)
8eb5694b 3895 free(conf->rootfs.mount);
d95db067
DE
3896 if (conf->rootfs.path)
3897 free(conf->rootfs.path);
a58878d6
SH
3898 if (conf->rootfs.pivot)
3899 free(conf->rootfs.pivot);
3900 if (conf->logfile)
3901 free(conf->logfile);
d95db067
DE
3902 if (conf->utsname)
3903 free(conf->utsname);
3904 if (conf->ttydir)
3905 free(conf->ttydir);
3906 if (conf->fstab)
3907 free(conf->fstab);
fc7e8864
WM
3908 if (conf->rcfile)
3909 free(conf->rcfile);
8eb5694b 3910 lxc_clear_config_network(conf);
fe4de9a6
DE
3911 if (conf->lsm_aa_profile)
3912 free(conf->lsm_aa_profile);
3913 if (conf->lsm_se_context)
3914 free(conf->lsm_se_context);
769872f9 3915 lxc_seccomp_free(conf);
8eb5694b 3916 lxc_clear_config_caps(conf);
1fb86a7c 3917 lxc_clear_config_keepcaps(conf);
8eb5694b 3918 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 3919 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 3920 lxc_clear_mount_entries(conf);
7b35f3d6 3921 lxc_clear_saved_nics(conf);
27c27d73 3922 lxc_clear_idmaps(conf);
ee1e7aa0 3923 lxc_clear_groups(conf);
8eb5694b
SH
3924 free(conf);
3925}
4355ab5f
SH
3926
3927struct userns_fn_data {
3928 int (*fn)(void *);
3929 void *arg;
3930 int p[2];
3931};
3932
3933static int run_userns_fn(void *data)
3934{
3935 struct userns_fn_data *d = data;
3936 char c;
3937 // we're not sharing with the parent any more, if it was a thread
3938
3939 close(d->p[1]);
3940 if (read(d->p[0], &c, 1) != 1)
3941 return -1;
3942 close(d->p[0]);
3943 return d->fn(d->arg);
3944}
3945
3946/*
3947 * Add a ID_TYPE_UID entry to an existing lxc_conf, if it is not
3948 * alread there.
3949 * We may want to generalize this to do gids as well as uids, but right now
3950 * it's not necessary.
3951 */
3952static struct lxc_list *idmap_add_id(struct lxc_conf *conf, uid_t uid)
3953{
3954 int hostid_mapped = mapped_hostid(uid, conf);
3955 struct lxc_list *new = NULL, *tmp, *it, *next;
3956 struct id_map *entry;
3957
3958 if (hostid_mapped < 0) {
3959 hostid_mapped = find_unmapped_nsuid(conf);
3960 if (hostid_mapped < 0) {
3961 ERROR("Could not find free uid to map");
3962 return NULL;
3963 }
3964 new = malloc(sizeof(*new));
3965 if (!new) {
3966 ERROR("Out of memory building id map");
3967 return NULL;
3968 }
3969 entry = malloc(sizeof(*entry));
3970 if (!entry) {
3971 free(new);
3972 ERROR("Out of memory building idmap entry");
3973 return NULL;
3974 }
3975 new->elem = entry;
3976 entry->idtype = ID_TYPE_UID;
3977 entry->nsid = hostid_mapped;
3978 entry->hostid = (unsigned long)uid;
3979 entry->range = 1;
3980 lxc_list_init(new);
3981 }
3982 lxc_list_for_each_safe(it, &conf->id_map, next) {
3983 tmp = malloc(sizeof(*tmp));
3984 if (!tmp)
3985 goto err;
3986 entry = malloc(sizeof(*entry));
3987 if (!entry) {
3988 free(tmp);
3989 goto err;
3990 }
3991 memset(entry, 0, sizeof(*entry));
3992 memcpy(entry, it->elem, sizeof(*entry));
3993 tmp->elem = entry;
3994 if (!new) {
3995 new = tmp;
3996 lxc_list_init(new);
3997 } else
3998 lxc_list_add_tail(new, tmp);
3999 }
4000
4001 return new;
4002
4003err:
4004 ERROR("Out of memory building a new uid map");
908fde6a
SH
4005 if (new)
4006 lxc_free_idmap(new);
c30ac545 4007 free(new);
4355ab5f
SH
4008 return NULL;
4009}
4010
4011/*
4012 * Run a function in a new user namespace.
4013 * The caller's euid will be mapped in if it is not already.
4014 */
4015int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
4016{
4017 int ret, pid;
4018 struct userns_fn_data d;
4019 char c = '1';
4020 int p[2];
4021 struct lxc_list *idmap;
4022
4355ab5f 4023 ret = pipe(p);
4355ab5f
SH
4024 if (ret < 0) {
4025 SYSERROR("opening pipe");
4026 return -1;
4027 }
4028 d.fn = fn;
4029 d.arg = data;
4030 d.p[0] = p[0];
4031 d.p[1] = p[1];
4032 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4033 if (pid < 0)
4034 goto err;
4355ab5f 4035 close(p[0]);
4355ab5f
SH
4036 p[0] = -1;
4037
4038 if ((idmap = idmap_add_id(conf, geteuid())) == NULL) {
4039 ERROR("Error adding self to container uid map");
4040 goto err;
4041 }
4042
4043 ret = lxc_map_ids(idmap, pid);
4044 lxc_free_idmap(idmap);
88dd66fc 4045 free(idmap);
4355ab5f
SH
4046 if (ret < 0) {
4047 ERROR("Error setting up child mappings");
4048 goto err;
4049 }
4050
4051 // kick the child
4052 if (write(p[1], &c, 1) != 1) {
4053 SYSERROR("writing to pipe to child");
4054 goto err;
4055 }
4056
4057 if ((ret = wait_for_pid(pid)) < 0) {
4058 ERROR("Child returned an error: %d\n", ret);
4059 goto err;
4060 }
4061err:
4355ab5f
SH
4062 if (p[0] != -1)
4063 close(p[0]);
4064 close(p[1]);
4355ab5f
SH
4065 return -1;
4066}