]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
lxc_info: Fix -H with -c
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
d06245b8
NC
23#include "config.h"
24
0ad19a3f 25#include <stdio.h>
0ad19a3f 26#include <stdlib.h>
e3b4c4c4 27#include <stdarg.h>
0ad19a3f 28#include <errno.h>
29#include <string.h>
30#include <dirent.h>
0ad19a3f 31#include <unistd.h>
bc6928ff 32#include <inttypes.h>
e3b4c4c4 33#include <sys/wait.h>
2d76d1d7 34#include <sys/syscall.h>
4a0ba80d 35#include <time.h>
e827ff7e
SG
36
37#if HAVE_PTY_H
b0a33c1e 38#include <pty.h>
e827ff7e
SG
39#else
40#include <../include/openpty.h>
41#endif
0ad19a3f 42
b3ecde1e
DL
43#include <linux/loop.h>
44
0ad19a3f 45#include <sys/types.h>
46#include <sys/utsname.h>
47#include <sys/param.h>
48#include <sys/stat.h>
49#include <sys/socket.h>
50#include <sys/mount.h>
51#include <sys/mman.h>
81810dd1 52#include <sys/prctl.h>
0ad19a3f 53
54#include <arpa/inet.h>
55#include <fcntl.h>
56#include <netinet/in.h>
57#include <net/if.h>
6f4a3756 58#include <libgen.h>
0ad19a3f 59
e5bda9ee 60#include "network.h"
61#include "error.h"
b2718c72 62#include "parse.h"
1b09f2c0
DL
63#include "utils.h"
64#include "conf.h"
65#include "log.h"
66#include "lxc.h" /* for lxc_cgroup_set() */
d55bc1ad 67#include "caps.h" /* for lxc_caps_last_cap() */
9be53773 68#include "bdev.h"
368bbc02 69#include "cgroup.h"
025ed0f3 70#include "lxclock.h"
4355ab5f 71#include "namespace.h"
fe4de9a6 72#include "lsm/lsm.h"
d0a36f2c 73
495d2046
SG
74#if HAVE_SYS_CAPABILITY_H
75#include <sys/capability.h>
76#endif
77
6ff05e18
SG
78#if HAVE_SYS_PERSONALITY_H
79#include <sys/personality.h>
80#endif
81
edaf8b1b
SG
82#if IS_BIONIC
83#include <../include/lxcmntent.h>
84#else
85#include <mntent.h>
86#endif
87
769872f9
SH
88#include "lxcseccomp.h"
89
36eb9bde 90lxc_log_define(lxc_conf, lxc);
e5bda9ee 91
0ad19a3f 92#define MAXHWLEN 18
93#define MAXINDEXLEN 20
442cbbe6 94#define MAXMTULEN 16
0ad19a3f 95#define MAXLINELEN 128
96
495d2046 97#if HAVE_SYS_CAPABILITY_H
b09094da
MN
98#ifndef CAP_SETFCAP
99#define CAP_SETFCAP 31
100#endif
101
102#ifndef CAP_MAC_OVERRIDE
103#define CAP_MAC_OVERRIDE 32
104#endif
105
106#ifndef CAP_MAC_ADMIN
107#define CAP_MAC_ADMIN 33
108#endif
495d2046 109#endif
b09094da
MN
110
111#ifndef PR_CAPBSET_DROP
112#define PR_CAPBSET_DROP 24
113#endif
114
9818cae4
SG
115#ifndef LO_FLAGS_AUTOCLEAR
116#define LO_FLAGS_AUTOCLEAR 4
117#endif
118
2d76d1d7
SG
119/* Define pivot_root() if missing from the C library */
120#ifndef HAVE_PIVOT_ROOT
121static int pivot_root(const char * new_root, const char * put_old)
122{
123#ifdef __NR_pivot_root
124return syscall(__NR_pivot_root, new_root, put_old);
125#else
126errno = ENOSYS;
127return -1;
128#endif
129}
130#else
131extern int pivot_root(const char * new_root, const char * put_old);
132#endif
133
134/* Define sethostname() if missing from the C library */
135#ifndef HAVE_SETHOSTNAME
136static int sethostname(const char * name, size_t len)
137{
138#ifdef __NR_sethostname
139return syscall(__NR_sethostname, name, len);
140#else
141errno = ENOSYS;
142return -1;
143#endif
144}
145#endif
146
72f919c4
SG
147/* Define __S_ISTYPE if missing from the C library */
148#ifndef __S_ISTYPE
149#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
150#endif
151
72d0e1cb 152char *lxchook_names[NUM_LXC_HOOKS] = {
148e91f5 153 "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop", "clone" };
72d0e1cb 154
e3b4c4c4 155typedef int (*instanciate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 156
998ac676
RT
157struct mount_opt {
158 char *name;
159 int clear;
160 int flag;
161};
162
81810dd1
DL
163struct caps_opt {
164 char *name;
165 int value;
166};
167
e3b4c4c4
ST
168static int instanciate_veth(struct lxc_handler *, struct lxc_netdev *);
169static int instanciate_macvlan(struct lxc_handler *, struct lxc_netdev *);
170static int instanciate_vlan(struct lxc_handler *, struct lxc_netdev *);
171static int instanciate_phys(struct lxc_handler *, struct lxc_netdev *);
172static int instanciate_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 173static int instanciate_none(struct lxc_handler *, struct lxc_netdev *);
82d5ae15 174
24654103
DL
175static instanciate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
176 [LXC_NET_VETH] = instanciate_veth,
177 [LXC_NET_MACVLAN] = instanciate_macvlan,
178 [LXC_NET_VLAN] = instanciate_vlan,
179 [LXC_NET_PHYS] = instanciate_phys,
180 [LXC_NET_EMPTY] = instanciate_empty,
26b797f3 181 [LXC_NET_NONE] = instanciate_none,
0ad19a3f 182};
183
74a2b586
JK
184static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
185static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
186static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
187static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
188static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 189static int shutdown_none(struct lxc_handler *, struct lxc_netdev *);
74a2b586
JK
190
191static instanciate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
192 [LXC_NET_VETH] = shutdown_veth,
193 [LXC_NET_MACVLAN] = shutdown_macvlan,
194 [LXC_NET_VLAN] = shutdown_vlan,
195 [LXC_NET_PHYS] = shutdown_phys,
196 [LXC_NET_EMPTY] = shutdown_empty,
26b797f3 197 [LXC_NET_NONE] = shutdown_none,
74a2b586
JK
198};
199
998ac676 200static struct mount_opt mount_opt[] = {
88d413d5
SW
201 { "defaults", 0, 0 },
202 { "ro", 0, MS_RDONLY },
203 { "rw", 1, MS_RDONLY },
204 { "suid", 1, MS_NOSUID },
205 { "nosuid", 0, MS_NOSUID },
206 { "dev", 1, MS_NODEV },
207 { "nodev", 0, MS_NODEV },
208 { "exec", 1, MS_NOEXEC },
209 { "noexec", 0, MS_NOEXEC },
210 { "sync", 0, MS_SYNCHRONOUS },
211 { "async", 1, MS_SYNCHRONOUS },
212 { "dirsync", 0, MS_DIRSYNC },
213 { "remount", 0, MS_REMOUNT },
214 { "mand", 0, MS_MANDLOCK },
215 { "nomand", 1, MS_MANDLOCK },
216 { "atime", 1, MS_NOATIME },
217 { "noatime", 0, MS_NOATIME },
218 { "diratime", 1, MS_NODIRATIME },
219 { "nodiratime", 0, MS_NODIRATIME },
220 { "bind", 0, MS_BIND },
221 { "rbind", 0, MS_BIND|MS_REC },
222 { "relatime", 0, MS_RELATIME },
223 { "norelatime", 1, MS_RELATIME },
224 { "strictatime", 0, MS_STRICTATIME },
225 { "nostrictatime", 1, MS_STRICTATIME },
226 { NULL, 0, 0 },
998ac676
RT
227};
228
495d2046 229#if HAVE_SYS_CAPABILITY_H
81810dd1 230static struct caps_opt caps_opt[] = {
a6afdde9 231 { "chown", CAP_CHOWN },
1e11be34
DL
232 { "dac_override", CAP_DAC_OVERRIDE },
233 { "dac_read_search", CAP_DAC_READ_SEARCH },
234 { "fowner", CAP_FOWNER },
235 { "fsetid", CAP_FSETID },
81810dd1
DL
236 { "kill", CAP_KILL },
237 { "setgid", CAP_SETGID },
238 { "setuid", CAP_SETUID },
239 { "setpcap", CAP_SETPCAP },
240 { "linux_immutable", CAP_LINUX_IMMUTABLE },
241 { "net_bind_service", CAP_NET_BIND_SERVICE },
242 { "net_broadcast", CAP_NET_BROADCAST },
243 { "net_admin", CAP_NET_ADMIN },
244 { "net_raw", CAP_NET_RAW },
245 { "ipc_lock", CAP_IPC_LOCK },
246 { "ipc_owner", CAP_IPC_OWNER },
247 { "sys_module", CAP_SYS_MODULE },
248 { "sys_rawio", CAP_SYS_RAWIO },
249 { "sys_chroot", CAP_SYS_CHROOT },
250 { "sys_ptrace", CAP_SYS_PTRACE },
251 { "sys_pacct", CAP_SYS_PACCT },
252 { "sys_admin", CAP_SYS_ADMIN },
253 { "sys_boot", CAP_SYS_BOOT },
254 { "sys_nice", CAP_SYS_NICE },
255 { "sys_resource", CAP_SYS_RESOURCE },
256 { "sys_time", CAP_SYS_TIME },
257 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
258 { "mknod", CAP_MKNOD },
259 { "lease", CAP_LEASE },
9527e566 260#ifdef CAP_AUDIT_WRITE
81810dd1 261 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
262#endif
263#ifdef CAP_AUDIT_CONTROL
81810dd1 264 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 265#endif
81810dd1
DL
266 { "setfcap", CAP_SETFCAP },
267 { "mac_override", CAP_MAC_OVERRIDE },
268 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
269#ifdef CAP_SYSLOG
270 { "syslog", CAP_SYSLOG },
271#endif
272#ifdef CAP_WAKE_ALARM
273 { "wake_alarm", CAP_WAKE_ALARM },
274#endif
81810dd1 275};
495d2046
SG
276#else
277static struct caps_opt caps_opt[] = {};
278#endif
81810dd1 279
91c3830e
SH
280static int run_buffer(char *buffer)
281{
ebec9176 282 struct lxc_popen_FILE *f;
91c3830e 283 char *output;
8e7da691 284 int ret;
91c3830e 285
ebec9176 286 f = lxc_popen(buffer);
91c3830e
SH
287 if (!f) {
288 SYSERROR("popen failed");
289 return -1;
290 }
291
292 output = malloc(LXC_LOG_BUFFER_SIZE);
293 if (!output) {
294 ERROR("failed to allocate memory for script output");
ebec9176 295 lxc_pclose(f);
91c3830e
SH
296 return -1;
297 }
298
ebec9176 299 while(fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
91c3830e
SH
300 DEBUG("script output: %s", output);
301
302 free(output);
303
ebec9176 304 ret = lxc_pclose(f);
8e7da691 305 if (ret == -1) {
91c3830e
SH
306 SYSERROR("Script exited on error");
307 return -1;
8e7da691
DE
308 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
309 ERROR("Script exited with status %d", WEXITSTATUS(ret));
310 return -1;
311 } else if (WIFSIGNALED(ret)) {
312 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
313 strsignal(WTERMSIG(ret)));
314 return -1;
91c3830e
SH
315 }
316
317 return 0;
318}
319
148e91f5 320static int run_script_argv(const char *name, const char *section,
283678ed
SH
321 const char *script, const char *hook, const char *lxcpath,
322 char **argsin)
148e91f5
SH
323{
324 int ret, i;
325 char *buffer;
326 size_t size = 0;
327
328 INFO("Executing script '%s' for container '%s', config section '%s'",
329 script, name, section);
330
331 for (i=0; argsin && argsin[i]; i++)
332 size += strlen(argsin[i]) + 1;
333
334 size += strlen(hook) + 1;
335
336 size += strlen(script);
337 size += strlen(name);
338 size += strlen(section);
339 size += 3;
340
341 if (size > INT_MAX)
342 return -1;
343
344 buffer = alloca(size);
345 if (!buffer) {
346 ERROR("failed to allocate memory");
347 return -1;
348 }
349
350 ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
351 if (ret < 0 || ret >= size) {
352 ERROR("Script name too long");
353 return -1;
354 }
355
356 for (i=0; argsin && argsin[i]; i++) {
357 int len = size-ret;
358 int rc;
359 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
360 if (rc < 0 || rc >= len) {
361 ERROR("Script args too long");
362 return -1;
363 }
364 ret += rc;
365 }
366
367 return run_buffer(buffer);
368}
369
751d9dcd
DL
370static int run_script(const char *name, const char *section,
371 const char *script, ...)
e3b4c4c4 372{
abbfd20b 373 int ret;
91c3830e 374 char *buffer, *p;
abbfd20b
DL
375 size_t size = 0;
376 va_list ap;
751d9dcd
DL
377
378 INFO("Executing script '%s' for container '%s', config section '%s'",
379 script, name, section);
e3b4c4c4 380
abbfd20b
DL
381 va_start(ap, script);
382 while ((p = va_arg(ap, char *)))
95642a10 383 size += strlen(p) + 1;
abbfd20b
DL
384 va_end(ap);
385
386 size += strlen(script);
387 size += strlen(name);
388 size += strlen(section);
95642a10 389 size += 3;
abbfd20b 390
95642a10
MS
391 if (size > INT_MAX)
392 return -1;
393
394 buffer = alloca(size);
abbfd20b
DL
395 if (!buffer) {
396 ERROR("failed to allocate memory");
751d9dcd
DL
397 return -1;
398 }
399
9ba8130c
SH
400 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
401 if (ret < 0 || ret >= size) {
402 ERROR("Script name too long");
9ba8130c
SH
403 return -1;
404 }
751d9dcd 405
abbfd20b 406 va_start(ap, script);
9ba8130c
SH
407 while ((p = va_arg(ap, char *))) {
408 int len = size-ret;
409 int rc;
410 rc = snprintf(buffer + ret, len, " %s", p);
411 if (rc < 0 || rc >= len) {
9ba8130c
SH
412 ERROR("Script args too long");
413 return -1;
414 }
415 ret += rc;
416 }
abbfd20b 417 va_end(ap);
751d9dcd 418
91c3830e 419 return run_buffer(buffer);
e3b4c4c4
ST
420}
421
a6afdde9 422static int find_fstype_cb(char* buffer, void *data)
78ae2fcc 423{
424 struct cbarg {
425 const char *rootfs;
a6afdde9 426 const char *target;
78ae2fcc 427 int mntopt;
428 } *cbarg = data;
429
430 char *fstype;
431
432 /* we don't try 'nodev' entries */
433 if (strstr(buffer, "nodev"))
434 return 0;
435
436 fstype = buffer;
b2718c72 437 fstype += lxc_char_left_gc(fstype, strlen(fstype));
438 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
78ae2fcc 439
a6afdde9
DL
440 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
441 cbarg->rootfs, cbarg->target, fstype);
442
443 if (mount(cbarg->rootfs, cbarg->target, fstype, cbarg->mntopt, NULL)) {
444 DEBUG("mount failed with error: %s", strerror(errno));
78ae2fcc 445 return 0;
a6afdde9 446 }
78ae2fcc 447
a6afdde9
DL
448 INFO("mounted '%s' on '%s', with fstype '%s'",
449 cbarg->rootfs, cbarg->target, fstype);
78ae2fcc 450
451 return 1;
452}
453
8ddf877b 454static int mount_unknown_fs(const char *rootfs, const char *target, int mntopt)
78ae2fcc 455{
a6afdde9 456 int i;
78ae2fcc 457
458 struct cbarg {
459 const char *rootfs;
a6afdde9 460 const char *target;
78ae2fcc 461 int mntopt;
462 } cbarg = {
463 .rootfs = rootfs,
a6afdde9 464 .target = target,
78ae2fcc 465 .mntopt = mntopt,
466 };
467
a6afdde9
DL
468 /*
469 * find the filesystem type with brute force:
470 * first we check with /etc/filesystems, in case the modules
78ae2fcc 471 * are auto-loaded and fall back to the supported kernel fs
472 */
473 char *fsfile[] = {
474 "/etc/filesystems",
475 "/proc/filesystems",
476 };
477
a6afdde9
DL
478 for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
479
480 int ret;
481
482 if (access(fsfile[i], F_OK))
483 continue;
484
485 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
486 if (ret < 0) {
487 ERROR("failed to parse '%s'", fsfile[i]);
488 return -1;
489 }
490
491 if (ret)
492 return 0;
78ae2fcc 493 }
494
a6afdde9
DL
495 ERROR("failed to determine fs type for '%s'", rootfs);
496 return -1;
497}
498
2656d231 499static int mount_rootfs_dir(const char *rootfs, const char *target)
a6afdde9
DL
500{
501 return mount(rootfs, target, "none", MS_BIND | MS_REC, NULL);
502}
503
504static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
505{
506 int rfd;
507 int ret = -1;
508
509 rfd = open(rootfs, O_RDWR);
510 if (rfd < 0) {
511 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 512 return -1;
513 }
514
a6afdde9 515 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 516
a6afdde9 517 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 518
a6afdde9
DL
519 if (ioctl(fd, LOOP_SET_FD, rfd)) {
520 SYSERROR("failed to LOOP_SET_FD");
521 goto out;
78ae2fcc 522 }
523
a6afdde9
DL
524 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
525 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 526 goto out;
527 }
528
a6afdde9 529 ret = 0;
78ae2fcc 530out:
a6afdde9 531 close(rfd);
78ae2fcc 532
a6afdde9 533 return ret;
78ae2fcc 534}
535
2656d231 536static int mount_rootfs_file(const char *rootfs, const char *target)
78ae2fcc 537{
a6afdde9
DL
538 struct dirent dirent, *direntp;
539 struct loop_info64 loinfo;
9ba8130c 540 int ret = -1, fd = -1, rc;
a6afdde9
DL
541 DIR *dir;
542 char path[MAXPATHLEN];
78ae2fcc 543
a6afdde9
DL
544 dir = opendir("/dev");
545 if (!dir) {
546 SYSERROR("failed to open '/dev'");
78ae2fcc 547 return -1;
548 }
549
a6afdde9
DL
550 while (!readdir_r(dir, &dirent, &direntp)) {
551
552 if (!direntp)
553 break;
554
555 if (!strcmp(direntp->d_name, "."))
556 continue;
557
558 if (!strcmp(direntp->d_name, ".."))
559 continue;
560
561 if (strncmp(direntp->d_name, "loop", 4))
562 continue;
563
9ba8130c
SH
564 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
565 if (rc < 0 || rc >= MAXPATHLEN)
566 continue;
567
a6afdde9
DL
568 fd = open(path, O_RDWR);
569 if (fd < 0)
570 continue;
571
572 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
573 close(fd);
574 continue;
575 }
576
577 if (errno != ENXIO) {
578 WARN("unexpected error for ioctl on '%s': %m",
579 direntp->d_name);
00b6be44 580 close(fd);
a6afdde9
DL
581 continue;
582 }
583
584 DEBUG("found '%s' free lodev", path);
585
586 ret = setup_lodev(rootfs, fd, &loinfo);
587 if (!ret)
8ddf877b 588 ret = mount_unknown_fs(path, target, 0);
a6afdde9
DL
589 close(fd);
590
591 break;
592 }
593
594 if (closedir(dir))
595 WARN("failed to close directory");
596
597 return ret;
78ae2fcc 598}
599
2656d231 600static int mount_rootfs_block(const char *rootfs, const char *target)
a6afdde9 601{
8ddf877b 602 return mount_unknown_fs(rootfs, target, 0);
a6afdde9
DL
603}
604
0c547523
SH
605/*
606 * pin_rootfs
b7ed4bf0
CS
607 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
608 * the duration of the container run, to prevent the container from marking
609 * the underlying fs readonly on shutdown. unlink the file immediately so
610 * no name pollution is happens
0c547523
SH
611 * return -1 on error.
612 * return -2 if nothing needed to be pinned.
613 * return an open fd (>=0) if we pinned it.
614 */
615int pin_rootfs(const char *rootfs)
616{
617 char absrootfs[MAXPATHLEN];
618 char absrootfspin[MAXPATHLEN];
619 struct stat s;
620 int ret, fd;
621
e99ee0de 622 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 623 return -2;
e99ee0de 624
00ec333b 625 if (!realpath(rootfs, absrootfs))
9be53773 626 return -2;
0c547523 627
00ec333b 628 if (access(absrootfs, F_OK))
0c547523 629 return -1;
0c547523 630
00ec333b 631 if (stat(absrootfs, &s))
0c547523 632 return -1;
0c547523 633
72f919c4 634 if (!S_ISDIR(s.st_mode))
0c547523
SH
635 return -2;
636
b7ed4bf0 637 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
00ec333b 638 if (ret >= MAXPATHLEN)
0c547523 639 return -1;
0c547523
SH
640
641 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
b7ed4bf0
CS
642 if (fd < 0)
643 return fd;
644 (void)unlink(absrootfspin);
0c547523
SH
645 return fd;
646}
647
d4ef7c50 648static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_cgroup_info *cgroup_info)
368bbc02 649{
368bbc02 650 int r;
b06b8511
CS
651 size_t i;
652 static struct {
653 int match_mask;
654 int match_flag;
655 const char *source;
656 const char *destination;
657 const char *fstype;
658 unsigned long flags;
659 const char *options;
660 } default_mounts[] = {
661 /* Read-only bind-mounting... In older kernels, doing that required
662 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
663 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
664 * kernel 2.6.26 onwards. However, this apparently does not work on
665 * kernel 3.8. Unfortunately, on that very same kernel, doing the
666 * same trick as above doesn't seem to work either, there one needs
667 * to ALSO specify MS_BIND for the remount, otherwise the entire
668 * fs is remounted read-only or the mount fails because it's busy...
669 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
670 * 2.6.32...
368bbc02 671 */
b06b8511
CS
672 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
673 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
674 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
675 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
676 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
677 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
678 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
679 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
680 { 0, 0, NULL, NULL, NULL, 0, NULL }
681 };
368bbc02 682
b06b8511
CS
683 for (i = 0; default_mounts[i].match_mask; i++) {
684 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
685 char *source = NULL;
686 char *destination = NULL;
687 int saved_errno;
688
689 if (default_mounts[i].source) {
690 /* will act like strdup if %r is not present */
691 source = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].source);
692 if (!source) {
693 SYSERROR("memory allocation error");
694 return -1;
695 }
696 }
697 if (default_mounts[i].destination) {
698 /* will act like strdup if %r is not present */
699 destination = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].destination);
700 if (!destination) {
701 saved_errno = errno;
702 SYSERROR("memory allocation error");
703 free(source);
704 errno = saved_errno;
705 return -1;
706 }
707 }
708 r = mount(source, destination, default_mounts[i].fstype, default_mounts[i].flags, default_mounts[i].options);
709 saved_errno = errno;
c414be25
DE
710 if (r < 0)
711 SYSERROR("error mounting %s on %s", source, destination);
b06b8511
CS
712 free(source);
713 free(destination);
714 if (r < 0) {
b06b8511
CS
715 errno = saved_errno;
716 return -1;
717 }
368bbc02 718 }
368bbc02
CS
719 }
720
b06b8511 721 if (flags & LXC_AUTO_CGROUP_MASK) {
c476bdce
SH
722 if (!lxc_setup_mount_cgroup(conf->rootfs.mount, cgroup_info,
723 flags & LXC_AUTO_CGROUP_MASK)) {
368bbc02 724 SYSERROR("error mounting /sys/fs/cgroup");
b06b8511 725 return -1;
368bbc02
CS
726 }
727 }
728
368bbc02 729 return 0;
368bbc02
CS
730}
731
2656d231 732static int mount_rootfs(const char *rootfs, const char *target)
0ad19a3f 733{
b09ef133 734 char absrootfs[MAXPATHLEN];
78ae2fcc 735 struct stat s;
a6afdde9 736 int i;
78ae2fcc 737
a6afdde9 738 typedef int (*rootfs_cb)(const char *, const char *);
78ae2fcc 739
740 struct rootfs_type {
741 int type;
742 rootfs_cb cb;
743 } rtfs_type[] = {
2656d231
DL
744 { S_IFDIR, mount_rootfs_dir },
745 { S_IFBLK, mount_rootfs_block },
746 { S_IFREG, mount_rootfs_file },
78ae2fcc 747 };
0ad19a3f 748
4c8ab83b 749 if (!realpath(rootfs, absrootfs)) {
36eb9bde 750 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 751 return -1;
752 }
b09ef133 753
b09ef133 754 if (access(absrootfs, F_OK)) {
36eb9bde 755 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 756 return -1;
757 }
758
78ae2fcc 759 if (stat(absrootfs, &s)) {
36eb9bde 760 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 761 return -1;
762 }
763
78ae2fcc 764 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 765
78ae2fcc 766 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
767 continue;
9b0f0477 768
a6afdde9 769 return rtfs_type[i].cb(absrootfs, target);
78ae2fcc 770 }
9b0f0477 771
36eb9bde 772 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 773 return -1;
0ad19a3f 774}
775
4e5440c6 776static int setup_utsname(struct utsname *utsname)
0ad19a3f 777{
4e5440c6
DL
778 if (!utsname)
779 return 0;
0ad19a3f 780
4e5440c6
DL
781 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
782 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 783 return -1;
784 }
785
4e5440c6 786 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 787
0ad19a3f 788 return 0;
789}
790
33fcb7a0 791static int setup_tty(const struct lxc_rootfs *rootfs,
7c6ef2a2 792 const struct lxc_tty_info *tty_info, char *ttydir)
b0a33c1e 793{
7c6ef2a2
SH
794 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
795 int i, ret;
b0a33c1e 796
bc9bd0e3
DL
797 if (!rootfs->path)
798 return 0;
799
b0a33c1e 800 for (i = 0; i < tty_info->nbtty; i++) {
801
802 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
803
7c6ef2a2 804 ret = snprintf(path, sizeof(path), "%s/dev/tty%d",
12297168 805 rootfs->mount, i + 1);
7c6ef2a2
SH
806 if (ret >= sizeof(path)) {
807 ERROR("pathname too long for ttys");
808 return -1;
809 }
810 if (ttydir) {
811 /* create dev/lxc/tty%d" */
9ba8130c 812 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/tty%d",
7c6ef2a2
SH
813 rootfs->mount, ttydir, i + 1);
814 if (ret >= sizeof(lxcpath)) {
815 ERROR("pathname too long for ttys");
816 return -1;
817 }
818 ret = creat(lxcpath, 0660);
819 if (ret==-1 && errno != EEXIST) {
820 SYSERROR("error creating %s\n", lxcpath);
821 return -1;
822 }
4d44e274
SH
823 if (ret >= 0)
824 close(ret);
7c6ef2a2
SH
825 ret = unlink(path);
826 if (ret && errno != ENOENT) {
827 SYSERROR("error unlinking %s\n", path);
828 return -1;
829 }
b0a33c1e 830
7c6ef2a2
SH
831 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
832 WARN("failed to mount '%s'->'%s'",
833 pty_info->name, path);
834 continue;
835 }
13954cce 836
9ba8130c
SH
837 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
838 if (ret >= sizeof(lxcpath)) {
839 ERROR("tty pathname too long");
840 return -1;
841 }
7c6ef2a2
SH
842 ret = symlink(lxcpath, path);
843 if (ret) {
844 SYSERROR("failed to create symlink for tty %d\n", i+1);
845 return -1;
846 }
847 } else {
c6883f38
SH
848 /* If we populated /dev, then we need to create /dev/ttyN */
849 if (access(path, F_OK)) {
850 ret = creat(path, 0660);
851 if (ret==-1) {
852 SYSERROR("error creating %s\n", path);
853 /* this isn't fatal, continue */
025ed0f3 854 } else {
c6883f38 855 close(ret);
025ed0f3 856 }
c6883f38 857 }
7c6ef2a2
SH
858 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
859 WARN("failed to mount '%s'->'%s'",
860 pty_info->name, path);
861 continue;
862 }
b0a33c1e 863 }
864 }
865
cd54d859
DL
866 INFO("%d tty(s) has been setup", tty_info->nbtty);
867
b0a33c1e 868 return 0;
869}
870
7a7ff0c6 871static int setup_rootfs_pivot_root_cb(char *buffer, void *data)
bf601689
MH
872{
873 struct lxc_list *mountlist, *listentry, *iterator;
2c7d90ac 874 char *pivotdir, *mountpoint, *mountentry, *saveptr = NULL;
bf601689
MH
875 int found;
876 void **cbparm;
877
878 mountentry = buffer;
879 cbparm = (void **)data;
880
881 mountlist = cbparm[0];
882 pivotdir = cbparm[1];
883
884 /* parse entry, first field is mountname, ignore */
2796cf79 885 mountpoint = strtok_r(mountentry, " ", &saveptr);
bf601689
MH
886 if (!mountpoint)
887 return -1;
888
889 /* second field is mountpoint */
2796cf79 890 mountpoint = strtok_r(NULL, " ", &saveptr);
bf601689
MH
891 if (!mountpoint)
892 return -1;
893
894 /* only consider mountpoints below old root fs */
895 if (strncmp(mountpoint, pivotdir, strlen(pivotdir)))
896 return 0;
897
898 /* filter duplicate mountpoints */
899 found = 0;
900 lxc_list_for_each(iterator, mountlist) {
901 if (!strcmp(iterator->elem, mountpoint)) {
902 found = 1;
903 break;
904 }
905 }
906 if (found)
907 return 0;
908
909 /* add entry to list */
910 listentry = malloc(sizeof(*listentry));
911 if (!listentry) {
912 SYSERROR("malloc for mountpoint listentry failed");
913 return -1;
914 }
915
916 listentry->elem = strdup(mountpoint);
917 if (!listentry->elem) {
918 SYSERROR("strdup failed");
00b6be44 919 free(listentry);
bf601689
MH
920 return -1;
921 }
922 lxc_list_add_tail(mountlist, listentry);
923
924 return 0;
925}
926
cc6f6dd7 927static int umount_oldrootfs(const char *oldrootfs)
bf601689 928{
2382ecff 929 char path[MAXPATHLEN];
bf601689 930 void *cbparm[2];
9ebb03ad 931 struct lxc_list mountlist, *iterator, *next;
bf601689 932 int ok, still_mounted, last_still_mounted;
9ba8130c 933 int rc;
bf601689
MH
934
935 /* read and parse /proc/mounts in old root fs */
936 lxc_list_init(&mountlist);
937
cc6f6dd7 938 /* oldrootfs is on the top tree directory now */
9ba8130c
SH
939 rc = snprintf(path, sizeof(path), "/%s", oldrootfs);
940 if (rc >= sizeof(path)) {
941 ERROR("rootfs name too long");
942 return -1;
943 }
bf601689 944 cbparm[0] = &mountlist;
bf601689 945
cc6f6dd7 946 cbparm[1] = strdup(path);
bf601689
MH
947 if (!cbparm[1]) {
948 SYSERROR("strdup failed");
949 return -1;
950 }
951
9ba8130c
SH
952 rc = snprintf(path, sizeof(path), "%s/proc/mounts", oldrootfs);
953 if (rc >= sizeof(path)) {
954 ERROR("container proc/mounts name too long");
955 return -1;
956 }
cc6f6dd7
DL
957
958 ok = lxc_file_for_each_line(path,
959 setup_rootfs_pivot_root_cb, &cbparm);
bf601689
MH
960 if (ok < 0) {
961 SYSERROR("failed to read or parse mount list '%s'", path);
962 return -1;
963 }
964
965 /* umount filesystems until none left or list no longer shrinks */
966 still_mounted = 0;
967 do {
968 last_still_mounted = still_mounted;
969 still_mounted = 0;
970
9ebb03ad 971 lxc_list_for_each_safe(iterator, &mountlist, next) {
bf601689 972
c08556c6 973 /* umount normally */
bf601689
MH
974 if (!umount(iterator->elem)) {
975 DEBUG("umounted '%s'", (char *)iterator->elem);
976 lxc_list_del(iterator);
977 continue;
978 }
979
bf601689
MH
980 still_mounted++;
981 }
7df119ee 982
bf601689
MH
983 } while (still_mounted > 0 && still_mounted != last_still_mounted);
984
7df119ee 985
c08556c6
DL
986 lxc_list_for_each(iterator, &mountlist) {
987
988 /* let's try a lazy umount */
989 if (!umount2(iterator->elem, MNT_DETACH)) {
990 INFO("lazy unmount of '%s'", (char *)iterator->elem);
991 continue;
992 }
993
994 /* be more brutal (nfs) */
995 if (!umount2(iterator->elem, MNT_FORCE)) {
996 INFO("forced unmount of '%s'", (char *)iterator->elem);
997 continue;
998 }
999
7df119ee 1000 WARN("failed to unmount '%s'", (char *)iterator->elem);
c08556c6 1001 }
bf601689 1002
cc6f6dd7
DL
1003 return 0;
1004}
1005
1006static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
1007{
1008 char path[MAXPATHLEN];
1009 int remove_pivotdir = 0;
9ba8130c 1010 int rc;
cc6f6dd7
DL
1011
1012 /* change into new root fs */
1013 if (chdir(rootfs)) {
1014 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
1015 return -1;
1016 }
1017
1018 if (!pivotdir)
30c5d292 1019 pivotdir = "lxc_putold";
cc6f6dd7 1020
4f9293b1 1021 /* compute the full path to pivotdir under rootfs */
9ba8130c
SH
1022 rc = snprintf(path, sizeof(path), "%s/%s", rootfs, pivotdir);
1023 if (rc >= sizeof(path)) {
1024 ERROR("pivot dir name too long");
1025 return -1;
1026 }
cc6f6dd7
DL
1027
1028 if (access(path, F_OK)) {
1029
1030 if (mkdir_p(path, 0755)) {
1031 SYSERROR("failed to create pivotdir '%s'", path);
1032 return -1;
1033 }
1034
1035 remove_pivotdir = 1;
1036 DEBUG("created '%s' directory", path);
1037 }
1038
1039 DEBUG("mountpoint for old rootfs is '%s'", path);
1040
1041 /* pivot_root into our new root fs */
1042 if (pivot_root(".", path)) {
1043 SYSERROR("pivot_root syscall failed");
bf601689
MH
1044 return -1;
1045 }
cc6f6dd7
DL
1046
1047 if (chdir("/")) {
1048 SYSERROR("can't chdir to / after pivot_root");
1049 return -1;
1050 }
1051
1052 DEBUG("pivot_root syscall to '%s' successful", rootfs);
1053
1054 /* we switch from absolute path to relative path */
1055 if (umount_oldrootfs(pivotdir))
1056 return -1;
bf601689 1057
c08556c6
DL
1058 /* remove temporary mount point, we don't consider the removing
1059 * as fatal */
a91d897a
FW
1060 if (remove_pivotdir && rmdir(pivotdir))
1061 WARN("can't remove mountpoint '%s': %m", pivotdir);
bf601689 1062
bf601689
MH
1063 return 0;
1064}
1065
bc6928ff
MW
1066
1067/*
1068 * Note: This is a verbatum copy of what is in monitor.c. We're just
1069 * usint it here to generate a safe subdirectory in /dev/ for the
1070 * containers /dev/
1071 */
1072
1073/* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS.
1074 * FNV has good anti collision properties and we're not worried
1075 * about pre-image resistance or one-way-ness, we're just trying to make
1076 * the name unique in the 108 bytes of space we have.
1077 */
1078#define FNV1A_64_INIT ((uint64_t)0xcbf29ce484222325ULL)
1079static uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
1080{
1081 unsigned char *bp;
1082
1083 for(bp = buf; bp < (unsigned char *)buf + len; bp++)
1084 {
1085 /* xor the bottom with the current octet */
1086 hval ^= (uint64_t)*bp;
1087
1088 /* gcc optimised:
1089 * multiply by the 64 bit FNV magic prime mod 2^64
1090 */
1091 hval += (hval << 1) + (hval << 4) + (hval << 5) +
1092 (hval << 7) + (hval << 8) + (hval << 40);
1093 }
1094
1095 return hval;
1096}
1097
1098/*
1099 * Check to see if a directory has something mounted on it and,
1100 * if it does, return the fstype.
1101 *
1102 * Code largely based on detect_shared_rootfs below
1103 *
1104 * Returns: # of matching entries in /proc/self/mounts
1105 * if != 0 fstype is filled with the last filesystem value.
1106 * if == 0 no matches found, fstype unchanged.
1107 *
1108 * ToDo: Maybe return the mount options in another parameter...
1109 */
1110
1111#define LINELEN 4096
1112#define MAX_FSTYPE_LEN 128
74a3920a 1113static int mount_check_fs( const char *dir, char *fstype )
bc6928ff
MW
1114{
1115 char buf[LINELEN], *p;
1116 struct stat s;
1117 FILE *f;
1118 int found_fs = 0;
1119 char *p2;
1120
1121 DEBUG("entering mount_check_fs for %s\n", dir);
1122
1123 if ( 0 != access(dir, F_OK) || 0 != stat(dir, &s) || 0 == S_ISDIR(s.st_mode) ) {
1124 return 0;
1125 }
1126
bc6928ff 1127 f = fopen("/proc/self/mounts", "r");
bc6928ff
MW
1128 if (!f)
1129 return 0;
1130 while ((p = fgets(buf, LINELEN, f))) {
1131 p = index(buf, ' ');
1132 if( !p )
1133 continue;
1134 *p = '\0';
1135 p2 = p + 1;
1136
1137 p = index(p2, ' ');
1138 if( !p )
1139 continue;
1140 *p = '\0';
1141
1142 /* Compare the directory in the entry to desired */
1143 if( strcmp( p2, dir ) ) {
1144 continue;
1145 }
1146
1147 p2 = p + 1;
1148 p = index( p2, ' ');
1149 if( !p )
1150 continue;
1151 *p = '\0';
1152
1153 ++found_fs;
1154
1155 if( fstype ) {
1156 strncpy( fstype, p2, MAX_FSTYPE_LEN - 1 );
1157 fstype [ MAX_FSTYPE_LEN - 1 ] = '\0';
1158 }
1159 }
1160
bc6928ff 1161 fclose(f);
bc6928ff
MW
1162
1163 DEBUG("mount_check_fs returning %d last %s\n", found_fs, fstype);
1164
1165 return found_fs;
1166}
1167
1168/*
1169 * Locate a devtmpfs mount (should be on /dev) and create a container
1170 * subdirectory on it which we can then bind mount to the container
1171 * /dev instead of mounting a tmpfs there.
1172 * If we fail, return NULL.
1173 * Else return the pointer to the name buffer with the string to
1174 * the devtmpfs subdirectory.
1175 */
1176
74a3920a 1177static char *mk_devtmpfs(const char *name, char *path, const char *lxcpath)
bc6928ff
MW
1178{
1179 int ret;
1180 struct stat s;
1181 char tmp_path[MAXPATHLEN];
1182 char fstype[MAX_FSTYPE_LEN];
1183 char *base_path = "/dev/.lxc";
1184 char *user_path = "/dev/.lxc/user";
1185 uint64_t hash;
1186
1187 if ( 0 != access(base_path, F_OK) || 0 != stat(base_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1188 /* This is just making /dev/.lxc it better work or we're done */
1189 ret = mkdir(base_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1190 if ( ret ) {
1191 SYSERROR( "Unable to create /dev/.lxc for autodev" );
1192 return NULL;
1193 }
1194 }
1195
1196 /*
1197 * Programmers notes:
1198 * We can not do mounts in this area of code that we want
1199 * to be visible in the host. Consequently, /dev/.lxc must
1200 * be set up earlier if we need a tmpfs mounted there.
1201 * That only affects the rare cases where autodev is enabled
1202 * for a container and devtmpfs is not mounted on /dev in the
1203 * host. In that case, we'll fall back to the old method
1204 * of mounting a tmpfs in the container and have no visibility
1205 * into the container /dev.
1206 */
1207 if( ! mount_check_fs( "/dev", fstype )
1208 || strcmp( "devtmpfs", fstype ) ) {
1209 /* Either /dev was not mounted or was not devtmpfs */
1210
1211 if ( ! mount_check_fs( "/dev/.lxc", NULL ) ) {
1212 /*
1213 * /dev/.lxc is not already mounted
1214 * Doing a mount here does no good, since
1215 * it's not visible in the host.
1216 */
1217
1218 ERROR("/dev/.lxc is not setup - taking fallback" );
1219 return NULL;
1220 }
1221 }
1222
1223 if ( 0 != access(user_path, F_OK) || 0 != stat(user_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1224 /*
1225 * This is making /dev/.lxc/user path for non-priv users.
1226 * If this doesn't work, we'll have to fall back in the
1227 * case of non-priv users. It's mode 1777 like /tmp.
1228 */
1229 ret = mkdir(user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
1230 if ( ret ) {
1231 /* Issue an error but don't fail yet! */
1232 ERROR("Unable to create /dev/.lxc/user");
1233 }
1234 /* Umask tends to screw us up here */
1235 chmod(user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
1236 }
1237
1238 /*
1239 * Since the container name must be unique within a given
1240 * lxcpath, we're going to use a hash of the path
1241 * /lxcpath/name as our hash name in /dev/.lxc/
1242 */
1243
1244 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s", lxcpath, name);
1245 if (ret < 0 || ret >= MAXPATHLEN)
1246 return NULL;
1247
1248 hash = fnv_64a_buf(tmp_path, ret, FNV1A_64_INIT);
1249
1250 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, base_path, name, hash);
1251 if (ret < 0 || ret >= MAXPATHLEN)
1252 return NULL;
1253
1254 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1255 ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1256 if ( ret ) {
1257 /* Something must have failed with the base_path...
1258 * Maybe unpriv user. Try user_path now... */
1259 INFO("Setup in /dev/.lxc failed. Trying /dev/.lxc/user." );
1260
1261 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, user_path, name, hash);
1262 if (ret < 0 || ret >= MAXPATHLEN)
1263 return NULL;
1264
1265 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1266 ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1267 if ( ret ) {
1268 ERROR("Container /dev setup in host /dev failed - taking fallback" );
1269 return NULL;
1270 }
1271 }
1272 }
1273 }
1274
1275 strcpy( path, tmp_path );
1276 return path;
1277}
1278
1279
91c3830e
SH
1280/*
1281 * Do we want to add options for max size of /dev and a file to
1282 * specify which devices to create?
1283 */
bc6928ff 1284static int mount_autodev(const char *name, char *root, const char *lxcpath)
91c3830e
SH
1285{
1286 int ret;
bc6928ff 1287 struct stat s;
91c3830e 1288 char path[MAXPATHLEN];
bc6928ff
MW
1289 char host_path[MAXPATHLEN];
1290 char devtmpfs_path[MAXPATHLEN];
91c3830e
SH
1291
1292 INFO("Mounting /dev under %s\n", root);
bc6928ff
MW
1293
1294 ret = snprintf(host_path, MAXPATHLEN, "%s/%s/rootfs.dev", lxcpath, name);
1295 if (ret < 0 || ret > MAXPATHLEN)
1296 return -1;
1297
91c3830e
SH
1298 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
1299 if (ret < 0 || ret > MAXPATHLEN)
1300 return -1;
bc6928ff
MW
1301
1302 if (mk_devtmpfs( name, devtmpfs_path, lxcpath ) ) {
1303 /*
1304 * Get rid of old links and directoriess
1305 * This could be either a symlink and we remove it,
1306 * or an empty directory and we remove it,
1307 * or non-existant and we don't care,
1308 * or a non-empty directory, and we will then emit an error
1309 * but we will not fail out the process.
1310 */
1311 unlink( host_path );
1312 rmdir( host_path );
1313 ret = symlink(devtmpfs_path, host_path);
1314
1315 if ( ret < 0 ) {
1316 SYSERROR("WARNING: Failed to create symlink '%s'->'%s'\n", host_path, devtmpfs_path);
1317 }
1318 DEBUG("Bind mounting %s to %s", devtmpfs_path , path );
1319 ret = mount(devtmpfs_path, path, NULL, MS_BIND, 0 );
1320 } else {
1321 /* Only mount a tmpfs on here if we don't already a mount */
1322 if ( ! mount_check_fs( host_path, NULL ) ) {
1323 DEBUG("Mounting tmpfs to %s", host_path );
58ab99ae 1324 ret = mount("none", path, "tmpfs", 0, "size=100000,mode=755");
bc6928ff
MW
1325 } else {
1326 /* This allows someone to manually set up a mount */
1327 DEBUG("Bind mounting %s to %s", host_path, path );
1328 ret = mount(host_path , path, NULL, MS_BIND, 0 );
1329 }
1330 }
91c3830e
SH
1331 if (ret) {
1332 SYSERROR("Failed to mount /dev at %s\n", root);
1333 return -1;
1334 }
1335 ret = snprintf(path, MAXPATHLEN, "%s/dev/pts", root);
1336 if (ret < 0 || ret >= MAXPATHLEN)
1337 return -1;
bc6928ff
MW
1338 /*
1339 * If we are running on a devtmpfs mapping, dev/pts may already exist.
1340 * If not, then create it and exit if that fails...
1341 */
1342 if ( 0 != access(path, F_OK) || 0 != stat(path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1343 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1344 if (ret) {
1345 SYSERROR("Failed to create /dev/pts in container");
1346 return -1;
1347 }
91c3830e
SH
1348 }
1349
1350 INFO("Mounted /dev under %s\n", root);
1351 return 0;
1352}
1353
c6883f38 1354struct lxc_devs {
74a3920a 1355 const char *name;
c6883f38
SH
1356 mode_t mode;
1357 int maj;
1358 int min;
1359};
1360
74a3920a 1361static const struct lxc_devs lxc_devs[] = {
c6883f38
SH
1362 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1363 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1364 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1365 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1366 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1367 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1368 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1369};
1370
74a3920a 1371static int setup_autodev(const char *root)
c6883f38
SH
1372{
1373 int ret;
c6883f38
SH
1374 char path[MAXPATHLEN];
1375 int i;
3a32201c 1376 mode_t cmask;
c6883f38 1377
91c3830e
SH
1378 INFO("Creating initial consoles under %s/dev\n", root);
1379
c6883f38 1380 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
91c3830e
SH
1381 if (ret < 0 || ret >= MAXPATHLEN) {
1382 ERROR("Error calculating container /dev location");
c6883f38 1383 return -1;
f7bee6c6 1384 }
91c3830e
SH
1385
1386 INFO("Populating /dev under %s\n", root);
3a32201c 1387 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38 1388 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
74a3920a 1389 const struct lxc_devs *d = &lxc_devs[i];
c6883f38
SH
1390 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", root, d->name);
1391 if (ret < 0 || ret >= MAXPATHLEN)
1392 return -1;
1393 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1394 if (ret && errno != EEXIST) {
c6883f38
SH
1395 SYSERROR("Error creating %s\n", d->name);
1396 return -1;
1397 }
1398 }
3a32201c 1399 umask(cmask);
c6883f38
SH
1400
1401 INFO("Populated /dev under %s\n", root);
1402 return 0;
1403}
1404
cc28d0b0
SH
1405/*
1406 * Detect whether / is mounted MS_SHARED. The only way I know of to
1407 * check that is through /proc/self/mountinfo.
1408 * I'm only checking for /. If the container rootfs or mount location
1409 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1410 * out would be too much work to be worth it.
1411 */
1412#define LINELEN 4096
1413int detect_shared_rootfs(void)
1414{
1415 char buf[LINELEN], *p;
1416 FILE *f;
1417 int i;
1418 char *p2;
1419
1420 f = fopen("/proc/self/mountinfo", "r");
1421 if (!f)
1422 return 0;
1423 while ((p = fgets(buf, LINELEN, f))) {
cc28d0b0
SH
1424 for (p = buf, i=0; p && i < 4; i++)
1425 p = index(p+1, ' ');
1426 if (!p)
1427 continue;
1428 p2 = index(p+1, ' ');
1429 if (!p2)
1430 continue;
1431 *p2 = '\0';
cc28d0b0
SH
1432 if (strcmp(p+1, "/") == 0) {
1433 // this is '/'. is it shared?
1434 p = index(p2+1, ' ');
ab81cef0 1435 if (p && strstr(p, "shared:")) {
00b6be44 1436 fclose(f);
cc28d0b0 1437 return 1;
00b6be44 1438 }
cc28d0b0
SH
1439 }
1440 }
1441 fclose(f);
1442 return 0;
1443}
1444
1445/*
1446 * I'll forgive you for asking whether all of this is needed :) The
1447 * answer is yes.
1448 * pivot_root will fail if the new root, the put_old dir, or the parent
1449 * of current->fs->root are MS_SHARED. (parent of current->fs_root may
1450 * or may not be current->fs_root - if we assumed it always was, we could
1451 * just mount --make-rslave /). So,
1452 * 1. mount a tiny tmpfs to be parent of current->fs->root.
1453 * 2. make that MS_SLAVE
1454 * 3. make a 'root' directory under that
1455 * 4. mount --rbind / under the $tinyroot/root.
1456 * 5. make that rslave
1457 * 6. chdir and chroot into $tinyroot/root
1458 * 7. $tinyroot will be unmounted by our parent in start.c
1459 */
1460static int chroot_into_slave(struct lxc_conf *conf)
1461{
1462 char path[MAXPATHLEN];
1463 const char *destpath = conf->rootfs.mount;
1464 int ret;
1465
1466 if (mount(destpath, destpath, NULL, MS_BIND, 0)) {
1467 SYSERROR("failed to mount %s bind", destpath);
1468 return -1;
1469 }
1470 if (mount("", destpath, NULL, MS_SLAVE, 0)) {
1471 SYSERROR("failed to make %s slave", destpath);
1472 return -1;
1473 }
58ab99ae 1474 if (mount("none", destpath, "tmpfs", 0, "size=10000,mode=755")) {
cc28d0b0
SH
1475 SYSERROR("Failed to mount tmpfs / at %s", destpath);
1476 return -1;
1477 }
1478 ret = snprintf(path, MAXPATHLEN, "%s/root", destpath);
1479 if (ret < 0 || ret >= MAXPATHLEN) {
1480 ERROR("out of memory making root path");
1481 return -1;
1482 }
1483 if (mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) {
1484 SYSERROR("Failed to create /dev/pts in container");
1485 return -1;
1486 }
1487 if (mount("/", path, NULL, MS_BIND|MS_REC, 0)) {
1488 SYSERROR("Failed to rbind mount / to %s", path);
1489 return -1;
1490 }
1491 if (mount("", destpath, NULL, MS_SLAVE|MS_REC, 0)) {
1492 SYSERROR("Failed to make tmp-/ at %s rslave", path);
1493 return -1;
1494 }
1495 if (chdir(path)) {
1496 SYSERROR("Failed to chdir into tmp-/");
1497 return -1;
1498 }
1499 if (chroot(path)) {
1500 SYSERROR("Failed to chroot into tmp-/");
1501 return -1;
1502 }
1503 INFO("Chrooted into tmp-/ at %s\n", path);
1504 return 0;
1505}
1506
1507static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1508{
cc28d0b0
SH
1509 const struct lxc_rootfs *rootfs = &conf->rootfs;
1510
a0f379bf
DW
1511 if (!rootfs->path) {
1512 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1513 SYSERROR("Failed to make / rslave");
1514 return -1;
1515 }
c69bd12f 1516 return 0;
a0f379bf 1517 }
0ad19a3f 1518
12297168 1519 if (access(rootfs->mount, F_OK)) {
b1789442 1520 SYSERROR("failed to access to '%s', check it is present",
12297168 1521 rootfs->mount);
b1789442
DL
1522 return -1;
1523 }
1524
cc28d0b0
SH
1525 if (detect_shared_rootfs()) {
1526 if (chroot_into_slave(conf)) {
1527 ERROR("Failed to chroot into slave /");
1528 return -1;
1529 }
1530 }
1531
9be53773
SH
1532 // First try mounting rootfs using a bdev
1533 struct bdev *bdev = bdev_init(rootfs->path, rootfs->mount, NULL);
1534 if (bdev && bdev->ops->mount(bdev) == 0) {
59d66af2 1535 bdev_put(bdev);
9be53773
SH
1536 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1537 return 0;
1538 }
59d66af2
SH
1539 if (bdev)
1540 bdev_put(bdev);
2656d231 1541 if (mount_rootfs(rootfs->path, rootfs->mount)) {
a6afdde9 1542 ERROR("failed to mount rootfs");
c3f0a28c 1543 return -1;
1544 }
0ad19a3f 1545
12297168 1546 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1547
ac778708
DL
1548 return 0;
1549}
1550
74a3920a 1551static int setup_pivot_root(const struct lxc_rootfs *rootfs)
ac778708 1552{
ac778708
DL
1553 if (!rootfs->path)
1554 return 0;
1555
12297168 1556 if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
cc6f6dd7 1557 ERROR("failed to setup pivot root");
25368b52 1558 return -1;
c69bd12f
DL
1559 }
1560
25368b52 1561 return 0;
0ad19a3f 1562}
1563
d852c78c 1564static int setup_pts(int pts)
3c26f34e 1565{
77890c6d
SW
1566 char target[PATH_MAX];
1567
d852c78c
DL
1568 if (!pts)
1569 return 0;
3c26f34e 1570
1571 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1572 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1573 return -1;
1574 }
1575
a6afdde9 1576 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
67e5a20a 1577 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
36eb9bde 1578 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1579 return -1;
1580 }
1581
3c26f34e 1582 if (access("/dev/ptmx", F_OK)) {
1583 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1584 goto out;
36eb9bde 1585 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1586 return -1;
1587 }
1588
77890c6d
SW
1589 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1590 goto out;
1591
3c26f34e 1592 /* fallback here, /dev/pts/ptmx exists just mount bind */
1593 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1594 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1595 return -1;
1596 }
cd54d859
DL
1597
1598 INFO("created new pts instance");
d852c78c 1599
3c26f34e 1600out:
1601 return 0;
1602}
1603
cccc74b5
DL
1604static int setup_personality(int persona)
1605{
6ff05e18 1606 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1607 if (persona == -1)
1608 return 0;
1609
1610 if (personality(persona) < 0) {
1611 SYSERROR("failed to set personality to '0x%x'", persona);
1612 return -1;
1613 }
1614
1615 INFO("set personality to '0x%x'", persona);
6ff05e18 1616 #endif
cccc74b5
DL
1617
1618 return 0;
1619}
1620
7c6ef2a2 1621static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1622 const struct lxc_console *console)
6e590161 1623{
63376d7d
DL
1624 char path[MAXPATHLEN];
1625 struct stat s;
7c6ef2a2 1626 int ret;
52e35957 1627
7c6ef2a2
SH
1628 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1629 if (ret >= sizeof(path)) {
1630 ERROR("console path too long\n");
1631 return -1;
1632 }
52e35957 1633
63376d7d 1634 if (access(path, F_OK)) {
466978b0 1635 WARN("rootfs specified but no console found at '%s'", path);
63376d7d 1636 return 0;
52e35957
DL
1637 }
1638
b5159817
DE
1639 if (console->master < 0) {
1640 INFO("no console");
f78a1f32
DL
1641 return 0;
1642 }
ed502555 1643
63376d7d
DL
1644 if (stat(path, &s)) {
1645 SYSERROR("failed to stat '%s'", path);
1646 return -1;
1647 }
1648
1649 if (chmod(console->name, s.st_mode)) {
1650 SYSERROR("failed to set mode '0%o' to '%s'",
1651 s.st_mode, console->name);
1652 return -1;
1653 }
13954cce 1654
63376d7d
DL
1655 if (mount(console->name, path, "none", MS_BIND, 0)) {
1656 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1657 return -1;
1658 }
1659
63376d7d 1660 INFO("console has been setup");
7c6ef2a2
SH
1661 return 0;
1662}
1663
1664static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1665 const struct lxc_console *console,
1666 char *ttydir)
1667{
1668 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1669 int ret;
1670
1671 /* create rootfs/dev/<ttydir> directory */
1672 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1673 ttydir);
1674 if (ret >= sizeof(path))
1675 return -1;
1676 ret = mkdir(path, 0755);
1677 if (ret && errno != EEXIST) {
1678 SYSERROR("failed with errno %d to create %s\n", errno, path);
1679 return -1;
1680 }
1681 INFO("created %s\n", path);
1682
1683 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1684 rootfs->mount, ttydir);
1685 if (ret >= sizeof(lxcpath)) {
1686 ERROR("console path too long\n");
1687 return -1;
1688 }
1689
1690 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1691 ret = unlink(path);
1692 if (ret && errno != ENOENT) {
1693 SYSERROR("error unlinking %s\n", path);
1694 return -1;
1695 }
1696
1697 ret = creat(lxcpath, 0660);
1698 if (ret==-1 && errno != EEXIST) {
1699 SYSERROR("error %d creating %s\n", errno, lxcpath);
1700 return -1;
1701 }
4d44e274
SH
1702 if (ret >= 0)
1703 close(ret);
7c6ef2a2 1704
b5159817
DE
1705 if (console->master < 0) {
1706 INFO("no console");
7c6ef2a2
SH
1707 return 0;
1708 }
1709
1710 if (mount(console->name, lxcpath, "none", MS_BIND, 0)) {
1711 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1712 return -1;
1713 }
1714
1715 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1716 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1717 if (ret >= sizeof(lxcpath)) {
1718 ERROR("lxc/console path too long");
1719 return -1;
1720 }
7c6ef2a2
SH
1721 ret = symlink(lxcpath, path);
1722 if (ret) {
1723 SYSERROR("failed to create symlink for console");
1724 return -1;
1725 }
1726
1727 INFO("console has been setup on %s", lxcpath);
cd54d859 1728
6e590161 1729 return 0;
1730}
1731
7c6ef2a2
SH
1732static int setup_console(const struct lxc_rootfs *rootfs,
1733 const struct lxc_console *console,
1734 char *ttydir)
1735{
1736 /* We don't have a rootfs, /dev/console will be shared */
1737 if (!rootfs->path)
1738 return 0;
1739 if (!ttydir)
1740 return setup_dev_console(rootfs, console);
1741
1742 return setup_ttydir_console(rootfs, console, ttydir);
1743}
1744
1bd051a6
SH
1745static int setup_kmsg(const struct lxc_rootfs *rootfs,
1746 const struct lxc_console *console)
1747{
1748 char kpath[MAXPATHLEN];
1749 int ret;
1750
222fea5a
DE
1751 if (!rootfs->path)
1752 return 0;
1bd051a6
SH
1753 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1754 if (ret < 0 || ret >= sizeof(kpath))
1755 return -1;
1756
1757 ret = unlink(kpath);
1758 if (ret && errno != ENOENT) {
1759 SYSERROR("error unlinking %s\n", kpath);
1760 return -1;
1761 }
1762
1763 ret = symlink("console", kpath);
1764 if (ret) {
1765 SYSERROR("failed to create symlink for kmsg");
1766 return -1;
1767 }
1768
1769 return 0;
1770}
1771
998ac676
RT
1772static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1773{
1774 struct mount_opt *mo;
1775
1776 /* If opt is found in mount_opt, set or clear flags.
1777 * Otherwise append it to data. */
1778
1779 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1780 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1781 if (mo->clear)
1782 *flags &= ~mo->flag;
1783 else
1784 *flags |= mo->flag;
1785 return;
1786 }
1787 }
1788
1789 if (strlen(*data))
1790 strcat(*data, ",");
1791 strcat(*data, opt);
1792}
1793
911324ef 1794static int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1795 char **mntdata)
1796{
1797 char *s, *data;
1798 char *p, *saveptr = NULL;
1799
911324ef 1800 *mntdata = NULL;
91656ce5 1801 *mntflags = 0L;
911324ef
DL
1802
1803 if (!mntopts)
998ac676
RT
1804 return 0;
1805
911324ef 1806 s = strdup(mntopts);
998ac676 1807 if (!s) {
36eb9bde 1808 SYSERROR("failed to allocate memory");
998ac676
RT
1809 return -1;
1810 }
1811
1812 data = malloc(strlen(s) + 1);
1813 if (!data) {
36eb9bde 1814 SYSERROR("failed to allocate memory");
998ac676
RT
1815 free(s);
1816 return -1;
1817 }
1818 *data = 0;
1819
1820 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1821 p = strtok_r(NULL, ",", &saveptr))
1822 parse_mntopt(p, mntflags, &data);
1823
1824 if (*data)
1825 *mntdata = data;
1826 else
1827 free(data);
1828 free(s);
1829
1830 return 0;
1831}
1832
911324ef
DL
1833static int mount_entry(const char *fsname, const char *target,
1834 const char *fstype, unsigned long mountflags,
1835 const char *data)
1836{
1837 if (mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data)) {
1838 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1839 return -1;
1840 }
1841
1842 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
1843
1844 DEBUG("remounting %s on %s to respect bind or remount options",
1845 fsname, target);
1846
1847 if (mount(fsname, target, fstype,
1848 mountflags | MS_REMOUNT, data)) {
1849 SYSERROR("failed to mount '%s' on '%s'",
1850 fsname, target);
1851 return -1;
1852 }
1853 }
1854
1855 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1856
1857 return 0;
1858}
1859
aaf901be 1860static inline int mount_entry_on_systemfs(const struct mntent *mntent)
0ad19a3f 1861{
998ac676
RT
1862 unsigned long mntflags;
1863 char *mntdata;
911324ef 1864 int ret;
34cfffb3
SG
1865 FILE *pathfile = NULL;
1866 char* pathdirname = NULL;
911324ef
DL
1867
1868 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1869 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1870 return -1;
1871 }
1872
34cfffb3
SG
1873 if (hasmntopt(mntent, "create=dir")) {
1874 if (!mkdir_p(mntent->mnt_dir, 0755)) {
1875 WARN("Failed to create mount target '%s'", mntent->mnt_dir);
1876 ret = -1;
1877 }
1878 }
1879
1880 if (hasmntopt(mntent, "create=file") && access(mntent->mnt_dir, F_OK)) {
1881 pathdirname = strdup(mntent->mnt_dir);
1882 pathdirname = dirname(pathdirname);
1883 mkdir_p(pathdirname, 0755);
1884 pathfile = fopen(mntent->mnt_dir, "wb");
1885 if (!pathfile) {
1886 WARN("Failed to create mount target '%s'", mntent->mnt_dir);
1887 ret = -1;
1888 }
1889 else
1890 fclose(pathfile);
1891 }
1892
911324ef
DL
1893 ret = mount_entry(mntent->mnt_fsname, mntent->mnt_dir,
1894 mntent->mnt_type, mntflags, mntdata);
1895
68c152ef
SH
1896 if (hasmntopt(mntent, "optional") != NULL)
1897 ret = 0;
1898
34cfffb3 1899 free(pathdirname);
911324ef
DL
1900 free(mntdata);
1901
1902 return ret;
1903}
1904
aaf901be 1905static int mount_entry_on_absolute_rootfs(const struct mntent *mntent,
80a881b2
SH
1906 const struct lxc_rootfs *rootfs,
1907 const char *lxc_name)
911324ef 1908{
013bd428 1909 char *aux;
59760f5d 1910 char path[MAXPATHLEN];
911324ef
DL
1911 unsigned long mntflags;
1912 char *mntdata;
80a881b2 1913 int r, ret = 0, offset;
67e571de 1914 const char *lxcpath;
34cfffb3
SG
1915 FILE *pathfile = NULL;
1916 char *pathdirname = NULL;
0ad19a3f 1917
911324ef
DL
1918 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1919 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1920 return -1;
1921 }
1bc60a65 1922
593e8478 1923 lxcpath = lxc_global_config_value("lxc.lxcpath");
2a59a681
SH
1924 if (!lxcpath) {
1925 ERROR("Out of memory");
1926 return -1;
1927 }
1928
80a881b2 1929 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
1930 * use $lxcpath/CN/rootfs as the target prefix */
1931 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
1932 if (r < 0 || r >= MAXPATHLEN)
1933 goto skipvarlib;
1934
1935 aux = strstr(mntent->mnt_dir, path);
1936 if (aux) {
1937 offset = strlen(path);
1938 goto skipabs;
1939 }
1940
1941skipvarlib:
013bd428
DL
1942 aux = strstr(mntent->mnt_dir, rootfs->path);
1943 if (!aux) {
1944 WARN("ignoring mount point '%s'", mntent->mnt_dir);
1945 goto out;
1946 }
80a881b2
SH
1947 offset = strlen(rootfs->path);
1948
1949skipabs:
013bd428 1950
9ba8130c 1951 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
1952 aux + offset);
1953 if (r < 0 || r >= MAXPATHLEN) {
1954 WARN("pathnme too long for '%s'", mntent->mnt_dir);
1955 ret = -1;
1956 goto out;
1957 }
1958
34cfffb3
SG
1959 if (hasmntopt(mntent, "create=dir")) {
1960 if (!mkdir_p(path, 0755)) {
1961 WARN("Failed to create mount target '%s'", path);
1962 ret = -1;
1963 }
1964 }
1965
1966 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
1967 pathdirname = strdup(path);
1968 pathdirname = dirname(pathdirname);
1969 mkdir_p(pathdirname, 0755);
1970 pathfile = fopen(path, "wb");
1971 if (!pathfile) {
1972 WARN("Failed to create mount target '%s'", path);
1973 ret = -1;
1974 }
1975 else
1976 fclose(pathfile);
1977 }
d330fe7b 1978
013bd428 1979 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
911324ef 1980 mntflags, mntdata);
0ad19a3f 1981
68c152ef
SH
1982 if (hasmntopt(mntent, "optional") != NULL)
1983 ret = 0;
1984
013bd428 1985out:
34cfffb3 1986 free(pathdirname);
911324ef
DL
1987 free(mntdata);
1988 return ret;
1989}
d330fe7b 1990
aaf901be 1991static int mount_entry_on_relative_rootfs(const struct mntent *mntent,
911324ef
DL
1992 const char *rootfs)
1993{
1994 char path[MAXPATHLEN];
1995 unsigned long mntflags;
1996 char *mntdata;
1997 int ret;
34cfffb3
SG
1998 FILE *pathfile = NULL;
1999 char *pathdirname = NULL;
d330fe7b 2000
911324ef
DL
2001 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
2002 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
2003 return -1;
2004 }
d330fe7b 2005
34cfffb3 2006 /* relative to root mount point */
9ba8130c
SH
2007 ret = snprintf(path, sizeof(path), "%s/%s", rootfs, mntent->mnt_dir);
2008 if (ret >= sizeof(path)) {
2009 ERROR("path name too long");
2010 return -1;
2011 }
911324ef 2012
34cfffb3
SG
2013 if (hasmntopt(mntent, "create=dir")) {
2014 if (!mkdir_p(path, 0755)) {
2015 WARN("Failed to create mount target '%s'", path);
2016 ret = -1;
2017 }
2018 }
2019
2020 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
2021 pathdirname = strdup(path);
2022 pathdirname = dirname(pathdirname);
2023 mkdir_p(pathdirname, 0755);
2024 pathfile = fopen(path, "wb");
2025 if (!pathfile) {
2026 WARN("Failed to create mount target '%s'", path);
2027 ret = -1;
2028 }
2029 else
2030 fclose(pathfile);
2031 }
2032
911324ef
DL
2033 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
2034 mntflags, mntdata);
2035
68c152ef
SH
2036 if (hasmntopt(mntent, "optional") != NULL)
2037 ret = 0;
2038
34cfffb3 2039 free(pathdirname);
911324ef 2040 free(mntdata);
998ac676 2041
911324ef
DL
2042 return ret;
2043}
2044
80a881b2
SH
2045static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
2046 const char *lxc_name)
911324ef 2047{
aaf901be
AM
2048 struct mntent mntent;
2049 char buf[4096];
911324ef 2050 int ret = -1;
e76b8764 2051
aaf901be 2052 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
e76b8764 2053
911324ef 2054 if (!rootfs->path) {
aaf901be 2055 if (mount_entry_on_systemfs(&mntent))
e76b8764 2056 goto out;
911324ef 2057 continue;
e76b8764
CDC
2058 }
2059
911324ef 2060 /* We have a separate root, mounts are relative to it */
aaf901be
AM
2061 if (mntent.mnt_dir[0] != '/') {
2062 if (mount_entry_on_relative_rootfs(&mntent,
911324ef
DL
2063 rootfs->mount))
2064 goto out;
2065 continue;
2066 }
cd54d859 2067
aaf901be 2068 if (mount_entry_on_absolute_rootfs(&mntent, rootfs, lxc_name))
911324ef 2069 goto out;
0ad19a3f 2070 }
cd54d859 2071
0ad19a3f 2072 ret = 0;
cd54d859
DL
2073
2074 INFO("mount points have been setup");
0ad19a3f 2075out:
e7938e9e
MN
2076 return ret;
2077}
2078
80a881b2
SH
2079static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
2080 const char *lxc_name)
e7938e9e
MN
2081{
2082 FILE *file;
2083 int ret;
2084
2085 if (!fstab)
2086 return 0;
2087
2088 file = setmntent(fstab, "r");
2089 if (!file) {
2090 SYSERROR("failed to use '%s'", fstab);
2091 return -1;
2092 }
2093
80a881b2 2094 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e 2095
0ad19a3f 2096 endmntent(file);
2097 return ret;
2098}
2099
80a881b2
SH
2100static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
2101 const char *lxc_name)
e7938e9e
MN
2102{
2103 FILE *file;
2104 struct lxc_list *iterator;
2105 char *mount_entry;
2106 int ret;
2107
2108 file = tmpfile();
2109 if (!file) {
2110 ERROR("tmpfile error: %m");
2111 return -1;
2112 }
2113
2114 lxc_list_for_each(iterator, mount) {
2115 mount_entry = iterator->elem;
1d6b1976 2116 fprintf(file, "%s\n", mount_entry);
e7938e9e
MN
2117 }
2118
2119 rewind(file);
2120
80a881b2 2121 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e
MN
2122
2123 fclose(file);
2124 return ret;
2125}
2126
81810dd1
DL
2127static int setup_caps(struct lxc_list *caps)
2128{
2129 struct lxc_list *iterator;
2130 char *drop_entry;
d55bc1ad 2131 char *ptr;
81810dd1
DL
2132 int i, capid;
2133
2134 lxc_list_for_each(iterator, caps) {
2135
2136 drop_entry = iterator->elem;
2137
2138 capid = -1;
2139
2140 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2141
2142 if (strcmp(drop_entry, caps_opt[i].name))
2143 continue;
2144
2145 capid = caps_opt[i].value;
2146 break;
2147 }
2148
d55bc1ad
CS
2149 if (capid < 0) {
2150 /* try to see if it's numeric, so the user may specify
2151 * capabilities that the running kernel knows about but
2152 * we don't */
09bbd745 2153 errno = 0;
d55bc1ad 2154 capid = strtol(drop_entry, &ptr, 10);
09bbd745 2155 if (!ptr || *ptr != '\0' || errno != 0)
d55bc1ad
CS
2156 /* not a valid number */
2157 capid = -1;
2158 else if (capid > lxc_caps_last_cap())
2159 /* we have a number but it's not a valid
2160 * capability */
2161 capid = -1;
2162 }
2163
81810dd1 2164 if (capid < 0) {
1e11be34
DL
2165 ERROR("unknown capability %s", drop_entry);
2166 return -1;
81810dd1
DL
2167 }
2168
2169 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
2170
2171 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
3ec1648d
SH
2172 SYSERROR("failed to remove %s capability", drop_entry);
2173 return -1;
2174 }
81810dd1
DL
2175
2176 }
2177
1fb86a7c
SH
2178 DEBUG("capabilities have been setup");
2179
2180 return 0;
2181}
2182
2183static int dropcaps_except(struct lxc_list *caps)
2184{
2185 struct lxc_list *iterator;
2186 char *keep_entry;
2187 char *ptr;
2188 int i, capid;
2189 int numcaps = lxc_caps_last_cap() + 1;
2190 INFO("found %d capabilities\n", numcaps);
2191
2caf9a97
SH
2192 if (numcaps <= 0 || numcaps > 200)
2193 return -1;
2194
1fb86a7c
SH
2195 // caplist[i] is 1 if we keep capability i
2196 int *caplist = alloca(numcaps * sizeof(int));
2197 memset(caplist, 0, numcaps * sizeof(int));
2198
2199 lxc_list_for_each(iterator, caps) {
2200
2201 keep_entry = iterator->elem;
2202
2203 capid = -1;
2204
2205 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2206
2207 if (strcmp(keep_entry, caps_opt[i].name))
2208 continue;
2209
2210 capid = caps_opt[i].value;
2211 break;
2212 }
2213
2214 if (capid < 0) {
2215 /* try to see if it's numeric, so the user may specify
2216 * capabilities that the running kernel knows about but
2217 * we don't */
2218 capid = strtol(keep_entry, &ptr, 10);
2219 if (!ptr || *ptr != '\0' ||
f371aca9 2220 capid == INT_MIN || capid == INT_MAX)
1fb86a7c
SH
2221 /* not a valid number */
2222 capid = -1;
2223 else if (capid > lxc_caps_last_cap())
2224 /* we have a number but it's not a valid
2225 * capability */
2226 capid = -1;
2227 }
2228
2229 if (capid < 0) {
2230 ERROR("unknown capability %s", keep_entry);
2231 return -1;
2232 }
2233
2234 DEBUG("drop capability '%s' (%d)", keep_entry, capid);
2235
2236 caplist[capid] = 1;
2237 }
2238 for (i=0; i<numcaps; i++) {
2239 if (caplist[i])
2240 continue;
2241 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
3ec1648d
SH
2242 SYSERROR("failed to remove capability %d", i);
2243 return -1;
2244 }
1fb86a7c
SH
2245 }
2246
2247 DEBUG("capabilities have been setup");
81810dd1
DL
2248
2249 return 0;
2250}
2251
0ad19a3f 2252static int setup_hw_addr(char *hwaddr, const char *ifname)
2253{
2254 struct sockaddr sockaddr;
2255 struct ifreq ifr;
2256 int ret, fd;
2257
3cfc0f3a
MN
2258 ret = lxc_convert_mac(hwaddr, &sockaddr);
2259 if (ret) {
2260 ERROR("mac address '%s' conversion failed : %s",
2261 hwaddr, strerror(-ret));
0ad19a3f 2262 return -1;
2263 }
2264
2265 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
5da6aa8c 2266 ifr.ifr_name[IFNAMSIZ-1] = '\0';
0ad19a3f 2267 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2268
2269 fd = socket(AF_INET, SOCK_DGRAM, 0);
2270 if (fd < 0) {
3ab87b66 2271 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 2272 return -1;
2273 }
2274
2275 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
2276 close(fd);
2277 if (ret)
3ab87b66 2278 ERROR("ioctl failure : %s", strerror(errno));
0ad19a3f 2279
5da6aa8c 2280 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
cd54d859 2281
0ad19a3f 2282 return ret;
2283}
2284
82d5ae15 2285static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2286{
82d5ae15
DL
2287 struct lxc_list *iterator;
2288 struct lxc_inetdev *inetdev;
3cfc0f3a 2289 int err;
0ad19a3f 2290
82d5ae15
DL
2291 lxc_list_for_each(iterator, ip) {
2292
2293 inetdev = iterator->elem;
2294
0093bb8c
DL
2295 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2296 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
2297 if (err) {
2298 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2299 ifindex, strerror(-err));
82d5ae15
DL
2300 return -1;
2301 }
2302 }
2303
2304 return 0;
0ad19a3f 2305}
2306
82d5ae15 2307static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2308{
82d5ae15 2309 struct lxc_list *iterator;
7fa9074f 2310 struct lxc_inet6dev *inet6dev;
3cfc0f3a 2311 int err;
0ad19a3f 2312
82d5ae15
DL
2313 lxc_list_for_each(iterator, ip) {
2314
2315 inet6dev = iterator->elem;
2316
b3df193c 2317 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
2318 &inet6dev->mcast, &inet6dev->acast,
2319 inet6dev->prefix);
3cfc0f3a
MN
2320 if (err) {
2321 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2322 ifindex, strerror(-err));
82d5ae15 2323 return -1;
3cfc0f3a 2324 }
82d5ae15
DL
2325 }
2326
2327 return 0;
0ad19a3f 2328}
2329
82d5ae15 2330static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 2331{
0ad19a3f 2332 char ifname[IFNAMSIZ];
0ad19a3f 2333 char *current_ifname = ifname;
3cfc0f3a 2334 int err;
0ad19a3f 2335
82d5ae15
DL
2336 /* empty network namespace */
2337 if (!netdev->ifindex) {
b0efbac4 2338 if (netdev->flags & IFF_UP) {
d472214b 2339 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2340 if (err) {
2341 ERROR("failed to set the loopback up : %s",
2342 strerror(-err));
82d5ae15
DL
2343 return -1;
2344 }
82d5ae15 2345 }
7b57e8b6 2346 return 0;
0ad19a3f 2347 }
13954cce 2348
b466dc33
BP
2349 /* get the new ifindex in case of physical netdev */
2350 if (netdev->type == LXC_NET_PHYS)
2351 if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
2352 ERROR("failed to get ifindex for %s",
2353 netdev->link);
2354 return -1;
2355 }
2356
82d5ae15
DL
2357 /* retrieve the name of the interface */
2358 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 2359 ERROR("no interface corresponding to index '%d'",
82d5ae15 2360 netdev->ifindex);
0ad19a3f 2361 return -1;
2362 }
13954cce 2363
018ef520 2364 /* default: let the system to choose one interface name */
9d083402 2365 if (!netdev->name)
fb6d9b2f
DL
2366 netdev->name = netdev->type == LXC_NET_PHYS ?
2367 netdev->link : "eth%d";
018ef520 2368
82d5ae15 2369 /* rename the interface name */
b84f58b9 2370 err = lxc_netdev_rename_by_name(ifname, netdev->name);
3cfc0f3a
MN
2371 if (err) {
2372 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2373 strerror(-err));
018ef520
DL
2374 return -1;
2375 }
2376
2377 /* Re-read the name of the interface because its name has changed
2378 * and would be automatically allocated by the system
2379 */
82d5ae15 2380 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 2381 ERROR("no interface corresponding to index '%d'",
82d5ae15 2382 netdev->ifindex);
018ef520 2383 return -1;
0ad19a3f 2384 }
2385
82d5ae15
DL
2386 /* set a mac address */
2387 if (netdev->hwaddr) {
2388 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 2389 ERROR("failed to setup hw address for '%s'",
82d5ae15 2390 current_ifname);
0ad19a3f 2391 return -1;
2392 }
2393 }
2394
82d5ae15
DL
2395 /* setup ipv4 addresses on the interface */
2396 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 2397 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 2398 ifname);
2399 return -1;
2400 }
2401
82d5ae15
DL
2402 /* setup ipv6 addresses on the interface */
2403 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 2404 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 2405 ifname);
2406 return -1;
2407 }
2408
82d5ae15 2409 /* set the network device up */
b0efbac4 2410 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
2411 int err;
2412
d472214b 2413 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
2414 if (err) {
2415 ERROR("failed to set '%s' up : %s", current_ifname,
2416 strerror(-err));
0ad19a3f 2417 return -1;
2418 }
2419
2420 /* the network is up, make the loopback up too */
d472214b 2421 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2422 if (err) {
2423 ERROR("failed to set the loopback up : %s",
2424 strerror(-err));
0ad19a3f 2425 return -1;
2426 }
2427 }
2428
f8fee0e2
MK
2429 /* We can only set up the default routes after bringing
2430 * up the interface, sine bringing up the interface adds
2431 * the link-local routes and we can't add a default
2432 * route if the gateway is not reachable. */
2433
2434 /* setup ipv4 gateway on the interface */
2435 if (netdev->ipv4_gateway) {
2436 if (!(netdev->flags & IFF_UP)) {
2437 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2438 return -1;
2439 }
2440
2441 if (lxc_list_empty(&netdev->ipv4)) {
2442 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2443 return -1;
2444 }
2445
2446 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2447 if (err) {
fc739df5
SG
2448 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2449 if (err) {
2450 ERROR("failed to add ipv4 dest for '%s': %s",
2451 ifname, strerror(-err));
2452 }
2453
2454 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2455 if (err) {
2456 ERROR("failed to setup ipv4 gateway for '%s': %s",
2457 ifname, strerror(-err));
2458 if (netdev->ipv4_gateway_auto) {
2459 char buf[INET_ADDRSTRLEN];
2460 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2461 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2462 }
2463 return -1;
19a26f82 2464 }
f8fee0e2
MK
2465 }
2466 }
2467
2468 /* setup ipv6 gateway on the interface */
2469 if (netdev->ipv6_gateway) {
2470 if (!(netdev->flags & IFF_UP)) {
2471 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2472 return -1;
2473 }
2474
2475 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2476 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2477 return -1;
2478 }
2479
2480 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2481 if (err) {
fc739df5
SG
2482 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2483 if (err) {
2484 ERROR("failed to add ipv6 dest for '%s': %s",
f8fee0e2 2485 ifname, strerror(-err));
19a26f82 2486 }
fc739df5
SG
2487
2488 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2489 if (err) {
2490 ERROR("failed to setup ipv6 gateway for '%s': %s",
2491 ifname, strerror(-err));
2492 if (netdev->ipv6_gateway_auto) {
2493 char buf[INET6_ADDRSTRLEN];
2494 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2495 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2496 }
2497 return -1;
2498 }
f8fee0e2
MK
2499 }
2500 }
2501
cd54d859
DL
2502 DEBUG("'%s' has been setup", current_ifname);
2503
0ad19a3f 2504 return 0;
2505}
2506
5f4535a3 2507static int setup_network(struct lxc_list *network)
0ad19a3f 2508{
82d5ae15 2509 struct lxc_list *iterator;
82d5ae15 2510 struct lxc_netdev *netdev;
0ad19a3f 2511
5f4535a3 2512 lxc_list_for_each(iterator, network) {
cd54d859 2513
5f4535a3 2514 netdev = iterator->elem;
82d5ae15
DL
2515
2516 if (setup_netdev(netdev)) {
2517 ERROR("failed to setup netdev");
2518 return -1;
2519 }
2520 }
cd54d859 2521
5f4535a3
DL
2522 if (!lxc_list_empty(network))
2523 INFO("network has been setup");
cd54d859
DL
2524
2525 return 0;
0ad19a3f 2526}
2527
7b35f3d6
SH
2528void lxc_rename_phys_nics_on_shutdown(struct lxc_conf *conf)
2529{
2530 int i;
2531
2532 INFO("running to reset %d nic names", conf->num_savednics);
2533 for (i=0; i<conf->num_savednics; i++) {
2534 struct saved_nic *s = &conf->saved_nics[i];
2535 INFO("resetting nic %d to %s\n", s->ifindex, s->orig_name);
2536 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
2537 free(s->orig_name);
2538 }
2539 conf->num_savednics = 0;
2540 free(conf->saved_nics);
2541}
2542
ae9242c8
SH
2543static char *default_rootfs_mount = LXCROOTFSMOUNT;
2544
7b379ab3 2545struct lxc_conf *lxc_conf_init(void)
089cd8b8 2546{
7b379ab3 2547 struct lxc_conf *new;
26ddeedd 2548 int i;
7b379ab3
MN
2549
2550 new = malloc(sizeof(*new));
2551 if (!new) {
2552 ERROR("lxc_conf_init : %m");
2553 return NULL;
2554 }
2555 memset(new, 0, sizeof(*new));
2556
b40a606e 2557 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
cccc74b5 2558 new->personality = -1;
bc6928ff 2559 new->autodev = -1;
596a818d
DE
2560 new->console.log_path = NULL;
2561 new->console.log_fd = -1;
28a4b0e5 2562 new->console.path = NULL;
63376d7d 2563 new->console.peer = -1;
b5159817
DE
2564 new->console.peerpty.busy = -1;
2565 new->console.peerpty.master = -1;
2566 new->console.peerpty.slave = -1;
63376d7d
DL
2567 new->console.master = -1;
2568 new->console.slave = -1;
2569 new->console.name[0] = '\0';
d2e30e99 2570 new->maincmd_fd = -1;
54c30e29 2571 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048
SH
2572 if (!new->rootfs.mount) {
2573 ERROR("lxc_conf_init : %m");
2574 free(new);
2575 return NULL;
2576 }
2f3f41d0 2577 new->kmsg = 1;
7b379ab3
MN
2578 lxc_list_init(&new->cgroup);
2579 lxc_list_init(&new->network);
2580 lxc_list_init(&new->mount_list);
81810dd1 2581 lxc_list_init(&new->caps);
1fb86a7c 2582 lxc_list_init(&new->keepcaps);
f6d3e3e4 2583 lxc_list_init(&new->id_map);
26ddeedd
SH
2584 for (i=0; i<NUM_LXC_HOOKS; i++)
2585 lxc_list_init(&new->hooks[i]);
ee1e7aa0 2586 lxc_list_init(&new->groups);
fe4de9a6
DE
2587 new->lsm_aa_profile = NULL;
2588 new->lsm_se_context = NULL;
e075f5d9 2589 new->lsm_umount_proc = 0;
7b379ab3 2590
9f30a190
MM
2591 for (i = 0; i < LXC_NS_MAX; i++)
2592 new->inherit_ns_fd[i] = -1;
2593
7b379ab3 2594 return new;
089cd8b8
DL
2595}
2596
e3b4c4c4 2597static int instanciate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2598{
8634bc19 2599 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2600 char veth2buf[IFNAMSIZ], *veth2;
3cfc0f3a 2601 int err;
13954cce 2602
e892973e
DL
2603 if (netdev->priv.veth_attr.pair)
2604 veth1 = netdev->priv.veth_attr.pair;
8634bc19 2605 else {
9ba8130c
SH
2606 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2607 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2608 ERROR("veth1 name too long");
2609 return -1;
2610 }
a0265685 2611 veth1 = lxc_mkifname(veth1buf);
ad40563e
ÇO
2612 if (!veth1) {
2613 ERROR("failed to allocate a temporary name");
2614 return -1;
2615 }
74a2b586
JK
2616 /* store away for deconf */
2617 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2618 }
82d5ae15 2619
0e391e57 2620 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
a0265685 2621 veth2 = lxc_mkifname(veth2buf);
ad40563e 2622 if (!veth2) {
82d5ae15 2623 ERROR("failed to allocate a temporary name");
ad40563e 2624 goto out_delete;
0ad19a3f 2625 }
2626
3cfc0f3a
MN
2627 err = lxc_veth_create(veth1, veth2);
2628 if (err) {
2629 ERROR("failed to create %s-%s : %s", veth1, veth2,
2630 strerror(-err));
ad40563e 2631 goto out_delete;
0ad19a3f 2632 }
13954cce 2633
49684c0b
CS
2634 /* changing the high byte of the mac address to 0xfe, the bridge interface
2635 * will always keep the host's mac address and not take the mac address
2636 * of a container */
2637 err = setup_private_host_hw_addr(veth1);
2638 if (err) {
2639 ERROR("failed to change mac address of host interface '%s' : %s",
2640 veth1, strerror(-err));
2641 goto out_delete;
2642 }
2643
82d5ae15 2644 if (netdev->mtu) {
d472214b 2645 err = lxc_netdev_set_mtu(veth1, atoi(netdev->mtu));
3cfc0f3a 2646 if (!err)
d472214b 2647 err = lxc_netdev_set_mtu(veth2, atoi(netdev->mtu));
3cfc0f3a
MN
2648 if (err) {
2649 ERROR("failed to set mtu '%s' for %s-%s : %s",
2650 netdev->mtu, veth1, veth2, strerror(-err));
eb14c10a 2651 goto out_delete;
75d09f83
DL
2652 }
2653 }
2654
3cfc0f3a
MN
2655 if (netdev->link) {
2656 err = lxc_bridge_attach(netdev->link, veth1);
2657 if (err) {
2658 ERROR("failed to attach '%s' to the bridge '%s' : %s",
2659 veth1, netdev->link, strerror(-err));
2660 goto out_delete;
2661 }
eb14c10a
DL
2662 }
2663
82d5ae15
DL
2664 netdev->ifindex = if_nametoindex(veth2);
2665 if (!netdev->ifindex) {
36eb9bde 2666 ERROR("failed to retrieve the index for %s", veth2);
eb14c10a
DL
2667 goto out_delete;
2668 }
2669
d472214b 2670 err = lxc_netdev_up(veth1);
6e35af2e
DL
2671 if (err) {
2672 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2673 goto out_delete;
0ad19a3f 2674 }
2675
e3b4c4c4 2676 if (netdev->upscript) {
751d9dcd
DL
2677 err = run_script(handler->name, "net", netdev->upscript, "up",
2678 "veth", veth1, (char*) NULL);
2679 if (err)
e3b4c4c4 2680 goto out_delete;
e3b4c4c4
ST
2681 }
2682
82d5ae15
DL
2683 DEBUG("instanciated veth '%s/%s', index is '%d'",
2684 veth1, veth2, netdev->ifindex);
2685
6ab9ab6d 2686 return 0;
eb14c10a
DL
2687
2688out_delete:
b84f58b9 2689 lxc_netdev_delete_by_name(veth1);
ad40563e
ÇO
2690 if (!netdev->priv.veth_attr.pair && veth1)
2691 free(veth1);
2692 if(veth2)
2693 free(veth2);
6ab9ab6d 2694 return -1;
13954cce 2695}
d957ae2d 2696
74a2b586
JK
2697static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2698{
2699 char *veth1;
2700 int err;
2701
2702 if (netdev->priv.veth_attr.pair)
2703 veth1 = netdev->priv.veth_attr.pair;
2704 else
2705 veth1 = netdev->priv.veth_attr.veth1;
2706
2707 if (netdev->downscript) {
2708 err = run_script(handler->name, "net", netdev->downscript,
2709 "down", "veth", veth1, (char*) NULL);
2710 if (err)
2711 return -1;
2712 }
2713 return 0;
2714}
2715
e3b4c4c4 2716static int instanciate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2717{
0e391e57 2718 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2719 int err;
d957ae2d
MT
2720
2721 if (!netdev->link) {
2722 ERROR("no link specified for macvlan netdev");
2723 return -1;
2724 }
13954cce 2725
9ba8130c
SH
2726 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2727 if (err >= sizeof(peerbuf))
2728 return -1;
82d5ae15 2729
a0265685 2730 peer = lxc_mkifname(peerbuf);
ad40563e 2731 if (!peer) {
82d5ae15
DL
2732 ERROR("failed to make a temporary name");
2733 return -1;
0ad19a3f 2734 }
2735
3cfc0f3a
MN
2736 err = lxc_macvlan_create(netdev->link, peer,
2737 netdev->priv.macvlan_attr.mode);
2738 if (err) {
2739 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2740 peer, netdev->link, strerror(-err));
ad40563e 2741 goto out;
0ad19a3f 2742 }
2743
82d5ae15
DL
2744 netdev->ifindex = if_nametoindex(peer);
2745 if (!netdev->ifindex) {
36eb9bde 2746 ERROR("failed to retrieve the index for %s", peer);
ad40563e 2747 goto out;
22ebac19 2748 }
2749
e3b4c4c4 2750 if (netdev->upscript) {
751d9dcd
DL
2751 err = run_script(handler->name, "net", netdev->upscript, "up",
2752 "macvlan", netdev->link, (char*) NULL);
2753 if (err)
ad40563e 2754 goto out;
e3b4c4c4
ST
2755 }
2756
e892973e
DL
2757 DEBUG("instanciated macvlan '%s', index is '%d' and mode '%d'",
2758 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 2759
d957ae2d 2760 return 0;
ad40563e
ÇO
2761out:
2762 lxc_netdev_delete_by_name(peer);
2763 free(peer);
2764 return -1;
0ad19a3f 2765}
2766
74a2b586
JK
2767static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2768{
2769 int err;
2770
2771 if (netdev->downscript) {
2772 err = run_script(handler->name, "net", netdev->downscript,
2773 "down", "macvlan", netdev->link,
2774 (char*) NULL);
2775 if (err)
2776 return -1;
2777 }
2778 return 0;
2779}
2780
26c39028 2781/* XXX: merge with instanciate_macvlan */
e3b4c4c4 2782static int instanciate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
2783{
2784 char peer[IFNAMSIZ];
3cfc0f3a 2785 int err;
26c39028
JHS
2786
2787 if (!netdev->link) {
2788 ERROR("no link specified for vlan netdev");
2789 return -1;
2790 }
2791
9ba8130c
SH
2792 err = snprintf(peer, sizeof(peer), "vlan%d", netdev->priv.vlan_attr.vid);
2793 if (err >= sizeof(peer)) {
2794 ERROR("peer name too long");
2795 return -1;
2796 }
26c39028 2797
3cfc0f3a
MN
2798 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2799 if (err) {
2800 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2801 peer, netdev->link, strerror(-err));
26c39028
JHS
2802 return -1;
2803 }
2804
2805 netdev->ifindex = if_nametoindex(peer);
2806 if (!netdev->ifindex) {
2807 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 2808 lxc_netdev_delete_by_name(peer);
26c39028
JHS
2809 return -1;
2810 }
2811
e892973e
DL
2812 DEBUG("instanciated vlan '%s', ifindex is '%d'", " vlan1000",
2813 netdev->ifindex);
2814
26c39028
JHS
2815 return 0;
2816}
2817
74a2b586
JK
2818static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2819{
2820 return 0;
2821}
2822
e3b4c4c4 2823static int instanciate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2824{
6168e99f
DL
2825 if (!netdev->link) {
2826 ERROR("no link specified for the physical interface");
2827 return -1;
2828 }
2829
9d083402 2830 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 2831 if (!netdev->ifindex) {
9d083402 2832 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 2833 return -1;
2834 }
2835
e3b4c4c4
ST
2836 if (netdev->upscript) {
2837 int err;
751d9dcd
DL
2838 err = run_script(handler->name, "net", netdev->upscript,
2839 "up", "phys", netdev->link, (char*) NULL);
2840 if (err)
e3b4c4c4 2841 return -1;
e3b4c4c4
ST
2842 }
2843
82d5ae15 2844 return 0;
0ad19a3f 2845}
2846
74a2b586
JK
2847static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2848{
2849 int err;
2850
2851 if (netdev->downscript) {
2852 err = run_script(handler->name, "net", netdev->downscript,
2853 "down", "phys", netdev->link, (char*) NULL);
2854 if (err)
2855 return -1;
2856 }
2857 return 0;
2858}
2859
26b797f3
SH
2860static int instanciate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
2861{
2862 netdev->ifindex = 0;
2863 return 0;
2864}
2865
e3b4c4c4 2866static int instanciate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2867{
82d5ae15 2868 netdev->ifindex = 0;
e3b4c4c4
ST
2869 if (netdev->upscript) {
2870 int err;
751d9dcd
DL
2871 err = run_script(handler->name, "net", netdev->upscript,
2872 "up", "empty", (char*) NULL);
2873 if (err)
e3b4c4c4 2874 return -1;
e3b4c4c4 2875 }
82d5ae15 2876 return 0;
0ad19a3f 2877}
2878
74a2b586
JK
2879static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2880{
2881 int err;
2882
2883 if (netdev->downscript) {
2884 err = run_script(handler->name, "net", netdev->downscript,
2885 "down", "empty", (char*) NULL);
2886 if (err)
2887 return -1;
2888 }
2889 return 0;
2890}
2891
26b797f3
SH
2892static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
2893{
2894 return 0;
2895}
2896
2897int lxc_requests_empty_network(struct lxc_handler *handler)
2898{
2899 struct lxc_list *network = &handler->conf->network;
2900 struct lxc_list *iterator;
2901 struct lxc_netdev *netdev;
2902 bool found_none = false, found_nic = false;
2903
2904 if (lxc_list_empty(network))
2905 return 0;
2906
2907 lxc_list_for_each(iterator, network) {
2908
2909 netdev = iterator->elem;
2910
2911 if (netdev->type == LXC_NET_NONE)
2912 found_none = true;
2913 else
2914 found_nic = true;
2915 }
2916 if (found_none && !found_nic)
2917 return 1;
2918 return 0;
2919}
2920
e3b4c4c4 2921int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 2922{
e3b4c4c4 2923 struct lxc_list *network = &handler->conf->network;
82d5ae15 2924 struct lxc_list *iterator;
82d5ae15 2925 struct lxc_netdev *netdev;
cbef6c52
SH
2926 int am_root = (getuid() == 0);
2927
2928 if (!am_root)
2929 return 0;
0ad19a3f 2930
5f4535a3 2931 lxc_list_for_each(iterator, network) {
0ad19a3f 2932
5f4535a3 2933 netdev = iterator->elem;
13954cce 2934
24654103 2935 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 2936 ERROR("invalid network configuration type '%d'",
5f4535a3 2937 netdev->type);
82d5ae15
DL
2938 return -1;
2939 }
0ad19a3f 2940
e3b4c4c4 2941 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
2942 ERROR("failed to create netdev");
2943 return -1;
2944 }
e3b4c4c4 2945
0ad19a3f 2946 }
2947
2948 return 0;
2949}
2950
74a2b586 2951void lxc_delete_network(struct lxc_handler *handler)
7fef7a06 2952{
74a2b586 2953 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
2954 struct lxc_list *iterator;
2955 struct lxc_netdev *netdev;
2956
2957 lxc_list_for_each(iterator, network) {
2958 netdev = iterator->elem;
d472214b 2959
74a2b586 2960 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352
DL
2961 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
2962 WARN("failed to rename to the initial name the " \
2963 "netdev '%s'", netdev->link);
d472214b 2964 continue;
d8f8e352 2965 }
d472214b 2966
74a2b586
JK
2967 if (netdev_deconf[netdev->type](handler, netdev)) {
2968 WARN("failed to destroy netdev");
2969 }
2970
d8f8e352
DL
2971 /* Recent kernel remove the virtual interfaces when the network
2972 * namespace is destroyed but in case we did not moved the
2973 * interface to the network namespace, we have to destroy it
2974 */
74a2b586
JK
2975 if (netdev->ifindex != 0 &&
2976 lxc_netdev_delete_by_index(netdev->ifindex))
d8f8e352 2977 WARN("failed to remove interface '%s'", netdev->name);
7fef7a06
DL
2978 }
2979}
2980
45e854dc
SG
2981#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
2982
74a3920a 2983static int unpriv_assign_nic(struct lxc_netdev *netdev, pid_t pid)
cbef6c52
SH
2984{
2985 pid_t child;
2986
2987 if (netdev->type != LXC_NET_VETH) {
2988 ERROR("nic type %d not support for unprivileged use",
2989 netdev->type);
2990 return -1;
2991 }
2992
2993 if ((child = fork()) < 0) {
2994 SYSERROR("fork");
2995 return -1;
2996 }
2997
2998 if (child > 0)
2999 return wait_for_pid(child);
3000
3001 // Call lxc-user-nic pid type bridge
45e854dc 3002
cbef6c52 3003 char pidstr[20];
45e854dc 3004 char *args[] = {LXC_USERNIC_PATH, pidstr, "veth", netdev->link, netdev->name, NULL };
cbef6c52
SH
3005 snprintf(pidstr, 19, "%lu", (unsigned long) pid);
3006 pidstr[19] = '\0';
45e854dc 3007 execvp(args[0], args);
cbef6c52
SH
3008 SYSERROR("execvp lxc-user-nic");
3009 exit(1);
3010}
3011
5f4535a3 3012int lxc_assign_network(struct lxc_list *network, pid_t pid)
0ad19a3f 3013{
82d5ae15 3014 struct lxc_list *iterator;
82d5ae15 3015 struct lxc_netdev *netdev;
cbef6c52 3016 int am_root = (getuid() == 0);
3cfc0f3a 3017 int err;
0ad19a3f 3018
5f4535a3 3019 lxc_list_for_each(iterator, network) {
82d5ae15 3020
5f4535a3 3021 netdev = iterator->elem;
82d5ae15 3022
fbb16259 3023 if (netdev->type == LXC_NET_VETH && !am_root) {
cbef6c52
SH
3024 if (unpriv_assign_nic(netdev, pid))
3025 return -1;
3026 // TODO fill in netdev->ifindex and name
3027 continue;
3028 }
236087a6 3029
fbb16259
SH
3030 /* empty network namespace, nothing to move */
3031 if (!netdev->ifindex)
3032 continue;
3033
d472214b 3034 err = lxc_netdev_move_by_index(netdev->ifindex, pid);
3cfc0f3a
MN
3035 if (err) {
3036 ERROR("failed to move '%s' to the container : %s",
3037 netdev->link, strerror(-err));
82d5ae15
DL
3038 return -1;
3039 }
3040
c1c75c04 3041 DEBUG("move '%s' to '%d'", netdev->name, pid);
0ad19a3f 3042 }
3043
3044 return 0;
3045}
3046
251d0d2a
DE
3047static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
3048 size_t buf_size)
f6d3e3e4
SH
3049{
3050 char path[PATH_MAX];
e4ccd113 3051 int ret, closeret;
f6d3e3e4
SH
3052 FILE *f;
3053
3054 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
3055 if (ret < 0 || ret >= PATH_MAX) {
3056 fprintf(stderr, "%s: path name too long", __func__);
3057 return -E2BIG;
3058 }
3059 f = fopen(path, "w");
3060 if (!f) {
3061 perror("open");
3062 return -EINVAL;
3063 }
251d0d2a 3064 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 3065 if (ret < 0)
e4ccd113
SH
3066 SYSERROR("writing id mapping");
3067 closeret = fclose(f);
3068 if (closeret)
3069 SYSERROR("writing id mapping");
3070 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
3071}
3072
3073int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
3074{
3075 struct lxc_list *iterator;
3076 struct id_map *map;
3077 int ret = 0;
251d0d2a 3078 enum idtype type;
4f7521b4 3079 char *buf = NULL, *pos;
cf3ef16d 3080 int am_root = (getuid() == 0);
251d0d2a
DE
3081
3082 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4 3083 int left, fill;
cf3ef16d
SH
3084 int had_entry = 0;
3085 if (!buf) {
3086 buf = pos = malloc(4096);
4f7521b4
SH
3087 if (!buf)
3088 return -ENOMEM;
cf3ef16d
SH
3089 }
3090 pos = buf;
3091 if (!am_root)
3092 pos += sprintf(buf, "new%cidmap %d ",
3093 type == ID_TYPE_UID ? 'u' : 'g',
3094 pid);
4f7521b4 3095
cf3ef16d
SH
3096 lxc_list_for_each(iterator, idmap) {
3097 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
251d0d2a 3098 map = iterator->elem;
cf3ef16d
SH
3099 if (map->idtype != type)
3100 continue;
3101
3102 had_entry = 1;
3103 left = 4096 - (pos - buf);
3104 fill = snprintf(pos, left, " %lu %lu %lu", map->nsid,
3105 map->hostid, map->range);
3106 if (fill <= 0 || fill >= left)
3107 SYSERROR("snprintf failed, too many mappings");
3108 pos += fill;
251d0d2a 3109 }
cf3ef16d 3110 if (!had_entry)
4f7521b4 3111 continue;
cf3ef16d
SH
3112 left = 4096 - (pos - buf);
3113 fill = snprintf(pos, left, "\n");
3114 if (fill <= 0 || fill >= left)
3115 SYSERROR("snprintf failed, too many mappings");
3116 pos += fill;
3117
3118 if (am_root)
3119 ret = write_id_mapping(type, pid, buf, pos-buf);
3120 else
3121 ret = system(buf);
3122
f6d3e3e4
SH
3123 if (ret)
3124 break;
3125 }
251d0d2a 3126
4f7521b4
SH
3127 if (buf)
3128 free(buf);
f6d3e3e4
SH
3129 return ret;
3130}
3131
cf3ef16d 3132/*
0b3a6504
SH
3133 * return the host uid to which the container root is mapped in *val.
3134 * Return true if id was found, false otherwise.
cf3ef16d 3135 */
2a9a80cb 3136bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
3ec1648d 3137 unsigned long *val)
cf3ef16d
SH
3138{
3139 struct lxc_list *it;
3140 struct id_map *map;
3141
3142 lxc_list_for_each(it, &conf->id_map) {
3143 map = it->elem;
3144 if (map->idtype != ID_TYPE_UID)
3145 continue;
3146 if (map->nsid != 0)
3147 continue;
2a9a80cb
SH
3148 *val = map->hostid;
3149 return true;
cf3ef16d 3150 }
2a9a80cb 3151 return false;
cf3ef16d
SH
3152}
3153
57d116ab 3154int mapped_hostid(int id, struct lxc_conf *conf)
cf3ef16d
SH
3155{
3156 struct lxc_list *it;
3157 struct id_map *map;
3158 lxc_list_for_each(it, &conf->id_map) {
3159 map = it->elem;
3160 if (map->idtype != ID_TYPE_UID)
3161 continue;
3162 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 3163 return (id - map->hostid) + map->nsid;
cf3ef16d 3164 }
57d116ab 3165 return -1;
cf3ef16d
SH
3166}
3167
3168int find_unmapped_nsuid(struct lxc_conf *conf)
3169{
3170 struct lxc_list *it;
3171 struct id_map *map;
3172 uid_t freeid = 0;
3173again:
3174 lxc_list_for_each(it, &conf->id_map) {
3175 map = it->elem;
3176 if (map->idtype != ID_TYPE_UID)
3177 continue;
3178 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3179 freeid = map->nsid + map->range;
3180 goto again;
3181 }
3182 }
3183 return freeid;
3184}
3185
19a26f82
MK
3186int lxc_find_gateway_addresses(struct lxc_handler *handler)
3187{
3188 struct lxc_list *network = &handler->conf->network;
3189 struct lxc_list *iterator;
3190 struct lxc_netdev *netdev;
3191 int link_index;
3192
3193 lxc_list_for_each(iterator, network) {
3194 netdev = iterator->elem;
3195
3196 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
3197 continue;
3198
3199 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
3200 ERROR("gateway = auto only supported for "
3201 "veth and macvlan");
3202 return -1;
3203 }
3204
3205 if (!netdev->link) {
3206 ERROR("gateway = auto needs a link interface");
3207 return -1;
3208 }
3209
3210 link_index = if_nametoindex(netdev->link);
3211 if (!link_index)
3212 return -EINVAL;
3213
3214 if (netdev->ipv4_gateway_auto) {
3215 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
3216 ERROR("failed to automatically find ipv4 gateway "
3217 "address from link interface '%s'", netdev->link);
3218 return -1;
3219 }
3220 }
3221
3222 if (netdev->ipv6_gateway_auto) {
3223 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
3224 ERROR("failed to automatically find ipv6 gateway "
3225 "address from link interface '%s'", netdev->link);
3226 return -1;
3227 }
3228 }
3229 }
3230
3231 return 0;
3232}
3233
5e4a62bf 3234int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 3235{
5e4a62bf 3236 struct lxc_tty_info *tty_info = &conf->tty_info;
025ed0f3 3237 int i, ret;
b0a33c1e 3238
5e4a62bf
DL
3239 /* no tty in the configuration */
3240 if (!conf->tty)
b0a33c1e 3241 return 0;
3242
13954cce 3243 tty_info->pty_info =
e4e7d59d 3244 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 3245 if (!tty_info->pty_info) {
36eb9bde 3246 SYSERROR("failed to allocate pty_info");
985d15b1 3247 return -1;
b0a33c1e 3248 }
3249
985d15b1 3250 for (i = 0; i < conf->tty; i++) {
13954cce 3251
b0a33c1e 3252 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3253
025ed0f3
SH
3254 process_lock();
3255 ret = openpty(&pty_info->master, &pty_info->slave,
3256 pty_info->name, NULL, NULL);
3257 process_unlock();
3258 if (ret) {
36eb9bde 3259 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
3260 tty_info->nbtty = i;
3261 lxc_delete_tty(tty_info);
3262 return -1;
b0a33c1e 3263 }
3264
5332bb84
DL
3265 DEBUG("allocated pty '%s' (%d/%d)",
3266 pty_info->name, pty_info->master, pty_info->slave);
3267
3ec1648d 3268 /* Prevent leaking the file descriptors to the container */
b035ad62
MS
3269 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3270 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3271
b0a33c1e 3272 pty_info->busy = 0;
3273 }
3274
985d15b1 3275 tty_info->nbtty = conf->tty;
1ac470c0
DL
3276
3277 INFO("tty's configured");
3278
985d15b1 3279 return 0;
b0a33c1e 3280}
3281
3282void lxc_delete_tty(struct lxc_tty_info *tty_info)
3283{
3284 int i;
3285
3286 for (i = 0; i < tty_info->nbtty; i++) {
3287 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3288
3289 close(pty_info->master);
3290 close(pty_info->slave);
3291 }
3292
3293 free(tty_info->pty_info);
3294 tty_info->nbtty = 0;
3295}
3296
f6d3e3e4 3297/*
c4d10a05
SH
3298 * chown_mapped_root: for an unprivileged user with uid X to chown a dir
3299 * to subuid Y, he needs to run chown as root in a userns where
3300 * nsid 0 is mapped to hostuid Y, and nsid Y is mapped to hostuid
3301 * X. That way, the container root is privileged with respect to
3302 * hostuid X, allowing him to do the chown.
f6d3e3e4 3303 */
c4d10a05 3304int chown_mapped_root(char *path, struct lxc_conf *conf)
f6d3e3e4 3305{
c4d10a05
SH
3306 uid_t rootid;
3307 pid_t pid;
2a9a80cb 3308 unsigned long val;
f6d3e3e4 3309
2a9a80cb 3310 if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
c4d10a05
SH
3311 ERROR("No mapping for container root");
3312 return -1;
f6d3e3e4 3313 }
2a9a80cb
SH
3314 rootid = (uid_t) val;
3315
c4d10a05
SH
3316 if (geteuid() == 0) {
3317 if (chown(path, rootid, -1) < 0) {
3318 ERROR("Error chowning %s", path);
3319 return -1;
3320 }
3321 return 0;
3322 }
3323 pid = fork();
3324 if (pid < 0) {
3325 SYSERROR("Failed forking");
f6d3e3e4
SH
3326 return -1;
3327 }
c4d10a05
SH
3328 if (!pid) {
3329 int hostuid = geteuid(), ret;
98e5ba51
SH
3330 char map1[100], map2[100], map3[100];
3331 char *args[] = {"lxc-usernsexec", "-m", map1, "-m", map2, "-m",
3332 map3, "--", "chown", "0", path, NULL};
f6d3e3e4 3333
98e5ba51
SH
3334 // "u:0:rootid:1"
3335 ret = snprintf(map1, 100, "u:0:%d:1", rootid);
c4d10a05
SH
3336 if (ret < 0 || ret >= 100) {
3337 ERROR("Error uid printing map string");
f6d3e3e4
SH
3338 return -1;
3339 }
c4d10a05 3340
98e5ba51
SH
3341 // "u:hostuid:hostuid:1"
3342 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3343 if (ret < 0 || ret >= 100) {
3344 ERROR("Error uid printing map string");
3345 return -1;
3346 }
3347
3348 // "g:0:hostgid:1"
3349 ret = snprintf(map3, 100, "g:0:%d:1", getgid());
c4d10a05
SH
3350 if (ret < 0 || ret >= 100) {
3351 ERROR("Error uid printing map string");
3352 return -1;
3353 }
3354
3355 ret = execvp("lxc-usernsexec", args);
3356 SYSERROR("Failed executing usernsexec");
3357 exit(1);
f6d3e3e4 3358 }
c4d10a05 3359 return wait_for_pid(pid);
f6d3e3e4
SH
3360}
3361
c4d10a05 3362int ttys_shift_ids(struct lxc_conf *c)
f6d3e3e4 3363{
c4d10a05 3364 int i;
f6d3e3e4 3365
c4d10a05 3366 if (lxc_list_empty(&c->id_map))
f6d3e3e4 3367 return 0;
c4d10a05
SH
3368
3369 for (i = 0; i < c->tty_info.nbtty; i++) {
3370 struct lxc_pty_info *pty_info = &c->tty_info.pty_info[i];
3371
3372 if (chown_mapped_root(pty_info->name, c) < 0) {
3373 ERROR("Failed to chown %s", pty_info->name);
f6d3e3e4
SH
3374 return -1;
3375 }
3376 }
3377
29b10e4f 3378 if (strcmp(c->console.name, "") !=0 && chown_mapped_root(c->console.name, c) < 0) {
c4d10a05
SH
3379 ERROR("Failed to chown %s", c->console.name);
3380 return -1;
3381 }
3382
f6d3e3e4
SH
3383 return 0;
3384}
3385
bc6928ff
MW
3386/*
3387 * This routine is called when the configuration does not already specify a value
3388 * for autodev (mounting a file system on /dev and populating it in a container).
3389 * If a hard override value has not be specified, then we try to apply some
3390 * heuristics to determine if we should switch to autodev mode.
3391 *
3392 * For instance, if the container has an /etc/systemd/system directory then it
3393 * is probably running systemd as the init process and it needs the autodev
3394 * mount to prevent it from mounting devtmpfs on /dev on it's own causing conflicts
3395 * in the host.
3396 *
3397 * We may also want to enable autodev if the host has devtmpfs mounted on its
3398 * /dev as this then enable us to use subdirectories under /dev for the container
3399 * /dev directories and we can fake udev devices.
3400 */
3401struct start_args {
3402 char *const *argv;
3403};
3404
3405#define MAX_SYMLINK_DEPTH 32
3406
74a3920a 3407static int check_autodev( const char *rootfs, void *data )
bc6928ff
MW
3408{
3409 struct start_args *arg = data;
3410 int ret;
3411 int loop_count = 0;
3412 struct stat s;
3413 char absrootfs[MAXPATHLEN];
3414 char path[MAXPATHLEN];
3415 char abs_path[MAXPATHLEN];
3416 char *command = "/sbin/init";
3417
3418 if (rootfs == NULL || strlen(rootfs) == 0)
3419 return -2;
3420
3421 if (!realpath(rootfs, absrootfs))
3422 return -2;
3423
3424 if( arg && arg->argv[0] ) {
3425 command = arg->argv[0];
3426 DEBUG("Set exec command to %s\n", command );
3427 }
3428
3429 strncpy( path, command, MAXPATHLEN-1 );
3430
3431 if ( 0 != access(path, F_OK) || 0 != stat(path, &s) )
3432 return -2;
3433
3434 /* Dereference down the symlink merry path testing as we go. */
3435 /* If anything references systemd in the path - set autodev! */
3436 /* Renormalize to the rootfs before each dereference */
3437 /* Relative symlinks should fall out in the wash even with .. */
3438 while( 1 ) {
3439 if ( strstr( path, "systemd" ) ) {
3440 INFO("Container with systemd init detected - enabling autodev!");
3441 return 1;
3442 }
3443
3444 ret = snprintf(abs_path, MAXPATHLEN-1, "%s/%s", absrootfs, path);
3445 if (ret < 0 || ret > MAXPATHLEN)
3446 return -2;
3447
3448 ret = readlink( abs_path, path, MAXPATHLEN-1 );
3449
3450 if ( ( ret <= 0 ) || ( ++loop_count > MAX_SYMLINK_DEPTH ) ) {
3451 break; /* Break out for other tests */
3452 }
3453 path[ret] = '\0';
3454 }
3455
3456 /*
3457 * Add future checks here.
3458 * Return positive if we should go autodev
3459 * Return 0 if we should NOT go autodev
3460 * Return negative if we encounter an error or can not determine...
3461 */
3462
3463 /* All else fails, we don't need autodev */
3464 INFO("Autodev not required.");
3465 return 0;
3466}
3467
d4ef7c50 3468int lxc_setup(struct lxc_handler *handler)
0ad19a3f 3469{
d4ef7c50
SH
3470 const char *name = handler->name;
3471 struct lxc_conf *lxc_conf = handler->conf;
3472 const char *lxcpath = handler->lxcpath;
3473 void *data = handler->data;
3474 struct lxc_cgroup_info *cgroup_info = handler->cgroup_info;
3475
6c544cb3
MM
3476 if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
3477 if (setup_utsname(lxc_conf->utsname)) {
3478 ERROR("failed to setup the utsname for '%s'", name);
3479 return -1;
3480 }
0ad19a3f 3481 }
3482
5f4535a3 3483 if (setup_network(&lxc_conf->network)) {
36eb9bde 3484 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 3485 return -1;
0ad19a3f 3486 }
3487
283678ed 3488 if (run_lxc_hooks(name, "pre-mount", lxc_conf, lxcpath, NULL)) {
89eaa05e
SH
3489 ERROR("failed to run pre-mount hooks for container '%s'.", name);
3490 return -1;
3491 }
5ea6163a 3492
cc28d0b0 3493 if (setup_rootfs(lxc_conf)) {
ac778708 3494 ERROR("failed to setup rootfs for '%s'", name);
95b5ffaf 3495 return -1;
0ad19a3f 3496 }
3497
bc6928ff
MW
3498 if (lxc_conf->autodev < 0) {
3499 lxc_conf->autodev = check_autodev(lxc_conf->rootfs.mount, data);
3500 }
3501
3502 if (lxc_conf->autodev > 0) {
3503 if (mount_autodev(name, lxc_conf->rootfs.mount, lxcpath)) {
91c3830e 3504 ERROR("failed to mount /dev in the container");
c6883f38
SH
3505 return -1;
3506 }
3507 }
3508
368bbc02
CS
3509 /* do automatic mounts (mainly /proc and /sys), but exclude
3510 * those that need to wait until other stuff has finished
3511 */
b06b8511 3512 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, cgroup_info) < 0) {
368bbc02
CS
3513 ERROR("failed to setup the automatic mounts for '%s'", name);
3514 return -1;
3515 }
3516
80a881b2 3517 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
36eb9bde 3518 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 3519 return -1;
576f946d 3520 }
3521
c1dc38c2 3522 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
e7938e9e
MN
3523 ERROR("failed to setup the mount entries for '%s'", name);
3524 return -1;
3525 }
3526
368bbc02
CS
3527 /* now mount only cgroup, if wanted;
3528 * before, /sys could not have been mounted
3529 * (is either mounted automatically or via fstab entries)
3530 */
b06b8511 3531 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, cgroup_info) < 0) {
368bbc02
CS
3532 ERROR("failed to setup the automatic mounts for '%s'", name);
3533 return -1;
3534 }
3535
283678ed 3536 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
773fb9ca
SH
3537 ERROR("failed to run mount hooks for container '%s'.", name);
3538 return -1;
3539 }
3540
bc6928ff 3541 if (lxc_conf->autodev > 0) {
283678ed 3542 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
f7bee6c6
MW
3543 ERROR("failed to run autodev hooks for container '%s'.", name);
3544 return -1;
3545 }
91c3830e
SH
3546 if (setup_autodev(lxc_conf->rootfs.mount)) {
3547 ERROR("failed to populate /dev in the container");
3548 return -1;
3549 }
3550 }
368bbc02 3551
37903589 3552 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 3553 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 3554 return -1;
6e590161 3555 }
3556
7e0e1d94
AV
3557 if (lxc_conf->kmsg) {
3558 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
3559 ERROR("failed to setup kmsg for '%s'", name);
3560 }
1bd051a6 3561
37903589 3562 if (!lxc_conf->is_execute && setup_tty(&lxc_conf->rootfs, &lxc_conf->tty_info, lxc_conf->ttydir)) {
36eb9bde 3563 ERROR("failed to setup the ttys for '%s'", name);
95b5ffaf 3564 return -1;
b0a33c1e 3565 }
3566
fe4de9a6
DE
3567 /* mount /proc if needed for LSM transition */
3568 if (lsm_proc_mount(lxc_conf) < 0) {
3569 ERROR("failed to LSM mount proc for '%s'", name);
e075f5d9 3570 return -1;
e075f5d9 3571 }
e075f5d9 3572
ac778708 3573 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 3574 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 3575 return -1;
ed502555 3576 }
3577
571e6ec8 3578 if (setup_pts(lxc_conf->pts)) {
36eb9bde 3579 ERROR("failed to setup the new pts instance");
95b5ffaf 3580 return -1;
3c26f34e 3581 }
3582
cccc74b5
DL
3583 if (setup_personality(lxc_conf->personality)) {
3584 ERROR("failed to setup personality");
3585 return -1;
3586 }
3587
f6d3e3e4 3588 if (lxc_list_empty(&lxc_conf->id_map)) {
1fb86a7c
SH
3589 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
3590 if (!lxc_list_empty(&lxc_conf->caps)) {
3591 ERROR("Simultaneously requested dropping and keeping caps");
3592 return -1;
3593 }
3594 if (dropcaps_except(&lxc_conf->keepcaps)) {
3595 ERROR("failed to keep requested caps\n");
3596 return -1;
3597 }
3598 } else if (setup_caps(&lxc_conf->caps)) {
f6d3e3e4
SH
3599 ERROR("failed to drop capabilities");
3600 return -1;
3601 }
81810dd1
DL
3602 }
3603
cd54d859
DL
3604 NOTICE("'%s' is setup.", name);
3605
0ad19a3f 3606 return 0;
3607}
26ddeedd 3608
283678ed
SH
3609int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
3610 const char *lxcpath, char *argv[])
26ddeedd
SH
3611{
3612 int which = -1;
3613 struct lxc_list *it;
3614
3615 if (strcmp(hook, "pre-start") == 0)
3616 which = LXCHOOK_PRESTART;
5ea6163a
SH
3617 else if (strcmp(hook, "pre-mount") == 0)
3618 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
3619 else if (strcmp(hook, "mount") == 0)
3620 which = LXCHOOK_MOUNT;
f7bee6c6
MW
3621 else if (strcmp(hook, "autodev") == 0)
3622 which = LXCHOOK_AUTODEV;
26ddeedd
SH
3623 else if (strcmp(hook, "start") == 0)
3624 which = LXCHOOK_START;
3625 else if (strcmp(hook, "post-stop") == 0)
3626 which = LXCHOOK_POSTSTOP;
148e91f5
SH
3627 else if (strcmp(hook, "clone") == 0)
3628 which = LXCHOOK_CLONE;
26ddeedd
SH
3629 else
3630 return -1;
3631 lxc_list_for_each(it, &conf->hooks[which]) {
3632 int ret;
3633 char *hookname = it->elem;
283678ed 3634 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
26ddeedd
SH
3635 if (ret)
3636 return ret;
3637 }
3638 return 0;
3639}
72d0e1cb 3640
427b3a21 3641static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
3642{
3643 struct lxc_netdev *netdev = it->elem;
9ebb03ad 3644 struct lxc_list *it2,*next;
72d0e1cb
SG
3645
3646 lxc_list_del(it);
3647
3648 if (netdev->link)
3649 free(netdev->link);
3650 if (netdev->name)
3651 free(netdev->name);
c9bb9a85
DE
3652 if (netdev->type == LXC_NET_VETH && netdev->priv.veth_attr.pair)
3653 free(netdev->priv.veth_attr.pair);
72d0e1cb
SG
3654 if (netdev->upscript)
3655 free(netdev->upscript);
3656 if (netdev->hwaddr)
3657 free(netdev->hwaddr);
3658 if (netdev->mtu)
3659 free(netdev->mtu);
3660 if (netdev->ipv4_gateway)
3661 free(netdev->ipv4_gateway);
3662 if (netdev->ipv6_gateway)
3663 free(netdev->ipv6_gateway);
9ebb03ad 3664 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3665 lxc_list_del(it2);
3666 free(it2->elem);
3667 free(it2);
3668 }
9ebb03ad 3669 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3670 lxc_list_del(it2);
3671 free(it2->elem);
3672 free(it2);
3673 }
d95db067 3674 free(netdev);
72d0e1cb
SG
3675 free(it);
3676}
3677
3678/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 3679int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
3680{
3681 char *p1;
3682 int ret, idx, i;
3683 struct lxc_list *it;
3684 struct lxc_netdev *netdev;
3685
3686 p1 = index(key, '.');
3687 if (!p1 || *(p1+1) == '\0')
3688 p1 = NULL;
3689
3690 ret = sscanf(key, "%d", &idx);
3691 if (ret != 1) return -1;
3692 if (idx < 0)
3693 return -1;
3694
3695 i = 0;
3696 lxc_list_for_each(it, &c->network) {
3697 if (i == idx)
3698 break;
3699 i++;
3700 }
3701 if (i < idx) // we don't have that many nics defined
3702 return -1;
3703
3704 if (!it || !it->elem)
3705 return -1;
3706
3707 netdev = it->elem;
3708
3709 if (!p1) {
3710 lxc_remove_nic(it);
52d21d40 3711 } else if (strcmp(p1, ".ipv4") == 0) {
9ebb03ad
DE
3712 struct lxc_list *it2,*next;
3713 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3714 lxc_list_del(it2);
3715 free(it2->elem);
3716 free(it2);
3717 }
52d21d40 3718 } else if (strcmp(p1, ".ipv6") == 0) {
9ebb03ad
DE
3719 struct lxc_list *it2,*next;
3720 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3721 lxc_list_del(it2);
3722 free(it2->elem);
3723 free(it2);
3724 }
52d21d40 3725 } else if (strcmp(p1, ".link") == 0) {
72d0e1cb
SG
3726 if (netdev->link) {
3727 free(netdev->link);
3728 netdev->link = NULL;
3729 }
52d21d40 3730 } else if (strcmp(p1, ".name") == 0) {
72d0e1cb
SG
3731 if (netdev->name) {
3732 free(netdev->name);
3733 netdev->name = NULL;
3734 }
52d21d40 3735 } else if (strcmp(p1, ".script.up") == 0) {
72d0e1cb
SG
3736 if (netdev->upscript) {
3737 free(netdev->upscript);
3738 netdev->upscript = NULL;
3739 }
52d21d40 3740 } else if (strcmp(p1, ".hwaddr") == 0) {
72d0e1cb
SG
3741 if (netdev->hwaddr) {
3742 free(netdev->hwaddr);
3743 netdev->hwaddr = NULL;
3744 }
52d21d40 3745 } else if (strcmp(p1, ".mtu") == 0) {
72d0e1cb
SG
3746 if (netdev->mtu) {
3747 free(netdev->mtu);
3748 netdev->mtu = NULL;
3749 }
52d21d40 3750 } else if (strcmp(p1, ".ipv4_gateway") == 0) {
72d0e1cb
SG
3751 if (netdev->ipv4_gateway) {
3752 free(netdev->ipv4_gateway);
3753 netdev->ipv4_gateway = NULL;
3754 }
52d21d40 3755 } else if (strcmp(p1, ".ipv6_gateway") == 0) {
72d0e1cb
SG
3756 if (netdev->ipv6_gateway) {
3757 free(netdev->ipv6_gateway);
3758 netdev->ipv6_gateway = NULL;
3759 }
3760 }
3761 else return -1;
3762
3763 return 0;
3764}
3765
3766int lxc_clear_config_network(struct lxc_conf *c)
3767{
9ebb03ad
DE
3768 struct lxc_list *it,*next;
3769 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
3770 lxc_remove_nic(it);
3771 }
3772 return 0;
3773}
3774
3775int lxc_clear_config_caps(struct lxc_conf *c)
3776{
9ebb03ad 3777 struct lxc_list *it,*next;
72d0e1cb 3778
9ebb03ad 3779 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
3780 lxc_list_del(it);
3781 free(it->elem);
3782 free(it);
3783 }
3784 return 0;
3785}
3786
74a3920a 3787static int lxc_free_idmap(struct lxc_list *id_map) {
27c27d73
SH
3788 struct lxc_list *it, *next;
3789
4355ab5f 3790 lxc_list_for_each_safe(it, id_map, next) {
27c27d73
SH
3791 lxc_list_del(it);
3792 free(it->elem);
3793 free(it);
3794 }
3795 return 0;
3796}
3797
4355ab5f
SH
3798int lxc_clear_idmaps(struct lxc_conf *c)
3799{
3800 return lxc_free_idmap(&c->id_map);
3801}
3802
1fb86a7c
SH
3803int lxc_clear_config_keepcaps(struct lxc_conf *c)
3804{
3805 struct lxc_list *it,*next;
3806
3807 lxc_list_for_each_safe(it, &c->keepcaps, next) {
3808 lxc_list_del(it);
3809 free(it->elem);
3810 free(it);
3811 }
3812 return 0;
3813}
3814
12a50cc6 3815int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 3816{
9ebb03ad 3817 struct lxc_list *it,*next;
72d0e1cb 3818 bool all = false;
12a50cc6 3819 const char *k = key + 11;
72d0e1cb
SG
3820
3821 if (strcmp(key, "lxc.cgroup") == 0)
3822 all = true;
3823
9ebb03ad 3824 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
3825 struct lxc_cgroup *cg = it->elem;
3826 if (!all && strcmp(cg->subsystem, k) != 0)
3827 continue;
3828 lxc_list_del(it);
3829 free(cg->subsystem);
3830 free(cg->value);
3831 free(cg);
3832 free(it);
3833 }
3834 return 0;
3835}
3836
ee1e7aa0
SG
3837int lxc_clear_groups(struct lxc_conf *c)
3838{
3839 struct lxc_list *it,*next;
3840
3841 lxc_list_for_each_safe(it, &c->groups, next) {
3842 lxc_list_del(it);
3843 free(it->elem);
3844 free(it);
3845 }
3846 return 0;
3847}
3848
72d0e1cb
SG
3849int lxc_clear_mount_entries(struct lxc_conf *c)
3850{
9ebb03ad 3851 struct lxc_list *it,*next;
72d0e1cb 3852
9ebb03ad 3853 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
3854 lxc_list_del(it);
3855 free(it->elem);
3856 free(it);
3857 }
3858 return 0;
3859}
3860
12a50cc6 3861int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 3862{
9ebb03ad 3863 struct lxc_list *it,*next;
17ed13a3 3864 bool all = false, done = false;
12a50cc6 3865 const char *k = key + 9;
72d0e1cb
SG
3866 int i;
3867
17ed13a3
SH
3868 if (strcmp(key, "lxc.hook") == 0)
3869 all = true;
3870
72d0e1cb 3871 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 3872 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 3873 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
3874 lxc_list_del(it);
3875 free(it->elem);
3876 free(it);
3877 }
3878 done = true;
72d0e1cb
SG
3879 }
3880 }
17ed13a3
SH
3881
3882 if (!done) {
3883 ERROR("Invalid hook key: %s", key);
3884 return -1;
3885 }
72d0e1cb
SG
3886 return 0;
3887}
8eb5694b 3888
74a3920a 3889static void lxc_clear_saved_nics(struct lxc_conf *conf)
7b35f3d6
SH
3890{
3891 int i;
3892
3893 if (!conf->num_savednics)
3894 return;
3895 for (i=0; i < conf->num_savednics; i++)
3896 free(conf->saved_nics[i].orig_name);
3897 conf->saved_nics = 0;
3898 free(conf->saved_nics);
3899}
3900
8eb5694b
SH
3901void lxc_conf_free(struct lxc_conf *conf)
3902{
3903 if (!conf)
3904 return;
3905 if (conf->console.path)
3906 free(conf->console.path);
54c30e29 3907 if (conf->rootfs.mount)
8eb5694b 3908 free(conf->rootfs.mount);
d95db067
DE
3909 if (conf->rootfs.path)
3910 free(conf->rootfs.path);
a58878d6
SH
3911 if (conf->rootfs.pivot)
3912 free(conf->rootfs.pivot);
3913 if (conf->logfile)
3914 free(conf->logfile);
d95db067
DE
3915 if (conf->utsname)
3916 free(conf->utsname);
3917 if (conf->ttydir)
3918 free(conf->ttydir);
3919 if (conf->fstab)
3920 free(conf->fstab);
fc7e8864
WM
3921 if (conf->rcfile)
3922 free(conf->rcfile);
8eb5694b 3923 lxc_clear_config_network(conf);
fe4de9a6
DE
3924 if (conf->lsm_aa_profile)
3925 free(conf->lsm_aa_profile);
3926 if (conf->lsm_se_context)
3927 free(conf->lsm_se_context);
769872f9 3928 lxc_seccomp_free(conf);
8eb5694b 3929 lxc_clear_config_caps(conf);
1fb86a7c 3930 lxc_clear_config_keepcaps(conf);
8eb5694b 3931 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 3932 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 3933 lxc_clear_mount_entries(conf);
7b35f3d6 3934 lxc_clear_saved_nics(conf);
27c27d73 3935 lxc_clear_idmaps(conf);
ee1e7aa0 3936 lxc_clear_groups(conf);
8eb5694b
SH
3937 free(conf);
3938}
4355ab5f
SH
3939
3940struct userns_fn_data {
3941 int (*fn)(void *);
3942 void *arg;
3943 int p[2];
3944};
3945
3946static int run_userns_fn(void *data)
3947{
3948 struct userns_fn_data *d = data;
3949 char c;
3950 // we're not sharing with the parent any more, if it was a thread
3951
3952 close(d->p[1]);
3953 if (read(d->p[0], &c, 1) != 1)
3954 return -1;
3955 close(d->p[0]);
3956 return d->fn(d->arg);
3957}
3958
3959/*
3960 * Add a ID_TYPE_UID entry to an existing lxc_conf, if it is not
3961 * alread there.
3962 * We may want to generalize this to do gids as well as uids, but right now
3963 * it's not necessary.
3964 */
3965static struct lxc_list *idmap_add_id(struct lxc_conf *conf, uid_t uid)
3966{
3967 int hostid_mapped = mapped_hostid(uid, conf);
3968 struct lxc_list *new = NULL, *tmp, *it, *next;
3969 struct id_map *entry;
3970
3ec1648d
SH
3971 new = malloc(sizeof(*new));
3972 if (!new) {
3973 ERROR("Out of memory building id map");
3974 return NULL;
3975 }
3976 lxc_list_init(new);
3977
4355ab5f
SH
3978 if (hostid_mapped < 0) {
3979 hostid_mapped = find_unmapped_nsuid(conf);
3ec1648d
SH
3980 if (hostid_mapped < 0)
3981 goto err;
3982 tmp = malloc(sizeof(*tmp));
3983 if (!tmp)
3984 goto err;
4355ab5f
SH
3985 entry = malloc(sizeof(*entry));
3986 if (!entry) {
3ec1648d
SH
3987 free(tmp);
3988 goto err;
4355ab5f 3989 }
3ec1648d 3990 tmp->elem = entry;
4355ab5f
SH
3991 entry->idtype = ID_TYPE_UID;
3992 entry->nsid = hostid_mapped;
3993 entry->hostid = (unsigned long)uid;
3994 entry->range = 1;
3ec1648d 3995 lxc_list_add_tail(new, tmp);
4355ab5f
SH
3996 }
3997 lxc_list_for_each_safe(it, &conf->id_map, next) {
3998 tmp = malloc(sizeof(*tmp));
3999 if (!tmp)
4000 goto err;
4001 entry = malloc(sizeof(*entry));
4002 if (!entry) {
4003 free(tmp);
4004 goto err;
4005 }
4006 memset(entry, 0, sizeof(*entry));
4007 memcpy(entry, it->elem, sizeof(*entry));
4008 tmp->elem = entry;
3ec1648d 4009 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4010 }
4011
4012 return new;
4013
4014err:
4015 ERROR("Out of memory building a new uid map");
908fde6a
SH
4016 if (new)
4017 lxc_free_idmap(new);
c30ac545 4018 free(new);
4355ab5f
SH
4019 return NULL;
4020}
4021
4022/*
4023 * Run a function in a new user namespace.
4024 * The caller's euid will be mapped in if it is not already.
4025 */
4026int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
4027{
4028 int ret, pid;
4029 struct userns_fn_data d;
4030 char c = '1';
4031 int p[2];
4032 struct lxc_list *idmap;
4033
4355ab5f 4034 ret = pipe(p);
4355ab5f
SH
4035 if (ret < 0) {
4036 SYSERROR("opening pipe");
4037 return -1;
4038 }
4039 d.fn = fn;
4040 d.arg = data;
4041 d.p[0] = p[0];
4042 d.p[1] = p[1];
4043 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4044 if (pid < 0)
4045 goto err;
4355ab5f 4046 close(p[0]);
4355ab5f
SH
4047 p[0] = -1;
4048
4049 if ((idmap = idmap_add_id(conf, geteuid())) == NULL) {
4050 ERROR("Error adding self to container uid map");
4051 goto err;
4052 }
4053
4054 ret = lxc_map_ids(idmap, pid);
4055 lxc_free_idmap(idmap);
88dd66fc 4056 free(idmap);
565e571c 4057 if (ret) {
4355ab5f
SH
4058 ERROR("Error setting up child mappings");
4059 goto err;
4060 }
4061
4062 // kick the child
4063 if (write(p[1], &c, 1) != 1) {
4064 SYSERROR("writing to pipe to child");
4065 goto err;
4066 }
4067
3139aead
SG
4068 ret = wait_for_pid(pid);
4069
4070 close(p[1]);
4071 return ret;
4072
4355ab5f 4073err:
4355ab5f
SH
4074 if (p[0] != -1)
4075 close(p[0]);
4076 close(p[1]);
4355ab5f
SH
4077 return -1;
4078}