]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
start: Fix print_top_failing_dir for /var/lib/lxc
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
d06245b8
NC
23#include "config.h"
24
0ad19a3f 25#include <stdio.h>
0ad19a3f 26#include <stdlib.h>
e3b4c4c4 27#include <stdarg.h>
0ad19a3f 28#include <errno.h>
29#include <string.h>
30#include <dirent.h>
0ad19a3f 31#include <unistd.h>
bc6928ff 32#include <inttypes.h>
e3b4c4c4 33#include <sys/wait.h>
2d76d1d7 34#include <sys/syscall.h>
4a0ba80d 35#include <time.h>
e827ff7e
SG
36
37#if HAVE_PTY_H
b0a33c1e 38#include <pty.h>
e827ff7e
SG
39#else
40#include <../include/openpty.h>
41#endif
0ad19a3f 42
b3ecde1e
DL
43#include <linux/loop.h>
44
0ad19a3f 45#include <sys/types.h>
46#include <sys/utsname.h>
47#include <sys/param.h>
48#include <sys/stat.h>
49#include <sys/socket.h>
50#include <sys/mount.h>
51#include <sys/mman.h>
81810dd1 52#include <sys/prctl.h>
0ad19a3f 53
54#include <arpa/inet.h>
55#include <fcntl.h>
56#include <netinet/in.h>
57#include <net/if.h>
6f4a3756 58#include <libgen.h>
0ad19a3f 59
e5bda9ee 60#include "network.h"
61#include "error.h"
b2718c72 62#include "parse.h"
1b09f2c0
DL
63#include "utils.h"
64#include "conf.h"
65#include "log.h"
d55bc1ad 66#include "caps.h" /* for lxc_caps_last_cap() */
9be53773 67#include "bdev.h"
368bbc02 68#include "cgroup.h"
025ed0f3 69#include "lxclock.h"
4355ab5f 70#include "namespace.h"
fe4de9a6 71#include "lsm/lsm.h"
d0a36f2c 72
495d2046
SG
73#if HAVE_SYS_CAPABILITY_H
74#include <sys/capability.h>
75#endif
76
6ff05e18
SG
77#if HAVE_SYS_PERSONALITY_H
78#include <sys/personality.h>
79#endif
80
edaf8b1b
SG
81#if IS_BIONIC
82#include <../include/lxcmntent.h>
83#else
84#include <mntent.h>
85#endif
86
769872f9
SH
87#include "lxcseccomp.h"
88
36eb9bde 89lxc_log_define(lxc_conf, lxc);
e5bda9ee 90
0ad19a3f 91#define MAXHWLEN 18
92#define MAXINDEXLEN 20
442cbbe6 93#define MAXMTULEN 16
0ad19a3f 94#define MAXLINELEN 128
95
495d2046 96#if HAVE_SYS_CAPABILITY_H
b09094da
MN
97#ifndef CAP_SETFCAP
98#define CAP_SETFCAP 31
99#endif
100
101#ifndef CAP_MAC_OVERRIDE
102#define CAP_MAC_OVERRIDE 32
103#endif
104
105#ifndef CAP_MAC_ADMIN
106#define CAP_MAC_ADMIN 33
107#endif
495d2046 108#endif
b09094da
MN
109
110#ifndef PR_CAPBSET_DROP
111#define PR_CAPBSET_DROP 24
112#endif
113
9818cae4
SG
114#ifndef LO_FLAGS_AUTOCLEAR
115#define LO_FLAGS_AUTOCLEAR 4
116#endif
117
2d76d1d7
SG
118/* Define pivot_root() if missing from the C library */
119#ifndef HAVE_PIVOT_ROOT
120static int pivot_root(const char * new_root, const char * put_old)
121{
122#ifdef __NR_pivot_root
123return syscall(__NR_pivot_root, new_root, put_old);
124#else
125errno = ENOSYS;
126return -1;
127#endif
128}
129#else
130extern int pivot_root(const char * new_root, const char * put_old);
131#endif
132
133/* Define sethostname() if missing from the C library */
134#ifndef HAVE_SETHOSTNAME
135static int sethostname(const char * name, size_t len)
136{
137#ifdef __NR_sethostname
138return syscall(__NR_sethostname, name, len);
139#else
140errno = ENOSYS;
141return -1;
142#endif
143}
144#endif
145
72f919c4
SG
146/* Define __S_ISTYPE if missing from the C library */
147#ifndef __S_ISTYPE
148#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
149#endif
150
72d0e1cb 151char *lxchook_names[NUM_LXC_HOOKS] = {
148e91f5 152 "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop", "clone" };
72d0e1cb 153
e3b4c4c4 154typedef int (*instanciate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 155
998ac676
RT
156struct mount_opt {
157 char *name;
158 int clear;
159 int flag;
160};
161
81810dd1
DL
162struct caps_opt {
163 char *name;
164 int value;
165};
166
e3b4c4c4
ST
167static int instanciate_veth(struct lxc_handler *, struct lxc_netdev *);
168static int instanciate_macvlan(struct lxc_handler *, struct lxc_netdev *);
169static int instanciate_vlan(struct lxc_handler *, struct lxc_netdev *);
170static int instanciate_phys(struct lxc_handler *, struct lxc_netdev *);
171static int instanciate_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 172static int instanciate_none(struct lxc_handler *, struct lxc_netdev *);
82d5ae15 173
24654103
DL
174static instanciate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
175 [LXC_NET_VETH] = instanciate_veth,
176 [LXC_NET_MACVLAN] = instanciate_macvlan,
177 [LXC_NET_VLAN] = instanciate_vlan,
178 [LXC_NET_PHYS] = instanciate_phys,
179 [LXC_NET_EMPTY] = instanciate_empty,
26b797f3 180 [LXC_NET_NONE] = instanciate_none,
0ad19a3f 181};
182
74a2b586
JK
183static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
184static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
185static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
186static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
187static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 188static int shutdown_none(struct lxc_handler *, struct lxc_netdev *);
74a2b586
JK
189
190static instanciate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
191 [LXC_NET_VETH] = shutdown_veth,
192 [LXC_NET_MACVLAN] = shutdown_macvlan,
193 [LXC_NET_VLAN] = shutdown_vlan,
194 [LXC_NET_PHYS] = shutdown_phys,
195 [LXC_NET_EMPTY] = shutdown_empty,
26b797f3 196 [LXC_NET_NONE] = shutdown_none,
74a2b586
JK
197};
198
998ac676 199static struct mount_opt mount_opt[] = {
88d413d5
SW
200 { "defaults", 0, 0 },
201 { "ro", 0, MS_RDONLY },
202 { "rw", 1, MS_RDONLY },
203 { "suid", 1, MS_NOSUID },
204 { "nosuid", 0, MS_NOSUID },
205 { "dev", 1, MS_NODEV },
206 { "nodev", 0, MS_NODEV },
207 { "exec", 1, MS_NOEXEC },
208 { "noexec", 0, MS_NOEXEC },
209 { "sync", 0, MS_SYNCHRONOUS },
210 { "async", 1, MS_SYNCHRONOUS },
211 { "dirsync", 0, MS_DIRSYNC },
212 { "remount", 0, MS_REMOUNT },
213 { "mand", 0, MS_MANDLOCK },
214 { "nomand", 1, MS_MANDLOCK },
215 { "atime", 1, MS_NOATIME },
216 { "noatime", 0, MS_NOATIME },
217 { "diratime", 1, MS_NODIRATIME },
218 { "nodiratime", 0, MS_NODIRATIME },
219 { "bind", 0, MS_BIND },
220 { "rbind", 0, MS_BIND|MS_REC },
221 { "relatime", 0, MS_RELATIME },
222 { "norelatime", 1, MS_RELATIME },
223 { "strictatime", 0, MS_STRICTATIME },
224 { "nostrictatime", 1, MS_STRICTATIME },
225 { NULL, 0, 0 },
998ac676
RT
226};
227
495d2046 228#if HAVE_SYS_CAPABILITY_H
81810dd1 229static struct caps_opt caps_opt[] = {
a6afdde9 230 { "chown", CAP_CHOWN },
1e11be34
DL
231 { "dac_override", CAP_DAC_OVERRIDE },
232 { "dac_read_search", CAP_DAC_READ_SEARCH },
233 { "fowner", CAP_FOWNER },
234 { "fsetid", CAP_FSETID },
81810dd1
DL
235 { "kill", CAP_KILL },
236 { "setgid", CAP_SETGID },
237 { "setuid", CAP_SETUID },
238 { "setpcap", CAP_SETPCAP },
239 { "linux_immutable", CAP_LINUX_IMMUTABLE },
240 { "net_bind_service", CAP_NET_BIND_SERVICE },
241 { "net_broadcast", CAP_NET_BROADCAST },
242 { "net_admin", CAP_NET_ADMIN },
243 { "net_raw", CAP_NET_RAW },
244 { "ipc_lock", CAP_IPC_LOCK },
245 { "ipc_owner", CAP_IPC_OWNER },
246 { "sys_module", CAP_SYS_MODULE },
247 { "sys_rawio", CAP_SYS_RAWIO },
248 { "sys_chroot", CAP_SYS_CHROOT },
249 { "sys_ptrace", CAP_SYS_PTRACE },
250 { "sys_pacct", CAP_SYS_PACCT },
251 { "sys_admin", CAP_SYS_ADMIN },
252 { "sys_boot", CAP_SYS_BOOT },
253 { "sys_nice", CAP_SYS_NICE },
254 { "sys_resource", CAP_SYS_RESOURCE },
255 { "sys_time", CAP_SYS_TIME },
256 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
257 { "mknod", CAP_MKNOD },
258 { "lease", CAP_LEASE },
9527e566 259#ifdef CAP_AUDIT_WRITE
81810dd1 260 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
261#endif
262#ifdef CAP_AUDIT_CONTROL
81810dd1 263 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 264#endif
81810dd1
DL
265 { "setfcap", CAP_SETFCAP },
266 { "mac_override", CAP_MAC_OVERRIDE },
267 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
268#ifdef CAP_SYSLOG
269 { "syslog", CAP_SYSLOG },
270#endif
271#ifdef CAP_WAKE_ALARM
272 { "wake_alarm", CAP_WAKE_ALARM },
273#endif
81810dd1 274};
495d2046
SG
275#else
276static struct caps_opt caps_opt[] = {};
277#endif
81810dd1 278
91c3830e
SH
279static int run_buffer(char *buffer)
280{
ebec9176 281 struct lxc_popen_FILE *f;
91c3830e 282 char *output;
8e7da691 283 int ret;
91c3830e 284
ebec9176 285 f = lxc_popen(buffer);
91c3830e
SH
286 if (!f) {
287 SYSERROR("popen failed");
288 return -1;
289 }
290
291 output = malloc(LXC_LOG_BUFFER_SIZE);
292 if (!output) {
293 ERROR("failed to allocate memory for script output");
ebec9176 294 lxc_pclose(f);
91c3830e
SH
295 return -1;
296 }
297
ebec9176 298 while(fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
91c3830e
SH
299 DEBUG("script output: %s", output);
300
301 free(output);
302
ebec9176 303 ret = lxc_pclose(f);
8e7da691 304 if (ret == -1) {
91c3830e
SH
305 SYSERROR("Script exited on error");
306 return -1;
8e7da691
DE
307 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
308 ERROR("Script exited with status %d", WEXITSTATUS(ret));
309 return -1;
310 } else if (WIFSIGNALED(ret)) {
311 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
312 strsignal(WTERMSIG(ret)));
313 return -1;
91c3830e
SH
314 }
315
316 return 0;
317}
318
148e91f5 319static int run_script_argv(const char *name, const char *section,
283678ed
SH
320 const char *script, const char *hook, const char *lxcpath,
321 char **argsin)
148e91f5
SH
322{
323 int ret, i;
324 char *buffer;
325 size_t size = 0;
326
327 INFO("Executing script '%s' for container '%s', config section '%s'",
328 script, name, section);
329
330 for (i=0; argsin && argsin[i]; i++)
331 size += strlen(argsin[i]) + 1;
332
333 size += strlen(hook) + 1;
334
335 size += strlen(script);
336 size += strlen(name);
337 size += strlen(section);
338 size += 3;
339
340 if (size > INT_MAX)
341 return -1;
342
343 buffer = alloca(size);
344 if (!buffer) {
345 ERROR("failed to allocate memory");
346 return -1;
347 }
348
349 ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
350 if (ret < 0 || ret >= size) {
351 ERROR("Script name too long");
352 return -1;
353 }
354
355 for (i=0; argsin && argsin[i]; i++) {
356 int len = size-ret;
357 int rc;
358 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
359 if (rc < 0 || rc >= len) {
360 ERROR("Script args too long");
361 return -1;
362 }
363 ret += rc;
364 }
365
366 return run_buffer(buffer);
367}
368
751d9dcd
DL
369static int run_script(const char *name, const char *section,
370 const char *script, ...)
e3b4c4c4 371{
abbfd20b 372 int ret;
91c3830e 373 char *buffer, *p;
abbfd20b
DL
374 size_t size = 0;
375 va_list ap;
751d9dcd
DL
376
377 INFO("Executing script '%s' for container '%s', config section '%s'",
378 script, name, section);
e3b4c4c4 379
abbfd20b
DL
380 va_start(ap, script);
381 while ((p = va_arg(ap, char *)))
95642a10 382 size += strlen(p) + 1;
abbfd20b
DL
383 va_end(ap);
384
385 size += strlen(script);
386 size += strlen(name);
387 size += strlen(section);
95642a10 388 size += 3;
abbfd20b 389
95642a10
MS
390 if (size > INT_MAX)
391 return -1;
392
393 buffer = alloca(size);
abbfd20b
DL
394 if (!buffer) {
395 ERROR("failed to allocate memory");
751d9dcd
DL
396 return -1;
397 }
398
9ba8130c
SH
399 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
400 if (ret < 0 || ret >= size) {
401 ERROR("Script name too long");
9ba8130c
SH
402 return -1;
403 }
751d9dcd 404
abbfd20b 405 va_start(ap, script);
9ba8130c
SH
406 while ((p = va_arg(ap, char *))) {
407 int len = size-ret;
408 int rc;
409 rc = snprintf(buffer + ret, len, " %s", p);
410 if (rc < 0 || rc >= len) {
9ba8130c
SH
411 ERROR("Script args too long");
412 return -1;
413 }
414 ret += rc;
415 }
abbfd20b 416 va_end(ap);
751d9dcd 417
91c3830e 418 return run_buffer(buffer);
e3b4c4c4
ST
419}
420
a6afdde9 421static int find_fstype_cb(char* buffer, void *data)
78ae2fcc 422{
423 struct cbarg {
424 const char *rootfs;
a6afdde9 425 const char *target;
a17b1e65 426 const char *options;
78ae2fcc 427 } *cbarg = data;
428
a17b1e65
SG
429 unsigned long mntflags;
430 char *mntdata;
78ae2fcc 431 char *fstype;
432
433 /* we don't try 'nodev' entries */
434 if (strstr(buffer, "nodev"))
435 return 0;
436
437 fstype = buffer;
b2718c72 438 fstype += lxc_char_left_gc(fstype, strlen(fstype));
439 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
78ae2fcc 440
a6afdde9
DL
441 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
442 cbarg->rootfs, cbarg->target, fstype);
443
a17b1e65
SG
444 if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) {
445 free(mntdata);
446 return -1;
447 }
448
449 if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
a6afdde9 450 DEBUG("mount failed with error: %s", strerror(errno));
a17b1e65 451 free(mntdata);
78ae2fcc 452 return 0;
a6afdde9 453 }
a17b1e65 454 free(mntdata);
78ae2fcc 455
a6afdde9
DL
456 INFO("mounted '%s' on '%s', with fstype '%s'",
457 cbarg->rootfs, cbarg->target, fstype);
78ae2fcc 458
459 return 1;
460}
461
a17b1e65
SG
462static int mount_unknown_fs(const char *rootfs, const char *target,
463 const char *options)
78ae2fcc 464{
a6afdde9 465 int i;
78ae2fcc 466
467 struct cbarg {
468 const char *rootfs;
a6afdde9 469 const char *target;
a17b1e65 470 const char *options;
78ae2fcc 471 } cbarg = {
472 .rootfs = rootfs,
a6afdde9 473 .target = target,
a17b1e65 474 .options = options,
78ae2fcc 475 };
476
a6afdde9
DL
477 /*
478 * find the filesystem type with brute force:
479 * first we check with /etc/filesystems, in case the modules
78ae2fcc 480 * are auto-loaded and fall back to the supported kernel fs
481 */
482 char *fsfile[] = {
483 "/etc/filesystems",
484 "/proc/filesystems",
485 };
486
a6afdde9
DL
487 for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
488
489 int ret;
490
491 if (access(fsfile[i], F_OK))
492 continue;
493
494 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
495 if (ret < 0) {
496 ERROR("failed to parse '%s'", fsfile[i]);
497 return -1;
498 }
499
500 if (ret)
501 return 0;
78ae2fcc 502 }
503
a6afdde9
DL
504 ERROR("failed to determine fs type for '%s'", rootfs);
505 return -1;
506}
507
a17b1e65
SG
508static int mount_rootfs_dir(const char *rootfs, const char *target,
509 const char *options)
a6afdde9 510{
a17b1e65
SG
511 unsigned long mntflags;
512 char *mntdata;
513 int ret;
514
515 if (parse_mntopts(options, &mntflags, &mntdata) < 0) {
516 free(mntdata);
517 return -1;
518 }
519
520 ret = mount(rootfs, target, "none", MS_BIND | MS_REC | mntflags, mntdata);
521 free(mntdata);
522
523 return ret;
a6afdde9
DL
524}
525
526static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
527{
528 int rfd;
529 int ret = -1;
530
531 rfd = open(rootfs, O_RDWR);
532 if (rfd < 0) {
533 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 534 return -1;
535 }
536
a6afdde9 537 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 538
a6afdde9 539 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 540
a6afdde9
DL
541 if (ioctl(fd, LOOP_SET_FD, rfd)) {
542 SYSERROR("failed to LOOP_SET_FD");
543 goto out;
78ae2fcc 544 }
545
a6afdde9
DL
546 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
547 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 548 goto out;
549 }
550
a6afdde9 551 ret = 0;
78ae2fcc 552out:
a6afdde9 553 close(rfd);
78ae2fcc 554
a6afdde9 555 return ret;
78ae2fcc 556}
557
a17b1e65
SG
558static int mount_rootfs_file(const char *rootfs, const char *target,
559 const char *options)
78ae2fcc 560{
a6afdde9
DL
561 struct dirent dirent, *direntp;
562 struct loop_info64 loinfo;
9ba8130c 563 int ret = -1, fd = -1, rc;
a6afdde9
DL
564 DIR *dir;
565 char path[MAXPATHLEN];
78ae2fcc 566
a6afdde9
DL
567 dir = opendir("/dev");
568 if (!dir) {
569 SYSERROR("failed to open '/dev'");
78ae2fcc 570 return -1;
571 }
572
a6afdde9
DL
573 while (!readdir_r(dir, &dirent, &direntp)) {
574
575 if (!direntp)
576 break;
577
578 if (!strcmp(direntp->d_name, "."))
579 continue;
580
581 if (!strcmp(direntp->d_name, ".."))
582 continue;
583
584 if (strncmp(direntp->d_name, "loop", 4))
585 continue;
586
9ba8130c
SH
587 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
588 if (rc < 0 || rc >= MAXPATHLEN)
589 continue;
590
a6afdde9
DL
591 fd = open(path, O_RDWR);
592 if (fd < 0)
593 continue;
594
595 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
596 close(fd);
597 continue;
598 }
599
600 if (errno != ENXIO) {
601 WARN("unexpected error for ioctl on '%s': %m",
602 direntp->d_name);
00b6be44 603 close(fd);
a6afdde9
DL
604 continue;
605 }
606
607 DEBUG("found '%s' free lodev", path);
608
609 ret = setup_lodev(rootfs, fd, &loinfo);
610 if (!ret)
a17b1e65 611 ret = mount_unknown_fs(path, target, options);
a6afdde9
DL
612 close(fd);
613
614 break;
615 }
616
617 if (closedir(dir))
618 WARN("failed to close directory");
619
620 return ret;
78ae2fcc 621}
622
a17b1e65
SG
623static int mount_rootfs_block(const char *rootfs, const char *target,
624 const char *options)
a6afdde9 625{
a17b1e65 626 return mount_unknown_fs(rootfs, target, options);
a6afdde9
DL
627}
628
0c547523
SH
629/*
630 * pin_rootfs
b7ed4bf0
CS
631 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
632 * the duration of the container run, to prevent the container from marking
633 * the underlying fs readonly on shutdown. unlink the file immediately so
634 * no name pollution is happens
0c547523
SH
635 * return -1 on error.
636 * return -2 if nothing needed to be pinned.
637 * return an open fd (>=0) if we pinned it.
638 */
639int pin_rootfs(const char *rootfs)
640{
641 char absrootfs[MAXPATHLEN];
642 char absrootfspin[MAXPATHLEN];
643 struct stat s;
644 int ret, fd;
645
e99ee0de 646 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 647 return -2;
e99ee0de 648
00ec333b 649 if (!realpath(rootfs, absrootfs))
9be53773 650 return -2;
0c547523 651
00ec333b 652 if (access(absrootfs, F_OK))
0c547523 653 return -1;
0c547523 654
00ec333b 655 if (stat(absrootfs, &s))
0c547523 656 return -1;
0c547523 657
72f919c4 658 if (!S_ISDIR(s.st_mode))
0c547523
SH
659 return -2;
660
b7ed4bf0 661 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
00ec333b 662 if (ret >= MAXPATHLEN)
0c547523 663 return -1;
0c547523
SH
664
665 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
b7ed4bf0
CS
666 if (fd < 0)
667 return fd;
668 (void)unlink(absrootfspin);
0c547523
SH
669 return fd;
670}
671
4fb3cba5 672static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
368bbc02 673{
368bbc02 674 int r;
b06b8511
CS
675 size_t i;
676 static struct {
677 int match_mask;
678 int match_flag;
679 const char *source;
680 const char *destination;
681 const char *fstype;
682 unsigned long flags;
683 const char *options;
684 } default_mounts[] = {
685 /* Read-only bind-mounting... In older kernels, doing that required
686 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
687 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
688 * kernel 2.6.26 onwards. However, this apparently does not work on
689 * kernel 3.8. Unfortunately, on that very same kernel, doing the
690 * same trick as above doesn't seem to work either, there one needs
691 * to ALSO specify MS_BIND for the remount, otherwise the entire
692 * fs is remounted read-only or the mount fails because it's busy...
693 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
694 * 2.6.32...
368bbc02 695 */
b06b8511
CS
696 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
697 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
698 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
699 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
700 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
701 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
702 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
703 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
704 { 0, 0, NULL, NULL, NULL, 0, NULL }
705 };
368bbc02 706
b06b8511
CS
707 for (i = 0; default_mounts[i].match_mask; i++) {
708 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
709 char *source = NULL;
710 char *destination = NULL;
711 int saved_errno;
712
713 if (default_mounts[i].source) {
714 /* will act like strdup if %r is not present */
715 source = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].source);
716 if (!source) {
717 SYSERROR("memory allocation error");
718 return -1;
719 }
720 }
721 if (default_mounts[i].destination) {
722 /* will act like strdup if %r is not present */
723 destination = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].destination);
724 if (!destination) {
725 saved_errno = errno;
726 SYSERROR("memory allocation error");
727 free(source);
728 errno = saved_errno;
729 return -1;
730 }
731 }
732 r = mount(source, destination, default_mounts[i].fstype, default_mounts[i].flags, default_mounts[i].options);
733 saved_errno = errno;
c414be25
DE
734 if (r < 0)
735 SYSERROR("error mounting %s on %s", source, destination);
b06b8511
CS
736 free(source);
737 free(destination);
738 if (r < 0) {
b06b8511
CS
739 errno = saved_errno;
740 return -1;
741 }
368bbc02 742 }
368bbc02
CS
743 }
744
b06b8511 745 if (flags & LXC_AUTO_CGROUP_MASK) {
4fb3cba5
DE
746 if (!cgroup_mount(conf->rootfs.mount, handler,
747 flags & LXC_AUTO_CGROUP_MASK)) {
368bbc02 748 SYSERROR("error mounting /sys/fs/cgroup");
b06b8511 749 return -1;
368bbc02
CS
750 }
751 }
752
368bbc02 753 return 0;
368bbc02
CS
754}
755
a17b1e65 756static int mount_rootfs(const char *rootfs, const char *target, const char *options)
0ad19a3f 757{
b09ef133 758 char absrootfs[MAXPATHLEN];
78ae2fcc 759 struct stat s;
a6afdde9 760 int i;
78ae2fcc 761
a17b1e65 762 typedef int (*rootfs_cb)(const char *, const char *, const char *);
78ae2fcc 763
764 struct rootfs_type {
765 int type;
766 rootfs_cb cb;
767 } rtfs_type[] = {
2656d231
DL
768 { S_IFDIR, mount_rootfs_dir },
769 { S_IFBLK, mount_rootfs_block },
770 { S_IFREG, mount_rootfs_file },
78ae2fcc 771 };
0ad19a3f 772
4c8ab83b 773 if (!realpath(rootfs, absrootfs)) {
36eb9bde 774 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 775 return -1;
776 }
b09ef133 777
b09ef133 778 if (access(absrootfs, F_OK)) {
36eb9bde 779 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 780 return -1;
781 }
782
78ae2fcc 783 if (stat(absrootfs, &s)) {
36eb9bde 784 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 785 return -1;
786 }
787
78ae2fcc 788 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 789
78ae2fcc 790 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
791 continue;
9b0f0477 792
a17b1e65 793 return rtfs_type[i].cb(absrootfs, target, options);
78ae2fcc 794 }
9b0f0477 795
36eb9bde 796 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 797 return -1;
0ad19a3f 798}
799
4e5440c6 800static int setup_utsname(struct utsname *utsname)
0ad19a3f 801{
4e5440c6
DL
802 if (!utsname)
803 return 0;
0ad19a3f 804
4e5440c6
DL
805 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
806 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 807 return -1;
808 }
809
4e5440c6 810 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 811
0ad19a3f 812 return 0;
813}
814
69aa6655
DE
815struct dev_symlinks {
816 const char *oldpath;
817 const char *name;
818};
819
820static const struct dev_symlinks dev_symlinks[] = {
821 {"/proc/self/fd", "fd"},
822 {"/proc/self/fd/0", "stdin"},
823 {"/proc/self/fd/1", "stdout"},
824 {"/proc/self/fd/2", "stderr"},
825};
826
827static int setup_dev_symlinks(const struct lxc_rootfs *rootfs)
828{
829 char path[MAXPATHLEN];
830 int ret,i;
831
832
833 for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
834 const struct dev_symlinks *d = &dev_symlinks[i];
835 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount, d->name);
836 if (ret < 0 || ret >= MAXPATHLEN)
837 return -1;
838 ret = symlink(d->oldpath, path);
839 if (ret && errno != EEXIST) {
840 SYSERROR("Error creating %s", path);
841 return -1;
842 }
843 }
844 return 0;
845}
846
33fcb7a0 847static int setup_tty(const struct lxc_rootfs *rootfs,
7c6ef2a2 848 const struct lxc_tty_info *tty_info, char *ttydir)
b0a33c1e 849{
7c6ef2a2
SH
850 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
851 int i, ret;
b0a33c1e 852
bc9bd0e3
DL
853 if (!rootfs->path)
854 return 0;
855
b0a33c1e 856 for (i = 0; i < tty_info->nbtty; i++) {
857
858 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
859
7c6ef2a2 860 ret = snprintf(path, sizeof(path), "%s/dev/tty%d",
12297168 861 rootfs->mount, i + 1);
7c6ef2a2
SH
862 if (ret >= sizeof(path)) {
863 ERROR("pathname too long for ttys");
864 return -1;
865 }
866 if (ttydir) {
867 /* create dev/lxc/tty%d" */
9ba8130c 868 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/tty%d",
7c6ef2a2
SH
869 rootfs->mount, ttydir, i + 1);
870 if (ret >= sizeof(lxcpath)) {
871 ERROR("pathname too long for ttys");
872 return -1;
873 }
874 ret = creat(lxcpath, 0660);
875 if (ret==-1 && errno != EEXIST) {
959aee9c 876 SYSERROR("error creating %s", lxcpath);
7c6ef2a2
SH
877 return -1;
878 }
4d44e274
SH
879 if (ret >= 0)
880 close(ret);
7c6ef2a2
SH
881 ret = unlink(path);
882 if (ret && errno != ENOENT) {
959aee9c 883 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
884 return -1;
885 }
b0a33c1e 886
7c6ef2a2
SH
887 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
888 WARN("failed to mount '%s'->'%s'",
889 pty_info->name, path);
890 continue;
891 }
13954cce 892
9ba8130c
SH
893 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
894 if (ret >= sizeof(lxcpath)) {
895 ERROR("tty pathname too long");
896 return -1;
897 }
7c6ef2a2
SH
898 ret = symlink(lxcpath, path);
899 if (ret) {
959aee9c 900 SYSERROR("failed to create symlink for tty %d", i+1);
7c6ef2a2
SH
901 return -1;
902 }
903 } else {
c6883f38
SH
904 /* If we populated /dev, then we need to create /dev/ttyN */
905 if (access(path, F_OK)) {
906 ret = creat(path, 0660);
907 if (ret==-1) {
959aee9c 908 SYSERROR("error creating %s", path);
c6883f38 909 /* this isn't fatal, continue */
025ed0f3 910 } else {
c6883f38 911 close(ret);
025ed0f3 912 }
c6883f38 913 }
7c6ef2a2
SH
914 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
915 WARN("failed to mount '%s'->'%s'",
916 pty_info->name, path);
917 continue;
918 }
b0a33c1e 919 }
920 }
921
cd54d859
DL
922 INFO("%d tty(s) has been setup", tty_info->nbtty);
923
b0a33c1e 924 return 0;
925}
926
7a7ff0c6 927static int setup_rootfs_pivot_root_cb(char *buffer, void *data)
bf601689
MH
928{
929 struct lxc_list *mountlist, *listentry, *iterator;
2c7d90ac 930 char *pivotdir, *mountpoint, *mountentry, *saveptr = NULL;
bf601689
MH
931 int found;
932 void **cbparm;
933
934 mountentry = buffer;
935 cbparm = (void **)data;
936
937 mountlist = cbparm[0];
938 pivotdir = cbparm[1];
939
940 /* parse entry, first field is mountname, ignore */
2796cf79 941 mountpoint = strtok_r(mountentry, " ", &saveptr);
bf601689
MH
942 if (!mountpoint)
943 return -1;
944
945 /* second field is mountpoint */
2796cf79 946 mountpoint = strtok_r(NULL, " ", &saveptr);
bf601689
MH
947 if (!mountpoint)
948 return -1;
949
950 /* only consider mountpoints below old root fs */
951 if (strncmp(mountpoint, pivotdir, strlen(pivotdir)))
952 return 0;
953
954 /* filter duplicate mountpoints */
955 found = 0;
956 lxc_list_for_each(iterator, mountlist) {
957 if (!strcmp(iterator->elem, mountpoint)) {
958 found = 1;
959 break;
960 }
961 }
962 if (found)
963 return 0;
964
965 /* add entry to list */
966 listentry = malloc(sizeof(*listentry));
967 if (!listentry) {
968 SYSERROR("malloc for mountpoint listentry failed");
969 return -1;
970 }
971
972 listentry->elem = strdup(mountpoint);
973 if (!listentry->elem) {
974 SYSERROR("strdup failed");
00b6be44 975 free(listentry);
bf601689
MH
976 return -1;
977 }
978 lxc_list_add_tail(mountlist, listentry);
979
980 return 0;
981}
982
cc6f6dd7 983static int umount_oldrootfs(const char *oldrootfs)
bf601689 984{
2382ecff 985 char path[MAXPATHLEN];
bf601689 986 void *cbparm[2];
9ebb03ad 987 struct lxc_list mountlist, *iterator, *next;
bf601689 988 int ok, still_mounted, last_still_mounted;
9ba8130c 989 int rc;
bf601689
MH
990
991 /* read and parse /proc/mounts in old root fs */
992 lxc_list_init(&mountlist);
993
cc6f6dd7 994 /* oldrootfs is on the top tree directory now */
9ba8130c
SH
995 rc = snprintf(path, sizeof(path), "/%s", oldrootfs);
996 if (rc >= sizeof(path)) {
997 ERROR("rootfs name too long");
998 return -1;
999 }
bf601689 1000 cbparm[0] = &mountlist;
bf601689 1001
cc6f6dd7 1002 cbparm[1] = strdup(path);
bf601689
MH
1003 if (!cbparm[1]) {
1004 SYSERROR("strdup failed");
1005 return -1;
1006 }
1007
9ba8130c
SH
1008 rc = snprintf(path, sizeof(path), "%s/proc/mounts", oldrootfs);
1009 if (rc >= sizeof(path)) {
1010 ERROR("container proc/mounts name too long");
1011 return -1;
1012 }
cc6f6dd7
DL
1013
1014 ok = lxc_file_for_each_line(path,
1015 setup_rootfs_pivot_root_cb, &cbparm);
bf601689
MH
1016 if (ok < 0) {
1017 SYSERROR("failed to read or parse mount list '%s'", path);
1018 return -1;
1019 }
1020
1021 /* umount filesystems until none left or list no longer shrinks */
1022 still_mounted = 0;
1023 do {
1024 last_still_mounted = still_mounted;
1025 still_mounted = 0;
1026
9ebb03ad 1027 lxc_list_for_each_safe(iterator, &mountlist, next) {
bf601689 1028
c08556c6 1029 /* umount normally */
bf601689
MH
1030 if (!umount(iterator->elem)) {
1031 DEBUG("umounted '%s'", (char *)iterator->elem);
1032 lxc_list_del(iterator);
1033 continue;
1034 }
1035
bf601689
MH
1036 still_mounted++;
1037 }
7df119ee 1038
bf601689
MH
1039 } while (still_mounted > 0 && still_mounted != last_still_mounted);
1040
7df119ee 1041
c08556c6
DL
1042 lxc_list_for_each(iterator, &mountlist) {
1043
1044 /* let's try a lazy umount */
1045 if (!umount2(iterator->elem, MNT_DETACH)) {
1046 INFO("lazy unmount of '%s'", (char *)iterator->elem);
1047 continue;
1048 }
1049
1050 /* be more brutal (nfs) */
1051 if (!umount2(iterator->elem, MNT_FORCE)) {
1052 INFO("forced unmount of '%s'", (char *)iterator->elem);
1053 continue;
1054 }
1055
7df119ee 1056 WARN("failed to unmount '%s'", (char *)iterator->elem);
c08556c6 1057 }
bf601689 1058
cc6f6dd7
DL
1059 return 0;
1060}
1061
1062static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
1063{
1064 char path[MAXPATHLEN];
1065 int remove_pivotdir = 0;
9ba8130c 1066 int rc;
cc6f6dd7
DL
1067
1068 /* change into new root fs */
1069 if (chdir(rootfs)) {
1070 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
1071 return -1;
1072 }
1073
1074 if (!pivotdir)
30c5d292 1075 pivotdir = "lxc_putold";
cc6f6dd7 1076
4f9293b1 1077 /* compute the full path to pivotdir under rootfs */
9ba8130c
SH
1078 rc = snprintf(path, sizeof(path), "%s/%s", rootfs, pivotdir);
1079 if (rc >= sizeof(path)) {
1080 ERROR("pivot dir name too long");
1081 return -1;
1082 }
cc6f6dd7
DL
1083
1084 if (access(path, F_OK)) {
1085
119126b6 1086 if (mkdir_p(path, 0755) < 0) {
cc6f6dd7
DL
1087 SYSERROR("failed to create pivotdir '%s'", path);
1088 return -1;
1089 }
1090
1091 remove_pivotdir = 1;
1092 DEBUG("created '%s' directory", path);
1093 }
1094
1095 DEBUG("mountpoint for old rootfs is '%s'", path);
1096
1097 /* pivot_root into our new root fs */
1098 if (pivot_root(".", path)) {
1099 SYSERROR("pivot_root syscall failed");
bf601689
MH
1100 return -1;
1101 }
cc6f6dd7
DL
1102
1103 if (chdir("/")) {
1104 SYSERROR("can't chdir to / after pivot_root");
1105 return -1;
1106 }
1107
1108 DEBUG("pivot_root syscall to '%s' successful", rootfs);
1109
1110 /* we switch from absolute path to relative path */
1111 if (umount_oldrootfs(pivotdir))
1112 return -1;
bf601689 1113
c08556c6
DL
1114 /* remove temporary mount point, we don't consider the removing
1115 * as fatal */
a91d897a
FW
1116 if (remove_pivotdir && rmdir(pivotdir))
1117 WARN("can't remove mountpoint '%s': %m", pivotdir);
bf601689 1118
bf601689
MH
1119 return 0;
1120}
1121
bc6928ff
MW
1122/*
1123 * Check to see if a directory has something mounted on it and,
1124 * if it does, return the fstype.
1125 *
1126 * Code largely based on detect_shared_rootfs below
1127 *
1128 * Returns: # of matching entries in /proc/self/mounts
1129 * if != 0 fstype is filled with the last filesystem value.
1130 * if == 0 no matches found, fstype unchanged.
1131 *
1132 * ToDo: Maybe return the mount options in another parameter...
1133 */
1134
1135#define LINELEN 4096
1136#define MAX_FSTYPE_LEN 128
74a3920a 1137static int mount_check_fs( const char *dir, char *fstype )
bc6928ff
MW
1138{
1139 char buf[LINELEN], *p;
1140 struct stat s;
1141 FILE *f;
1142 int found_fs = 0;
1143 char *p2;
1144
959aee9c 1145 DEBUG("entering mount_check_fs for %s", dir);
bc6928ff
MW
1146
1147 if ( 0 != access(dir, F_OK) || 0 != stat(dir, &s) || 0 == S_ISDIR(s.st_mode) ) {
1148 return 0;
1149 }
1150
bc6928ff 1151 f = fopen("/proc/self/mounts", "r");
bc6928ff
MW
1152 if (!f)
1153 return 0;
4ad9f44b 1154 while (fgets(buf, LINELEN, f)) {
bc6928ff
MW
1155 p = index(buf, ' ');
1156 if( !p )
1157 continue;
1158 *p = '\0';
1159 p2 = p + 1;
1160
1161 p = index(p2, ' ');
1162 if( !p )
1163 continue;
1164 *p = '\0';
1165
1166 /* Compare the directory in the entry to desired */
1167 if( strcmp( p2, dir ) ) {
1168 continue;
1169 }
1170
1171 p2 = p + 1;
1172 p = index( p2, ' ');
1173 if( !p )
1174 continue;
1175 *p = '\0';
1176
1177 ++found_fs;
1178
1179 if( fstype ) {
1180 strncpy( fstype, p2, MAX_FSTYPE_LEN - 1 );
1181 fstype [ MAX_FSTYPE_LEN - 1 ] = '\0';
1182 }
1183 }
1184
bc6928ff 1185 fclose(f);
bc6928ff 1186
959aee9c 1187 DEBUG("mount_check_fs returning %d last %s", found_fs, fstype);
bc6928ff
MW
1188
1189 return found_fs;
1190}
1191
1192/*
1193 * Locate a devtmpfs mount (should be on /dev) and create a container
1194 * subdirectory on it which we can then bind mount to the container
1195 * /dev instead of mounting a tmpfs there.
1196 * If we fail, return NULL.
1197 * Else return the pointer to the name buffer with the string to
1198 * the devtmpfs subdirectory.
1199 */
1200
74a3920a 1201static char *mk_devtmpfs(const char *name, char *path, const char *lxcpath)
bc6928ff
MW
1202{
1203 int ret;
1204 struct stat s;
1205 char tmp_path[MAXPATHLEN];
1206 char fstype[MAX_FSTYPE_LEN];
1207 char *base_path = "/dev/.lxc";
1208 char *user_path = "/dev/.lxc/user";
1209 uint64_t hash;
1210
1211 if ( 0 != access(base_path, F_OK) || 0 != stat(base_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1212 /* This is just making /dev/.lxc it better work or we're done */
1213 ret = mkdir(base_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1214 if ( ret ) {
1215 SYSERROR( "Unable to create /dev/.lxc for autodev" );
1216 return NULL;
1217 }
1218 }
1219
1220 /*
1221 * Programmers notes:
1222 * We can not do mounts in this area of code that we want
1223 * to be visible in the host. Consequently, /dev/.lxc must
1224 * be set up earlier if we need a tmpfs mounted there.
1225 * That only affects the rare cases where autodev is enabled
1226 * for a container and devtmpfs is not mounted on /dev in the
1227 * host. In that case, we'll fall back to the old method
1228 * of mounting a tmpfs in the container and have no visibility
1229 * into the container /dev.
1230 */
1231 if( ! mount_check_fs( "/dev", fstype )
1232 || strcmp( "devtmpfs", fstype ) ) {
1233 /* Either /dev was not mounted or was not devtmpfs */
1234
1235 if ( ! mount_check_fs( "/dev/.lxc", NULL ) ) {
1236 /*
1237 * /dev/.lxc is not already mounted
1238 * Doing a mount here does no good, since
1239 * it's not visible in the host.
1240 */
1241
1242 ERROR("/dev/.lxc is not setup - taking fallback" );
1243 return NULL;
1244 }
1245 }
1246
1247 if ( 0 != access(user_path, F_OK) || 0 != stat(user_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1248 /*
1249 * This is making /dev/.lxc/user path for non-priv users.
1250 * If this doesn't work, we'll have to fall back in the
1251 * case of non-priv users. It's mode 1777 like /tmp.
1252 */
1253 ret = mkdir(user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
1254 if ( ret ) {
1255 /* Issue an error but don't fail yet! */
1256 ERROR("Unable to create /dev/.lxc/user");
1257 }
1258 /* Umask tends to screw us up here */
1259 chmod(user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
1260 }
1261
1262 /*
1263 * Since the container name must be unique within a given
1264 * lxcpath, we're going to use a hash of the path
1265 * /lxcpath/name as our hash name in /dev/.lxc/
1266 */
1267
1268 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s", lxcpath, name);
1269 if (ret < 0 || ret >= MAXPATHLEN)
1270 return NULL;
1271
1272 hash = fnv_64a_buf(tmp_path, ret, FNV1A_64_INIT);
1273
1274 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, base_path, name, hash);
1275 if (ret < 0 || ret >= MAXPATHLEN)
1276 return NULL;
1277
1278 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1279 ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1280 if ( ret ) {
1281 /* Something must have failed with the base_path...
1282 * Maybe unpriv user. Try user_path now... */
1283 INFO("Setup in /dev/.lxc failed. Trying /dev/.lxc/user." );
1284
1285 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, user_path, name, hash);
1286 if (ret < 0 || ret >= MAXPATHLEN)
1287 return NULL;
1288
1289 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1290 ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1291 if ( ret ) {
1292 ERROR("Container /dev setup in host /dev failed - taking fallback" );
1293 return NULL;
1294 }
1295 }
1296 }
1297 }
1298
1299 strcpy( path, tmp_path );
1300 return path;
1301}
1302
1303
91c3830e
SH
1304/*
1305 * Do we want to add options for max size of /dev and a file to
1306 * specify which devices to create?
1307 */
bc6928ff 1308static int mount_autodev(const char *name, char *root, const char *lxcpath)
91c3830e
SH
1309{
1310 int ret;
bc6928ff 1311 struct stat s;
91c3830e 1312 char path[MAXPATHLEN];
bc6928ff
MW
1313 char host_path[MAXPATHLEN];
1314 char devtmpfs_path[MAXPATHLEN];
91c3830e 1315
959aee9c 1316 INFO("Mounting /dev under %s", root);
bc6928ff
MW
1317
1318 ret = snprintf(host_path, MAXPATHLEN, "%s/%s/rootfs.dev", lxcpath, name);
1319 if (ret < 0 || ret > MAXPATHLEN)
1320 return -1;
1321
91c3830e
SH
1322 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
1323 if (ret < 0 || ret > MAXPATHLEN)
1324 return -1;
bc6928ff
MW
1325
1326 if (mk_devtmpfs( name, devtmpfs_path, lxcpath ) ) {
1327 /*
1328 * Get rid of old links and directoriess
1329 * This could be either a symlink and we remove it,
1330 * or an empty directory and we remove it,
1331 * or non-existant and we don't care,
1332 * or a non-empty directory, and we will then emit an error
1333 * but we will not fail out the process.
1334 */
1335 unlink( host_path );
1336 rmdir( host_path );
1337 ret = symlink(devtmpfs_path, host_path);
1338
1339 if ( ret < 0 ) {
959aee9c 1340 SYSERROR("WARNING: Failed to create symlink '%s'->'%s'", host_path, devtmpfs_path);
bc6928ff
MW
1341 }
1342 DEBUG("Bind mounting %s to %s", devtmpfs_path , path );
1343 ret = mount(devtmpfs_path, path, NULL, MS_BIND, 0 );
1344 } else {
1345 /* Only mount a tmpfs on here if we don't already a mount */
1346 if ( ! mount_check_fs( host_path, NULL ) ) {
1347 DEBUG("Mounting tmpfs to %s", host_path );
58ab99ae 1348 ret = mount("none", path, "tmpfs", 0, "size=100000,mode=755");
bc6928ff
MW
1349 } else {
1350 /* This allows someone to manually set up a mount */
1351 DEBUG("Bind mounting %s to %s", host_path, path );
1352 ret = mount(host_path , path, NULL, MS_BIND, 0 );
1353 }
1354 }
91c3830e 1355 if (ret) {
959aee9c 1356 SYSERROR("Failed to mount /dev at %s", root);
91c3830e
SH
1357 return -1;
1358 }
1359 ret = snprintf(path, MAXPATHLEN, "%s/dev/pts", root);
1360 if (ret < 0 || ret >= MAXPATHLEN)
1361 return -1;
bc6928ff
MW
1362 /*
1363 * If we are running on a devtmpfs mapping, dev/pts may already exist.
1364 * If not, then create it and exit if that fails...
1365 */
1366 if ( 0 != access(path, F_OK) || 0 != stat(path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1367 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1368 if (ret) {
1369 SYSERROR("Failed to create /dev/pts in container");
1370 return -1;
1371 }
91c3830e
SH
1372 }
1373
959aee9c 1374 INFO("Mounted /dev under %s", root);
91c3830e
SH
1375 return 0;
1376}
1377
c6883f38 1378struct lxc_devs {
74a3920a 1379 const char *name;
c6883f38
SH
1380 mode_t mode;
1381 int maj;
1382 int min;
1383};
1384
74a3920a 1385static const struct lxc_devs lxc_devs[] = {
c6883f38
SH
1386 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1387 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1388 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1389 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1390 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1391 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1392 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1393};
1394
74a3920a 1395static int setup_autodev(const char *root)
c6883f38
SH
1396{
1397 int ret;
c6883f38
SH
1398 char path[MAXPATHLEN];
1399 int i;
3a32201c 1400 mode_t cmask;
c6883f38 1401
959aee9c 1402 INFO("Creating initial consoles under %s/dev", root);
91c3830e 1403
c6883f38 1404 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
91c3830e
SH
1405 if (ret < 0 || ret >= MAXPATHLEN) {
1406 ERROR("Error calculating container /dev location");
c6883f38 1407 return -1;
f7bee6c6 1408 }
91c3830e 1409
959aee9c 1410 INFO("Populating /dev under %s", root);
3a32201c 1411 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38 1412 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
74a3920a 1413 const struct lxc_devs *d = &lxc_devs[i];
c6883f38
SH
1414 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", root, d->name);
1415 if (ret < 0 || ret >= MAXPATHLEN)
1416 return -1;
1417 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1418 if (ret && errno != EEXIST) {
959aee9c 1419 SYSERROR("Error creating %s", d->name);
c6883f38
SH
1420 return -1;
1421 }
1422 }
3a32201c 1423 umask(cmask);
c6883f38 1424
959aee9c 1425 INFO("Populated /dev under %s", root);
c6883f38
SH
1426 return 0;
1427}
1428
cc28d0b0
SH
1429/*
1430 * I'll forgive you for asking whether all of this is needed :) The
1431 * answer is yes.
1432 * pivot_root will fail if the new root, the put_old dir, or the parent
1433 * of current->fs->root are MS_SHARED. (parent of current->fs_root may
1434 * or may not be current->fs_root - if we assumed it always was, we could
1435 * just mount --make-rslave /). So,
1436 * 1. mount a tiny tmpfs to be parent of current->fs->root.
1437 * 2. make that MS_SLAVE
1438 * 3. make a 'root' directory under that
1439 * 4. mount --rbind / under the $tinyroot/root.
1440 * 5. make that rslave
1441 * 6. chdir and chroot into $tinyroot/root
1442 * 7. $tinyroot will be unmounted by our parent in start.c
1443 */
1444static int chroot_into_slave(struct lxc_conf *conf)
1445{
1446 char path[MAXPATHLEN];
1447 const char *destpath = conf->rootfs.mount;
1448 int ret;
1449
1450 if (mount(destpath, destpath, NULL, MS_BIND, 0)) {
1451 SYSERROR("failed to mount %s bind", destpath);
1452 return -1;
1453 }
1454 if (mount("", destpath, NULL, MS_SLAVE, 0)) {
1455 SYSERROR("failed to make %s slave", destpath);
1456 return -1;
1457 }
58ab99ae 1458 if (mount("none", destpath, "tmpfs", 0, "size=10000,mode=755")) {
cc28d0b0
SH
1459 SYSERROR("Failed to mount tmpfs / at %s", destpath);
1460 return -1;
1461 }
1462 ret = snprintf(path, MAXPATHLEN, "%s/root", destpath);
1463 if (ret < 0 || ret >= MAXPATHLEN) {
1464 ERROR("out of memory making root path");
1465 return -1;
1466 }
1467 if (mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) {
1468 SYSERROR("Failed to create /dev/pts in container");
1469 return -1;
1470 }
1471 if (mount("/", path, NULL, MS_BIND|MS_REC, 0)) {
1472 SYSERROR("Failed to rbind mount / to %s", path);
1473 return -1;
1474 }
1475 if (mount("", destpath, NULL, MS_SLAVE|MS_REC, 0)) {
1476 SYSERROR("Failed to make tmp-/ at %s rslave", path);
1477 return -1;
1478 }
cc28d0b0
SH
1479 if (chroot(path)) {
1480 SYSERROR("Failed to chroot into tmp-/");
1481 return -1;
1482 }
6b9324bd
SG
1483 if (chdir("/")) {
1484 SYSERROR("Failed to chdir into tmp-/");
1485 return -1;
1486 }
959aee9c 1487 INFO("Chrooted into tmp-/ at %s", path);
cc28d0b0
SH
1488 return 0;
1489}
1490
1491static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1492{
cc28d0b0
SH
1493 const struct lxc_rootfs *rootfs = &conf->rootfs;
1494
a0f379bf
DW
1495 if (!rootfs->path) {
1496 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1497 SYSERROR("Failed to make / rslave");
1498 return -1;
1499 }
c69bd12f 1500 return 0;
a0f379bf 1501 }
0ad19a3f 1502
12297168 1503 if (access(rootfs->mount, F_OK)) {
b1789442 1504 SYSERROR("failed to access to '%s', check it is present",
12297168 1505 rootfs->mount);
b1789442
DL
1506 return -1;
1507 }
1508
cc28d0b0
SH
1509 if (detect_shared_rootfs()) {
1510 if (chroot_into_slave(conf)) {
1511 ERROR("Failed to chroot into slave /");
1512 return -1;
1513 }
1514 }
1515
9be53773 1516 // First try mounting rootfs using a bdev
a17b1e65 1517 struct bdev *bdev = bdev_init(rootfs->path, rootfs->mount, rootfs->options);
9be53773 1518 if (bdev && bdev->ops->mount(bdev) == 0) {
59d66af2 1519 bdev_put(bdev);
9be53773
SH
1520 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1521 return 0;
1522 }
59d66af2
SH
1523 if (bdev)
1524 bdev_put(bdev);
a17b1e65 1525 if (mount_rootfs(rootfs->path, rootfs->mount, rootfs->options)) {
a6afdde9 1526 ERROR("failed to mount rootfs");
c3f0a28c 1527 return -1;
1528 }
0ad19a3f 1529
12297168 1530 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1531
ac778708
DL
1532 return 0;
1533}
1534
74a3920a 1535static int setup_pivot_root(const struct lxc_rootfs *rootfs)
ac778708 1536{
ac778708
DL
1537 if (!rootfs->path)
1538 return 0;
1539
12297168 1540 if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
cc6f6dd7 1541 ERROR("failed to setup pivot root");
25368b52 1542 return -1;
c69bd12f
DL
1543 }
1544
25368b52 1545 return 0;
0ad19a3f 1546}
1547
d852c78c 1548static int setup_pts(int pts)
3c26f34e 1549{
77890c6d
SW
1550 char target[PATH_MAX];
1551
d852c78c
DL
1552 if (!pts)
1553 return 0;
3c26f34e 1554
1555 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1556 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1557 return -1;
1558 }
1559
a6afdde9 1560 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
67e5a20a 1561 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
36eb9bde 1562 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1563 return -1;
1564 }
1565
3c26f34e 1566 if (access("/dev/ptmx", F_OK)) {
1567 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1568 goto out;
36eb9bde 1569 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1570 return -1;
1571 }
1572
77890c6d
SW
1573 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1574 goto out;
1575
3c26f34e 1576 /* fallback here, /dev/pts/ptmx exists just mount bind */
1577 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1578 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1579 return -1;
1580 }
cd54d859
DL
1581
1582 INFO("created new pts instance");
d852c78c 1583
3c26f34e 1584out:
1585 return 0;
1586}
1587
cccc74b5
DL
1588static int setup_personality(int persona)
1589{
6ff05e18 1590 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1591 if (persona == -1)
1592 return 0;
1593
1594 if (personality(persona) < 0) {
1595 SYSERROR("failed to set personality to '0x%x'", persona);
1596 return -1;
1597 }
1598
1599 INFO("set personality to '0x%x'", persona);
6ff05e18 1600 #endif
cccc74b5
DL
1601
1602 return 0;
1603}
1604
7c6ef2a2 1605static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1606 const struct lxc_console *console)
6e590161 1607{
63376d7d
DL
1608 char path[MAXPATHLEN];
1609 struct stat s;
7c6ef2a2 1610 int ret;
52e35957 1611
7c6ef2a2
SH
1612 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1613 if (ret >= sizeof(path)) {
959aee9c 1614 ERROR("console path too long");
7c6ef2a2
SH
1615 return -1;
1616 }
52e35957 1617
63376d7d 1618 if (access(path, F_OK)) {
466978b0 1619 WARN("rootfs specified but no console found at '%s'", path);
63376d7d 1620 return 0;
52e35957
DL
1621 }
1622
b5159817
DE
1623 if (console->master < 0) {
1624 INFO("no console");
f78a1f32
DL
1625 return 0;
1626 }
ed502555 1627
63376d7d
DL
1628 if (stat(path, &s)) {
1629 SYSERROR("failed to stat '%s'", path);
1630 return -1;
1631 }
1632
1633 if (chmod(console->name, s.st_mode)) {
1634 SYSERROR("failed to set mode '0%o' to '%s'",
1635 s.st_mode, console->name);
1636 return -1;
1637 }
13954cce 1638
63376d7d
DL
1639 if (mount(console->name, path, "none", MS_BIND, 0)) {
1640 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1641 return -1;
1642 }
1643
63376d7d 1644 INFO("console has been setup");
7c6ef2a2
SH
1645 return 0;
1646}
1647
1648static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1649 const struct lxc_console *console,
1650 char *ttydir)
1651{
1652 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1653 int ret;
1654
1655 /* create rootfs/dev/<ttydir> directory */
1656 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1657 ttydir);
1658 if (ret >= sizeof(path))
1659 return -1;
1660 ret = mkdir(path, 0755);
1661 if (ret && errno != EEXIST) {
959aee9c 1662 SYSERROR("failed with errno %d to create %s", errno, path);
7c6ef2a2
SH
1663 return -1;
1664 }
959aee9c 1665 INFO("created %s", path);
7c6ef2a2
SH
1666
1667 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1668 rootfs->mount, ttydir);
1669 if (ret >= sizeof(lxcpath)) {
959aee9c 1670 ERROR("console path too long");
7c6ef2a2
SH
1671 return -1;
1672 }
1673
1674 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1675 ret = unlink(path);
1676 if (ret && errno != ENOENT) {
959aee9c 1677 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1678 return -1;
1679 }
1680
1681 ret = creat(lxcpath, 0660);
1682 if (ret==-1 && errno != EEXIST) {
959aee9c 1683 SYSERROR("error %d creating %s", errno, lxcpath);
7c6ef2a2
SH
1684 return -1;
1685 }
4d44e274
SH
1686 if (ret >= 0)
1687 close(ret);
7c6ef2a2 1688
b5159817
DE
1689 if (console->master < 0) {
1690 INFO("no console");
7c6ef2a2
SH
1691 return 0;
1692 }
1693
1694 if (mount(console->name, lxcpath, "none", MS_BIND, 0)) {
1695 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1696 return -1;
1697 }
1698
1699 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1700 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1701 if (ret >= sizeof(lxcpath)) {
1702 ERROR("lxc/console path too long");
1703 return -1;
1704 }
7c6ef2a2
SH
1705 ret = symlink(lxcpath, path);
1706 if (ret) {
1707 SYSERROR("failed to create symlink for console");
1708 return -1;
1709 }
1710
1711 INFO("console has been setup on %s", lxcpath);
cd54d859 1712
6e590161 1713 return 0;
1714}
1715
7c6ef2a2
SH
1716static int setup_console(const struct lxc_rootfs *rootfs,
1717 const struct lxc_console *console,
1718 char *ttydir)
1719{
1720 /* We don't have a rootfs, /dev/console will be shared */
1721 if (!rootfs->path)
1722 return 0;
1723 if (!ttydir)
1724 return setup_dev_console(rootfs, console);
1725
1726 return setup_ttydir_console(rootfs, console, ttydir);
1727}
1728
1bd051a6
SH
1729static int setup_kmsg(const struct lxc_rootfs *rootfs,
1730 const struct lxc_console *console)
1731{
1732 char kpath[MAXPATHLEN];
1733 int ret;
1734
222fea5a
DE
1735 if (!rootfs->path)
1736 return 0;
1bd051a6
SH
1737 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1738 if (ret < 0 || ret >= sizeof(kpath))
1739 return -1;
1740
1741 ret = unlink(kpath);
1742 if (ret && errno != ENOENT) {
959aee9c 1743 SYSERROR("error unlinking %s", kpath);
1bd051a6
SH
1744 return -1;
1745 }
1746
1747 ret = symlink("console", kpath);
1748 if (ret) {
1749 SYSERROR("failed to create symlink for kmsg");
1750 return -1;
1751 }
1752
1753 return 0;
1754}
1755
998ac676
RT
1756static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1757{
1758 struct mount_opt *mo;
1759
1760 /* If opt is found in mount_opt, set or clear flags.
1761 * Otherwise append it to data. */
1762
1763 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1764 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1765 if (mo->clear)
1766 *flags &= ~mo->flag;
1767 else
1768 *flags |= mo->flag;
1769 return;
1770 }
1771 }
1772
1773 if (strlen(*data))
1774 strcat(*data, ",");
1775 strcat(*data, opt);
1776}
1777
a17b1e65 1778int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1779 char **mntdata)
1780{
1781 char *s, *data;
1782 char *p, *saveptr = NULL;
1783
911324ef 1784 *mntdata = NULL;
91656ce5 1785 *mntflags = 0L;
911324ef
DL
1786
1787 if (!mntopts)
998ac676
RT
1788 return 0;
1789
911324ef 1790 s = strdup(mntopts);
998ac676 1791 if (!s) {
36eb9bde 1792 SYSERROR("failed to allocate memory");
998ac676
RT
1793 return -1;
1794 }
1795
1796 data = malloc(strlen(s) + 1);
1797 if (!data) {
36eb9bde 1798 SYSERROR("failed to allocate memory");
998ac676
RT
1799 free(s);
1800 return -1;
1801 }
1802 *data = 0;
1803
1804 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1805 p = strtok_r(NULL, ",", &saveptr))
1806 parse_mntopt(p, mntflags, &data);
1807
1808 if (*data)
1809 *mntdata = data;
1810 else
1811 free(data);
1812 free(s);
1813
1814 return 0;
1815}
1816
911324ef
DL
1817static int mount_entry(const char *fsname, const char *target,
1818 const char *fstype, unsigned long mountflags,
1819 const char *data)
1820{
1821 if (mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data)) {
1822 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1823 return -1;
1824 }
1825
1826 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
1827
1828 DEBUG("remounting %s on %s to respect bind or remount options",
1829 fsname, target);
1830
1831 if (mount(fsname, target, fstype,
1832 mountflags | MS_REMOUNT, data)) {
1833 SYSERROR("failed to mount '%s' on '%s'",
1834 fsname, target);
1835 return -1;
1836 }
1837 }
1838
1839 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1840
1841 return 0;
1842}
1843
4e4ca161
SH
1844/*
1845 * Remove 'optional', 'create=dir', and 'create=file' from mntopt
1846 */
1847static void cull_mntent_opt(struct mntent *mntent)
1848{
1849 int i;
1850 char *p, *p2;
1851 char *list[] = {"create=dir",
1852 "create=file",
1853 "optional",
1854 NULL };
1855
1856 for (i=0; list[i]; i++) {
1857 if (!(p = strstr(mntent->mnt_opts, list[i])))
1858 continue;
1859 p2 = strchr(p, ',');
1860 if (!p2) {
1861 /* no more mntopts, so just chop it here */
1862 *p = '\0';
1863 continue;
1864 }
1865 memmove(p, p2+1, strlen(p2+1)+1);
1866 }
1867}
1868
1869static inline int mount_entry_on_systemfs(struct mntent *mntent)
0ad19a3f 1870{
998ac676
RT
1871 unsigned long mntflags;
1872 char *mntdata;
911324ef 1873 int ret;
34cfffb3
SG
1874 FILE *pathfile = NULL;
1875 char* pathdirname = NULL;
4f1d50d1 1876 bool optional = hasmntopt(mntent, "optional") != NULL;
911324ef 1877
34cfffb3 1878 if (hasmntopt(mntent, "create=dir")) {
119126b6 1879 if (mkdir_p(mntent->mnt_dir, 0755) < 0) {
34cfffb3
SG
1880 WARN("Failed to create mount target '%s'", mntent->mnt_dir);
1881 ret = -1;
1882 }
1883 }
1884
1885 if (hasmntopt(mntent, "create=file") && access(mntent->mnt_dir, F_OK)) {
1886 pathdirname = strdup(mntent->mnt_dir);
1887 pathdirname = dirname(pathdirname);
119126b6
SG
1888 if (mkdir_p(pathdirname, 0755) < 0) {
1889 WARN("Failed to create target directory");
1890 }
34cfffb3
SG
1891 pathfile = fopen(mntent->mnt_dir, "wb");
1892 if (!pathfile) {
1893 WARN("Failed to create mount target '%s'", mntent->mnt_dir);
1894 ret = -1;
1895 }
1896 else
1897 fclose(pathfile);
1898 }
1899
4e4ca161
SH
1900 cull_mntent_opt(mntent);
1901
a17b1e65
SG
1902 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1903 free(mntdata);
1904 return -1;
1905 }
1906
911324ef
DL
1907 ret = mount_entry(mntent->mnt_fsname, mntent->mnt_dir,
1908 mntent->mnt_type, mntflags, mntdata);
1909
4f1d50d1 1910 if (optional)
68c152ef
SH
1911 ret = 0;
1912
34cfffb3 1913 free(pathdirname);
911324ef
DL
1914 free(mntdata);
1915
1916 return ret;
1917}
1918
4e4ca161 1919static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2
SH
1920 const struct lxc_rootfs *rootfs,
1921 const char *lxc_name)
911324ef 1922{
013bd428 1923 char *aux;
59760f5d 1924 char path[MAXPATHLEN];
911324ef
DL
1925 unsigned long mntflags;
1926 char *mntdata;
80a881b2 1927 int r, ret = 0, offset;
67e571de 1928 const char *lxcpath;
34cfffb3
SG
1929 FILE *pathfile = NULL;
1930 char *pathdirname = NULL;
4f1d50d1 1931 bool optional = hasmntopt(mntent, "optional") != NULL;
0ad19a3f 1932
593e8478 1933 lxcpath = lxc_global_config_value("lxc.lxcpath");
2a59a681
SH
1934 if (!lxcpath) {
1935 ERROR("Out of memory");
1936 return -1;
1937 }
1938
80a881b2 1939 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
1940 * use $lxcpath/CN/rootfs as the target prefix */
1941 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
1942 if (r < 0 || r >= MAXPATHLEN)
1943 goto skipvarlib;
1944
1945 aux = strstr(mntent->mnt_dir, path);
1946 if (aux) {
1947 offset = strlen(path);
1948 goto skipabs;
1949 }
1950
1951skipvarlib:
013bd428
DL
1952 aux = strstr(mntent->mnt_dir, rootfs->path);
1953 if (!aux) {
1954 WARN("ignoring mount point '%s'", mntent->mnt_dir);
1955 goto out;
1956 }
80a881b2
SH
1957 offset = strlen(rootfs->path);
1958
1959skipabs:
013bd428 1960
9ba8130c 1961 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
1962 aux + offset);
1963 if (r < 0 || r >= MAXPATHLEN) {
1964 WARN("pathnme too long for '%s'", mntent->mnt_dir);
1965 ret = -1;
1966 goto out;
1967 }
1968
34cfffb3 1969 if (hasmntopt(mntent, "create=dir")) {
119126b6 1970 if (mkdir_p(path, 0755) < 0) {
34cfffb3
SG
1971 WARN("Failed to create mount target '%s'", path);
1972 ret = -1;
1973 }
1974 }
1975
1976 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
1977 pathdirname = strdup(path);
1978 pathdirname = dirname(pathdirname);
119126b6
SG
1979 if (mkdir_p(pathdirname, 0755) < 0) {
1980 WARN("Failed to create target directory");
1981 }
34cfffb3
SG
1982 pathfile = fopen(path, "wb");
1983 if (!pathfile) {
1984 WARN("Failed to create mount target '%s'", path);
1985 ret = -1;
1986 }
1987 else
1988 fclose(pathfile);
1989 }
4e4ca161 1990 cull_mntent_opt(mntent);
d330fe7b 1991
a17b1e65
SG
1992 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1993 free(mntdata);
1994 return -1;
1995 }
1996
013bd428 1997 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
911324ef 1998 mntflags, mntdata);
0ad19a3f 1999
a17b1e65
SG
2000 free(mntdata);
2001
4f1d50d1 2002 if (optional)
68c152ef
SH
2003 ret = 0;
2004
013bd428 2005out:
34cfffb3 2006 free(pathdirname);
911324ef
DL
2007 return ret;
2008}
d330fe7b 2009
4e4ca161 2010static int mount_entry_on_relative_rootfs(struct mntent *mntent,
911324ef
DL
2011 const char *rootfs)
2012{
2013 char path[MAXPATHLEN];
2014 unsigned long mntflags;
2015 char *mntdata;
2016 int ret;
34cfffb3
SG
2017 FILE *pathfile = NULL;
2018 char *pathdirname = NULL;
4f1d50d1 2019 bool optional = hasmntopt(mntent, "optional") != NULL;
d330fe7b 2020
34cfffb3 2021 /* relative to root mount point */
9ba8130c
SH
2022 ret = snprintf(path, sizeof(path), "%s/%s", rootfs, mntent->mnt_dir);
2023 if (ret >= sizeof(path)) {
2024 ERROR("path name too long");
2025 return -1;
2026 }
911324ef 2027
34cfffb3 2028 if (hasmntopt(mntent, "create=dir")) {
119126b6 2029 if (mkdir_p(path, 0755) < 0) {
34cfffb3
SG
2030 WARN("Failed to create mount target '%s'", path);
2031 ret = -1;
2032 }
2033 }
2034
2035 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
2036 pathdirname = strdup(path);
2037 pathdirname = dirname(pathdirname);
119126b6
SG
2038 if (mkdir_p(pathdirname, 0755) < 0) {
2039 WARN("Failed to create target directory");
2040 }
34cfffb3
SG
2041 pathfile = fopen(path, "wb");
2042 if (!pathfile) {
2043 WARN("Failed to create mount target '%s'", path);
2044 ret = -1;
2045 }
2046 else
2047 fclose(pathfile);
2048 }
4e4ca161 2049 cull_mntent_opt(mntent);
34cfffb3 2050
a17b1e65
SG
2051 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
2052 free(mntdata);
2053 return -1;
2054 }
2055
911324ef
DL
2056 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
2057 mntflags, mntdata);
2058
4f1d50d1 2059 if (optional)
68c152ef
SH
2060 ret = 0;
2061
34cfffb3 2062 free(pathdirname);
911324ef 2063 free(mntdata);
998ac676 2064
911324ef
DL
2065 return ret;
2066}
2067
80a881b2
SH
2068static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
2069 const char *lxc_name)
911324ef 2070{
aaf901be
AM
2071 struct mntent mntent;
2072 char buf[4096];
911324ef 2073 int ret = -1;
e76b8764 2074
aaf901be 2075 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
e76b8764 2076
911324ef 2077 if (!rootfs->path) {
aaf901be 2078 if (mount_entry_on_systemfs(&mntent))
e76b8764 2079 goto out;
911324ef 2080 continue;
e76b8764
CDC
2081 }
2082
911324ef 2083 /* We have a separate root, mounts are relative to it */
aaf901be
AM
2084 if (mntent.mnt_dir[0] != '/') {
2085 if (mount_entry_on_relative_rootfs(&mntent,
911324ef
DL
2086 rootfs->mount))
2087 goto out;
2088 continue;
2089 }
cd54d859 2090
aaf901be 2091 if (mount_entry_on_absolute_rootfs(&mntent, rootfs, lxc_name))
911324ef 2092 goto out;
0ad19a3f 2093 }
cd54d859 2094
0ad19a3f 2095 ret = 0;
cd54d859
DL
2096
2097 INFO("mount points have been setup");
0ad19a3f 2098out:
e7938e9e
MN
2099 return ret;
2100}
2101
80a881b2
SH
2102static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
2103 const char *lxc_name)
e7938e9e
MN
2104{
2105 FILE *file;
2106 int ret;
2107
2108 if (!fstab)
2109 return 0;
2110
2111 file = setmntent(fstab, "r");
2112 if (!file) {
2113 SYSERROR("failed to use '%s'", fstab);
2114 return -1;
2115 }
2116
80a881b2 2117 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e 2118
0ad19a3f 2119 endmntent(file);
2120 return ret;
2121}
2122
80a881b2
SH
2123static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
2124 const char *lxc_name)
e7938e9e
MN
2125{
2126 FILE *file;
2127 struct lxc_list *iterator;
2128 char *mount_entry;
2129 int ret;
2130
2131 file = tmpfile();
2132 if (!file) {
2133 ERROR("tmpfile error: %m");
2134 return -1;
2135 }
2136
2137 lxc_list_for_each(iterator, mount) {
2138 mount_entry = iterator->elem;
1d6b1976 2139 fprintf(file, "%s\n", mount_entry);
e7938e9e
MN
2140 }
2141
2142 rewind(file);
2143
80a881b2 2144 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e
MN
2145
2146 fclose(file);
2147 return ret;
2148}
2149
81810dd1
DL
2150static int setup_caps(struct lxc_list *caps)
2151{
2152 struct lxc_list *iterator;
2153 char *drop_entry;
d55bc1ad 2154 char *ptr;
81810dd1
DL
2155 int i, capid;
2156
2157 lxc_list_for_each(iterator, caps) {
2158
2159 drop_entry = iterator->elem;
2160
2161 capid = -1;
2162
2163 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2164
2165 if (strcmp(drop_entry, caps_opt[i].name))
2166 continue;
2167
2168 capid = caps_opt[i].value;
2169 break;
2170 }
2171
d55bc1ad
CS
2172 if (capid < 0) {
2173 /* try to see if it's numeric, so the user may specify
2174 * capabilities that the running kernel knows about but
2175 * we don't */
09bbd745 2176 errno = 0;
d55bc1ad 2177 capid = strtol(drop_entry, &ptr, 10);
09bbd745 2178 if (!ptr || *ptr != '\0' || errno != 0)
d55bc1ad
CS
2179 /* not a valid number */
2180 capid = -1;
2181 else if (capid > lxc_caps_last_cap())
2182 /* we have a number but it's not a valid
2183 * capability */
2184 capid = -1;
2185 }
2186
81810dd1 2187 if (capid < 0) {
1e11be34
DL
2188 ERROR("unknown capability %s", drop_entry);
2189 return -1;
81810dd1
DL
2190 }
2191
2192 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
2193
2194 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
3ec1648d
SH
2195 SYSERROR("failed to remove %s capability", drop_entry);
2196 return -1;
2197 }
81810dd1
DL
2198
2199 }
2200
1fb86a7c
SH
2201 DEBUG("capabilities have been setup");
2202
2203 return 0;
2204}
2205
2206static int dropcaps_except(struct lxc_list *caps)
2207{
2208 struct lxc_list *iterator;
2209 char *keep_entry;
2210 char *ptr;
2211 int i, capid;
2212 int numcaps = lxc_caps_last_cap() + 1;
959aee9c 2213 INFO("found %d capabilities", numcaps);
1fb86a7c 2214
2caf9a97
SH
2215 if (numcaps <= 0 || numcaps > 200)
2216 return -1;
2217
1fb86a7c
SH
2218 // caplist[i] is 1 if we keep capability i
2219 int *caplist = alloca(numcaps * sizeof(int));
2220 memset(caplist, 0, numcaps * sizeof(int));
2221
2222 lxc_list_for_each(iterator, caps) {
2223
2224 keep_entry = iterator->elem;
2225
2226 capid = -1;
2227
2228 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2229
2230 if (strcmp(keep_entry, caps_opt[i].name))
2231 continue;
2232
2233 capid = caps_opt[i].value;
2234 break;
2235 }
2236
2237 if (capid < 0) {
2238 /* try to see if it's numeric, so the user may specify
2239 * capabilities that the running kernel knows about but
2240 * we don't */
2241 capid = strtol(keep_entry, &ptr, 10);
2242 if (!ptr || *ptr != '\0' ||
f371aca9 2243 capid == INT_MIN || capid == INT_MAX)
1fb86a7c
SH
2244 /* not a valid number */
2245 capid = -1;
2246 else if (capid > lxc_caps_last_cap())
2247 /* we have a number but it's not a valid
2248 * capability */
2249 capid = -1;
2250 }
2251
2252 if (capid < 0) {
2253 ERROR("unknown capability %s", keep_entry);
2254 return -1;
2255 }
2256
2257 DEBUG("drop capability '%s' (%d)", keep_entry, capid);
2258
2259 caplist[capid] = 1;
2260 }
2261 for (i=0; i<numcaps; i++) {
2262 if (caplist[i])
2263 continue;
2264 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
3ec1648d
SH
2265 SYSERROR("failed to remove capability %d", i);
2266 return -1;
2267 }
1fb86a7c
SH
2268 }
2269
2270 DEBUG("capabilities have been setup");
81810dd1
DL
2271
2272 return 0;
2273}
2274
0ad19a3f 2275static int setup_hw_addr(char *hwaddr, const char *ifname)
2276{
2277 struct sockaddr sockaddr;
2278 struct ifreq ifr;
2279 int ret, fd;
2280
3cfc0f3a
MN
2281 ret = lxc_convert_mac(hwaddr, &sockaddr);
2282 if (ret) {
2283 ERROR("mac address '%s' conversion failed : %s",
2284 hwaddr, strerror(-ret));
0ad19a3f 2285 return -1;
2286 }
2287
2288 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
5da6aa8c 2289 ifr.ifr_name[IFNAMSIZ-1] = '\0';
0ad19a3f 2290 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2291
2292 fd = socket(AF_INET, SOCK_DGRAM, 0);
2293 if (fd < 0) {
3ab87b66 2294 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 2295 return -1;
2296 }
2297
2298 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
2299 close(fd);
2300 if (ret)
3ab87b66 2301 ERROR("ioctl failure : %s", strerror(errno));
0ad19a3f 2302
5da6aa8c 2303 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
cd54d859 2304
0ad19a3f 2305 return ret;
2306}
2307
82d5ae15 2308static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2309{
82d5ae15
DL
2310 struct lxc_list *iterator;
2311 struct lxc_inetdev *inetdev;
3cfc0f3a 2312 int err;
0ad19a3f 2313
82d5ae15
DL
2314 lxc_list_for_each(iterator, ip) {
2315
2316 inetdev = iterator->elem;
2317
0093bb8c
DL
2318 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2319 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
2320 if (err) {
2321 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2322 ifindex, strerror(-err));
82d5ae15
DL
2323 return -1;
2324 }
2325 }
2326
2327 return 0;
0ad19a3f 2328}
2329
82d5ae15 2330static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2331{
82d5ae15 2332 struct lxc_list *iterator;
7fa9074f 2333 struct lxc_inet6dev *inet6dev;
3cfc0f3a 2334 int err;
0ad19a3f 2335
82d5ae15
DL
2336 lxc_list_for_each(iterator, ip) {
2337
2338 inet6dev = iterator->elem;
2339
b3df193c 2340 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
2341 &inet6dev->mcast, &inet6dev->acast,
2342 inet6dev->prefix);
3cfc0f3a
MN
2343 if (err) {
2344 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2345 ifindex, strerror(-err));
82d5ae15 2346 return -1;
3cfc0f3a 2347 }
82d5ae15
DL
2348 }
2349
2350 return 0;
0ad19a3f 2351}
2352
82d5ae15 2353static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 2354{
0ad19a3f 2355 char ifname[IFNAMSIZ];
0ad19a3f 2356 char *current_ifname = ifname;
3cfc0f3a 2357 int err;
0ad19a3f 2358
82d5ae15
DL
2359 /* empty network namespace */
2360 if (!netdev->ifindex) {
b0efbac4 2361 if (netdev->flags & IFF_UP) {
d472214b 2362 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2363 if (err) {
2364 ERROR("failed to set the loopback up : %s",
2365 strerror(-err));
82d5ae15
DL
2366 return -1;
2367 }
82d5ae15 2368 }
40790553
SH
2369 if (netdev->type != LXC_NET_VETH)
2370 return 0;
2371 netdev->ifindex = if_nametoindex(netdev->name);
0ad19a3f 2372 }
13954cce 2373
b466dc33 2374 /* get the new ifindex in case of physical netdev */
40790553 2375 if (netdev->type == LXC_NET_PHYS) {
b466dc33
BP
2376 if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
2377 ERROR("failed to get ifindex for %s",
2378 netdev->link);
2379 return -1;
2380 }
40790553 2381 }
b466dc33 2382
82d5ae15
DL
2383 /* retrieve the name of the interface */
2384 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 2385 ERROR("no interface corresponding to index '%d'",
82d5ae15 2386 netdev->ifindex);
0ad19a3f 2387 return -1;
2388 }
13954cce 2389
018ef520 2390 /* default: let the system to choose one interface name */
9d083402 2391 if (!netdev->name)
fb6d9b2f
DL
2392 netdev->name = netdev->type == LXC_NET_PHYS ?
2393 netdev->link : "eth%d";
018ef520 2394
82d5ae15 2395 /* rename the interface name */
40790553
SH
2396 if (strcmp(ifname, netdev->name) != 0) {
2397 err = lxc_netdev_rename_by_name(ifname, netdev->name);
2398 if (err) {
2399 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2400 strerror(-err));
2401 return -1;
2402 }
018ef520
DL
2403 }
2404
2405 /* Re-read the name of the interface because its name has changed
2406 * and would be automatically allocated by the system
2407 */
82d5ae15 2408 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 2409 ERROR("no interface corresponding to index '%d'",
82d5ae15 2410 netdev->ifindex);
018ef520 2411 return -1;
0ad19a3f 2412 }
2413
82d5ae15
DL
2414 /* set a mac address */
2415 if (netdev->hwaddr) {
2416 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 2417 ERROR("failed to setup hw address for '%s'",
82d5ae15 2418 current_ifname);
0ad19a3f 2419 return -1;
2420 }
2421 }
2422
82d5ae15
DL
2423 /* setup ipv4 addresses on the interface */
2424 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 2425 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 2426 ifname);
2427 return -1;
2428 }
2429
82d5ae15
DL
2430 /* setup ipv6 addresses on the interface */
2431 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 2432 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 2433 ifname);
2434 return -1;
2435 }
2436
82d5ae15 2437 /* set the network device up */
b0efbac4 2438 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
2439 int err;
2440
d472214b 2441 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
2442 if (err) {
2443 ERROR("failed to set '%s' up : %s", current_ifname,
2444 strerror(-err));
0ad19a3f 2445 return -1;
2446 }
2447
2448 /* the network is up, make the loopback up too */
d472214b 2449 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2450 if (err) {
2451 ERROR("failed to set the loopback up : %s",
2452 strerror(-err));
0ad19a3f 2453 return -1;
2454 }
2455 }
2456
f8fee0e2
MK
2457 /* We can only set up the default routes after bringing
2458 * up the interface, sine bringing up the interface adds
2459 * the link-local routes and we can't add a default
2460 * route if the gateway is not reachable. */
2461
2462 /* setup ipv4 gateway on the interface */
2463 if (netdev->ipv4_gateway) {
2464 if (!(netdev->flags & IFF_UP)) {
2465 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2466 return -1;
2467 }
2468
2469 if (lxc_list_empty(&netdev->ipv4)) {
2470 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2471 return -1;
2472 }
2473
2474 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2475 if (err) {
fc739df5
SG
2476 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2477 if (err) {
2478 ERROR("failed to add ipv4 dest for '%s': %s",
2479 ifname, strerror(-err));
2480 }
2481
2482 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2483 if (err) {
2484 ERROR("failed to setup ipv4 gateway for '%s': %s",
2485 ifname, strerror(-err));
2486 if (netdev->ipv4_gateway_auto) {
2487 char buf[INET_ADDRSTRLEN];
2488 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2489 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2490 }
2491 return -1;
19a26f82 2492 }
f8fee0e2
MK
2493 }
2494 }
2495
2496 /* setup ipv6 gateway on the interface */
2497 if (netdev->ipv6_gateway) {
2498 if (!(netdev->flags & IFF_UP)) {
2499 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2500 return -1;
2501 }
2502
2503 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2504 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2505 return -1;
2506 }
2507
2508 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2509 if (err) {
fc739df5
SG
2510 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2511 if (err) {
2512 ERROR("failed to add ipv6 dest for '%s': %s",
f8fee0e2 2513 ifname, strerror(-err));
19a26f82 2514 }
fc739df5
SG
2515
2516 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2517 if (err) {
2518 ERROR("failed to setup ipv6 gateway for '%s': %s",
2519 ifname, strerror(-err));
2520 if (netdev->ipv6_gateway_auto) {
2521 char buf[INET6_ADDRSTRLEN];
2522 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2523 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2524 }
2525 return -1;
2526 }
f8fee0e2
MK
2527 }
2528 }
2529
cd54d859
DL
2530 DEBUG("'%s' has been setup", current_ifname);
2531
0ad19a3f 2532 return 0;
2533}
2534
5f4535a3 2535static int setup_network(struct lxc_list *network)
0ad19a3f 2536{
82d5ae15 2537 struct lxc_list *iterator;
82d5ae15 2538 struct lxc_netdev *netdev;
0ad19a3f 2539
5f4535a3 2540 lxc_list_for_each(iterator, network) {
cd54d859 2541
5f4535a3 2542 netdev = iterator->elem;
82d5ae15
DL
2543
2544 if (setup_netdev(netdev)) {
2545 ERROR("failed to setup netdev");
2546 return -1;
2547 }
2548 }
cd54d859 2549
5f4535a3
DL
2550 if (!lxc_list_empty(network))
2551 INFO("network has been setup");
cd54d859
DL
2552
2553 return 0;
0ad19a3f 2554}
2555
7b35f3d6
SH
2556void lxc_rename_phys_nics_on_shutdown(struct lxc_conf *conf)
2557{
2558 int i;
2559
2560 INFO("running to reset %d nic names", conf->num_savednics);
2561 for (i=0; i<conf->num_savednics; i++) {
2562 struct saved_nic *s = &conf->saved_nics[i];
959aee9c 2563 INFO("resetting nic %d to %s", s->ifindex, s->orig_name);
7b35f3d6
SH
2564 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
2565 free(s->orig_name);
2566 }
2567 conf->num_savednics = 0;
7b35f3d6
SH
2568}
2569
ae9242c8
SH
2570static char *default_rootfs_mount = LXCROOTFSMOUNT;
2571
7b379ab3 2572struct lxc_conf *lxc_conf_init(void)
089cd8b8 2573{
7b379ab3 2574 struct lxc_conf *new;
26ddeedd 2575 int i;
7b379ab3
MN
2576
2577 new = malloc(sizeof(*new));
2578 if (!new) {
2579 ERROR("lxc_conf_init : %m");
2580 return NULL;
2581 }
2582 memset(new, 0, sizeof(*new));
2583
b40a606e 2584 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
cccc74b5 2585 new->personality = -1;
bc6928ff 2586 new->autodev = -1;
596a818d
DE
2587 new->console.log_path = NULL;
2588 new->console.log_fd = -1;
28a4b0e5 2589 new->console.path = NULL;
63376d7d 2590 new->console.peer = -1;
b5159817
DE
2591 new->console.peerpty.busy = -1;
2592 new->console.peerpty.master = -1;
2593 new->console.peerpty.slave = -1;
63376d7d
DL
2594 new->console.master = -1;
2595 new->console.slave = -1;
2596 new->console.name[0] = '\0';
d2e30e99 2597 new->maincmd_fd = -1;
54c30e29 2598 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048
SH
2599 if (!new->rootfs.mount) {
2600 ERROR("lxc_conf_init : %m");
2601 free(new);
2602 return NULL;
2603 }
2f3f41d0 2604 new->kmsg = 1;
7b379ab3
MN
2605 lxc_list_init(&new->cgroup);
2606 lxc_list_init(&new->network);
2607 lxc_list_init(&new->mount_list);
81810dd1 2608 lxc_list_init(&new->caps);
1fb86a7c 2609 lxc_list_init(&new->keepcaps);
f6d3e3e4 2610 lxc_list_init(&new->id_map);
26ddeedd
SH
2611 for (i=0; i<NUM_LXC_HOOKS; i++)
2612 lxc_list_init(&new->hooks[i]);
ee1e7aa0 2613 lxc_list_init(&new->groups);
fe4de9a6
DE
2614 new->lsm_aa_profile = NULL;
2615 new->lsm_se_context = NULL;
5112cd70 2616 new->tmp_umount_proc = 0;
7b379ab3 2617
9f30a190
MM
2618 for (i = 0; i < LXC_NS_MAX; i++)
2619 new->inherit_ns_fd[i] = -1;
2620
7b379ab3 2621 return new;
089cd8b8
DL
2622}
2623
e3b4c4c4 2624static int instanciate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2625{
8634bc19 2626 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2627 char veth2buf[IFNAMSIZ], *veth2;
3cfc0f3a 2628 int err;
13954cce 2629
e892973e
DL
2630 if (netdev->priv.veth_attr.pair)
2631 veth1 = netdev->priv.veth_attr.pair;
8634bc19 2632 else {
9ba8130c
SH
2633 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2634 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2635 ERROR("veth1 name too long");
2636 return -1;
2637 }
a0265685 2638 veth1 = lxc_mkifname(veth1buf);
ad40563e
ÇO
2639 if (!veth1) {
2640 ERROR("failed to allocate a temporary name");
2641 return -1;
2642 }
74a2b586
JK
2643 /* store away for deconf */
2644 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2645 }
82d5ae15 2646
0e391e57 2647 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
a0265685 2648 veth2 = lxc_mkifname(veth2buf);
ad40563e 2649 if (!veth2) {
82d5ae15 2650 ERROR("failed to allocate a temporary name");
ad40563e 2651 goto out_delete;
0ad19a3f 2652 }
2653
3cfc0f3a
MN
2654 err = lxc_veth_create(veth1, veth2);
2655 if (err) {
2656 ERROR("failed to create %s-%s : %s", veth1, veth2,
2657 strerror(-err));
ad40563e 2658 goto out_delete;
0ad19a3f 2659 }
13954cce 2660
49684c0b
CS
2661 /* changing the high byte of the mac address to 0xfe, the bridge interface
2662 * will always keep the host's mac address and not take the mac address
2663 * of a container */
2664 err = setup_private_host_hw_addr(veth1);
2665 if (err) {
2666 ERROR("failed to change mac address of host interface '%s' : %s",
2667 veth1, strerror(-err));
2668 goto out_delete;
2669 }
2670
82d5ae15 2671 if (netdev->mtu) {
d472214b 2672 err = lxc_netdev_set_mtu(veth1, atoi(netdev->mtu));
3cfc0f3a 2673 if (!err)
d472214b 2674 err = lxc_netdev_set_mtu(veth2, atoi(netdev->mtu));
3cfc0f3a
MN
2675 if (err) {
2676 ERROR("failed to set mtu '%s' for %s-%s : %s",
2677 netdev->mtu, veth1, veth2, strerror(-err));
eb14c10a 2678 goto out_delete;
75d09f83
DL
2679 }
2680 }
2681
3cfc0f3a
MN
2682 if (netdev->link) {
2683 err = lxc_bridge_attach(netdev->link, veth1);
2684 if (err) {
2685 ERROR("failed to attach '%s' to the bridge '%s' : %s",
2686 veth1, netdev->link, strerror(-err));
2687 goto out_delete;
2688 }
eb14c10a
DL
2689 }
2690
82d5ae15
DL
2691 netdev->ifindex = if_nametoindex(veth2);
2692 if (!netdev->ifindex) {
36eb9bde 2693 ERROR("failed to retrieve the index for %s", veth2);
eb14c10a
DL
2694 goto out_delete;
2695 }
2696
d472214b 2697 err = lxc_netdev_up(veth1);
6e35af2e
DL
2698 if (err) {
2699 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2700 goto out_delete;
0ad19a3f 2701 }
2702
e3b4c4c4 2703 if (netdev->upscript) {
751d9dcd
DL
2704 err = run_script(handler->name, "net", netdev->upscript, "up",
2705 "veth", veth1, (char*) NULL);
2706 if (err)
e3b4c4c4 2707 goto out_delete;
e3b4c4c4
ST
2708 }
2709
82d5ae15
DL
2710 DEBUG("instanciated veth '%s/%s', index is '%d'",
2711 veth1, veth2, netdev->ifindex);
2712
6ab9ab6d 2713 return 0;
eb14c10a
DL
2714
2715out_delete:
b84f58b9 2716 lxc_netdev_delete_by_name(veth1);
ad40563e
ÇO
2717 if (!netdev->priv.veth_attr.pair && veth1)
2718 free(veth1);
2719 if(veth2)
2720 free(veth2);
6ab9ab6d 2721 return -1;
13954cce 2722}
d957ae2d 2723
74a2b586
JK
2724static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2725{
2726 char *veth1;
2727 int err;
2728
2729 if (netdev->priv.veth_attr.pair)
2730 veth1 = netdev->priv.veth_attr.pair;
2731 else
2732 veth1 = netdev->priv.veth_attr.veth1;
2733
2734 if (netdev->downscript) {
2735 err = run_script(handler->name, "net", netdev->downscript,
2736 "down", "veth", veth1, (char*) NULL);
2737 if (err)
2738 return -1;
2739 }
2740 return 0;
2741}
2742
e3b4c4c4 2743static int instanciate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2744{
0e391e57 2745 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2746 int err;
d957ae2d
MT
2747
2748 if (!netdev->link) {
2749 ERROR("no link specified for macvlan netdev");
2750 return -1;
2751 }
13954cce 2752
9ba8130c
SH
2753 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2754 if (err >= sizeof(peerbuf))
2755 return -1;
82d5ae15 2756
a0265685 2757 peer = lxc_mkifname(peerbuf);
ad40563e 2758 if (!peer) {
82d5ae15
DL
2759 ERROR("failed to make a temporary name");
2760 return -1;
0ad19a3f 2761 }
2762
3cfc0f3a
MN
2763 err = lxc_macvlan_create(netdev->link, peer,
2764 netdev->priv.macvlan_attr.mode);
2765 if (err) {
2766 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2767 peer, netdev->link, strerror(-err));
ad40563e 2768 goto out;
0ad19a3f 2769 }
2770
82d5ae15
DL
2771 netdev->ifindex = if_nametoindex(peer);
2772 if (!netdev->ifindex) {
36eb9bde 2773 ERROR("failed to retrieve the index for %s", peer);
ad40563e 2774 goto out;
22ebac19 2775 }
2776
e3b4c4c4 2777 if (netdev->upscript) {
751d9dcd
DL
2778 err = run_script(handler->name, "net", netdev->upscript, "up",
2779 "macvlan", netdev->link, (char*) NULL);
2780 if (err)
ad40563e 2781 goto out;
e3b4c4c4
ST
2782 }
2783
e892973e
DL
2784 DEBUG("instanciated macvlan '%s', index is '%d' and mode '%d'",
2785 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 2786
d957ae2d 2787 return 0;
ad40563e
ÇO
2788out:
2789 lxc_netdev_delete_by_name(peer);
2790 free(peer);
2791 return -1;
0ad19a3f 2792}
2793
74a2b586
JK
2794static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2795{
2796 int err;
2797
2798 if (netdev->downscript) {
2799 err = run_script(handler->name, "net", netdev->downscript,
2800 "down", "macvlan", netdev->link,
2801 (char*) NULL);
2802 if (err)
2803 return -1;
2804 }
2805 return 0;
2806}
2807
26c39028 2808/* XXX: merge with instanciate_macvlan */
e3b4c4c4 2809static int instanciate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
2810{
2811 char peer[IFNAMSIZ];
3cfc0f3a 2812 int err;
26c39028
JHS
2813
2814 if (!netdev->link) {
2815 ERROR("no link specified for vlan netdev");
2816 return -1;
2817 }
2818
9ba8130c
SH
2819 err = snprintf(peer, sizeof(peer), "vlan%d", netdev->priv.vlan_attr.vid);
2820 if (err >= sizeof(peer)) {
2821 ERROR("peer name too long");
2822 return -1;
2823 }
26c39028 2824
3cfc0f3a
MN
2825 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2826 if (err) {
2827 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2828 peer, netdev->link, strerror(-err));
26c39028
JHS
2829 return -1;
2830 }
2831
2832 netdev->ifindex = if_nametoindex(peer);
2833 if (!netdev->ifindex) {
2834 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 2835 lxc_netdev_delete_by_name(peer);
26c39028
JHS
2836 return -1;
2837 }
2838
e892973e
DL
2839 DEBUG("instanciated vlan '%s', ifindex is '%d'", " vlan1000",
2840 netdev->ifindex);
2841
26c39028
JHS
2842 return 0;
2843}
2844
74a2b586
JK
2845static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2846{
2847 return 0;
2848}
2849
e3b4c4c4 2850static int instanciate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2851{
6168e99f
DL
2852 if (!netdev->link) {
2853 ERROR("no link specified for the physical interface");
2854 return -1;
2855 }
2856
9d083402 2857 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 2858 if (!netdev->ifindex) {
9d083402 2859 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 2860 return -1;
2861 }
2862
e3b4c4c4
ST
2863 if (netdev->upscript) {
2864 int err;
751d9dcd
DL
2865 err = run_script(handler->name, "net", netdev->upscript,
2866 "up", "phys", netdev->link, (char*) NULL);
2867 if (err)
e3b4c4c4 2868 return -1;
e3b4c4c4
ST
2869 }
2870
82d5ae15 2871 return 0;
0ad19a3f 2872}
2873
74a2b586
JK
2874static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2875{
2876 int err;
2877
2878 if (netdev->downscript) {
2879 err = run_script(handler->name, "net", netdev->downscript,
2880 "down", "phys", netdev->link, (char*) NULL);
2881 if (err)
2882 return -1;
2883 }
2884 return 0;
2885}
2886
26b797f3
SH
2887static int instanciate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
2888{
2889 netdev->ifindex = 0;
2890 return 0;
2891}
2892
e3b4c4c4 2893static int instanciate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2894{
82d5ae15 2895 netdev->ifindex = 0;
e3b4c4c4
ST
2896 if (netdev->upscript) {
2897 int err;
751d9dcd
DL
2898 err = run_script(handler->name, "net", netdev->upscript,
2899 "up", "empty", (char*) NULL);
2900 if (err)
e3b4c4c4 2901 return -1;
e3b4c4c4 2902 }
82d5ae15 2903 return 0;
0ad19a3f 2904}
2905
74a2b586
JK
2906static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2907{
2908 int err;
2909
2910 if (netdev->downscript) {
2911 err = run_script(handler->name, "net", netdev->downscript,
2912 "down", "empty", (char*) NULL);
2913 if (err)
2914 return -1;
2915 }
2916 return 0;
2917}
2918
26b797f3
SH
2919static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
2920{
2921 return 0;
2922}
2923
2924int lxc_requests_empty_network(struct lxc_handler *handler)
2925{
2926 struct lxc_list *network = &handler->conf->network;
2927 struct lxc_list *iterator;
2928 struct lxc_netdev *netdev;
2929 bool found_none = false, found_nic = false;
2930
2931 if (lxc_list_empty(network))
2932 return 0;
2933
2934 lxc_list_for_each(iterator, network) {
2935
2936 netdev = iterator->elem;
2937
2938 if (netdev->type == LXC_NET_NONE)
2939 found_none = true;
2940 else
2941 found_nic = true;
2942 }
2943 if (found_none && !found_nic)
2944 return 1;
2945 return 0;
2946}
2947
e3b4c4c4 2948int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 2949{
e3b4c4c4 2950 struct lxc_list *network = &handler->conf->network;
82d5ae15 2951 struct lxc_list *iterator;
82d5ae15 2952 struct lxc_netdev *netdev;
cbef6c52
SH
2953 int am_root = (getuid() == 0);
2954
2955 if (!am_root)
2956 return 0;
0ad19a3f 2957
5f4535a3 2958 lxc_list_for_each(iterator, network) {
0ad19a3f 2959
5f4535a3 2960 netdev = iterator->elem;
13954cce 2961
24654103 2962 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 2963 ERROR("invalid network configuration type '%d'",
5f4535a3 2964 netdev->type);
82d5ae15
DL
2965 return -1;
2966 }
0ad19a3f 2967
e3b4c4c4 2968 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
2969 ERROR("failed to create netdev");
2970 return -1;
2971 }
e3b4c4c4 2972
0ad19a3f 2973 }
2974
2975 return 0;
2976}
2977
74a2b586 2978void lxc_delete_network(struct lxc_handler *handler)
7fef7a06 2979{
74a2b586 2980 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
2981 struct lxc_list *iterator;
2982 struct lxc_netdev *netdev;
2983
2984 lxc_list_for_each(iterator, network) {
2985 netdev = iterator->elem;
d472214b 2986
74a2b586 2987 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352
DL
2988 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
2989 WARN("failed to rename to the initial name the " \
2990 "netdev '%s'", netdev->link);
d472214b 2991 continue;
d8f8e352 2992 }
d472214b 2993
74a2b586
JK
2994 if (netdev_deconf[netdev->type](handler, netdev)) {
2995 WARN("failed to destroy netdev");
2996 }
2997
d8f8e352
DL
2998 /* Recent kernel remove the virtual interfaces when the network
2999 * namespace is destroyed but in case we did not moved the
3000 * interface to the network namespace, we have to destroy it
3001 */
74a2b586
JK
3002 if (netdev->ifindex != 0 &&
3003 lxc_netdev_delete_by_index(netdev->ifindex))
d8f8e352 3004 WARN("failed to remove interface '%s'", netdev->name);
7fef7a06
DL
3005 }
3006}
3007
45e854dc
SG
3008#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
3009
fe1f672f
ÇO
3010/* lxc-user-nic returns "interface_name:interface_name\n" */
3011#define MAX_BUFFER_SIZE IFNAMSIZ*2 + 2
74a3920a 3012static int unpriv_assign_nic(struct lxc_netdev *netdev, pid_t pid)
cbef6c52
SH
3013{
3014 pid_t child;
a7242d9a
ÇO
3015 int bytes, pipefd[2];
3016 char *token, *saveptr = NULL;
fe1f672f 3017 char buffer[MAX_BUFFER_SIZE];
cbef6c52
SH
3018
3019 if (netdev->type != LXC_NET_VETH) {
3020 ERROR("nic type %d not support for unprivileged use",
3021 netdev->type);
3022 return -1;
3023 }
3024
a7242d9a
ÇO
3025 if(pipe(pipefd) < 0) {
3026 SYSERROR("pipe failed");
3027 return -1;
3028 }
3029
cbef6c52
SH
3030 if ((child = fork()) < 0) {
3031 SYSERROR("fork");
a7242d9a
ÇO
3032 close(pipefd[0]);
3033 close(pipefd[1]);
3034 return -1;
3035 }
3036
3037 if (child == 0) { // child
3038 /* close the read-end of the pipe */
3039 close(pipefd[0]);
3040 /* redirect the stdout to write-end of the pipe */
3041 dup2(pipefd[1], STDOUT_FILENO);
3042 /* close the write-end of the pipe */
fe1f672f 3043 close(pipefd[1]);
a7242d9a
ÇO
3044
3045 // Call lxc-user-nic pid type bridge
3046 char pidstr[20];
3047 char *args[] = {LXC_USERNIC_PATH, pidstr, "veth", netdev->link, netdev->name, NULL };
3048 snprintf(pidstr, 19, "%lu", (unsigned long) pid);
3049 pidstr[19] = '\0';
3050 execvp(args[0], args);
3051 SYSERROR("execvp lxc-user-nic");
3052 exit(1);
3053 }
3054
3055 /* close the write-end of the pipe */
3056 close(pipefd[1]);
3057
fe1f672f 3058 bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE);
a7242d9a
ÇO
3059 if (bytes < 0) {
3060 SYSERROR("read failed");
3061 }
3062 buffer[bytes - 1] = '\0';
3063
3064 if (wait_for_pid(child) != 0) {
3065 close(pipefd[0]);
cbef6c52
SH
3066 return -1;
3067 }
3068
a7242d9a
ÇO
3069 /* close the read-end of the pipe */
3070 close(pipefd[0]);
cbef6c52 3071
a7242d9a
ÇO
3072 /* fill netdev->name field */
3073 token = strtok_r(buffer, ":", &saveptr);
3074 if (!token)
3075 return -1;
658979c5
SH
3076 netdev->name = malloc(IFNAMSIZ+1);
3077 if (!netdev->name) {
3078 ERROR("Out of memory");
3079 return -1;
3080 }
3081 memset(netdev->name, 0, IFNAMSIZ+1);
3082 strncpy(netdev->name, token, IFNAMSIZ);
a7242d9a
ÇO
3083
3084 /* fill netdev->veth_attr.pair field */
3085 token = strtok_r(NULL, ":", &saveptr);
3086 if (!token)
3087 return -1;
3088 netdev->priv.veth_attr.pair = strdup(token);
658979c5
SH
3089 if (!netdev->priv.veth_attr.pair) {
3090 ERROR("Out of memory");
3091 return -1;
3092 }
45e854dc 3093
a7242d9a 3094 return 0;
cbef6c52
SH
3095}
3096
5f4535a3 3097int lxc_assign_network(struct lxc_list *network, pid_t pid)
0ad19a3f 3098{
82d5ae15 3099 struct lxc_list *iterator;
82d5ae15 3100 struct lxc_netdev *netdev;
cbef6c52 3101 int am_root = (getuid() == 0);
3cfc0f3a 3102 int err;
0ad19a3f 3103
5f4535a3 3104 lxc_list_for_each(iterator, network) {
82d5ae15 3105
5f4535a3 3106 netdev = iterator->elem;
82d5ae15 3107
fbb16259 3108 if (netdev->type == LXC_NET_VETH && !am_root) {
cbef6c52
SH
3109 if (unpriv_assign_nic(netdev, pid))
3110 return -1;
658979c5
SH
3111 // lxc-user-nic has moved the nic to the new ns.
3112 // unpriv_assign_nic() fills in netdev->name.
3113 // netdev->ifindex will be filed in at setup_netdev.
cbef6c52
SH
3114 continue;
3115 }
236087a6 3116
fbb16259
SH
3117 /* empty network namespace, nothing to move */
3118 if (!netdev->ifindex)
3119 continue;
3120
d472214b 3121 err = lxc_netdev_move_by_index(netdev->ifindex, pid);
3cfc0f3a
MN
3122 if (err) {
3123 ERROR("failed to move '%s' to the container : %s",
3124 netdev->link, strerror(-err));
82d5ae15
DL
3125 return -1;
3126 }
3127
c1c75c04 3128 DEBUG("move '%s' to '%d'", netdev->name, pid);
0ad19a3f 3129 }
3130
3131 return 0;
3132}
3133
251d0d2a
DE
3134static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
3135 size_t buf_size)
f6d3e3e4
SH
3136{
3137 char path[PATH_MAX];
e4ccd113 3138 int ret, closeret;
f6d3e3e4
SH
3139 FILE *f;
3140
3141 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
3142 if (ret < 0 || ret >= PATH_MAX) {
03fadd16 3143 fprintf(stderr, "%s: path name too long\n", __func__);
f6d3e3e4
SH
3144 return -E2BIG;
3145 }
3146 f = fopen(path, "w");
3147 if (!f) {
3148 perror("open");
3149 return -EINVAL;
3150 }
251d0d2a 3151 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 3152 if (ret < 0)
e4ccd113
SH
3153 SYSERROR("writing id mapping");
3154 closeret = fclose(f);
3155 if (closeret)
3156 SYSERROR("writing id mapping");
3157 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
3158}
3159
3160int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
3161{
3162 struct lxc_list *iterator;
3163 struct id_map *map;
3164 int ret = 0;
251d0d2a 3165 enum idtype type;
4f7521b4 3166 char *buf = NULL, *pos;
cf3ef16d 3167 int am_root = (getuid() == 0);
251d0d2a
DE
3168
3169 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4 3170 int left, fill;
cf3ef16d
SH
3171 int had_entry = 0;
3172 if (!buf) {
3173 buf = pos = malloc(4096);
4f7521b4
SH
3174 if (!buf)
3175 return -ENOMEM;
cf3ef16d
SH
3176 }
3177 pos = buf;
3178 if (!am_root)
d1838f34 3179 pos += sprintf(buf, "new%cidmap %d",
cf3ef16d
SH
3180 type == ID_TYPE_UID ? 'u' : 'g',
3181 pid);
4f7521b4 3182
cf3ef16d
SH
3183 lxc_list_for_each(iterator, idmap) {
3184 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
251d0d2a 3185 map = iterator->elem;
cf3ef16d
SH
3186 if (map->idtype != type)
3187 continue;
3188
3189 had_entry = 1;
3190 left = 4096 - (pos - buf);
d1838f34
MS
3191 fill = snprintf(pos, left, "%s%lu %lu %lu%s",
3192 am_root ? "" : " ",
3193 map->nsid, map->hostid, map->range,
3194 am_root ? "\n" : "");
cf3ef16d
SH
3195 if (fill <= 0 || fill >= left)
3196 SYSERROR("snprintf failed, too many mappings");
3197 pos += fill;
251d0d2a 3198 }
cf3ef16d 3199 if (!had_entry)
4f7521b4 3200 continue;
cf3ef16d 3201
d1838f34 3202 if (am_root) {
cf3ef16d 3203 ret = write_id_mapping(type, pid, buf, pos-buf);
d1838f34
MS
3204 } else {
3205 left = 4096 - (pos - buf);
3206 fill = snprintf(pos, left, "\n");
3207 if (fill <= 0 || fill >= left)
3208 SYSERROR("snprintf failed, too many mappings");
3209 pos += fill;
cf3ef16d 3210 ret = system(buf);
d1838f34 3211 }
cf3ef16d 3212
f6d3e3e4
SH
3213 if (ret)
3214 break;
3215 }
251d0d2a 3216
4f7521b4
SH
3217 if (buf)
3218 free(buf);
f6d3e3e4
SH
3219 return ret;
3220}
3221
cf3ef16d 3222/*
0b3a6504
SH
3223 * return the host uid to which the container root is mapped in *val.
3224 * Return true if id was found, false otherwise.
cf3ef16d 3225 */
2a9a80cb 3226bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
3ec1648d 3227 unsigned long *val)
cf3ef16d
SH
3228{
3229 struct lxc_list *it;
3230 struct id_map *map;
3231
3232 lxc_list_for_each(it, &conf->id_map) {
3233 map = it->elem;
3234 if (map->idtype != ID_TYPE_UID)
3235 continue;
3236 if (map->nsid != 0)
3237 continue;
2a9a80cb
SH
3238 *val = map->hostid;
3239 return true;
cf3ef16d 3240 }
2a9a80cb 3241 return false;
cf3ef16d
SH
3242}
3243
2133f58c 3244int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3245{
3246 struct lxc_list *it;
3247 struct id_map *map;
3248 lxc_list_for_each(it, &conf->id_map) {
3249 map = it->elem;
2133f58c 3250 if (map->idtype != idtype)
cf3ef16d
SH
3251 continue;
3252 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 3253 return (id - map->hostid) + map->nsid;
cf3ef16d 3254 }
57d116ab 3255 return -1;
cf3ef16d
SH
3256}
3257
2133f58c 3258int find_unmapped_nsuid(struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3259{
3260 struct lxc_list *it;
3261 struct id_map *map;
2133f58c 3262 unsigned int freeid = 0;
cf3ef16d
SH
3263again:
3264 lxc_list_for_each(it, &conf->id_map) {
3265 map = it->elem;
2133f58c 3266 if (map->idtype != idtype)
cf3ef16d
SH
3267 continue;
3268 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3269 freeid = map->nsid + map->range;
3270 goto again;
3271 }
3272 }
3273 return freeid;
3274}
3275
19a26f82
MK
3276int lxc_find_gateway_addresses(struct lxc_handler *handler)
3277{
3278 struct lxc_list *network = &handler->conf->network;
3279 struct lxc_list *iterator;
3280 struct lxc_netdev *netdev;
3281 int link_index;
3282
3283 lxc_list_for_each(iterator, network) {
3284 netdev = iterator->elem;
3285
3286 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
3287 continue;
3288
3289 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
3290 ERROR("gateway = auto only supported for "
3291 "veth and macvlan");
3292 return -1;
3293 }
3294
3295 if (!netdev->link) {
3296 ERROR("gateway = auto needs a link interface");
3297 return -1;
3298 }
3299
3300 link_index = if_nametoindex(netdev->link);
3301 if (!link_index)
3302 return -EINVAL;
3303
3304 if (netdev->ipv4_gateway_auto) {
3305 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
3306 ERROR("failed to automatically find ipv4 gateway "
3307 "address from link interface '%s'", netdev->link);
3308 return -1;
3309 }
3310 }
3311
3312 if (netdev->ipv6_gateway_auto) {
3313 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
3314 ERROR("failed to automatically find ipv6 gateway "
3315 "address from link interface '%s'", netdev->link);
3316 return -1;
3317 }
3318 }
3319 }
3320
3321 return 0;
3322}
3323
5e4a62bf 3324int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 3325{
5e4a62bf 3326 struct lxc_tty_info *tty_info = &conf->tty_info;
025ed0f3 3327 int i, ret;
b0a33c1e 3328
5e4a62bf
DL
3329 /* no tty in the configuration */
3330 if (!conf->tty)
b0a33c1e 3331 return 0;
3332
13954cce 3333 tty_info->pty_info =
e4e7d59d 3334 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 3335 if (!tty_info->pty_info) {
36eb9bde 3336 SYSERROR("failed to allocate pty_info");
985d15b1 3337 return -1;
b0a33c1e 3338 }
3339
985d15b1 3340 for (i = 0; i < conf->tty; i++) {
13954cce 3341
b0a33c1e 3342 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3343
025ed0f3
SH
3344 process_lock();
3345 ret = openpty(&pty_info->master, &pty_info->slave,
3346 pty_info->name, NULL, NULL);
3347 process_unlock();
3348 if (ret) {
36eb9bde 3349 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
3350 tty_info->nbtty = i;
3351 lxc_delete_tty(tty_info);
3352 return -1;
b0a33c1e 3353 }
3354
5332bb84
DL
3355 DEBUG("allocated pty '%s' (%d/%d)",
3356 pty_info->name, pty_info->master, pty_info->slave);
3357
3ec1648d 3358 /* Prevent leaking the file descriptors to the container */
b035ad62
MS
3359 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3360 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3361
b0a33c1e 3362 pty_info->busy = 0;
3363 }
3364
985d15b1 3365 tty_info->nbtty = conf->tty;
1ac470c0
DL
3366
3367 INFO("tty's configured");
3368
985d15b1 3369 return 0;
b0a33c1e 3370}
3371
3372void lxc_delete_tty(struct lxc_tty_info *tty_info)
3373{
3374 int i;
3375
3376 for (i = 0; i < tty_info->nbtty; i++) {
3377 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3378
3379 close(pty_info->master);
3380 close(pty_info->slave);
3381 }
3382
3383 free(tty_info->pty_info);
3384 tty_info->nbtty = 0;
3385}
3386
f6d3e3e4 3387/*
c4d10a05
SH
3388 * chown_mapped_root: for an unprivileged user with uid X to chown a dir
3389 * to subuid Y, he needs to run chown as root in a userns where
3390 * nsid 0 is mapped to hostuid Y, and nsid Y is mapped to hostuid
3391 * X. That way, the container root is privileged with respect to
3392 * hostuid X, allowing him to do the chown.
f6d3e3e4 3393 */
c4d10a05 3394int chown_mapped_root(char *path, struct lxc_conf *conf)
f6d3e3e4 3395{
c4d10a05
SH
3396 uid_t rootid;
3397 pid_t pid;
2a9a80cb 3398 unsigned long val;
a7ef8753 3399 char *chownpath = path;
f6d3e3e4 3400
2a9a80cb 3401 if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
c4d10a05
SH
3402 ERROR("No mapping for container root");
3403 return -1;
f6d3e3e4 3404 }
2a9a80cb
SH
3405 rootid = (uid_t) val;
3406
a7ef8753
SH
3407 /*
3408 * In case of overlay, we want only the writeable layer
3409 * to be chowned
3410 */
1f92162d 3411 if (strncmp(path, "overlayfs:", 10) == 0 || strncmp(path, "aufs:", 5) == 0) {
a7ef8753
SH
3412 chownpath = strchr(path, ':');
3413 if (!chownpath) {
3414 ERROR("Bad overlay path: %s", path);
3415 return -1;
3416 }
3417 chownpath = strchr(chownpath+1, ':');
3418 if (!chownpath) {
3419 ERROR("Bad overlay path: %s", path);
3420 return -1;
3421 }
3422 chownpath++;
3423 }
3424 path = chownpath;
c4d10a05
SH
3425 if (geteuid() == 0) {
3426 if (chown(path, rootid, -1) < 0) {
3427 ERROR("Error chowning %s", path);
3428 return -1;
3429 }
3430 return 0;
3431 }
3432 pid = fork();
3433 if (pid < 0) {
3434 SYSERROR("Failed forking");
f6d3e3e4
SH
3435 return -1;
3436 }
c4d10a05
SH
3437 if (!pid) {
3438 int hostuid = geteuid(), ret;
98e5ba51
SH
3439 char map1[100], map2[100], map3[100];
3440 char *args[] = {"lxc-usernsexec", "-m", map1, "-m", map2, "-m",
3441 map3, "--", "chown", "0", path, NULL};
f6d3e3e4 3442
98e5ba51
SH
3443 // "u:0:rootid:1"
3444 ret = snprintf(map1, 100, "u:0:%d:1", rootid);
c4d10a05
SH
3445 if (ret < 0 || ret >= 100) {
3446 ERROR("Error uid printing map string");
f6d3e3e4
SH
3447 return -1;
3448 }
c4d10a05 3449
98e5ba51
SH
3450 // "u:hostuid:hostuid:1"
3451 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3452 if (ret < 0 || ret >= 100) {
3453 ERROR("Error uid printing map string");
3454 return -1;
3455 }
3456
3457 // "g:0:hostgid:1"
3458 ret = snprintf(map3, 100, "g:0:%d:1", getgid());
c4d10a05
SH
3459 if (ret < 0 || ret >= 100) {
3460 ERROR("Error uid printing map string");
3461 return -1;
3462 }
3463
3464 ret = execvp("lxc-usernsexec", args);
3465 SYSERROR("Failed executing usernsexec");
3466 exit(1);
f6d3e3e4 3467 }
c4d10a05 3468 return wait_for_pid(pid);
f6d3e3e4
SH
3469}
3470
c4d10a05 3471int ttys_shift_ids(struct lxc_conf *c)
f6d3e3e4 3472{
c4d10a05 3473 int i;
f6d3e3e4 3474
c4d10a05 3475 if (lxc_list_empty(&c->id_map))
f6d3e3e4 3476 return 0;
c4d10a05
SH
3477
3478 for (i = 0; i < c->tty_info.nbtty; i++) {
3479 struct lxc_pty_info *pty_info = &c->tty_info.pty_info[i];
3480
3481 if (chown_mapped_root(pty_info->name, c) < 0) {
3482 ERROR("Failed to chown %s", pty_info->name);
f6d3e3e4
SH
3483 return -1;
3484 }
3485 }
3486
29b10e4f 3487 if (strcmp(c->console.name, "") !=0 && chown_mapped_root(c->console.name, c) < 0) {
c4d10a05
SH
3488 ERROR("Failed to chown %s", c->console.name);
3489 return -1;
3490 }
3491
f6d3e3e4
SH
3492 return 0;
3493}
3494
bc6928ff
MW
3495/*
3496 * This routine is called when the configuration does not already specify a value
3497 * for autodev (mounting a file system on /dev and populating it in a container).
3498 * If a hard override value has not be specified, then we try to apply some
3499 * heuristics to determine if we should switch to autodev mode.
3500 *
3501 * For instance, if the container has an /etc/systemd/system directory then it
3502 * is probably running systemd as the init process and it needs the autodev
3503 * mount to prevent it from mounting devtmpfs on /dev on it's own causing conflicts
3504 * in the host.
3505 *
3506 * We may also want to enable autodev if the host has devtmpfs mounted on its
3507 * /dev as this then enable us to use subdirectories under /dev for the container
3508 * /dev directories and we can fake udev devices.
3509 */
3510struct start_args {
3511 char *const *argv;
3512};
3513
3514#define MAX_SYMLINK_DEPTH 32
3515
74a3920a 3516static int check_autodev( const char *rootfs, void *data )
bc6928ff
MW
3517{
3518 struct start_args *arg = data;
3519 int ret;
3520 int loop_count = 0;
3521 struct stat s;
3522 char absrootfs[MAXPATHLEN];
3523 char path[MAXPATHLEN];
3524 char abs_path[MAXPATHLEN];
3525 char *command = "/sbin/init";
3526
3527 if (rootfs == NULL || strlen(rootfs) == 0)
3528 return -2;
3529
3530 if (!realpath(rootfs, absrootfs))
3531 return -2;
3532
3533 if( arg && arg->argv[0] ) {
3534 command = arg->argv[0];
959aee9c 3535 DEBUG("Set exec command to %s", command );
bc6928ff
MW
3536 }
3537
3538 strncpy( path, command, MAXPATHLEN-1 );
3539
3540 if ( 0 != access(path, F_OK) || 0 != stat(path, &s) )
3541 return -2;
3542
3543 /* Dereference down the symlink merry path testing as we go. */
3544 /* If anything references systemd in the path - set autodev! */
3545 /* Renormalize to the rootfs before each dereference */
3546 /* Relative symlinks should fall out in the wash even with .. */
3547 while( 1 ) {
3548 if ( strstr( path, "systemd" ) ) {
3549 INFO("Container with systemd init detected - enabling autodev!");
3550 return 1;
3551 }
3552
3553 ret = snprintf(abs_path, MAXPATHLEN-1, "%s/%s", absrootfs, path);
3554 if (ret < 0 || ret > MAXPATHLEN)
3555 return -2;
3556
3557 ret = readlink( abs_path, path, MAXPATHLEN-1 );
3558
3559 if ( ( ret <= 0 ) || ( ++loop_count > MAX_SYMLINK_DEPTH ) ) {
3560 break; /* Break out for other tests */
3561 }
3562 path[ret] = '\0';
3563 }
3564
3565 /*
3566 * Add future checks here.
3567 * Return positive if we should go autodev
3568 * Return 0 if we should NOT go autodev
3569 * Return negative if we encounter an error or can not determine...
3570 */
3571
3572 /* All else fails, we don't need autodev */
3573 INFO("Autodev not required.");
3574 return 0;
3575}
3576
5112cd70
SH
3577/*
3578 * _do_tmp_proc_mount: Mount /proc inside container if not already
3579 * mounted
3580 *
3581 * @rootfs : the rootfs where proc should be mounted
3582 *
3583 * Returns < 0 on failure, 0 if the correct proc was already mounted
3584 * and 1 if a new proc was mounted.
3585 */
3586static int do_tmp_proc_mount(const char *rootfs)
3587{
3588 char path[MAXPATHLEN];
3589 char link[20];
3590 int linklen, ret;
3591
3592 ret = snprintf(path, MAXPATHLEN, "%s/proc/self", rootfs);
3593 if (ret < 0 || ret >= MAXPATHLEN) {
3594 SYSERROR("proc path name too long");
3595 return -1;
3596 }
3597 memset(link, 0, 20);
3598 linklen = readlink(path, link, 20);
3599 INFO("I am %d, /proc/self points to '%s'", getpid(), link);
3600 ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs);
3601 if (linklen < 0) /* /proc not mounted */
3602 goto domount;
3603 /* can't be longer than rootfs/proc/1 */
3604 if (strncmp(link, "1", linklen) != 0) {
3605 /* wrong /procs mounted */
3606 umount2(path, MNT_DETACH); /* ignore failure */
3607 goto domount;
3608 }
3609 /* the right proc is already mounted */
3610 return 0;
3611
3612domount:
3613 if (mount("proc", path, "proc", 0, NULL))
3614 return -1;
3615 INFO("Mounted /proc in container for security transition");
3616 return 1;
3617}
3618
3619int tmp_proc_mount(struct lxc_conf *lxc_conf)
3620{
3621 int mounted;
3622
3623 if (lxc_conf->rootfs.path == NULL || strlen(lxc_conf->rootfs.path) == 0) {
3624 if (mount("proc", "/proc", "proc", 0, NULL)) {
3625 SYSERROR("Failed mounting /proc, proceeding");
3626 mounted = 0;
3627 } else
3628 mounted = 1;
3629 } else
3630 mounted = do_tmp_proc_mount(lxc_conf->rootfs.mount);
3631 if (mounted == -1) {
3632 SYSERROR("failed to mount /proc in the container.");
3633 return -1;
3634 } else if (mounted == 1) {
3635 lxc_conf->tmp_umount_proc = 1;
3636 }
3637 return 0;
3638}
3639
3640void tmp_proc_unmount(struct lxc_conf *lxc_conf)
3641{
3642 if (lxc_conf->tmp_umount_proc == 1) {
3643 umount("/proc");
3644 lxc_conf->tmp_umount_proc = 0;
3645 }
3646}
3647
d4ef7c50 3648int lxc_setup(struct lxc_handler *handler)
0ad19a3f 3649{
d4ef7c50
SH
3650 const char *name = handler->name;
3651 struct lxc_conf *lxc_conf = handler->conf;
3652 const char *lxcpath = handler->lxcpath;
3653 void *data = handler->data;
d4ef7c50 3654
6c544cb3
MM
3655 if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
3656 if (setup_utsname(lxc_conf->utsname)) {
3657 ERROR("failed to setup the utsname for '%s'", name);
3658 return -1;
3659 }
0ad19a3f 3660 }
3661
5f4535a3 3662 if (setup_network(&lxc_conf->network)) {
36eb9bde 3663 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 3664 return -1;
0ad19a3f 3665 }
3666
283678ed 3667 if (run_lxc_hooks(name, "pre-mount", lxc_conf, lxcpath, NULL)) {
89eaa05e
SH
3668 ERROR("failed to run pre-mount hooks for container '%s'.", name);
3669 return -1;
3670 }
5ea6163a 3671
cc28d0b0 3672 if (setup_rootfs(lxc_conf)) {
ac778708 3673 ERROR("failed to setup rootfs for '%s'", name);
95b5ffaf 3674 return -1;
0ad19a3f 3675 }
3676
bc6928ff
MW
3677 if (lxc_conf->autodev < 0) {
3678 lxc_conf->autodev = check_autodev(lxc_conf->rootfs.mount, data);
3679 }
3680
3681 if (lxc_conf->autodev > 0) {
3682 if (mount_autodev(name, lxc_conf->rootfs.mount, lxcpath)) {
91c3830e 3683 ERROR("failed to mount /dev in the container");
c6883f38
SH
3684 return -1;
3685 }
3686 }
3687
368bbc02
CS
3688 /* do automatic mounts (mainly /proc and /sys), but exclude
3689 * those that need to wait until other stuff has finished
3690 */
4fb3cba5 3691 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
3692 ERROR("failed to setup the automatic mounts for '%s'", name);
3693 return -1;
3694 }
3695
80a881b2 3696 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
36eb9bde 3697 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 3698 return -1;
576f946d 3699 }
3700
c1dc38c2 3701 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
e7938e9e
MN
3702 ERROR("failed to setup the mount entries for '%s'", name);
3703 return -1;
3704 }
3705
368bbc02
CS
3706 /* now mount only cgroup, if wanted;
3707 * before, /sys could not have been mounted
3708 * (is either mounted automatically or via fstab entries)
3709 */
4fb3cba5 3710 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
3711 ERROR("failed to setup the automatic mounts for '%s'", name);
3712 return -1;
3713 }
3714
283678ed 3715 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
773fb9ca
SH
3716 ERROR("failed to run mount hooks for container '%s'.", name);
3717 return -1;
3718 }
3719
bc6928ff 3720 if (lxc_conf->autodev > 0) {
283678ed 3721 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
f7bee6c6
MW
3722 ERROR("failed to run autodev hooks for container '%s'.", name);
3723 return -1;
3724 }
91c3830e
SH
3725 if (setup_autodev(lxc_conf->rootfs.mount)) {
3726 ERROR("failed to populate /dev in the container");
3727 return -1;
3728 }
3729 }
368bbc02 3730
37903589 3731 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 3732 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 3733 return -1;
6e590161 3734 }
3735
7e0e1d94
AV
3736 if (lxc_conf->kmsg) {
3737 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
3738 ERROR("failed to setup kmsg for '%s'", name);
3739 }
1bd051a6 3740
37903589 3741 if (!lxc_conf->is_execute && setup_tty(&lxc_conf->rootfs, &lxc_conf->tty_info, lxc_conf->ttydir)) {
36eb9bde 3742 ERROR("failed to setup the ttys for '%s'", name);
95b5ffaf 3743 return -1;
b0a33c1e 3744 }
3745
69aa6655
DE
3746 if (!lxc_conf->is_execute && setup_dev_symlinks(&lxc_conf->rootfs)) {
3747 ERROR("failed to setup /dev symlinks for '%s'", name);
3748 return -1;
3749 }
3750
5112cd70
SH
3751 /* mount /proc if it's not already there */
3752 if (tmp_proc_mount(lxc_conf) < 0) {
fe4de9a6 3753 ERROR("failed to LSM mount proc for '%s'", name);
e075f5d9 3754 return -1;
e075f5d9 3755 }
e075f5d9 3756
ac778708 3757 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 3758 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 3759 return -1;
ed502555 3760 }
3761
571e6ec8 3762 if (setup_pts(lxc_conf->pts)) {
36eb9bde 3763 ERROR("failed to setup the new pts instance");
95b5ffaf 3764 return -1;
3c26f34e 3765 }
3766
cccc74b5
DL
3767 if (setup_personality(lxc_conf->personality)) {
3768 ERROR("failed to setup personality");
3769 return -1;
3770 }
3771
f6d3e3e4 3772 if (lxc_list_empty(&lxc_conf->id_map)) {
1fb86a7c
SH
3773 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
3774 if (!lxc_list_empty(&lxc_conf->caps)) {
3775 ERROR("Simultaneously requested dropping and keeping caps");
3776 return -1;
3777 }
3778 if (dropcaps_except(&lxc_conf->keepcaps)) {
959aee9c 3779 ERROR("failed to keep requested caps");
1fb86a7c
SH
3780 return -1;
3781 }
3782 } else if (setup_caps(&lxc_conf->caps)) {
f6d3e3e4
SH
3783 ERROR("failed to drop capabilities");
3784 return -1;
3785 }
81810dd1
DL
3786 }
3787
cd54d859
DL
3788 NOTICE("'%s' is setup.", name);
3789
0ad19a3f 3790 return 0;
3791}
26ddeedd 3792
283678ed
SH
3793int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
3794 const char *lxcpath, char *argv[])
26ddeedd
SH
3795{
3796 int which = -1;
3797 struct lxc_list *it;
3798
3799 if (strcmp(hook, "pre-start") == 0)
3800 which = LXCHOOK_PRESTART;
5ea6163a
SH
3801 else if (strcmp(hook, "pre-mount") == 0)
3802 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
3803 else if (strcmp(hook, "mount") == 0)
3804 which = LXCHOOK_MOUNT;
f7bee6c6
MW
3805 else if (strcmp(hook, "autodev") == 0)
3806 which = LXCHOOK_AUTODEV;
26ddeedd
SH
3807 else if (strcmp(hook, "start") == 0)
3808 which = LXCHOOK_START;
3809 else if (strcmp(hook, "post-stop") == 0)
3810 which = LXCHOOK_POSTSTOP;
148e91f5
SH
3811 else if (strcmp(hook, "clone") == 0)
3812 which = LXCHOOK_CLONE;
26ddeedd
SH
3813 else
3814 return -1;
3815 lxc_list_for_each(it, &conf->hooks[which]) {
3816 int ret;
3817 char *hookname = it->elem;
283678ed 3818 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
26ddeedd
SH
3819 if (ret)
3820 return ret;
3821 }
3822 return 0;
3823}
72d0e1cb 3824
427b3a21 3825static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
3826{
3827 struct lxc_netdev *netdev = it->elem;
9ebb03ad 3828 struct lxc_list *it2,*next;
72d0e1cb
SG
3829
3830 lxc_list_del(it);
3831
3832 if (netdev->link)
3833 free(netdev->link);
3834 if (netdev->name)
3835 free(netdev->name);
c9bb9a85
DE
3836 if (netdev->type == LXC_NET_VETH && netdev->priv.veth_attr.pair)
3837 free(netdev->priv.veth_attr.pair);
72d0e1cb
SG
3838 if (netdev->upscript)
3839 free(netdev->upscript);
3840 if (netdev->hwaddr)
3841 free(netdev->hwaddr);
3842 if (netdev->mtu)
3843 free(netdev->mtu);
3844 if (netdev->ipv4_gateway)
3845 free(netdev->ipv4_gateway);
3846 if (netdev->ipv6_gateway)
3847 free(netdev->ipv6_gateway);
9ebb03ad 3848 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3849 lxc_list_del(it2);
3850 free(it2->elem);
3851 free(it2);
3852 }
9ebb03ad 3853 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3854 lxc_list_del(it2);
3855 free(it2->elem);
3856 free(it2);
3857 }
d95db067 3858 free(netdev);
72d0e1cb
SG
3859 free(it);
3860}
3861
3862/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 3863int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
3864{
3865 char *p1;
3866 int ret, idx, i;
3867 struct lxc_list *it;
3868 struct lxc_netdev *netdev;
3869
3870 p1 = index(key, '.');
3871 if (!p1 || *(p1+1) == '\0')
3872 p1 = NULL;
3873
3874 ret = sscanf(key, "%d", &idx);
3875 if (ret != 1) return -1;
3876 if (idx < 0)
3877 return -1;
3878
3879 i = 0;
3880 lxc_list_for_each(it, &c->network) {
3881 if (i == idx)
3882 break;
3883 i++;
3884 }
3885 if (i < idx) // we don't have that many nics defined
3886 return -1;
3887
3888 if (!it || !it->elem)
3889 return -1;
3890
3891 netdev = it->elem;
3892
3893 if (!p1) {
3894 lxc_remove_nic(it);
52d21d40 3895 } else if (strcmp(p1, ".ipv4") == 0) {
9ebb03ad
DE
3896 struct lxc_list *it2,*next;
3897 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3898 lxc_list_del(it2);
3899 free(it2->elem);
3900 free(it2);
3901 }
52d21d40 3902 } else if (strcmp(p1, ".ipv6") == 0) {
9ebb03ad
DE
3903 struct lxc_list *it2,*next;
3904 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3905 lxc_list_del(it2);
3906 free(it2->elem);
3907 free(it2);
3908 }
52d21d40 3909 } else if (strcmp(p1, ".link") == 0) {
72d0e1cb
SG
3910 if (netdev->link) {
3911 free(netdev->link);
3912 netdev->link = NULL;
3913 }
52d21d40 3914 } else if (strcmp(p1, ".name") == 0) {
72d0e1cb
SG
3915 if (netdev->name) {
3916 free(netdev->name);
3917 netdev->name = NULL;
3918 }
52d21d40 3919 } else if (strcmp(p1, ".script.up") == 0) {
72d0e1cb
SG
3920 if (netdev->upscript) {
3921 free(netdev->upscript);
3922 netdev->upscript = NULL;
3923 }
52d21d40 3924 } else if (strcmp(p1, ".hwaddr") == 0) {
72d0e1cb
SG
3925 if (netdev->hwaddr) {
3926 free(netdev->hwaddr);
3927 netdev->hwaddr = NULL;
3928 }
52d21d40 3929 } else if (strcmp(p1, ".mtu") == 0) {
72d0e1cb
SG
3930 if (netdev->mtu) {
3931 free(netdev->mtu);
3932 netdev->mtu = NULL;
3933 }
52d21d40 3934 } else if (strcmp(p1, ".ipv4_gateway") == 0) {
72d0e1cb
SG
3935 if (netdev->ipv4_gateway) {
3936 free(netdev->ipv4_gateway);
3937 netdev->ipv4_gateway = NULL;
3938 }
52d21d40 3939 } else if (strcmp(p1, ".ipv6_gateway") == 0) {
72d0e1cb
SG
3940 if (netdev->ipv6_gateway) {
3941 free(netdev->ipv6_gateway);
3942 netdev->ipv6_gateway = NULL;
3943 }
3944 }
3945 else return -1;
3946
3947 return 0;
3948}
3949
3950int lxc_clear_config_network(struct lxc_conf *c)
3951{
9ebb03ad
DE
3952 struct lxc_list *it,*next;
3953 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
3954 lxc_remove_nic(it);
3955 }
3956 return 0;
3957}
3958
3959int lxc_clear_config_caps(struct lxc_conf *c)
3960{
9ebb03ad 3961 struct lxc_list *it,*next;
72d0e1cb 3962
9ebb03ad 3963 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
3964 lxc_list_del(it);
3965 free(it->elem);
3966 free(it);
3967 }
3968 return 0;
3969}
3970
74a3920a 3971static int lxc_free_idmap(struct lxc_list *id_map) {
27c27d73
SH
3972 struct lxc_list *it, *next;
3973
4355ab5f 3974 lxc_list_for_each_safe(it, id_map, next) {
27c27d73
SH
3975 lxc_list_del(it);
3976 free(it->elem);
3977 free(it);
3978 }
3979 return 0;
3980}
3981
4355ab5f
SH
3982int lxc_clear_idmaps(struct lxc_conf *c)
3983{
3984 return lxc_free_idmap(&c->id_map);
3985}
3986
1fb86a7c
SH
3987int lxc_clear_config_keepcaps(struct lxc_conf *c)
3988{
3989 struct lxc_list *it,*next;
3990
3991 lxc_list_for_each_safe(it, &c->keepcaps, next) {
3992 lxc_list_del(it);
3993 free(it->elem);
3994 free(it);
3995 }
3996 return 0;
3997}
3998
12a50cc6 3999int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 4000{
9ebb03ad 4001 struct lxc_list *it,*next;
72d0e1cb 4002 bool all = false;
12a50cc6 4003 const char *k = key + 11;
72d0e1cb
SG
4004
4005 if (strcmp(key, "lxc.cgroup") == 0)
4006 all = true;
4007
9ebb03ad 4008 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
4009 struct lxc_cgroup *cg = it->elem;
4010 if (!all && strcmp(cg->subsystem, k) != 0)
4011 continue;
4012 lxc_list_del(it);
4013 free(cg->subsystem);
4014 free(cg->value);
4015 free(cg);
4016 free(it);
4017 }
4018 return 0;
4019}
4020
ee1e7aa0
SG
4021int lxc_clear_groups(struct lxc_conf *c)
4022{
4023 struct lxc_list *it,*next;
4024
4025 lxc_list_for_each_safe(it, &c->groups, next) {
4026 lxc_list_del(it);
4027 free(it->elem);
4028 free(it);
4029 }
4030 return 0;
4031}
4032
72d0e1cb
SG
4033int lxc_clear_mount_entries(struct lxc_conf *c)
4034{
9ebb03ad 4035 struct lxc_list *it,*next;
72d0e1cb 4036
9ebb03ad 4037 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
4038 lxc_list_del(it);
4039 free(it->elem);
4040 free(it);
4041 }
4042 return 0;
4043}
4044
12a50cc6 4045int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 4046{
9ebb03ad 4047 struct lxc_list *it,*next;
17ed13a3 4048 bool all = false, done = false;
12a50cc6 4049 const char *k = key + 9;
72d0e1cb
SG
4050 int i;
4051
17ed13a3
SH
4052 if (strcmp(key, "lxc.hook") == 0)
4053 all = true;
4054
72d0e1cb 4055 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 4056 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 4057 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
4058 lxc_list_del(it);
4059 free(it->elem);
4060 free(it);
4061 }
4062 done = true;
72d0e1cb
SG
4063 }
4064 }
17ed13a3
SH
4065
4066 if (!done) {
4067 ERROR("Invalid hook key: %s", key);
4068 return -1;
4069 }
72d0e1cb
SG
4070 return 0;
4071}
8eb5694b 4072
74a3920a 4073static void lxc_clear_saved_nics(struct lxc_conf *conf)
7b35f3d6
SH
4074{
4075 int i;
4076
0cf45501 4077 if (!conf->saved_nics)
7b35f3d6
SH
4078 return;
4079 for (i=0; i < conf->num_savednics; i++)
4080 free(conf->saved_nics[i].orig_name);
7b35f3d6
SH
4081 free(conf->saved_nics);
4082}
4083
8eb5694b
SH
4084void lxc_conf_free(struct lxc_conf *conf)
4085{
4086 if (!conf)
4087 return;
4088 if (conf->console.path)
4089 free(conf->console.path);
54c30e29 4090 if (conf->rootfs.mount)
8eb5694b 4091 free(conf->rootfs.mount);
a17b1e65
SG
4092 if (conf->rootfs.options)
4093 free(conf->rootfs.options);
d95db067
DE
4094 if (conf->rootfs.path)
4095 free(conf->rootfs.path);
a58878d6
SH
4096 if (conf->rootfs.pivot)
4097 free(conf->rootfs.pivot);
4098 if (conf->logfile)
4099 free(conf->logfile);
d95db067
DE
4100 if (conf->utsname)
4101 free(conf->utsname);
4102 if (conf->ttydir)
4103 free(conf->ttydir);
4104 if (conf->fstab)
4105 free(conf->fstab);
fc7e8864
WM
4106 if (conf->rcfile)
4107 free(conf->rcfile);
8eb5694b 4108 lxc_clear_config_network(conf);
fe4de9a6
DE
4109 if (conf->lsm_aa_profile)
4110 free(conf->lsm_aa_profile);
4111 if (conf->lsm_se_context)
4112 free(conf->lsm_se_context);
769872f9 4113 lxc_seccomp_free(conf);
8eb5694b 4114 lxc_clear_config_caps(conf);
1fb86a7c 4115 lxc_clear_config_keepcaps(conf);
8eb5694b 4116 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 4117 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 4118 lxc_clear_mount_entries(conf);
7b35f3d6 4119 lxc_clear_saved_nics(conf);
27c27d73 4120 lxc_clear_idmaps(conf);
ee1e7aa0 4121 lxc_clear_groups(conf);
8eb5694b
SH
4122 free(conf);
4123}
4355ab5f
SH
4124
4125struct userns_fn_data {
4126 int (*fn)(void *);
4127 void *arg;
4128 int p[2];
4129};
4130
4131static int run_userns_fn(void *data)
4132{
4133 struct userns_fn_data *d = data;
4134 char c;
4135 // we're not sharing with the parent any more, if it was a thread
4136
4137 close(d->p[1]);
4138 if (read(d->p[0], &c, 1) != 1)
4139 return -1;
4140 close(d->p[0]);
4141 return d->fn(d->arg);
4142}
4143
4144/*
4145 * Add a ID_TYPE_UID entry to an existing lxc_conf, if it is not
4146 * alread there.
4147 * We may want to generalize this to do gids as well as uids, but right now
4148 * it's not necessary.
4149 */
4150static struct lxc_list *idmap_add_id(struct lxc_conf *conf, uid_t uid)
4151{
2133f58c 4152 int hostid_mapped = mapped_hostid(uid, conf, ID_TYPE_UID);
4355ab5f
SH
4153 struct lxc_list *new = NULL, *tmp, *it, *next;
4154 struct id_map *entry;
4155
3ec1648d
SH
4156 new = malloc(sizeof(*new));
4157 if (!new) {
4158 ERROR("Out of memory building id map");
4159 return NULL;
4160 }
4161 lxc_list_init(new);
4162
4355ab5f 4163 if (hostid_mapped < 0) {
2133f58c 4164 hostid_mapped = find_unmapped_nsuid(conf, ID_TYPE_UID);
3ec1648d
SH
4165 if (hostid_mapped < 0)
4166 goto err;
4167 tmp = malloc(sizeof(*tmp));
4168 if (!tmp)
4169 goto err;
4355ab5f
SH
4170 entry = malloc(sizeof(*entry));
4171 if (!entry) {
3ec1648d
SH
4172 free(tmp);
4173 goto err;
4355ab5f 4174 }
3ec1648d 4175 tmp->elem = entry;
4355ab5f
SH
4176 entry->idtype = ID_TYPE_UID;
4177 entry->nsid = hostid_mapped;
4178 entry->hostid = (unsigned long)uid;
4179 entry->range = 1;
3ec1648d 4180 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4181 }
4182 lxc_list_for_each_safe(it, &conf->id_map, next) {
4183 tmp = malloc(sizeof(*tmp));
4184 if (!tmp)
4185 goto err;
4186 entry = malloc(sizeof(*entry));
4187 if (!entry) {
4188 free(tmp);
4189 goto err;
4190 }
4191 memset(entry, 0, sizeof(*entry));
4192 memcpy(entry, it->elem, sizeof(*entry));
4193 tmp->elem = entry;
3ec1648d 4194 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4195 }
4196
4197 return new;
4198
4199err:
4200 ERROR("Out of memory building a new uid map");
908fde6a
SH
4201 if (new)
4202 lxc_free_idmap(new);
c30ac545 4203 free(new);
4355ab5f
SH
4204 return NULL;
4205}
4206
4207/*
4208 * Run a function in a new user namespace.
4209 * The caller's euid will be mapped in if it is not already.
4210 */
4211int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
4212{
4213 int ret, pid;
4214 struct userns_fn_data d;
4215 char c = '1';
4216 int p[2];
4217 struct lxc_list *idmap;
4218
4355ab5f 4219 ret = pipe(p);
4355ab5f
SH
4220 if (ret < 0) {
4221 SYSERROR("opening pipe");
4222 return -1;
4223 }
4224 d.fn = fn;
4225 d.arg = data;
4226 d.p[0] = p[0];
4227 d.p[1] = p[1];
4228 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4229 if (pid < 0)
4230 goto err;
4355ab5f 4231 close(p[0]);
4355ab5f
SH
4232 p[0] = -1;
4233
4234 if ((idmap = idmap_add_id(conf, geteuid())) == NULL) {
4235 ERROR("Error adding self to container uid map");
4236 goto err;
4237 }
4238
4239 ret = lxc_map_ids(idmap, pid);
4240 lxc_free_idmap(idmap);
88dd66fc 4241 free(idmap);
565e571c 4242 if (ret) {
4355ab5f
SH
4243 ERROR("Error setting up child mappings");
4244 goto err;
4245 }
4246
4247 // kick the child
4248 if (write(p[1], &c, 1) != 1) {
4249 SYSERROR("writing to pipe to child");
4250 goto err;
4251 }
4252
3139aead
SG
4253 ret = wait_for_pid(pid);
4254
4255 close(p[1]);
4256 return ret;
4257
4355ab5f 4258err:
4355ab5f
SH
4259 if (p[0] != -1)
4260 close(p[0]);
4261 close(p[1]);
4355ab5f
SH
4262 return -1;
4263}