]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
lxc.spec: adjust for move of libexecdir/lxc-init to sbin/lxc.init
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
d06245b8
NC
23#include "config.h"
24
0ad19a3f 25#include <stdio.h>
0ad19a3f 26#include <stdlib.h>
e3b4c4c4 27#include <stdarg.h>
0ad19a3f 28#include <errno.h>
29#include <string.h>
30#include <dirent.h>
0ad19a3f 31#include <unistd.h>
bc6928ff 32#include <inttypes.h>
e3b4c4c4 33#include <sys/wait.h>
2d76d1d7 34#include <sys/syscall.h>
4a0ba80d 35#include <time.h>
e827ff7e
SG
36
37#if HAVE_PTY_H
b0a33c1e 38#include <pty.h>
e827ff7e
SG
39#else
40#include <../include/openpty.h>
41#endif
0ad19a3f 42
b3ecde1e
DL
43#include <linux/loop.h>
44
0ad19a3f 45#include <sys/types.h>
46#include <sys/utsname.h>
47#include <sys/param.h>
48#include <sys/stat.h>
49#include <sys/socket.h>
50#include <sys/mount.h>
51#include <sys/mman.h>
81810dd1 52#include <sys/prctl.h>
0ad19a3f 53
54#include <arpa/inet.h>
55#include <fcntl.h>
56#include <netinet/in.h>
57#include <net/if.h>
6f4a3756 58#include <libgen.h>
0ad19a3f 59
e5bda9ee 60#include "network.h"
61#include "error.h"
b2718c72 62#include "parse.h"
1b09f2c0
DL
63#include "utils.h"
64#include "conf.h"
65#include "log.h"
d55bc1ad 66#include "caps.h" /* for lxc_caps_last_cap() */
9be53773 67#include "bdev.h"
368bbc02 68#include "cgroup.h"
025ed0f3 69#include "lxclock.h"
4355ab5f 70#include "namespace.h"
fe4de9a6 71#include "lsm/lsm.h"
d0a36f2c 72
495d2046
SG
73#if HAVE_SYS_CAPABILITY_H
74#include <sys/capability.h>
75#endif
76
6ff05e18
SG
77#if HAVE_SYS_PERSONALITY_H
78#include <sys/personality.h>
79#endif
80
edaf8b1b
SG
81#if IS_BIONIC
82#include <../include/lxcmntent.h>
83#else
84#include <mntent.h>
85#endif
86
769872f9
SH
87#include "lxcseccomp.h"
88
36eb9bde 89lxc_log_define(lxc_conf, lxc);
e5bda9ee 90
0ad19a3f 91#define MAXHWLEN 18
92#define MAXINDEXLEN 20
442cbbe6 93#define MAXMTULEN 16
0ad19a3f 94#define MAXLINELEN 128
95
495d2046 96#if HAVE_SYS_CAPABILITY_H
b09094da
MN
97#ifndef CAP_SETFCAP
98#define CAP_SETFCAP 31
99#endif
100
101#ifndef CAP_MAC_OVERRIDE
102#define CAP_MAC_OVERRIDE 32
103#endif
104
105#ifndef CAP_MAC_ADMIN
106#define CAP_MAC_ADMIN 33
107#endif
495d2046 108#endif
b09094da
MN
109
110#ifndef PR_CAPBSET_DROP
111#define PR_CAPBSET_DROP 24
112#endif
113
9818cae4
SG
114#ifndef LO_FLAGS_AUTOCLEAR
115#define LO_FLAGS_AUTOCLEAR 4
116#endif
117
2d76d1d7
SG
118/* Define pivot_root() if missing from the C library */
119#ifndef HAVE_PIVOT_ROOT
120static int pivot_root(const char * new_root, const char * put_old)
121{
122#ifdef __NR_pivot_root
123return syscall(__NR_pivot_root, new_root, put_old);
124#else
125errno = ENOSYS;
126return -1;
127#endif
128}
129#else
130extern int pivot_root(const char * new_root, const char * put_old);
131#endif
132
133/* Define sethostname() if missing from the C library */
134#ifndef HAVE_SETHOSTNAME
135static int sethostname(const char * name, size_t len)
136{
137#ifdef __NR_sethostname
138return syscall(__NR_sethostname, name, len);
139#else
140errno = ENOSYS;
141return -1;
142#endif
143}
144#endif
145
72f919c4
SG
146/* Define __S_ISTYPE if missing from the C library */
147#ifndef __S_ISTYPE
148#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
149#endif
150
72d0e1cb 151char *lxchook_names[NUM_LXC_HOOKS] = {
148e91f5 152 "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop", "clone" };
72d0e1cb 153
e3b4c4c4 154typedef int (*instanciate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 155
998ac676
RT
156struct mount_opt {
157 char *name;
158 int clear;
159 int flag;
160};
161
81810dd1
DL
162struct caps_opt {
163 char *name;
164 int value;
165};
166
e3b4c4c4
ST
167static int instanciate_veth(struct lxc_handler *, struct lxc_netdev *);
168static int instanciate_macvlan(struct lxc_handler *, struct lxc_netdev *);
169static int instanciate_vlan(struct lxc_handler *, struct lxc_netdev *);
170static int instanciate_phys(struct lxc_handler *, struct lxc_netdev *);
171static int instanciate_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 172static int instanciate_none(struct lxc_handler *, struct lxc_netdev *);
82d5ae15 173
24654103
DL
174static instanciate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
175 [LXC_NET_VETH] = instanciate_veth,
176 [LXC_NET_MACVLAN] = instanciate_macvlan,
177 [LXC_NET_VLAN] = instanciate_vlan,
178 [LXC_NET_PHYS] = instanciate_phys,
179 [LXC_NET_EMPTY] = instanciate_empty,
26b797f3 180 [LXC_NET_NONE] = instanciate_none,
0ad19a3f 181};
182
74a2b586
JK
183static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
184static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
185static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
186static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
187static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 188static int shutdown_none(struct lxc_handler *, struct lxc_netdev *);
74a2b586
JK
189
190static instanciate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
191 [LXC_NET_VETH] = shutdown_veth,
192 [LXC_NET_MACVLAN] = shutdown_macvlan,
193 [LXC_NET_VLAN] = shutdown_vlan,
194 [LXC_NET_PHYS] = shutdown_phys,
195 [LXC_NET_EMPTY] = shutdown_empty,
26b797f3 196 [LXC_NET_NONE] = shutdown_none,
74a2b586
JK
197};
198
998ac676 199static struct mount_opt mount_opt[] = {
88d413d5
SW
200 { "defaults", 0, 0 },
201 { "ro", 0, MS_RDONLY },
202 { "rw", 1, MS_RDONLY },
203 { "suid", 1, MS_NOSUID },
204 { "nosuid", 0, MS_NOSUID },
205 { "dev", 1, MS_NODEV },
206 { "nodev", 0, MS_NODEV },
207 { "exec", 1, MS_NOEXEC },
208 { "noexec", 0, MS_NOEXEC },
209 { "sync", 0, MS_SYNCHRONOUS },
210 { "async", 1, MS_SYNCHRONOUS },
211 { "dirsync", 0, MS_DIRSYNC },
212 { "remount", 0, MS_REMOUNT },
213 { "mand", 0, MS_MANDLOCK },
214 { "nomand", 1, MS_MANDLOCK },
215 { "atime", 1, MS_NOATIME },
216 { "noatime", 0, MS_NOATIME },
217 { "diratime", 1, MS_NODIRATIME },
218 { "nodiratime", 0, MS_NODIRATIME },
219 { "bind", 0, MS_BIND },
220 { "rbind", 0, MS_BIND|MS_REC },
221 { "relatime", 0, MS_RELATIME },
222 { "norelatime", 1, MS_RELATIME },
223 { "strictatime", 0, MS_STRICTATIME },
224 { "nostrictatime", 1, MS_STRICTATIME },
225 { NULL, 0, 0 },
998ac676
RT
226};
227
495d2046 228#if HAVE_SYS_CAPABILITY_H
81810dd1 229static struct caps_opt caps_opt[] = {
a6afdde9 230 { "chown", CAP_CHOWN },
1e11be34
DL
231 { "dac_override", CAP_DAC_OVERRIDE },
232 { "dac_read_search", CAP_DAC_READ_SEARCH },
233 { "fowner", CAP_FOWNER },
234 { "fsetid", CAP_FSETID },
81810dd1
DL
235 { "kill", CAP_KILL },
236 { "setgid", CAP_SETGID },
237 { "setuid", CAP_SETUID },
238 { "setpcap", CAP_SETPCAP },
239 { "linux_immutable", CAP_LINUX_IMMUTABLE },
240 { "net_bind_service", CAP_NET_BIND_SERVICE },
241 { "net_broadcast", CAP_NET_BROADCAST },
242 { "net_admin", CAP_NET_ADMIN },
243 { "net_raw", CAP_NET_RAW },
244 { "ipc_lock", CAP_IPC_LOCK },
245 { "ipc_owner", CAP_IPC_OWNER },
246 { "sys_module", CAP_SYS_MODULE },
247 { "sys_rawio", CAP_SYS_RAWIO },
248 { "sys_chroot", CAP_SYS_CHROOT },
249 { "sys_ptrace", CAP_SYS_PTRACE },
250 { "sys_pacct", CAP_SYS_PACCT },
251 { "sys_admin", CAP_SYS_ADMIN },
252 { "sys_boot", CAP_SYS_BOOT },
253 { "sys_nice", CAP_SYS_NICE },
254 { "sys_resource", CAP_SYS_RESOURCE },
255 { "sys_time", CAP_SYS_TIME },
256 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
257 { "mknod", CAP_MKNOD },
258 { "lease", CAP_LEASE },
9527e566 259#ifdef CAP_AUDIT_WRITE
81810dd1 260 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
261#endif
262#ifdef CAP_AUDIT_CONTROL
81810dd1 263 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 264#endif
81810dd1
DL
265 { "setfcap", CAP_SETFCAP },
266 { "mac_override", CAP_MAC_OVERRIDE },
267 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
268#ifdef CAP_SYSLOG
269 { "syslog", CAP_SYSLOG },
270#endif
271#ifdef CAP_WAKE_ALARM
272 { "wake_alarm", CAP_WAKE_ALARM },
273#endif
81810dd1 274};
495d2046
SG
275#else
276static struct caps_opt caps_opt[] = {};
277#endif
81810dd1 278
91c3830e
SH
279static int run_buffer(char *buffer)
280{
ebec9176 281 struct lxc_popen_FILE *f;
91c3830e 282 char *output;
8e7da691 283 int ret;
91c3830e 284
ebec9176 285 f = lxc_popen(buffer);
91c3830e
SH
286 if (!f) {
287 SYSERROR("popen failed");
288 return -1;
289 }
290
291 output = malloc(LXC_LOG_BUFFER_SIZE);
292 if (!output) {
293 ERROR("failed to allocate memory for script output");
ebec9176 294 lxc_pclose(f);
91c3830e
SH
295 return -1;
296 }
297
ebec9176 298 while(fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
91c3830e
SH
299 DEBUG("script output: %s", output);
300
301 free(output);
302
ebec9176 303 ret = lxc_pclose(f);
8e7da691 304 if (ret == -1) {
91c3830e
SH
305 SYSERROR("Script exited on error");
306 return -1;
8e7da691
DE
307 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
308 ERROR("Script exited with status %d", WEXITSTATUS(ret));
309 return -1;
310 } else if (WIFSIGNALED(ret)) {
311 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
312 strsignal(WTERMSIG(ret)));
313 return -1;
91c3830e
SH
314 }
315
316 return 0;
317}
318
148e91f5 319static int run_script_argv(const char *name, const char *section,
283678ed
SH
320 const char *script, const char *hook, const char *lxcpath,
321 char **argsin)
148e91f5
SH
322{
323 int ret, i;
324 char *buffer;
325 size_t size = 0;
326
327 INFO("Executing script '%s' for container '%s', config section '%s'",
328 script, name, section);
329
330 for (i=0; argsin && argsin[i]; i++)
331 size += strlen(argsin[i]) + 1;
332
333 size += strlen(hook) + 1;
334
335 size += strlen(script);
336 size += strlen(name);
337 size += strlen(section);
338 size += 3;
339
340 if (size > INT_MAX)
341 return -1;
342
343 buffer = alloca(size);
344 if (!buffer) {
345 ERROR("failed to allocate memory");
346 return -1;
347 }
348
349 ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
350 if (ret < 0 || ret >= size) {
351 ERROR("Script name too long");
352 return -1;
353 }
354
355 for (i=0; argsin && argsin[i]; i++) {
356 int len = size-ret;
357 int rc;
358 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
359 if (rc < 0 || rc >= len) {
360 ERROR("Script args too long");
361 return -1;
362 }
363 ret += rc;
364 }
365
366 return run_buffer(buffer);
367}
368
751d9dcd
DL
369static int run_script(const char *name, const char *section,
370 const char *script, ...)
e3b4c4c4 371{
abbfd20b 372 int ret;
91c3830e 373 char *buffer, *p;
abbfd20b
DL
374 size_t size = 0;
375 va_list ap;
751d9dcd
DL
376
377 INFO("Executing script '%s' for container '%s', config section '%s'",
378 script, name, section);
e3b4c4c4 379
abbfd20b
DL
380 va_start(ap, script);
381 while ((p = va_arg(ap, char *)))
95642a10 382 size += strlen(p) + 1;
abbfd20b
DL
383 va_end(ap);
384
385 size += strlen(script);
386 size += strlen(name);
387 size += strlen(section);
95642a10 388 size += 3;
abbfd20b 389
95642a10
MS
390 if (size > INT_MAX)
391 return -1;
392
393 buffer = alloca(size);
abbfd20b
DL
394 if (!buffer) {
395 ERROR("failed to allocate memory");
751d9dcd
DL
396 return -1;
397 }
398
9ba8130c
SH
399 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
400 if (ret < 0 || ret >= size) {
401 ERROR("Script name too long");
9ba8130c
SH
402 return -1;
403 }
751d9dcd 404
abbfd20b 405 va_start(ap, script);
9ba8130c
SH
406 while ((p = va_arg(ap, char *))) {
407 int len = size-ret;
408 int rc;
409 rc = snprintf(buffer + ret, len, " %s", p);
410 if (rc < 0 || rc >= len) {
9ba8130c
SH
411 ERROR("Script args too long");
412 return -1;
413 }
414 ret += rc;
415 }
abbfd20b 416 va_end(ap);
751d9dcd 417
91c3830e 418 return run_buffer(buffer);
e3b4c4c4
ST
419}
420
a6afdde9 421static int find_fstype_cb(char* buffer, void *data)
78ae2fcc 422{
423 struct cbarg {
424 const char *rootfs;
a6afdde9 425 const char *target;
a17b1e65 426 const char *options;
78ae2fcc 427 } *cbarg = data;
428
a17b1e65
SG
429 unsigned long mntflags;
430 char *mntdata;
78ae2fcc 431 char *fstype;
432
433 /* we don't try 'nodev' entries */
434 if (strstr(buffer, "nodev"))
435 return 0;
436
437 fstype = buffer;
b2718c72 438 fstype += lxc_char_left_gc(fstype, strlen(fstype));
439 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
78ae2fcc 440
a6afdde9
DL
441 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
442 cbarg->rootfs, cbarg->target, fstype);
443
a17b1e65
SG
444 if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) {
445 free(mntdata);
446 return -1;
447 }
448
449 if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
a6afdde9 450 DEBUG("mount failed with error: %s", strerror(errno));
a17b1e65 451 free(mntdata);
78ae2fcc 452 return 0;
a6afdde9 453 }
a17b1e65 454 free(mntdata);
78ae2fcc 455
a6afdde9
DL
456 INFO("mounted '%s' on '%s', with fstype '%s'",
457 cbarg->rootfs, cbarg->target, fstype);
78ae2fcc 458
459 return 1;
460}
461
a17b1e65
SG
462static int mount_unknown_fs(const char *rootfs, const char *target,
463 const char *options)
78ae2fcc 464{
a6afdde9 465 int i;
78ae2fcc 466
467 struct cbarg {
468 const char *rootfs;
a6afdde9 469 const char *target;
a17b1e65 470 const char *options;
78ae2fcc 471 } cbarg = {
472 .rootfs = rootfs,
a6afdde9 473 .target = target,
a17b1e65 474 .options = options,
78ae2fcc 475 };
476
a6afdde9
DL
477 /*
478 * find the filesystem type with brute force:
479 * first we check with /etc/filesystems, in case the modules
78ae2fcc 480 * are auto-loaded and fall back to the supported kernel fs
481 */
482 char *fsfile[] = {
483 "/etc/filesystems",
484 "/proc/filesystems",
485 };
486
a6afdde9
DL
487 for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
488
489 int ret;
490
491 if (access(fsfile[i], F_OK))
492 continue;
493
494 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
495 if (ret < 0) {
496 ERROR("failed to parse '%s'", fsfile[i]);
497 return -1;
498 }
499
500 if (ret)
501 return 0;
78ae2fcc 502 }
503
a6afdde9
DL
504 ERROR("failed to determine fs type for '%s'", rootfs);
505 return -1;
506}
507
a17b1e65
SG
508static int mount_rootfs_dir(const char *rootfs, const char *target,
509 const char *options)
a6afdde9 510{
a17b1e65
SG
511 unsigned long mntflags;
512 char *mntdata;
513 int ret;
514
515 if (parse_mntopts(options, &mntflags, &mntdata) < 0) {
516 free(mntdata);
517 return -1;
518 }
519
520 ret = mount(rootfs, target, "none", MS_BIND | MS_REC | mntflags, mntdata);
521 free(mntdata);
522
523 return ret;
a6afdde9
DL
524}
525
526static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
527{
528 int rfd;
529 int ret = -1;
530
531 rfd = open(rootfs, O_RDWR);
532 if (rfd < 0) {
533 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 534 return -1;
535 }
536
a6afdde9 537 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 538
a6afdde9 539 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 540
a6afdde9
DL
541 if (ioctl(fd, LOOP_SET_FD, rfd)) {
542 SYSERROR("failed to LOOP_SET_FD");
543 goto out;
78ae2fcc 544 }
545
a6afdde9
DL
546 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
547 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 548 goto out;
549 }
550
a6afdde9 551 ret = 0;
78ae2fcc 552out:
a6afdde9 553 close(rfd);
78ae2fcc 554
a6afdde9 555 return ret;
78ae2fcc 556}
557
a17b1e65
SG
558static int mount_rootfs_file(const char *rootfs, const char *target,
559 const char *options)
78ae2fcc 560{
a6afdde9
DL
561 struct dirent dirent, *direntp;
562 struct loop_info64 loinfo;
9ba8130c 563 int ret = -1, fd = -1, rc;
a6afdde9
DL
564 DIR *dir;
565 char path[MAXPATHLEN];
78ae2fcc 566
a6afdde9
DL
567 dir = opendir("/dev");
568 if (!dir) {
569 SYSERROR("failed to open '/dev'");
78ae2fcc 570 return -1;
571 }
572
a6afdde9
DL
573 while (!readdir_r(dir, &dirent, &direntp)) {
574
575 if (!direntp)
576 break;
577
578 if (!strcmp(direntp->d_name, "."))
579 continue;
580
581 if (!strcmp(direntp->d_name, ".."))
582 continue;
583
584 if (strncmp(direntp->d_name, "loop", 4))
585 continue;
586
9ba8130c
SH
587 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
588 if (rc < 0 || rc >= MAXPATHLEN)
589 continue;
590
a6afdde9
DL
591 fd = open(path, O_RDWR);
592 if (fd < 0)
593 continue;
594
595 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
596 close(fd);
597 continue;
598 }
599
600 if (errno != ENXIO) {
601 WARN("unexpected error for ioctl on '%s': %m",
602 direntp->d_name);
00b6be44 603 close(fd);
a6afdde9
DL
604 continue;
605 }
606
607 DEBUG("found '%s' free lodev", path);
608
609 ret = setup_lodev(rootfs, fd, &loinfo);
610 if (!ret)
a17b1e65 611 ret = mount_unknown_fs(path, target, options);
a6afdde9
DL
612 close(fd);
613
614 break;
615 }
616
617 if (closedir(dir))
618 WARN("failed to close directory");
619
620 return ret;
78ae2fcc 621}
622
a17b1e65
SG
623static int mount_rootfs_block(const char *rootfs, const char *target,
624 const char *options)
a6afdde9 625{
a17b1e65 626 return mount_unknown_fs(rootfs, target, options);
a6afdde9
DL
627}
628
0c547523
SH
629/*
630 * pin_rootfs
b7ed4bf0
CS
631 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
632 * the duration of the container run, to prevent the container from marking
633 * the underlying fs readonly on shutdown. unlink the file immediately so
634 * no name pollution is happens
0c547523
SH
635 * return -1 on error.
636 * return -2 if nothing needed to be pinned.
637 * return an open fd (>=0) if we pinned it.
638 */
639int pin_rootfs(const char *rootfs)
640{
641 char absrootfs[MAXPATHLEN];
642 char absrootfspin[MAXPATHLEN];
643 struct stat s;
644 int ret, fd;
645
e99ee0de 646 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 647 return -2;
e99ee0de 648
00ec333b 649 if (!realpath(rootfs, absrootfs))
9be53773 650 return -2;
0c547523 651
00ec333b 652 if (access(absrootfs, F_OK))
0c547523 653 return -1;
0c547523 654
00ec333b 655 if (stat(absrootfs, &s))
0c547523 656 return -1;
0c547523 657
72f919c4 658 if (!S_ISDIR(s.st_mode))
0c547523
SH
659 return -2;
660
b7ed4bf0 661 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
00ec333b 662 if (ret >= MAXPATHLEN)
0c547523 663 return -1;
0c547523
SH
664
665 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
b7ed4bf0
CS
666 if (fd < 0)
667 return fd;
668 (void)unlink(absrootfspin);
0c547523
SH
669 return fd;
670}
671
4fb3cba5 672static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
368bbc02 673{
368bbc02 674 int r;
b06b8511
CS
675 size_t i;
676 static struct {
677 int match_mask;
678 int match_flag;
679 const char *source;
680 const char *destination;
681 const char *fstype;
682 unsigned long flags;
683 const char *options;
684 } default_mounts[] = {
685 /* Read-only bind-mounting... In older kernels, doing that required
686 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
687 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
688 * kernel 2.6.26 onwards. However, this apparently does not work on
689 * kernel 3.8. Unfortunately, on that very same kernel, doing the
690 * same trick as above doesn't seem to work either, there one needs
691 * to ALSO specify MS_BIND for the remount, otherwise the entire
692 * fs is remounted read-only or the mount fails because it's busy...
693 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
694 * 2.6.32...
368bbc02 695 */
b06b8511
CS
696 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
697 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
698 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
699 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
700 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
701 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
702 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
703 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
704 { 0, 0, NULL, NULL, NULL, 0, NULL }
705 };
368bbc02 706
b06b8511
CS
707 for (i = 0; default_mounts[i].match_mask; i++) {
708 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
709 char *source = NULL;
710 char *destination = NULL;
711 int saved_errno;
712
713 if (default_mounts[i].source) {
714 /* will act like strdup if %r is not present */
715 source = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].source);
716 if (!source) {
717 SYSERROR("memory allocation error");
718 return -1;
719 }
720 }
721 if (default_mounts[i].destination) {
722 /* will act like strdup if %r is not present */
723 destination = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].destination);
724 if (!destination) {
725 saved_errno = errno;
726 SYSERROR("memory allocation error");
727 free(source);
728 errno = saved_errno;
729 return -1;
730 }
731 }
732 r = mount(source, destination, default_mounts[i].fstype, default_mounts[i].flags, default_mounts[i].options);
733 saved_errno = errno;
c414be25
DE
734 if (r < 0)
735 SYSERROR("error mounting %s on %s", source, destination);
b06b8511
CS
736 free(source);
737 free(destination);
738 if (r < 0) {
b06b8511
CS
739 errno = saved_errno;
740 return -1;
741 }
368bbc02 742 }
368bbc02
CS
743 }
744
b06b8511 745 if (flags & LXC_AUTO_CGROUP_MASK) {
4fb3cba5
DE
746 if (!cgroup_mount(conf->rootfs.mount, handler,
747 flags & LXC_AUTO_CGROUP_MASK)) {
368bbc02 748 SYSERROR("error mounting /sys/fs/cgroup");
b06b8511 749 return -1;
368bbc02
CS
750 }
751 }
752
368bbc02 753 return 0;
368bbc02
CS
754}
755
a17b1e65 756static int mount_rootfs(const char *rootfs, const char *target, const char *options)
0ad19a3f 757{
b09ef133 758 char absrootfs[MAXPATHLEN];
78ae2fcc 759 struct stat s;
a6afdde9 760 int i;
78ae2fcc 761
a17b1e65 762 typedef int (*rootfs_cb)(const char *, const char *, const char *);
78ae2fcc 763
764 struct rootfs_type {
765 int type;
766 rootfs_cb cb;
767 } rtfs_type[] = {
2656d231
DL
768 { S_IFDIR, mount_rootfs_dir },
769 { S_IFBLK, mount_rootfs_block },
770 { S_IFREG, mount_rootfs_file },
78ae2fcc 771 };
0ad19a3f 772
4c8ab83b 773 if (!realpath(rootfs, absrootfs)) {
36eb9bde 774 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 775 return -1;
776 }
b09ef133 777
b09ef133 778 if (access(absrootfs, F_OK)) {
36eb9bde 779 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 780 return -1;
781 }
782
78ae2fcc 783 if (stat(absrootfs, &s)) {
36eb9bde 784 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 785 return -1;
786 }
787
78ae2fcc 788 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 789
78ae2fcc 790 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
791 continue;
9b0f0477 792
a17b1e65 793 return rtfs_type[i].cb(absrootfs, target, options);
78ae2fcc 794 }
9b0f0477 795
36eb9bde 796 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 797 return -1;
0ad19a3f 798}
799
4e5440c6 800static int setup_utsname(struct utsname *utsname)
0ad19a3f 801{
4e5440c6
DL
802 if (!utsname)
803 return 0;
0ad19a3f 804
4e5440c6
DL
805 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
806 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 807 return -1;
808 }
809
4e5440c6 810 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 811
0ad19a3f 812 return 0;
813}
814
69aa6655
DE
815struct dev_symlinks {
816 const char *oldpath;
817 const char *name;
818};
819
820static const struct dev_symlinks dev_symlinks[] = {
821 {"/proc/self/fd", "fd"},
822 {"/proc/self/fd/0", "stdin"},
823 {"/proc/self/fd/1", "stdout"},
824 {"/proc/self/fd/2", "stderr"},
825};
826
827static int setup_dev_symlinks(const struct lxc_rootfs *rootfs)
828{
829 char path[MAXPATHLEN];
830 int ret,i;
831
832
833 for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
834 const struct dev_symlinks *d = &dev_symlinks[i];
835 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount, d->name);
836 if (ret < 0 || ret >= MAXPATHLEN)
837 return -1;
838 ret = symlink(d->oldpath, path);
839 if (ret && errno != EEXIST) {
840 SYSERROR("Error creating %s", path);
841 return -1;
842 }
843 }
844 return 0;
845}
846
33fcb7a0 847static int setup_tty(const struct lxc_rootfs *rootfs,
7c6ef2a2 848 const struct lxc_tty_info *tty_info, char *ttydir)
b0a33c1e 849{
7c6ef2a2
SH
850 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
851 int i, ret;
b0a33c1e 852
bc9bd0e3
DL
853 if (!rootfs->path)
854 return 0;
855
b0a33c1e 856 for (i = 0; i < tty_info->nbtty; i++) {
857
858 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
859
7c6ef2a2 860 ret = snprintf(path, sizeof(path), "%s/dev/tty%d",
12297168 861 rootfs->mount, i + 1);
7c6ef2a2
SH
862 if (ret >= sizeof(path)) {
863 ERROR("pathname too long for ttys");
864 return -1;
865 }
866 if (ttydir) {
867 /* create dev/lxc/tty%d" */
9ba8130c 868 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/tty%d",
7c6ef2a2
SH
869 rootfs->mount, ttydir, i + 1);
870 if (ret >= sizeof(lxcpath)) {
871 ERROR("pathname too long for ttys");
872 return -1;
873 }
874 ret = creat(lxcpath, 0660);
875 if (ret==-1 && errno != EEXIST) {
959aee9c 876 SYSERROR("error creating %s", lxcpath);
7c6ef2a2
SH
877 return -1;
878 }
4d44e274
SH
879 if (ret >= 0)
880 close(ret);
7c6ef2a2
SH
881 ret = unlink(path);
882 if (ret && errno != ENOENT) {
959aee9c 883 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
884 return -1;
885 }
b0a33c1e 886
7c6ef2a2
SH
887 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
888 WARN("failed to mount '%s'->'%s'",
889 pty_info->name, path);
890 continue;
891 }
13954cce 892
9ba8130c
SH
893 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
894 if (ret >= sizeof(lxcpath)) {
895 ERROR("tty pathname too long");
896 return -1;
897 }
7c6ef2a2
SH
898 ret = symlink(lxcpath, path);
899 if (ret) {
959aee9c 900 SYSERROR("failed to create symlink for tty %d", i+1);
7c6ef2a2
SH
901 return -1;
902 }
903 } else {
c6883f38
SH
904 /* If we populated /dev, then we need to create /dev/ttyN */
905 if (access(path, F_OK)) {
906 ret = creat(path, 0660);
907 if (ret==-1) {
959aee9c 908 SYSERROR("error creating %s", path);
c6883f38 909 /* this isn't fatal, continue */
025ed0f3 910 } else {
c6883f38 911 close(ret);
025ed0f3 912 }
c6883f38 913 }
7c6ef2a2
SH
914 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
915 WARN("failed to mount '%s'->'%s'",
916 pty_info->name, path);
917 continue;
918 }
b0a33c1e 919 }
920 }
921
cd54d859
DL
922 INFO("%d tty(s) has been setup", tty_info->nbtty);
923
b0a33c1e 924 return 0;
925}
926
7a7ff0c6 927static int setup_rootfs_pivot_root_cb(char *buffer, void *data)
bf601689
MH
928{
929 struct lxc_list *mountlist, *listentry, *iterator;
2c7d90ac 930 char *pivotdir, *mountpoint, *mountentry, *saveptr = NULL;
bf601689
MH
931 int found;
932 void **cbparm;
933
934 mountentry = buffer;
935 cbparm = (void **)data;
936
937 mountlist = cbparm[0];
938 pivotdir = cbparm[1];
939
940 /* parse entry, first field is mountname, ignore */
2796cf79 941 mountpoint = strtok_r(mountentry, " ", &saveptr);
bf601689
MH
942 if (!mountpoint)
943 return -1;
944
945 /* second field is mountpoint */
2796cf79 946 mountpoint = strtok_r(NULL, " ", &saveptr);
bf601689
MH
947 if (!mountpoint)
948 return -1;
949
950 /* only consider mountpoints below old root fs */
951 if (strncmp(mountpoint, pivotdir, strlen(pivotdir)))
952 return 0;
953
954 /* filter duplicate mountpoints */
955 found = 0;
956 lxc_list_for_each(iterator, mountlist) {
957 if (!strcmp(iterator->elem, mountpoint)) {
958 found = 1;
959 break;
960 }
961 }
962 if (found)
963 return 0;
964
965 /* add entry to list */
966 listentry = malloc(sizeof(*listentry));
967 if (!listentry) {
968 SYSERROR("malloc for mountpoint listentry failed");
969 return -1;
970 }
971
972 listentry->elem = strdup(mountpoint);
973 if (!listentry->elem) {
974 SYSERROR("strdup failed");
00b6be44 975 free(listentry);
bf601689
MH
976 return -1;
977 }
978 lxc_list_add_tail(mountlist, listentry);
979
980 return 0;
981}
982
cc6f6dd7 983static int umount_oldrootfs(const char *oldrootfs)
bf601689 984{
2382ecff 985 char path[MAXPATHLEN];
bf601689 986 void *cbparm[2];
9ebb03ad 987 struct lxc_list mountlist, *iterator, *next;
bf601689 988 int ok, still_mounted, last_still_mounted;
9ba8130c 989 int rc;
bf601689
MH
990
991 /* read and parse /proc/mounts in old root fs */
992 lxc_list_init(&mountlist);
993
cc6f6dd7 994 /* oldrootfs is on the top tree directory now */
9ba8130c
SH
995 rc = snprintf(path, sizeof(path), "/%s", oldrootfs);
996 if (rc >= sizeof(path)) {
997 ERROR("rootfs name too long");
998 return -1;
999 }
bf601689 1000 cbparm[0] = &mountlist;
bf601689 1001
cc6f6dd7 1002 cbparm[1] = strdup(path);
bf601689
MH
1003 if (!cbparm[1]) {
1004 SYSERROR("strdup failed");
1005 return -1;
1006 }
1007
9ba8130c
SH
1008 rc = snprintf(path, sizeof(path), "%s/proc/mounts", oldrootfs);
1009 if (rc >= sizeof(path)) {
1010 ERROR("container proc/mounts name too long");
1011 return -1;
1012 }
cc6f6dd7
DL
1013
1014 ok = lxc_file_for_each_line(path,
1015 setup_rootfs_pivot_root_cb, &cbparm);
bf601689
MH
1016 if (ok < 0) {
1017 SYSERROR("failed to read or parse mount list '%s'", path);
1018 return -1;
1019 }
1020
1021 /* umount filesystems until none left or list no longer shrinks */
1022 still_mounted = 0;
1023 do {
1024 last_still_mounted = still_mounted;
1025 still_mounted = 0;
1026
9ebb03ad 1027 lxc_list_for_each_safe(iterator, &mountlist, next) {
bf601689 1028
c08556c6 1029 /* umount normally */
bf601689
MH
1030 if (!umount(iterator->elem)) {
1031 DEBUG("umounted '%s'", (char *)iterator->elem);
1032 lxc_list_del(iterator);
1033 continue;
1034 }
1035
bf601689
MH
1036 still_mounted++;
1037 }
7df119ee 1038
bf601689
MH
1039 } while (still_mounted > 0 && still_mounted != last_still_mounted);
1040
7df119ee 1041
c08556c6
DL
1042 lxc_list_for_each(iterator, &mountlist) {
1043
1044 /* let's try a lazy umount */
1045 if (!umount2(iterator->elem, MNT_DETACH)) {
1046 INFO("lazy unmount of '%s'", (char *)iterator->elem);
1047 continue;
1048 }
1049
1050 /* be more brutal (nfs) */
1051 if (!umount2(iterator->elem, MNT_FORCE)) {
1052 INFO("forced unmount of '%s'", (char *)iterator->elem);
1053 continue;
1054 }
1055
7df119ee 1056 WARN("failed to unmount '%s'", (char *)iterator->elem);
c08556c6 1057 }
bf601689 1058
cc6f6dd7
DL
1059 return 0;
1060}
1061
1062static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
1063{
1064 char path[MAXPATHLEN];
1065 int remove_pivotdir = 0;
9ba8130c 1066 int rc;
cc6f6dd7
DL
1067
1068 /* change into new root fs */
1069 if (chdir(rootfs)) {
1070 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
1071 return -1;
1072 }
1073
1074 if (!pivotdir)
30c5d292 1075 pivotdir = "lxc_putold";
cc6f6dd7 1076
4f9293b1 1077 /* compute the full path to pivotdir under rootfs */
9ba8130c
SH
1078 rc = snprintf(path, sizeof(path), "%s/%s", rootfs, pivotdir);
1079 if (rc >= sizeof(path)) {
1080 ERROR("pivot dir name too long");
1081 return -1;
1082 }
cc6f6dd7
DL
1083
1084 if (access(path, F_OK)) {
1085
119126b6 1086 if (mkdir_p(path, 0755) < 0) {
cc6f6dd7
DL
1087 SYSERROR("failed to create pivotdir '%s'", path);
1088 return -1;
1089 }
1090
1091 remove_pivotdir = 1;
1092 DEBUG("created '%s' directory", path);
1093 }
1094
1095 DEBUG("mountpoint for old rootfs is '%s'", path);
1096
1097 /* pivot_root into our new root fs */
1098 if (pivot_root(".", path)) {
1099 SYSERROR("pivot_root syscall failed");
bf601689
MH
1100 return -1;
1101 }
cc6f6dd7
DL
1102
1103 if (chdir("/")) {
1104 SYSERROR("can't chdir to / after pivot_root");
1105 return -1;
1106 }
1107
1108 DEBUG("pivot_root syscall to '%s' successful", rootfs);
1109
1110 /* we switch from absolute path to relative path */
1111 if (umount_oldrootfs(pivotdir))
1112 return -1;
bf601689 1113
c08556c6
DL
1114 /* remove temporary mount point, we don't consider the removing
1115 * as fatal */
a91d897a
FW
1116 if (remove_pivotdir && rmdir(pivotdir))
1117 WARN("can't remove mountpoint '%s': %m", pivotdir);
bf601689 1118
bf601689
MH
1119 return 0;
1120}
1121
bc6928ff
MW
1122/*
1123 * Check to see if a directory has something mounted on it and,
1124 * if it does, return the fstype.
1125 *
1126 * Code largely based on detect_shared_rootfs below
1127 *
1128 * Returns: # of matching entries in /proc/self/mounts
1129 * if != 0 fstype is filled with the last filesystem value.
1130 * if == 0 no matches found, fstype unchanged.
1131 *
1132 * ToDo: Maybe return the mount options in another parameter...
1133 */
1134
1135#define LINELEN 4096
1136#define MAX_FSTYPE_LEN 128
74a3920a 1137static int mount_check_fs( const char *dir, char *fstype )
bc6928ff
MW
1138{
1139 char buf[LINELEN], *p;
1140 struct stat s;
1141 FILE *f;
1142 int found_fs = 0;
1143 char *p2;
1144
959aee9c 1145 DEBUG("entering mount_check_fs for %s", dir);
bc6928ff
MW
1146
1147 if ( 0 != access(dir, F_OK) || 0 != stat(dir, &s) || 0 == S_ISDIR(s.st_mode) ) {
1148 return 0;
1149 }
1150
bc6928ff 1151 f = fopen("/proc/self/mounts", "r");
bc6928ff
MW
1152 if (!f)
1153 return 0;
4ad9f44b 1154 while (fgets(buf, LINELEN, f)) {
bc6928ff
MW
1155 p = index(buf, ' ');
1156 if( !p )
1157 continue;
1158 *p = '\0';
1159 p2 = p + 1;
1160
1161 p = index(p2, ' ');
1162 if( !p )
1163 continue;
1164 *p = '\0';
1165
1166 /* Compare the directory in the entry to desired */
1167 if( strcmp( p2, dir ) ) {
1168 continue;
1169 }
1170
1171 p2 = p + 1;
1172 p = index( p2, ' ');
1173 if( !p )
1174 continue;
1175 *p = '\0';
1176
1177 ++found_fs;
1178
1179 if( fstype ) {
1180 strncpy( fstype, p2, MAX_FSTYPE_LEN - 1 );
1181 fstype [ MAX_FSTYPE_LEN - 1 ] = '\0';
1182 }
1183 }
1184
bc6928ff 1185 fclose(f);
bc6928ff 1186
959aee9c 1187 DEBUG("mount_check_fs returning %d last %s", found_fs, fstype);
bc6928ff
MW
1188
1189 return found_fs;
1190}
1191
1192/*
1193 * Locate a devtmpfs mount (should be on /dev) and create a container
1194 * subdirectory on it which we can then bind mount to the container
1195 * /dev instead of mounting a tmpfs there.
1196 * If we fail, return NULL.
1197 * Else return the pointer to the name buffer with the string to
1198 * the devtmpfs subdirectory.
1199 */
1200
74a3920a 1201static char *mk_devtmpfs(const char *name, char *path, const char *lxcpath)
bc6928ff
MW
1202{
1203 int ret;
1204 struct stat s;
1205 char tmp_path[MAXPATHLEN];
1206 char fstype[MAX_FSTYPE_LEN];
1207 char *base_path = "/dev/.lxc";
1208 char *user_path = "/dev/.lxc/user";
1209 uint64_t hash;
1210
1211 if ( 0 != access(base_path, F_OK) || 0 != stat(base_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1212 /* This is just making /dev/.lxc it better work or we're done */
1213 ret = mkdir(base_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1214 if ( ret ) {
1215 SYSERROR( "Unable to create /dev/.lxc for autodev" );
1216 return NULL;
1217 }
1218 }
1219
1220 /*
1221 * Programmers notes:
1222 * We can not do mounts in this area of code that we want
1223 * to be visible in the host. Consequently, /dev/.lxc must
1224 * be set up earlier if we need a tmpfs mounted there.
1225 * That only affects the rare cases where autodev is enabled
1226 * for a container and devtmpfs is not mounted on /dev in the
1227 * host. In that case, we'll fall back to the old method
1228 * of mounting a tmpfs in the container and have no visibility
1229 * into the container /dev.
1230 */
1231 if( ! mount_check_fs( "/dev", fstype )
1232 || strcmp( "devtmpfs", fstype ) ) {
1233 /* Either /dev was not mounted or was not devtmpfs */
1234
1235 if ( ! mount_check_fs( "/dev/.lxc", NULL ) ) {
1236 /*
1237 * /dev/.lxc is not already mounted
1238 * Doing a mount here does no good, since
1239 * it's not visible in the host.
1240 */
1241
1242 ERROR("/dev/.lxc is not setup - taking fallback" );
1243 return NULL;
1244 }
1245 }
1246
1247 if ( 0 != access(user_path, F_OK) || 0 != stat(user_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1248 /*
1249 * This is making /dev/.lxc/user path for non-priv users.
1250 * If this doesn't work, we'll have to fall back in the
1251 * case of non-priv users. It's mode 1777 like /tmp.
1252 */
1253 ret = mkdir(user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
1254 if ( ret ) {
1255 /* Issue an error but don't fail yet! */
1256 ERROR("Unable to create /dev/.lxc/user");
1257 }
1258 /* Umask tends to screw us up here */
1259 chmod(user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
1260 }
1261
1262 /*
1263 * Since the container name must be unique within a given
1264 * lxcpath, we're going to use a hash of the path
1265 * /lxcpath/name as our hash name in /dev/.lxc/
1266 */
1267
1268 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s", lxcpath, name);
1269 if (ret < 0 || ret >= MAXPATHLEN)
1270 return NULL;
1271
1272 hash = fnv_64a_buf(tmp_path, ret, FNV1A_64_INIT);
1273
1274 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, base_path, name, hash);
1275 if (ret < 0 || ret >= MAXPATHLEN)
1276 return NULL;
1277
1278 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1279 ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1280 if ( ret ) {
1281 /* Something must have failed with the base_path...
1282 * Maybe unpriv user. Try user_path now... */
1283 INFO("Setup in /dev/.lxc failed. Trying /dev/.lxc/user." );
1284
1285 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, user_path, name, hash);
1286 if (ret < 0 || ret >= MAXPATHLEN)
1287 return NULL;
1288
1289 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1290 ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1291 if ( ret ) {
1292 ERROR("Container /dev setup in host /dev failed - taking fallback" );
1293 return NULL;
1294 }
1295 }
1296 }
1297 }
1298
1299 strcpy( path, tmp_path );
1300 return path;
1301}
1302
1303
91c3830e
SH
1304/*
1305 * Do we want to add options for max size of /dev and a file to
1306 * specify which devices to create?
1307 */
bc6928ff 1308static int mount_autodev(const char *name, char *root, const char *lxcpath)
91c3830e
SH
1309{
1310 int ret;
bc6928ff 1311 struct stat s;
91c3830e 1312 char path[MAXPATHLEN];
bc6928ff
MW
1313 char host_path[MAXPATHLEN];
1314 char devtmpfs_path[MAXPATHLEN];
91c3830e 1315
959aee9c 1316 INFO("Mounting /dev under %s", root);
bc6928ff
MW
1317
1318 ret = snprintf(host_path, MAXPATHLEN, "%s/%s/rootfs.dev", lxcpath, name);
1319 if (ret < 0 || ret > MAXPATHLEN)
1320 return -1;
1321
91c3830e
SH
1322 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
1323 if (ret < 0 || ret > MAXPATHLEN)
1324 return -1;
bc6928ff
MW
1325
1326 if (mk_devtmpfs( name, devtmpfs_path, lxcpath ) ) {
1327 /*
1328 * Get rid of old links and directoriess
1329 * This could be either a symlink and we remove it,
1330 * or an empty directory and we remove it,
1331 * or non-existant and we don't care,
1332 * or a non-empty directory, and we will then emit an error
1333 * but we will not fail out the process.
1334 */
1335 unlink( host_path );
1336 rmdir( host_path );
1337 ret = symlink(devtmpfs_path, host_path);
1338
1339 if ( ret < 0 ) {
959aee9c 1340 SYSERROR("WARNING: Failed to create symlink '%s'->'%s'", host_path, devtmpfs_path);
bc6928ff
MW
1341 }
1342 DEBUG("Bind mounting %s to %s", devtmpfs_path , path );
1343 ret = mount(devtmpfs_path, path, NULL, MS_BIND, 0 );
1344 } else {
1345 /* Only mount a tmpfs on here if we don't already a mount */
1346 if ( ! mount_check_fs( host_path, NULL ) ) {
1347 DEBUG("Mounting tmpfs to %s", host_path );
58ab99ae 1348 ret = mount("none", path, "tmpfs", 0, "size=100000,mode=755");
bc6928ff
MW
1349 } else {
1350 /* This allows someone to manually set up a mount */
1351 DEBUG("Bind mounting %s to %s", host_path, path );
1352 ret = mount(host_path , path, NULL, MS_BIND, 0 );
1353 }
1354 }
91c3830e 1355 if (ret) {
959aee9c 1356 SYSERROR("Failed to mount /dev at %s", root);
91c3830e
SH
1357 return -1;
1358 }
1359 ret = snprintf(path, MAXPATHLEN, "%s/dev/pts", root);
1360 if (ret < 0 || ret >= MAXPATHLEN)
1361 return -1;
bc6928ff
MW
1362 /*
1363 * If we are running on a devtmpfs mapping, dev/pts may already exist.
1364 * If not, then create it and exit if that fails...
1365 */
1366 if ( 0 != access(path, F_OK) || 0 != stat(path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1367 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1368 if (ret) {
1369 SYSERROR("Failed to create /dev/pts in container");
1370 return -1;
1371 }
91c3830e
SH
1372 }
1373
959aee9c 1374 INFO("Mounted /dev under %s", root);
91c3830e
SH
1375 return 0;
1376}
1377
c6883f38 1378struct lxc_devs {
74a3920a 1379 const char *name;
c6883f38
SH
1380 mode_t mode;
1381 int maj;
1382 int min;
1383};
1384
74a3920a 1385static const struct lxc_devs lxc_devs[] = {
c6883f38
SH
1386 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1387 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1388 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1389 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1390 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1391 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1392 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1393};
1394
74a3920a 1395static int setup_autodev(const char *root)
c6883f38
SH
1396{
1397 int ret;
c6883f38
SH
1398 char path[MAXPATHLEN];
1399 int i;
3a32201c 1400 mode_t cmask;
c6883f38 1401
959aee9c 1402 INFO("Creating initial consoles under %s/dev", root);
91c3830e 1403
c6883f38 1404 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
91c3830e
SH
1405 if (ret < 0 || ret >= MAXPATHLEN) {
1406 ERROR("Error calculating container /dev location");
c6883f38 1407 return -1;
f7bee6c6 1408 }
91c3830e 1409
959aee9c 1410 INFO("Populating /dev under %s", root);
3a32201c 1411 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38 1412 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
74a3920a 1413 const struct lxc_devs *d = &lxc_devs[i];
c6883f38
SH
1414 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", root, d->name);
1415 if (ret < 0 || ret >= MAXPATHLEN)
1416 return -1;
1417 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1418 if (ret && errno != EEXIST) {
959aee9c 1419 SYSERROR("Error creating %s", d->name);
c6883f38
SH
1420 return -1;
1421 }
1422 }
3a32201c 1423 umask(cmask);
c6883f38 1424
959aee9c 1425 INFO("Populated /dev under %s", root);
c6883f38
SH
1426 return 0;
1427}
1428
cc28d0b0
SH
1429/*
1430 * I'll forgive you for asking whether all of this is needed :) The
1431 * answer is yes.
1432 * pivot_root will fail if the new root, the put_old dir, or the parent
1433 * of current->fs->root are MS_SHARED. (parent of current->fs_root may
1434 * or may not be current->fs_root - if we assumed it always was, we could
1435 * just mount --make-rslave /). So,
1436 * 1. mount a tiny tmpfs to be parent of current->fs->root.
1437 * 2. make that MS_SLAVE
1438 * 3. make a 'root' directory under that
1439 * 4. mount --rbind / under the $tinyroot/root.
1440 * 5. make that rslave
1441 * 6. chdir and chroot into $tinyroot/root
1442 * 7. $tinyroot will be unmounted by our parent in start.c
1443 */
1444static int chroot_into_slave(struct lxc_conf *conf)
1445{
1446 char path[MAXPATHLEN];
1447 const char *destpath = conf->rootfs.mount;
1448 int ret;
1449
1450 if (mount(destpath, destpath, NULL, MS_BIND, 0)) {
1451 SYSERROR("failed to mount %s bind", destpath);
1452 return -1;
1453 }
1454 if (mount("", destpath, NULL, MS_SLAVE, 0)) {
1455 SYSERROR("failed to make %s slave", destpath);
1456 return -1;
1457 }
58ab99ae 1458 if (mount("none", destpath, "tmpfs", 0, "size=10000,mode=755")) {
cc28d0b0
SH
1459 SYSERROR("Failed to mount tmpfs / at %s", destpath);
1460 return -1;
1461 }
1462 ret = snprintf(path, MAXPATHLEN, "%s/root", destpath);
1463 if (ret < 0 || ret >= MAXPATHLEN) {
1464 ERROR("out of memory making root path");
1465 return -1;
1466 }
1467 if (mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) {
1468 SYSERROR("Failed to create /dev/pts in container");
1469 return -1;
1470 }
1471 if (mount("/", path, NULL, MS_BIND|MS_REC, 0)) {
1472 SYSERROR("Failed to rbind mount / to %s", path);
1473 return -1;
1474 }
1475 if (mount("", destpath, NULL, MS_SLAVE|MS_REC, 0)) {
1476 SYSERROR("Failed to make tmp-/ at %s rslave", path);
1477 return -1;
1478 }
cc28d0b0
SH
1479 if (chroot(path)) {
1480 SYSERROR("Failed to chroot into tmp-/");
1481 return -1;
1482 }
6b9324bd
SG
1483 if (chdir("/")) {
1484 SYSERROR("Failed to chdir into tmp-/");
1485 return -1;
1486 }
959aee9c 1487 INFO("Chrooted into tmp-/ at %s", path);
cc28d0b0
SH
1488 return 0;
1489}
1490
1491static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1492{
cc28d0b0
SH
1493 const struct lxc_rootfs *rootfs = &conf->rootfs;
1494
a0f379bf
DW
1495 if (!rootfs->path) {
1496 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1497 SYSERROR("Failed to make / rslave");
1498 return -1;
1499 }
c69bd12f 1500 return 0;
a0f379bf 1501 }
0ad19a3f 1502
12297168 1503 if (access(rootfs->mount, F_OK)) {
b1789442 1504 SYSERROR("failed to access to '%s', check it is present",
12297168 1505 rootfs->mount);
b1789442
DL
1506 return -1;
1507 }
1508
9be53773 1509 // First try mounting rootfs using a bdev
a17b1e65 1510 struct bdev *bdev = bdev_init(rootfs->path, rootfs->mount, rootfs->options);
9be53773 1511 if (bdev && bdev->ops->mount(bdev) == 0) {
59d66af2 1512 bdev_put(bdev);
9be53773
SH
1513 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1514 return 0;
1515 }
59d66af2
SH
1516 if (bdev)
1517 bdev_put(bdev);
a17b1e65 1518 if (mount_rootfs(rootfs->path, rootfs->mount, rootfs->options)) {
a6afdde9 1519 ERROR("failed to mount rootfs");
c3f0a28c 1520 return -1;
1521 }
0ad19a3f 1522
12297168 1523 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1524
ac778708
DL
1525 return 0;
1526}
1527
74a3920a 1528static int setup_pivot_root(const struct lxc_rootfs *rootfs)
ac778708 1529{
ac778708
DL
1530 if (!rootfs->path)
1531 return 0;
1532
12297168 1533 if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
cc6f6dd7 1534 ERROR("failed to setup pivot root");
25368b52 1535 return -1;
c69bd12f
DL
1536 }
1537
25368b52 1538 return 0;
0ad19a3f 1539}
1540
d852c78c 1541static int setup_pts(int pts)
3c26f34e 1542{
77890c6d
SW
1543 char target[PATH_MAX];
1544
d852c78c
DL
1545 if (!pts)
1546 return 0;
3c26f34e 1547
1548 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1549 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1550 return -1;
1551 }
1552
a6afdde9 1553 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
67e5a20a 1554 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
36eb9bde 1555 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1556 return -1;
1557 }
1558
3c26f34e 1559 if (access("/dev/ptmx", F_OK)) {
1560 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1561 goto out;
36eb9bde 1562 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1563 return -1;
1564 }
1565
77890c6d
SW
1566 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1567 goto out;
1568
3c26f34e 1569 /* fallback here, /dev/pts/ptmx exists just mount bind */
1570 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1571 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1572 return -1;
1573 }
cd54d859
DL
1574
1575 INFO("created new pts instance");
d852c78c 1576
3c26f34e 1577out:
1578 return 0;
1579}
1580
cccc74b5
DL
1581static int setup_personality(int persona)
1582{
6ff05e18 1583 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1584 if (persona == -1)
1585 return 0;
1586
1587 if (personality(persona) < 0) {
1588 SYSERROR("failed to set personality to '0x%x'", persona);
1589 return -1;
1590 }
1591
1592 INFO("set personality to '0x%x'", persona);
6ff05e18 1593 #endif
cccc74b5
DL
1594
1595 return 0;
1596}
1597
7c6ef2a2 1598static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1599 const struct lxc_console *console)
6e590161 1600{
63376d7d
DL
1601 char path[MAXPATHLEN];
1602 struct stat s;
7c6ef2a2 1603 int ret;
52e35957 1604
7c6ef2a2
SH
1605 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1606 if (ret >= sizeof(path)) {
959aee9c 1607 ERROR("console path too long");
7c6ef2a2
SH
1608 return -1;
1609 }
52e35957 1610
63376d7d 1611 if (access(path, F_OK)) {
466978b0 1612 WARN("rootfs specified but no console found at '%s'", path);
63376d7d 1613 return 0;
52e35957
DL
1614 }
1615
b5159817
DE
1616 if (console->master < 0) {
1617 INFO("no console");
f78a1f32
DL
1618 return 0;
1619 }
ed502555 1620
63376d7d
DL
1621 if (stat(path, &s)) {
1622 SYSERROR("failed to stat '%s'", path);
1623 return -1;
1624 }
1625
1626 if (chmod(console->name, s.st_mode)) {
1627 SYSERROR("failed to set mode '0%o' to '%s'",
1628 s.st_mode, console->name);
1629 return -1;
1630 }
13954cce 1631
63376d7d
DL
1632 if (mount(console->name, path, "none", MS_BIND, 0)) {
1633 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1634 return -1;
1635 }
1636
63376d7d 1637 INFO("console has been setup");
7c6ef2a2
SH
1638 return 0;
1639}
1640
1641static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1642 const struct lxc_console *console,
1643 char *ttydir)
1644{
1645 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1646 int ret;
1647
1648 /* create rootfs/dev/<ttydir> directory */
1649 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1650 ttydir);
1651 if (ret >= sizeof(path))
1652 return -1;
1653 ret = mkdir(path, 0755);
1654 if (ret && errno != EEXIST) {
959aee9c 1655 SYSERROR("failed with errno %d to create %s", errno, path);
7c6ef2a2
SH
1656 return -1;
1657 }
959aee9c 1658 INFO("created %s", path);
7c6ef2a2
SH
1659
1660 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1661 rootfs->mount, ttydir);
1662 if (ret >= sizeof(lxcpath)) {
959aee9c 1663 ERROR("console path too long");
7c6ef2a2
SH
1664 return -1;
1665 }
1666
1667 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1668 ret = unlink(path);
1669 if (ret && errno != ENOENT) {
959aee9c 1670 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1671 return -1;
1672 }
1673
1674 ret = creat(lxcpath, 0660);
1675 if (ret==-1 && errno != EEXIST) {
959aee9c 1676 SYSERROR("error %d creating %s", errno, lxcpath);
7c6ef2a2
SH
1677 return -1;
1678 }
4d44e274
SH
1679 if (ret >= 0)
1680 close(ret);
7c6ef2a2 1681
b5159817
DE
1682 if (console->master < 0) {
1683 INFO("no console");
7c6ef2a2
SH
1684 return 0;
1685 }
1686
1687 if (mount(console->name, lxcpath, "none", MS_BIND, 0)) {
1688 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1689 return -1;
1690 }
1691
1692 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1693 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1694 if (ret >= sizeof(lxcpath)) {
1695 ERROR("lxc/console path too long");
1696 return -1;
1697 }
7c6ef2a2
SH
1698 ret = symlink(lxcpath, path);
1699 if (ret) {
1700 SYSERROR("failed to create symlink for console");
1701 return -1;
1702 }
1703
1704 INFO("console has been setup on %s", lxcpath);
cd54d859 1705
6e590161 1706 return 0;
1707}
1708
7c6ef2a2
SH
1709static int setup_console(const struct lxc_rootfs *rootfs,
1710 const struct lxc_console *console,
1711 char *ttydir)
1712{
1713 /* We don't have a rootfs, /dev/console will be shared */
1714 if (!rootfs->path)
1715 return 0;
1716 if (!ttydir)
1717 return setup_dev_console(rootfs, console);
1718
1719 return setup_ttydir_console(rootfs, console, ttydir);
1720}
1721
1bd051a6
SH
1722static int setup_kmsg(const struct lxc_rootfs *rootfs,
1723 const struct lxc_console *console)
1724{
1725 char kpath[MAXPATHLEN];
1726 int ret;
1727
222fea5a
DE
1728 if (!rootfs->path)
1729 return 0;
1bd051a6
SH
1730 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1731 if (ret < 0 || ret >= sizeof(kpath))
1732 return -1;
1733
1734 ret = unlink(kpath);
1735 if (ret && errno != ENOENT) {
959aee9c 1736 SYSERROR("error unlinking %s", kpath);
1bd051a6
SH
1737 return -1;
1738 }
1739
1740 ret = symlink("console", kpath);
1741 if (ret) {
1742 SYSERROR("failed to create symlink for kmsg");
1743 return -1;
1744 }
1745
1746 return 0;
1747}
1748
998ac676
RT
1749static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1750{
1751 struct mount_opt *mo;
1752
1753 /* If opt is found in mount_opt, set or clear flags.
1754 * Otherwise append it to data. */
1755
1756 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1757 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1758 if (mo->clear)
1759 *flags &= ~mo->flag;
1760 else
1761 *flags |= mo->flag;
1762 return;
1763 }
1764 }
1765
1766 if (strlen(*data))
1767 strcat(*data, ",");
1768 strcat(*data, opt);
1769}
1770
a17b1e65 1771int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1772 char **mntdata)
1773{
1774 char *s, *data;
1775 char *p, *saveptr = NULL;
1776
911324ef 1777 *mntdata = NULL;
91656ce5 1778 *mntflags = 0L;
911324ef
DL
1779
1780 if (!mntopts)
998ac676
RT
1781 return 0;
1782
911324ef 1783 s = strdup(mntopts);
998ac676 1784 if (!s) {
36eb9bde 1785 SYSERROR("failed to allocate memory");
998ac676
RT
1786 return -1;
1787 }
1788
1789 data = malloc(strlen(s) + 1);
1790 if (!data) {
36eb9bde 1791 SYSERROR("failed to allocate memory");
998ac676
RT
1792 free(s);
1793 return -1;
1794 }
1795 *data = 0;
1796
1797 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1798 p = strtok_r(NULL, ",", &saveptr))
1799 parse_mntopt(p, mntflags, &data);
1800
1801 if (*data)
1802 *mntdata = data;
1803 else
1804 free(data);
1805 free(s);
1806
1807 return 0;
1808}
1809
911324ef
DL
1810static int mount_entry(const char *fsname, const char *target,
1811 const char *fstype, unsigned long mountflags,
1fc64d22 1812 const char *data, int optional)
911324ef
DL
1813{
1814 if (mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data)) {
1fc64d22
SG
1815 if (optional) {
1816 INFO("failed to mount '%s' on '%s' (optional): %s", fsname,
1817 target, strerror(errno));
1818 return 0;
1819 }
1820 else {
1821 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1822 return -1;
1823 }
911324ef
DL
1824 }
1825
1826 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
1827
1828 DEBUG("remounting %s on %s to respect bind or remount options",
1829 fsname, target);
1830
1831 if (mount(fsname, target, fstype,
1832 mountflags | MS_REMOUNT, data)) {
1fc64d22
SG
1833 if (optional) {
1834 INFO("failed to mount '%s' on '%s' (optional): %s",
1835 fsname, target, strerror(errno));
1836 return 0;
1837 }
1838 else {
1839 SYSERROR("failed to mount '%s' on '%s'",
1840 fsname, target);
1841 return -1;
1842 }
911324ef
DL
1843 }
1844 }
1845
1846 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1847
1848 return 0;
1849}
1850
4e4ca161
SH
1851/*
1852 * Remove 'optional', 'create=dir', and 'create=file' from mntopt
1853 */
1854static void cull_mntent_opt(struct mntent *mntent)
1855{
1856 int i;
1857 char *p, *p2;
1858 char *list[] = {"create=dir",
1859 "create=file",
1860 "optional",
1861 NULL };
1862
1863 for (i=0; list[i]; i++) {
1864 if (!(p = strstr(mntent->mnt_opts, list[i])))
1865 continue;
1866 p2 = strchr(p, ',');
1867 if (!p2) {
1868 /* no more mntopts, so just chop it here */
1869 *p = '\0';
1870 continue;
1871 }
1872 memmove(p, p2+1, strlen(p2+1)+1);
1873 }
1874}
1875
1876static inline int mount_entry_on_systemfs(struct mntent *mntent)
0ad19a3f 1877{
998ac676
RT
1878 unsigned long mntflags;
1879 char *mntdata;
911324ef 1880 int ret;
34cfffb3
SG
1881 FILE *pathfile = NULL;
1882 char* pathdirname = NULL;
4f1d50d1 1883 bool optional = hasmntopt(mntent, "optional") != NULL;
911324ef 1884
34cfffb3 1885 if (hasmntopt(mntent, "create=dir")) {
119126b6 1886 if (mkdir_p(mntent->mnt_dir, 0755) < 0) {
34cfffb3
SG
1887 WARN("Failed to create mount target '%s'", mntent->mnt_dir);
1888 ret = -1;
1889 }
1890 }
1891
1892 if (hasmntopt(mntent, "create=file") && access(mntent->mnt_dir, F_OK)) {
1893 pathdirname = strdup(mntent->mnt_dir);
1894 pathdirname = dirname(pathdirname);
119126b6
SG
1895 if (mkdir_p(pathdirname, 0755) < 0) {
1896 WARN("Failed to create target directory");
1897 }
34cfffb3
SG
1898 pathfile = fopen(mntent->mnt_dir, "wb");
1899 if (!pathfile) {
1900 WARN("Failed to create mount target '%s'", mntent->mnt_dir);
1901 ret = -1;
1902 }
1903 else
1904 fclose(pathfile);
1905 }
1906
4e4ca161
SH
1907 cull_mntent_opt(mntent);
1908
a17b1e65
SG
1909 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1910 free(mntdata);
1911 return -1;
1912 }
1913
911324ef 1914 ret = mount_entry(mntent->mnt_fsname, mntent->mnt_dir,
1fc64d22 1915 mntent->mnt_type, mntflags, mntdata, optional);
68c152ef 1916
34cfffb3 1917 free(pathdirname);
911324ef
DL
1918 free(mntdata);
1919
1920 return ret;
1921}
1922
4e4ca161 1923static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2
SH
1924 const struct lxc_rootfs *rootfs,
1925 const char *lxc_name)
911324ef 1926{
013bd428 1927 char *aux;
59760f5d 1928 char path[MAXPATHLEN];
911324ef
DL
1929 unsigned long mntflags;
1930 char *mntdata;
80a881b2 1931 int r, ret = 0, offset;
67e571de 1932 const char *lxcpath;
34cfffb3
SG
1933 FILE *pathfile = NULL;
1934 char *pathdirname = NULL;
4f1d50d1 1935 bool optional = hasmntopt(mntent, "optional") != NULL;
0ad19a3f 1936
593e8478 1937 lxcpath = lxc_global_config_value("lxc.lxcpath");
2a59a681
SH
1938 if (!lxcpath) {
1939 ERROR("Out of memory");
1940 return -1;
1941 }
1942
80a881b2 1943 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
1944 * use $lxcpath/CN/rootfs as the target prefix */
1945 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
1946 if (r < 0 || r >= MAXPATHLEN)
1947 goto skipvarlib;
1948
1949 aux = strstr(mntent->mnt_dir, path);
1950 if (aux) {
1951 offset = strlen(path);
1952 goto skipabs;
1953 }
1954
1955skipvarlib:
013bd428
DL
1956 aux = strstr(mntent->mnt_dir, rootfs->path);
1957 if (!aux) {
1958 WARN("ignoring mount point '%s'", mntent->mnt_dir);
1959 goto out;
1960 }
80a881b2
SH
1961 offset = strlen(rootfs->path);
1962
1963skipabs:
013bd428 1964
9ba8130c 1965 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
1966 aux + offset);
1967 if (r < 0 || r >= MAXPATHLEN) {
1968 WARN("pathnme too long for '%s'", mntent->mnt_dir);
1969 ret = -1;
1970 goto out;
1971 }
1972
34cfffb3 1973 if (hasmntopt(mntent, "create=dir")) {
119126b6 1974 if (mkdir_p(path, 0755) < 0) {
34cfffb3
SG
1975 WARN("Failed to create mount target '%s'", path);
1976 ret = -1;
1977 }
1978 }
1979
1980 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
1981 pathdirname = strdup(path);
1982 pathdirname = dirname(pathdirname);
119126b6
SG
1983 if (mkdir_p(pathdirname, 0755) < 0) {
1984 WARN("Failed to create target directory");
1985 }
34cfffb3
SG
1986 pathfile = fopen(path, "wb");
1987 if (!pathfile) {
1988 WARN("Failed to create mount target '%s'", path);
1989 ret = -1;
1990 }
1991 else
1992 fclose(pathfile);
1993 }
4e4ca161 1994 cull_mntent_opt(mntent);
d330fe7b 1995
a17b1e65
SG
1996 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1997 free(mntdata);
1998 return -1;
1999 }
2000
013bd428 2001 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1fc64d22 2002 mntflags, mntdata, optional);
0ad19a3f 2003
a17b1e65
SG
2004 free(mntdata);
2005
013bd428 2006out:
34cfffb3 2007 free(pathdirname);
911324ef
DL
2008 return ret;
2009}
d330fe7b 2010
4e4ca161 2011static int mount_entry_on_relative_rootfs(struct mntent *mntent,
911324ef
DL
2012 const char *rootfs)
2013{
2014 char path[MAXPATHLEN];
2015 unsigned long mntflags;
2016 char *mntdata;
2017 int ret;
34cfffb3
SG
2018 FILE *pathfile = NULL;
2019 char *pathdirname = NULL;
4f1d50d1 2020 bool optional = hasmntopt(mntent, "optional") != NULL;
d330fe7b 2021
34cfffb3 2022 /* relative to root mount point */
9ba8130c
SH
2023 ret = snprintf(path, sizeof(path), "%s/%s", rootfs, mntent->mnt_dir);
2024 if (ret >= sizeof(path)) {
2025 ERROR("path name too long");
2026 return -1;
2027 }
911324ef 2028
34cfffb3 2029 if (hasmntopt(mntent, "create=dir")) {
119126b6 2030 if (mkdir_p(path, 0755) < 0) {
34cfffb3
SG
2031 WARN("Failed to create mount target '%s'", path);
2032 ret = -1;
2033 }
2034 }
2035
2036 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
2037 pathdirname = strdup(path);
2038 pathdirname = dirname(pathdirname);
119126b6
SG
2039 if (mkdir_p(pathdirname, 0755) < 0) {
2040 WARN("Failed to create target directory");
2041 }
34cfffb3
SG
2042 pathfile = fopen(path, "wb");
2043 if (!pathfile) {
2044 WARN("Failed to create mount target '%s'", path);
2045 ret = -1;
2046 }
2047 else
2048 fclose(pathfile);
2049 }
4e4ca161 2050 cull_mntent_opt(mntent);
34cfffb3 2051
a17b1e65
SG
2052 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
2053 free(mntdata);
2054 return -1;
2055 }
2056
911324ef 2057 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1fc64d22 2058 mntflags, mntdata, optional);
68c152ef 2059
34cfffb3 2060 free(pathdirname);
911324ef 2061 free(mntdata);
998ac676 2062
911324ef
DL
2063 return ret;
2064}
2065
80a881b2
SH
2066static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
2067 const char *lxc_name)
911324ef 2068{
aaf901be
AM
2069 struct mntent mntent;
2070 char buf[4096];
911324ef 2071 int ret = -1;
e76b8764 2072
aaf901be 2073 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
e76b8764 2074
911324ef 2075 if (!rootfs->path) {
aaf901be 2076 if (mount_entry_on_systemfs(&mntent))
e76b8764 2077 goto out;
911324ef 2078 continue;
e76b8764
CDC
2079 }
2080
911324ef 2081 /* We have a separate root, mounts are relative to it */
aaf901be
AM
2082 if (mntent.mnt_dir[0] != '/') {
2083 if (mount_entry_on_relative_rootfs(&mntent,
911324ef
DL
2084 rootfs->mount))
2085 goto out;
2086 continue;
2087 }
cd54d859 2088
aaf901be 2089 if (mount_entry_on_absolute_rootfs(&mntent, rootfs, lxc_name))
911324ef 2090 goto out;
0ad19a3f 2091 }
cd54d859 2092
0ad19a3f 2093 ret = 0;
cd54d859
DL
2094
2095 INFO("mount points have been setup");
0ad19a3f 2096out:
e7938e9e
MN
2097 return ret;
2098}
2099
80a881b2
SH
2100static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
2101 const char *lxc_name)
e7938e9e
MN
2102{
2103 FILE *file;
2104 int ret;
2105
2106 if (!fstab)
2107 return 0;
2108
2109 file = setmntent(fstab, "r");
2110 if (!file) {
2111 SYSERROR("failed to use '%s'", fstab);
2112 return -1;
2113 }
2114
80a881b2 2115 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e 2116
0ad19a3f 2117 endmntent(file);
2118 return ret;
2119}
2120
80a881b2
SH
2121static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
2122 const char *lxc_name)
e7938e9e
MN
2123{
2124 FILE *file;
2125 struct lxc_list *iterator;
2126 char *mount_entry;
2127 int ret;
2128
2129 file = tmpfile();
2130 if (!file) {
2131 ERROR("tmpfile error: %m");
2132 return -1;
2133 }
2134
2135 lxc_list_for_each(iterator, mount) {
2136 mount_entry = iterator->elem;
1d6b1976 2137 fprintf(file, "%s\n", mount_entry);
e7938e9e
MN
2138 }
2139
2140 rewind(file);
2141
80a881b2 2142 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e
MN
2143
2144 fclose(file);
2145 return ret;
2146}
2147
81810dd1
DL
2148static int setup_caps(struct lxc_list *caps)
2149{
2150 struct lxc_list *iterator;
2151 char *drop_entry;
d55bc1ad 2152 char *ptr;
81810dd1
DL
2153 int i, capid;
2154
2155 lxc_list_for_each(iterator, caps) {
2156
2157 drop_entry = iterator->elem;
2158
2159 capid = -1;
2160
2161 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2162
2163 if (strcmp(drop_entry, caps_opt[i].name))
2164 continue;
2165
2166 capid = caps_opt[i].value;
2167 break;
2168 }
2169
d55bc1ad
CS
2170 if (capid < 0) {
2171 /* try to see if it's numeric, so the user may specify
2172 * capabilities that the running kernel knows about but
2173 * we don't */
09bbd745 2174 errno = 0;
d55bc1ad 2175 capid = strtol(drop_entry, &ptr, 10);
09bbd745 2176 if (!ptr || *ptr != '\0' || errno != 0)
d55bc1ad
CS
2177 /* not a valid number */
2178 capid = -1;
2179 else if (capid > lxc_caps_last_cap())
2180 /* we have a number but it's not a valid
2181 * capability */
2182 capid = -1;
2183 }
2184
81810dd1 2185 if (capid < 0) {
1e11be34
DL
2186 ERROR("unknown capability %s", drop_entry);
2187 return -1;
81810dd1
DL
2188 }
2189
2190 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
2191
2192 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
3ec1648d
SH
2193 SYSERROR("failed to remove %s capability", drop_entry);
2194 return -1;
2195 }
81810dd1
DL
2196
2197 }
2198
1fb86a7c
SH
2199 DEBUG("capabilities have been setup");
2200
2201 return 0;
2202}
2203
2204static int dropcaps_except(struct lxc_list *caps)
2205{
2206 struct lxc_list *iterator;
2207 char *keep_entry;
2208 char *ptr;
2209 int i, capid;
2210 int numcaps = lxc_caps_last_cap() + 1;
959aee9c 2211 INFO("found %d capabilities", numcaps);
1fb86a7c 2212
2caf9a97
SH
2213 if (numcaps <= 0 || numcaps > 200)
2214 return -1;
2215
1fb86a7c
SH
2216 // caplist[i] is 1 if we keep capability i
2217 int *caplist = alloca(numcaps * sizeof(int));
2218 memset(caplist, 0, numcaps * sizeof(int));
2219
2220 lxc_list_for_each(iterator, caps) {
2221
2222 keep_entry = iterator->elem;
2223
2224 capid = -1;
2225
2226 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2227
2228 if (strcmp(keep_entry, caps_opt[i].name))
2229 continue;
2230
2231 capid = caps_opt[i].value;
2232 break;
2233 }
2234
2235 if (capid < 0) {
2236 /* try to see if it's numeric, so the user may specify
2237 * capabilities that the running kernel knows about but
2238 * we don't */
2239 capid = strtol(keep_entry, &ptr, 10);
2240 if (!ptr || *ptr != '\0' ||
f371aca9 2241 capid == INT_MIN || capid == INT_MAX)
1fb86a7c
SH
2242 /* not a valid number */
2243 capid = -1;
2244 else if (capid > lxc_caps_last_cap())
2245 /* we have a number but it's not a valid
2246 * capability */
2247 capid = -1;
2248 }
2249
2250 if (capid < 0) {
2251 ERROR("unknown capability %s", keep_entry);
2252 return -1;
2253 }
2254
2255 DEBUG("drop capability '%s' (%d)", keep_entry, capid);
2256
2257 caplist[capid] = 1;
2258 }
2259 for (i=0; i<numcaps; i++) {
2260 if (caplist[i])
2261 continue;
2262 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
3ec1648d
SH
2263 SYSERROR("failed to remove capability %d", i);
2264 return -1;
2265 }
1fb86a7c
SH
2266 }
2267
2268 DEBUG("capabilities have been setup");
81810dd1
DL
2269
2270 return 0;
2271}
2272
0ad19a3f 2273static int setup_hw_addr(char *hwaddr, const char *ifname)
2274{
2275 struct sockaddr sockaddr;
2276 struct ifreq ifr;
2277 int ret, fd;
2278
3cfc0f3a
MN
2279 ret = lxc_convert_mac(hwaddr, &sockaddr);
2280 if (ret) {
2281 ERROR("mac address '%s' conversion failed : %s",
2282 hwaddr, strerror(-ret));
0ad19a3f 2283 return -1;
2284 }
2285
2286 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
5da6aa8c 2287 ifr.ifr_name[IFNAMSIZ-1] = '\0';
0ad19a3f 2288 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2289
2290 fd = socket(AF_INET, SOCK_DGRAM, 0);
2291 if (fd < 0) {
3ab87b66 2292 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 2293 return -1;
2294 }
2295
2296 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
2297 close(fd);
2298 if (ret)
3ab87b66 2299 ERROR("ioctl failure : %s", strerror(errno));
0ad19a3f 2300
5da6aa8c 2301 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
cd54d859 2302
0ad19a3f 2303 return ret;
2304}
2305
82d5ae15 2306static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2307{
82d5ae15
DL
2308 struct lxc_list *iterator;
2309 struct lxc_inetdev *inetdev;
3cfc0f3a 2310 int err;
0ad19a3f 2311
82d5ae15
DL
2312 lxc_list_for_each(iterator, ip) {
2313
2314 inetdev = iterator->elem;
2315
0093bb8c
DL
2316 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2317 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
2318 if (err) {
2319 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2320 ifindex, strerror(-err));
82d5ae15
DL
2321 return -1;
2322 }
2323 }
2324
2325 return 0;
0ad19a3f 2326}
2327
82d5ae15 2328static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2329{
82d5ae15 2330 struct lxc_list *iterator;
7fa9074f 2331 struct lxc_inet6dev *inet6dev;
3cfc0f3a 2332 int err;
0ad19a3f 2333
82d5ae15
DL
2334 lxc_list_for_each(iterator, ip) {
2335
2336 inet6dev = iterator->elem;
2337
b3df193c 2338 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
2339 &inet6dev->mcast, &inet6dev->acast,
2340 inet6dev->prefix);
3cfc0f3a
MN
2341 if (err) {
2342 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2343 ifindex, strerror(-err));
82d5ae15 2344 return -1;
3cfc0f3a 2345 }
82d5ae15
DL
2346 }
2347
2348 return 0;
0ad19a3f 2349}
2350
82d5ae15 2351static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 2352{
0ad19a3f 2353 char ifname[IFNAMSIZ];
0ad19a3f 2354 char *current_ifname = ifname;
3cfc0f3a 2355 int err;
0ad19a3f 2356
82d5ae15
DL
2357 /* empty network namespace */
2358 if (!netdev->ifindex) {
b0efbac4 2359 if (netdev->flags & IFF_UP) {
d472214b 2360 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2361 if (err) {
2362 ERROR("failed to set the loopback up : %s",
2363 strerror(-err));
82d5ae15
DL
2364 return -1;
2365 }
82d5ae15 2366 }
40790553
SH
2367 if (netdev->type != LXC_NET_VETH)
2368 return 0;
2369 netdev->ifindex = if_nametoindex(netdev->name);
0ad19a3f 2370 }
13954cce 2371
b466dc33 2372 /* get the new ifindex in case of physical netdev */
40790553 2373 if (netdev->type == LXC_NET_PHYS) {
b466dc33
BP
2374 if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
2375 ERROR("failed to get ifindex for %s",
2376 netdev->link);
2377 return -1;
2378 }
40790553 2379 }
b466dc33 2380
82d5ae15
DL
2381 /* retrieve the name of the interface */
2382 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 2383 ERROR("no interface corresponding to index '%d'",
82d5ae15 2384 netdev->ifindex);
0ad19a3f 2385 return -1;
2386 }
13954cce 2387
018ef520 2388 /* default: let the system to choose one interface name */
9d083402 2389 if (!netdev->name)
fb6d9b2f
DL
2390 netdev->name = netdev->type == LXC_NET_PHYS ?
2391 netdev->link : "eth%d";
018ef520 2392
82d5ae15 2393 /* rename the interface name */
40790553
SH
2394 if (strcmp(ifname, netdev->name) != 0) {
2395 err = lxc_netdev_rename_by_name(ifname, netdev->name);
2396 if (err) {
2397 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2398 strerror(-err));
2399 return -1;
2400 }
018ef520
DL
2401 }
2402
2403 /* Re-read the name of the interface because its name has changed
2404 * and would be automatically allocated by the system
2405 */
82d5ae15 2406 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 2407 ERROR("no interface corresponding to index '%d'",
82d5ae15 2408 netdev->ifindex);
018ef520 2409 return -1;
0ad19a3f 2410 }
2411
82d5ae15
DL
2412 /* set a mac address */
2413 if (netdev->hwaddr) {
2414 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 2415 ERROR("failed to setup hw address for '%s'",
82d5ae15 2416 current_ifname);
0ad19a3f 2417 return -1;
2418 }
2419 }
2420
82d5ae15
DL
2421 /* setup ipv4 addresses on the interface */
2422 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 2423 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 2424 ifname);
2425 return -1;
2426 }
2427
82d5ae15
DL
2428 /* setup ipv6 addresses on the interface */
2429 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 2430 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 2431 ifname);
2432 return -1;
2433 }
2434
82d5ae15 2435 /* set the network device up */
b0efbac4 2436 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
2437 int err;
2438
d472214b 2439 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
2440 if (err) {
2441 ERROR("failed to set '%s' up : %s", current_ifname,
2442 strerror(-err));
0ad19a3f 2443 return -1;
2444 }
2445
2446 /* the network is up, make the loopback up too */
d472214b 2447 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2448 if (err) {
2449 ERROR("failed to set the loopback up : %s",
2450 strerror(-err));
0ad19a3f 2451 return -1;
2452 }
2453 }
2454
f8fee0e2
MK
2455 /* We can only set up the default routes after bringing
2456 * up the interface, sine bringing up the interface adds
2457 * the link-local routes and we can't add a default
2458 * route if the gateway is not reachable. */
2459
2460 /* setup ipv4 gateway on the interface */
2461 if (netdev->ipv4_gateway) {
2462 if (!(netdev->flags & IFF_UP)) {
2463 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2464 return -1;
2465 }
2466
2467 if (lxc_list_empty(&netdev->ipv4)) {
2468 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2469 return -1;
2470 }
2471
2472 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2473 if (err) {
fc739df5
SG
2474 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2475 if (err) {
2476 ERROR("failed to add ipv4 dest for '%s': %s",
2477 ifname, strerror(-err));
2478 }
2479
2480 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2481 if (err) {
2482 ERROR("failed to setup ipv4 gateway for '%s': %s",
2483 ifname, strerror(-err));
2484 if (netdev->ipv4_gateway_auto) {
2485 char buf[INET_ADDRSTRLEN];
2486 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2487 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2488 }
2489 return -1;
19a26f82 2490 }
f8fee0e2
MK
2491 }
2492 }
2493
2494 /* setup ipv6 gateway on the interface */
2495 if (netdev->ipv6_gateway) {
2496 if (!(netdev->flags & IFF_UP)) {
2497 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2498 return -1;
2499 }
2500
2501 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2502 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2503 return -1;
2504 }
2505
2506 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2507 if (err) {
fc739df5
SG
2508 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2509 if (err) {
2510 ERROR("failed to add ipv6 dest for '%s': %s",
f8fee0e2 2511 ifname, strerror(-err));
19a26f82 2512 }
fc739df5
SG
2513
2514 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2515 if (err) {
2516 ERROR("failed to setup ipv6 gateway for '%s': %s",
2517 ifname, strerror(-err));
2518 if (netdev->ipv6_gateway_auto) {
2519 char buf[INET6_ADDRSTRLEN];
2520 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2521 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2522 }
2523 return -1;
2524 }
f8fee0e2
MK
2525 }
2526 }
2527
cd54d859
DL
2528 DEBUG("'%s' has been setup", current_ifname);
2529
0ad19a3f 2530 return 0;
2531}
2532
5f4535a3 2533static int setup_network(struct lxc_list *network)
0ad19a3f 2534{
82d5ae15 2535 struct lxc_list *iterator;
82d5ae15 2536 struct lxc_netdev *netdev;
0ad19a3f 2537
5f4535a3 2538 lxc_list_for_each(iterator, network) {
cd54d859 2539
5f4535a3 2540 netdev = iterator->elem;
82d5ae15
DL
2541
2542 if (setup_netdev(netdev)) {
2543 ERROR("failed to setup netdev");
2544 return -1;
2545 }
2546 }
cd54d859 2547
5f4535a3
DL
2548 if (!lxc_list_empty(network))
2549 INFO("network has been setup");
cd54d859
DL
2550
2551 return 0;
0ad19a3f 2552}
2553
2af6bd1b
SH
2554/* try to move physical nics to the init netns */
2555void restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf)
2556{
2557 int i, ret, oldfd;
2558 char path[MAXPATHLEN];
2559
2560 if (netnsfd < 0)
2561 return;
2562
2563 ret = snprintf(path, MAXPATHLEN, "/proc/self/ns/net");
2564 if (ret < 0 || ret >= MAXPATHLEN) {
2565 WARN("Failed to open monitor netns fd");
2566 return;
2567 }
2568 if ((oldfd = open(path, O_RDONLY)) < 0) {
2569 SYSERROR("Failed to open monitor netns fd");
2570 return;
2571 }
2572 if (setns(netnsfd, 0) != 0) {
2573 SYSERROR("Failed to enter container netns to reset nics");
2574 close(oldfd);
2575 return;
2576 }
2577 for (i=0; i<conf->num_savednics; i++) {
2578 struct saved_nic *s = &conf->saved_nics[i];
2579 if (lxc_netdev_move_by_index(s->ifindex, 1))
2580 WARN("Error moving nic index:%d back to host netns",
2581 s->ifindex);
2582 }
2583 if (setns(oldfd, 0) != 0)
2584 SYSERROR("Failed to re-enter monitor's netns");
2585 close(oldfd);
2586}
2587
2588void lxc_rename_phys_nics_on_shutdown(int netnsfd, struct lxc_conf *conf)
7b35f3d6
SH
2589{
2590 int i;
2591
2af6bd1b
SH
2592 if (conf->num_savednics == 0)
2593 return;
2594
7b35f3d6 2595 INFO("running to reset %d nic names", conf->num_savednics);
2af6bd1b 2596 restore_phys_nics_to_netns(netnsfd, conf);
7b35f3d6
SH
2597 for (i=0; i<conf->num_savednics; i++) {
2598 struct saved_nic *s = &conf->saved_nics[i];
959aee9c 2599 INFO("resetting nic %d to %s", s->ifindex, s->orig_name);
7b35f3d6
SH
2600 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
2601 free(s->orig_name);
2602 }
2603 conf->num_savednics = 0;
7b35f3d6
SH
2604}
2605
ae9242c8
SH
2606static char *default_rootfs_mount = LXCROOTFSMOUNT;
2607
7b379ab3 2608struct lxc_conf *lxc_conf_init(void)
089cd8b8 2609{
7b379ab3 2610 struct lxc_conf *new;
26ddeedd 2611 int i;
7b379ab3
MN
2612
2613 new = malloc(sizeof(*new));
2614 if (!new) {
2615 ERROR("lxc_conf_init : %m");
2616 return NULL;
2617 }
2618 memset(new, 0, sizeof(*new));
2619
b40a606e 2620 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
cccc74b5 2621 new->personality = -1;
bc6928ff 2622 new->autodev = -1;
596a818d
DE
2623 new->console.log_path = NULL;
2624 new->console.log_fd = -1;
28a4b0e5 2625 new->console.path = NULL;
63376d7d 2626 new->console.peer = -1;
b5159817
DE
2627 new->console.peerpty.busy = -1;
2628 new->console.peerpty.master = -1;
2629 new->console.peerpty.slave = -1;
63376d7d
DL
2630 new->console.master = -1;
2631 new->console.slave = -1;
2632 new->console.name[0] = '\0';
d2e30e99 2633 new->maincmd_fd = -1;
54c30e29 2634 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048
SH
2635 if (!new->rootfs.mount) {
2636 ERROR("lxc_conf_init : %m");
2637 free(new);
2638 return NULL;
2639 }
2f3f41d0 2640 new->kmsg = 1;
7b379ab3
MN
2641 lxc_list_init(&new->cgroup);
2642 lxc_list_init(&new->network);
2643 lxc_list_init(&new->mount_list);
81810dd1 2644 lxc_list_init(&new->caps);
1fb86a7c 2645 lxc_list_init(&new->keepcaps);
f6d3e3e4 2646 lxc_list_init(&new->id_map);
26ddeedd
SH
2647 for (i=0; i<NUM_LXC_HOOKS; i++)
2648 lxc_list_init(&new->hooks[i]);
ee1e7aa0 2649 lxc_list_init(&new->groups);
fe4de9a6
DE
2650 new->lsm_aa_profile = NULL;
2651 new->lsm_se_context = NULL;
5112cd70 2652 new->tmp_umount_proc = 0;
7b379ab3 2653
9f30a190
MM
2654 for (i = 0; i < LXC_NS_MAX; i++)
2655 new->inherit_ns_fd[i] = -1;
2656
7b379ab3 2657 return new;
089cd8b8
DL
2658}
2659
e3b4c4c4 2660static int instanciate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2661{
8634bc19 2662 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2663 char veth2buf[IFNAMSIZ], *veth2;
3cfc0f3a 2664 int err;
13954cce 2665
e892973e
DL
2666 if (netdev->priv.veth_attr.pair)
2667 veth1 = netdev->priv.veth_attr.pair;
8634bc19 2668 else {
9ba8130c
SH
2669 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2670 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2671 ERROR("veth1 name too long");
2672 return -1;
2673 }
a0265685 2674 veth1 = lxc_mkifname(veth1buf);
ad40563e
ÇO
2675 if (!veth1) {
2676 ERROR("failed to allocate a temporary name");
2677 return -1;
2678 }
74a2b586
JK
2679 /* store away for deconf */
2680 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2681 }
82d5ae15 2682
0e391e57 2683 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
a0265685 2684 veth2 = lxc_mkifname(veth2buf);
ad40563e 2685 if (!veth2) {
82d5ae15 2686 ERROR("failed to allocate a temporary name");
ad40563e 2687 goto out_delete;
0ad19a3f 2688 }
2689
3cfc0f3a
MN
2690 err = lxc_veth_create(veth1, veth2);
2691 if (err) {
2692 ERROR("failed to create %s-%s : %s", veth1, veth2,
2693 strerror(-err));
ad40563e 2694 goto out_delete;
0ad19a3f 2695 }
13954cce 2696
49684c0b
CS
2697 /* changing the high byte of the mac address to 0xfe, the bridge interface
2698 * will always keep the host's mac address and not take the mac address
2699 * of a container */
2700 err = setup_private_host_hw_addr(veth1);
2701 if (err) {
2702 ERROR("failed to change mac address of host interface '%s' : %s",
2703 veth1, strerror(-err));
2704 goto out_delete;
2705 }
2706
82d5ae15 2707 if (netdev->mtu) {
d472214b 2708 err = lxc_netdev_set_mtu(veth1, atoi(netdev->mtu));
3cfc0f3a 2709 if (!err)
d472214b 2710 err = lxc_netdev_set_mtu(veth2, atoi(netdev->mtu));
3cfc0f3a
MN
2711 if (err) {
2712 ERROR("failed to set mtu '%s' for %s-%s : %s",
2713 netdev->mtu, veth1, veth2, strerror(-err));
eb14c10a 2714 goto out_delete;
75d09f83
DL
2715 }
2716 }
2717
3cfc0f3a
MN
2718 if (netdev->link) {
2719 err = lxc_bridge_attach(netdev->link, veth1);
2720 if (err) {
2721 ERROR("failed to attach '%s' to the bridge '%s' : %s",
2722 veth1, netdev->link, strerror(-err));
2723 goto out_delete;
2724 }
eb14c10a
DL
2725 }
2726
82d5ae15
DL
2727 netdev->ifindex = if_nametoindex(veth2);
2728 if (!netdev->ifindex) {
36eb9bde 2729 ERROR("failed to retrieve the index for %s", veth2);
eb14c10a
DL
2730 goto out_delete;
2731 }
2732
d472214b 2733 err = lxc_netdev_up(veth1);
6e35af2e
DL
2734 if (err) {
2735 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2736 goto out_delete;
0ad19a3f 2737 }
2738
e3b4c4c4 2739 if (netdev->upscript) {
751d9dcd
DL
2740 err = run_script(handler->name, "net", netdev->upscript, "up",
2741 "veth", veth1, (char*) NULL);
2742 if (err)
e3b4c4c4 2743 goto out_delete;
e3b4c4c4
ST
2744 }
2745
82d5ae15
DL
2746 DEBUG("instanciated veth '%s/%s', index is '%d'",
2747 veth1, veth2, netdev->ifindex);
2748
6ab9ab6d 2749 return 0;
eb14c10a
DL
2750
2751out_delete:
b84f58b9 2752 lxc_netdev_delete_by_name(veth1);
ad40563e
ÇO
2753 if (!netdev->priv.veth_attr.pair && veth1)
2754 free(veth1);
2755 if(veth2)
2756 free(veth2);
6ab9ab6d 2757 return -1;
13954cce 2758}
d957ae2d 2759
74a2b586
JK
2760static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2761{
2762 char *veth1;
2763 int err;
2764
2765 if (netdev->priv.veth_attr.pair)
2766 veth1 = netdev->priv.veth_attr.pair;
2767 else
2768 veth1 = netdev->priv.veth_attr.veth1;
2769
2770 if (netdev->downscript) {
2771 err = run_script(handler->name, "net", netdev->downscript,
2772 "down", "veth", veth1, (char*) NULL);
2773 if (err)
2774 return -1;
2775 }
2776 return 0;
2777}
2778
e3b4c4c4 2779static int instanciate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2780{
0e391e57 2781 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2782 int err;
d957ae2d
MT
2783
2784 if (!netdev->link) {
2785 ERROR("no link specified for macvlan netdev");
2786 return -1;
2787 }
13954cce 2788
9ba8130c
SH
2789 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2790 if (err >= sizeof(peerbuf))
2791 return -1;
82d5ae15 2792
a0265685 2793 peer = lxc_mkifname(peerbuf);
ad40563e 2794 if (!peer) {
82d5ae15
DL
2795 ERROR("failed to make a temporary name");
2796 return -1;
0ad19a3f 2797 }
2798
3cfc0f3a
MN
2799 err = lxc_macvlan_create(netdev->link, peer,
2800 netdev->priv.macvlan_attr.mode);
2801 if (err) {
2802 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2803 peer, netdev->link, strerror(-err));
ad40563e 2804 goto out;
0ad19a3f 2805 }
2806
82d5ae15
DL
2807 netdev->ifindex = if_nametoindex(peer);
2808 if (!netdev->ifindex) {
36eb9bde 2809 ERROR("failed to retrieve the index for %s", peer);
ad40563e 2810 goto out;
22ebac19 2811 }
2812
e3b4c4c4 2813 if (netdev->upscript) {
751d9dcd
DL
2814 err = run_script(handler->name, "net", netdev->upscript, "up",
2815 "macvlan", netdev->link, (char*) NULL);
2816 if (err)
ad40563e 2817 goto out;
e3b4c4c4
ST
2818 }
2819
e892973e
DL
2820 DEBUG("instanciated macvlan '%s', index is '%d' and mode '%d'",
2821 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 2822
d957ae2d 2823 return 0;
ad40563e
ÇO
2824out:
2825 lxc_netdev_delete_by_name(peer);
2826 free(peer);
2827 return -1;
0ad19a3f 2828}
2829
74a2b586
JK
2830static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2831{
2832 int err;
2833
2834 if (netdev->downscript) {
2835 err = run_script(handler->name, "net", netdev->downscript,
2836 "down", "macvlan", netdev->link,
2837 (char*) NULL);
2838 if (err)
2839 return -1;
2840 }
2841 return 0;
2842}
2843
26c39028 2844/* XXX: merge with instanciate_macvlan */
e3b4c4c4 2845static int instanciate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
2846{
2847 char peer[IFNAMSIZ];
3cfc0f3a 2848 int err;
26c39028
JHS
2849
2850 if (!netdev->link) {
2851 ERROR("no link specified for vlan netdev");
2852 return -1;
2853 }
2854
9ba8130c
SH
2855 err = snprintf(peer, sizeof(peer), "vlan%d", netdev->priv.vlan_attr.vid);
2856 if (err >= sizeof(peer)) {
2857 ERROR("peer name too long");
2858 return -1;
2859 }
26c39028 2860
3cfc0f3a
MN
2861 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2862 if (err) {
2863 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2864 peer, netdev->link, strerror(-err));
26c39028
JHS
2865 return -1;
2866 }
2867
2868 netdev->ifindex = if_nametoindex(peer);
2869 if (!netdev->ifindex) {
2870 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 2871 lxc_netdev_delete_by_name(peer);
26c39028
JHS
2872 return -1;
2873 }
2874
e892973e
DL
2875 DEBUG("instanciated vlan '%s', ifindex is '%d'", " vlan1000",
2876 netdev->ifindex);
2877
26c39028
JHS
2878 return 0;
2879}
2880
74a2b586
JK
2881static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2882{
2883 return 0;
2884}
2885
e3b4c4c4 2886static int instanciate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2887{
6168e99f
DL
2888 if (!netdev->link) {
2889 ERROR("no link specified for the physical interface");
2890 return -1;
2891 }
2892
9d083402 2893 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 2894 if (!netdev->ifindex) {
9d083402 2895 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 2896 return -1;
2897 }
2898
e3b4c4c4
ST
2899 if (netdev->upscript) {
2900 int err;
751d9dcd
DL
2901 err = run_script(handler->name, "net", netdev->upscript,
2902 "up", "phys", netdev->link, (char*) NULL);
2903 if (err)
e3b4c4c4 2904 return -1;
e3b4c4c4
ST
2905 }
2906
82d5ae15 2907 return 0;
0ad19a3f 2908}
2909
74a2b586
JK
2910static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2911{
2912 int err;
2913
2914 if (netdev->downscript) {
2915 err = run_script(handler->name, "net", netdev->downscript,
2916 "down", "phys", netdev->link, (char*) NULL);
2917 if (err)
2918 return -1;
2919 }
2920 return 0;
2921}
2922
26b797f3
SH
2923static int instanciate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
2924{
2925 netdev->ifindex = 0;
2926 return 0;
2927}
2928
e3b4c4c4 2929static int instanciate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2930{
82d5ae15 2931 netdev->ifindex = 0;
e3b4c4c4
ST
2932 if (netdev->upscript) {
2933 int err;
751d9dcd
DL
2934 err = run_script(handler->name, "net", netdev->upscript,
2935 "up", "empty", (char*) NULL);
2936 if (err)
e3b4c4c4 2937 return -1;
e3b4c4c4 2938 }
82d5ae15 2939 return 0;
0ad19a3f 2940}
2941
74a2b586
JK
2942static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2943{
2944 int err;
2945
2946 if (netdev->downscript) {
2947 err = run_script(handler->name, "net", netdev->downscript,
2948 "down", "empty", (char*) NULL);
2949 if (err)
2950 return -1;
2951 }
2952 return 0;
2953}
2954
26b797f3
SH
2955static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
2956{
2957 return 0;
2958}
2959
2960int lxc_requests_empty_network(struct lxc_handler *handler)
2961{
2962 struct lxc_list *network = &handler->conf->network;
2963 struct lxc_list *iterator;
2964 struct lxc_netdev *netdev;
2965 bool found_none = false, found_nic = false;
2966
2967 if (lxc_list_empty(network))
2968 return 0;
2969
2970 lxc_list_for_each(iterator, network) {
2971
2972 netdev = iterator->elem;
2973
2974 if (netdev->type == LXC_NET_NONE)
2975 found_none = true;
2976 else
2977 found_nic = true;
2978 }
2979 if (found_none && !found_nic)
2980 return 1;
2981 return 0;
2982}
2983
e3b4c4c4 2984int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 2985{
e3b4c4c4 2986 struct lxc_list *network = &handler->conf->network;
82d5ae15 2987 struct lxc_list *iterator;
82d5ae15 2988 struct lxc_netdev *netdev;
cbef6c52
SH
2989 int am_root = (getuid() == 0);
2990
2991 if (!am_root)
2992 return 0;
0ad19a3f 2993
5f4535a3 2994 lxc_list_for_each(iterator, network) {
0ad19a3f 2995
5f4535a3 2996 netdev = iterator->elem;
13954cce 2997
24654103 2998 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 2999 ERROR("invalid network configuration type '%d'",
5f4535a3 3000 netdev->type);
82d5ae15
DL
3001 return -1;
3002 }
0ad19a3f 3003
e3b4c4c4 3004 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
3005 ERROR("failed to create netdev");
3006 return -1;
3007 }
e3b4c4c4 3008
0ad19a3f 3009 }
3010
3011 return 0;
3012}
3013
74a2b586 3014void lxc_delete_network(struct lxc_handler *handler)
7fef7a06 3015{
74a2b586 3016 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
3017 struct lxc_list *iterator;
3018 struct lxc_netdev *netdev;
3019
3020 lxc_list_for_each(iterator, network) {
3021 netdev = iterator->elem;
d472214b 3022
74a2b586 3023 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352
DL
3024 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
3025 WARN("failed to rename to the initial name the " \
3026 "netdev '%s'", netdev->link);
d472214b 3027 continue;
d8f8e352 3028 }
d472214b 3029
74a2b586
JK
3030 if (netdev_deconf[netdev->type](handler, netdev)) {
3031 WARN("failed to destroy netdev");
3032 }
3033
d8f8e352
DL
3034 /* Recent kernel remove the virtual interfaces when the network
3035 * namespace is destroyed but in case we did not moved the
3036 * interface to the network namespace, we have to destroy it
3037 */
74a2b586
JK
3038 if (netdev->ifindex != 0 &&
3039 lxc_netdev_delete_by_index(netdev->ifindex))
d8f8e352 3040 WARN("failed to remove interface '%s'", netdev->name);
7fef7a06
DL
3041 }
3042}
3043
45e854dc
SG
3044#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
3045
fe1f672f
ÇO
3046/* lxc-user-nic returns "interface_name:interface_name\n" */
3047#define MAX_BUFFER_SIZE IFNAMSIZ*2 + 2
74a3920a 3048static int unpriv_assign_nic(struct lxc_netdev *netdev, pid_t pid)
cbef6c52
SH
3049{
3050 pid_t child;
a7242d9a
ÇO
3051 int bytes, pipefd[2];
3052 char *token, *saveptr = NULL;
fe1f672f 3053 char buffer[MAX_BUFFER_SIZE];
cbef6c52
SH
3054
3055 if (netdev->type != LXC_NET_VETH) {
3056 ERROR("nic type %d not support for unprivileged use",
3057 netdev->type);
3058 return -1;
3059 }
3060
a7242d9a
ÇO
3061 if(pipe(pipefd) < 0) {
3062 SYSERROR("pipe failed");
3063 return -1;
3064 }
3065
cbef6c52
SH
3066 if ((child = fork()) < 0) {
3067 SYSERROR("fork");
a7242d9a
ÇO
3068 close(pipefd[0]);
3069 close(pipefd[1]);
3070 return -1;
3071 }
3072
3073 if (child == 0) { // child
3074 /* close the read-end of the pipe */
3075 close(pipefd[0]);
3076 /* redirect the stdout to write-end of the pipe */
3077 dup2(pipefd[1], STDOUT_FILENO);
3078 /* close the write-end of the pipe */
fe1f672f 3079 close(pipefd[1]);
a7242d9a
ÇO
3080
3081 // Call lxc-user-nic pid type bridge
3082 char pidstr[20];
3083 char *args[] = {LXC_USERNIC_PATH, pidstr, "veth", netdev->link, netdev->name, NULL };
3084 snprintf(pidstr, 19, "%lu", (unsigned long) pid);
3085 pidstr[19] = '\0';
3086 execvp(args[0], args);
3087 SYSERROR("execvp lxc-user-nic");
3088 exit(1);
3089 }
3090
3091 /* close the write-end of the pipe */
3092 close(pipefd[1]);
3093
fe1f672f 3094 bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE);
a7242d9a
ÇO
3095 if (bytes < 0) {
3096 SYSERROR("read failed");
3097 }
3098 buffer[bytes - 1] = '\0';
3099
3100 if (wait_for_pid(child) != 0) {
3101 close(pipefd[0]);
cbef6c52
SH
3102 return -1;
3103 }
3104
a7242d9a
ÇO
3105 /* close the read-end of the pipe */
3106 close(pipefd[0]);
cbef6c52 3107
a7242d9a
ÇO
3108 /* fill netdev->name field */
3109 token = strtok_r(buffer, ":", &saveptr);
3110 if (!token)
3111 return -1;
658979c5
SH
3112 netdev->name = malloc(IFNAMSIZ+1);
3113 if (!netdev->name) {
3114 ERROR("Out of memory");
3115 return -1;
3116 }
3117 memset(netdev->name, 0, IFNAMSIZ+1);
3118 strncpy(netdev->name, token, IFNAMSIZ);
a7242d9a
ÇO
3119
3120 /* fill netdev->veth_attr.pair field */
3121 token = strtok_r(NULL, ":", &saveptr);
3122 if (!token)
3123 return -1;
3124 netdev->priv.veth_attr.pair = strdup(token);
658979c5
SH
3125 if (!netdev->priv.veth_attr.pair) {
3126 ERROR("Out of memory");
3127 return -1;
3128 }
45e854dc 3129
a7242d9a 3130 return 0;
cbef6c52
SH
3131}
3132
5f4535a3 3133int lxc_assign_network(struct lxc_list *network, pid_t pid)
0ad19a3f 3134{
82d5ae15 3135 struct lxc_list *iterator;
82d5ae15 3136 struct lxc_netdev *netdev;
cbef6c52 3137 int am_root = (getuid() == 0);
3cfc0f3a 3138 int err;
0ad19a3f 3139
5f4535a3 3140 lxc_list_for_each(iterator, network) {
82d5ae15 3141
5f4535a3 3142 netdev = iterator->elem;
82d5ae15 3143
fbb16259 3144 if (netdev->type == LXC_NET_VETH && !am_root) {
cbef6c52
SH
3145 if (unpriv_assign_nic(netdev, pid))
3146 return -1;
658979c5
SH
3147 // lxc-user-nic has moved the nic to the new ns.
3148 // unpriv_assign_nic() fills in netdev->name.
3149 // netdev->ifindex will be filed in at setup_netdev.
cbef6c52
SH
3150 continue;
3151 }
236087a6 3152
fbb16259
SH
3153 /* empty network namespace, nothing to move */
3154 if (!netdev->ifindex)
3155 continue;
3156
d472214b 3157 err = lxc_netdev_move_by_index(netdev->ifindex, pid);
3cfc0f3a
MN
3158 if (err) {
3159 ERROR("failed to move '%s' to the container : %s",
3160 netdev->link, strerror(-err));
82d5ae15
DL
3161 return -1;
3162 }
3163
c1c75c04 3164 DEBUG("move '%s' to '%d'", netdev->name, pid);
0ad19a3f 3165 }
3166
3167 return 0;
3168}
3169
251d0d2a
DE
3170static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
3171 size_t buf_size)
f6d3e3e4
SH
3172{
3173 char path[PATH_MAX];
e4ccd113 3174 int ret, closeret;
f6d3e3e4
SH
3175 FILE *f;
3176
3177 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
3178 if (ret < 0 || ret >= PATH_MAX) {
03fadd16 3179 fprintf(stderr, "%s: path name too long\n", __func__);
f6d3e3e4
SH
3180 return -E2BIG;
3181 }
3182 f = fopen(path, "w");
3183 if (!f) {
3184 perror("open");
3185 return -EINVAL;
3186 }
251d0d2a 3187 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 3188 if (ret < 0)
e4ccd113
SH
3189 SYSERROR("writing id mapping");
3190 closeret = fclose(f);
3191 if (closeret)
3192 SYSERROR("writing id mapping");
3193 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
3194}
3195
3196int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
3197{
3198 struct lxc_list *iterator;
3199 struct id_map *map;
3200 int ret = 0;
251d0d2a 3201 enum idtype type;
4f7521b4 3202 char *buf = NULL, *pos;
e9c5dc7b 3203 int use_shadow = (on_path("newuidmap") && on_path("newgidmap"));
0e6e3a41
SG
3204
3205 if (!use_shadow && geteuid()) {
3206 ERROR("Missing newuidmap/newgidmap");
3207 return -1;
3208 }
251d0d2a
DE
3209
3210 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4 3211 int left, fill;
cf3ef16d
SH
3212 int had_entry = 0;
3213 if (!buf) {
3214 buf = pos = malloc(4096);
4f7521b4
SH
3215 if (!buf)
3216 return -ENOMEM;
cf3ef16d
SH
3217 }
3218 pos = buf;
0e6e3a41 3219 if (use_shadow)
d1838f34 3220 pos += sprintf(buf, "new%cidmap %d",
cf3ef16d
SH
3221 type == ID_TYPE_UID ? 'u' : 'g',
3222 pid);
4f7521b4 3223
cf3ef16d
SH
3224 lxc_list_for_each(iterator, idmap) {
3225 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
251d0d2a 3226 map = iterator->elem;
cf3ef16d
SH
3227 if (map->idtype != type)
3228 continue;
3229
3230 had_entry = 1;
3231 left = 4096 - (pos - buf);
d1838f34 3232 fill = snprintf(pos, left, "%s%lu %lu %lu%s",
0e6e3a41 3233 use_shadow ? " " : "",
d1838f34 3234 map->nsid, map->hostid, map->range,
0e6e3a41 3235 use_shadow ? "" : "\n");
cf3ef16d
SH
3236 if (fill <= 0 || fill >= left)
3237 SYSERROR("snprintf failed, too many mappings");
3238 pos += fill;
251d0d2a 3239 }
cf3ef16d 3240 if (!had_entry)
4f7521b4 3241 continue;
cf3ef16d 3242
0e6e3a41 3243 if (!use_shadow) {
cf3ef16d 3244 ret = write_id_mapping(type, pid, buf, pos-buf);
d1838f34
MS
3245 } else {
3246 left = 4096 - (pos - buf);
3247 fill = snprintf(pos, left, "\n");
3248 if (fill <= 0 || fill >= left)
3249 SYSERROR("snprintf failed, too many mappings");
3250 pos += fill;
cf3ef16d 3251 ret = system(buf);
d1838f34 3252 }
cf3ef16d 3253
f6d3e3e4
SH
3254 if (ret)
3255 break;
3256 }
251d0d2a 3257
4f7521b4
SH
3258 if (buf)
3259 free(buf);
f6d3e3e4
SH
3260 return ret;
3261}
3262
cf3ef16d 3263/*
0b3a6504
SH
3264 * return the host uid to which the container root is mapped in *val.
3265 * Return true if id was found, false otherwise.
cf3ef16d 3266 */
2a9a80cb 3267bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
3ec1648d 3268 unsigned long *val)
cf3ef16d
SH
3269{
3270 struct lxc_list *it;
3271 struct id_map *map;
3272
3273 lxc_list_for_each(it, &conf->id_map) {
3274 map = it->elem;
3275 if (map->idtype != ID_TYPE_UID)
3276 continue;
3277 if (map->nsid != 0)
3278 continue;
2a9a80cb
SH
3279 *val = map->hostid;
3280 return true;
cf3ef16d 3281 }
2a9a80cb 3282 return false;
cf3ef16d
SH
3283}
3284
2133f58c 3285int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3286{
3287 struct lxc_list *it;
3288 struct id_map *map;
3289 lxc_list_for_each(it, &conf->id_map) {
3290 map = it->elem;
2133f58c 3291 if (map->idtype != idtype)
cf3ef16d
SH
3292 continue;
3293 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 3294 return (id - map->hostid) + map->nsid;
cf3ef16d 3295 }
57d116ab 3296 return -1;
cf3ef16d
SH
3297}
3298
2133f58c 3299int find_unmapped_nsuid(struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3300{
3301 struct lxc_list *it;
3302 struct id_map *map;
2133f58c 3303 unsigned int freeid = 0;
cf3ef16d
SH
3304again:
3305 lxc_list_for_each(it, &conf->id_map) {
3306 map = it->elem;
2133f58c 3307 if (map->idtype != idtype)
cf3ef16d
SH
3308 continue;
3309 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3310 freeid = map->nsid + map->range;
3311 goto again;
3312 }
3313 }
3314 return freeid;
3315}
3316
19a26f82
MK
3317int lxc_find_gateway_addresses(struct lxc_handler *handler)
3318{
3319 struct lxc_list *network = &handler->conf->network;
3320 struct lxc_list *iterator;
3321 struct lxc_netdev *netdev;
3322 int link_index;
3323
3324 lxc_list_for_each(iterator, network) {
3325 netdev = iterator->elem;
3326
3327 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
3328 continue;
3329
3330 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
3331 ERROR("gateway = auto only supported for "
3332 "veth and macvlan");
3333 return -1;
3334 }
3335
3336 if (!netdev->link) {
3337 ERROR("gateway = auto needs a link interface");
3338 return -1;
3339 }
3340
3341 link_index = if_nametoindex(netdev->link);
3342 if (!link_index)
3343 return -EINVAL;
3344
3345 if (netdev->ipv4_gateway_auto) {
3346 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
3347 ERROR("failed to automatically find ipv4 gateway "
3348 "address from link interface '%s'", netdev->link);
3349 return -1;
3350 }
3351 }
3352
3353 if (netdev->ipv6_gateway_auto) {
3354 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
3355 ERROR("failed to automatically find ipv6 gateway "
3356 "address from link interface '%s'", netdev->link);
3357 return -1;
3358 }
3359 }
3360 }
3361
3362 return 0;
3363}
3364
5e4a62bf 3365int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 3366{
5e4a62bf 3367 struct lxc_tty_info *tty_info = &conf->tty_info;
025ed0f3 3368 int i, ret;
b0a33c1e 3369
5e4a62bf
DL
3370 /* no tty in the configuration */
3371 if (!conf->tty)
b0a33c1e 3372 return 0;
3373
13954cce 3374 tty_info->pty_info =
e4e7d59d 3375 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 3376 if (!tty_info->pty_info) {
36eb9bde 3377 SYSERROR("failed to allocate pty_info");
985d15b1 3378 return -1;
b0a33c1e 3379 }
3380
985d15b1 3381 for (i = 0; i < conf->tty; i++) {
13954cce 3382
b0a33c1e 3383 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3384
025ed0f3
SH
3385 process_lock();
3386 ret = openpty(&pty_info->master, &pty_info->slave,
3387 pty_info->name, NULL, NULL);
3388 process_unlock();
3389 if (ret) {
36eb9bde 3390 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
3391 tty_info->nbtty = i;
3392 lxc_delete_tty(tty_info);
3393 return -1;
b0a33c1e 3394 }
3395
5332bb84
DL
3396 DEBUG("allocated pty '%s' (%d/%d)",
3397 pty_info->name, pty_info->master, pty_info->slave);
3398
3ec1648d 3399 /* Prevent leaking the file descriptors to the container */
b035ad62
MS
3400 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3401 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3402
b0a33c1e 3403 pty_info->busy = 0;
3404 }
3405
985d15b1 3406 tty_info->nbtty = conf->tty;
1ac470c0
DL
3407
3408 INFO("tty's configured");
3409
985d15b1 3410 return 0;
b0a33c1e 3411}
3412
3413void lxc_delete_tty(struct lxc_tty_info *tty_info)
3414{
3415 int i;
3416
3417 for (i = 0; i < tty_info->nbtty; i++) {
3418 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3419
3420 close(pty_info->master);
3421 close(pty_info->slave);
3422 }
3423
3424 free(tty_info->pty_info);
3425 tty_info->nbtty = 0;
3426}
3427
f6d3e3e4 3428/*
c4d10a05
SH
3429 * chown_mapped_root: for an unprivileged user with uid X to chown a dir
3430 * to subuid Y, he needs to run chown as root in a userns where
3431 * nsid 0 is mapped to hostuid Y, and nsid Y is mapped to hostuid
3432 * X. That way, the container root is privileged with respect to
3433 * hostuid X, allowing him to do the chown.
f6d3e3e4 3434 */
c4d10a05 3435int chown_mapped_root(char *path, struct lxc_conf *conf)
f6d3e3e4 3436{
c4d10a05
SH
3437 uid_t rootid;
3438 pid_t pid;
2a9a80cb 3439 unsigned long val;
a7ef8753 3440 char *chownpath = path;
f6d3e3e4 3441
2a9a80cb 3442 if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
c4d10a05
SH
3443 ERROR("No mapping for container root");
3444 return -1;
f6d3e3e4 3445 }
2a9a80cb
SH
3446 rootid = (uid_t) val;
3447
a7ef8753
SH
3448 /*
3449 * In case of overlay, we want only the writeable layer
3450 * to be chowned
3451 */
1f92162d 3452 if (strncmp(path, "overlayfs:", 10) == 0 || strncmp(path, "aufs:", 5) == 0) {
a7ef8753
SH
3453 chownpath = strchr(path, ':');
3454 if (!chownpath) {
3455 ERROR("Bad overlay path: %s", path);
3456 return -1;
3457 }
3458 chownpath = strchr(chownpath+1, ':');
3459 if (!chownpath) {
3460 ERROR("Bad overlay path: %s", path);
3461 return -1;
3462 }
3463 chownpath++;
3464 }
3465 path = chownpath;
c4d10a05
SH
3466 if (geteuid() == 0) {
3467 if (chown(path, rootid, -1) < 0) {
3468 ERROR("Error chowning %s", path);
3469 return -1;
3470 }
3471 return 0;
3472 }
f3d7e4ca
SH
3473
3474 if (rootid == geteuid()) {
3475 // nothing to do
3476 INFO("%s: container root is our uid; no need to chown" ,__func__);
3477 return 0;
3478 }
3479
c4d10a05
SH
3480 pid = fork();
3481 if (pid < 0) {
3482 SYSERROR("Failed forking");
f6d3e3e4
SH
3483 return -1;
3484 }
c4d10a05
SH
3485 if (!pid) {
3486 int hostuid = geteuid(), ret;
98e5ba51
SH
3487 char map1[100], map2[100], map3[100];
3488 char *args[] = {"lxc-usernsexec", "-m", map1, "-m", map2, "-m",
3489 map3, "--", "chown", "0", path, NULL};
f6d3e3e4 3490
98e5ba51
SH
3491 // "u:0:rootid:1"
3492 ret = snprintf(map1, 100, "u:0:%d:1", rootid);
c4d10a05
SH
3493 if (ret < 0 || ret >= 100) {
3494 ERROR("Error uid printing map string");
f6d3e3e4
SH
3495 return -1;
3496 }
c4d10a05 3497
98e5ba51
SH
3498 // "u:hostuid:hostuid:1"
3499 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3500 if (ret < 0 || ret >= 100) {
3501 ERROR("Error uid printing map string");
3502 return -1;
3503 }
3504
3505 // "g:0:hostgid:1"
3506 ret = snprintf(map3, 100, "g:0:%d:1", getgid());
c4d10a05
SH
3507 if (ret < 0 || ret >= 100) {
3508 ERROR("Error uid printing map string");
3509 return -1;
3510 }
3511
3512 ret = execvp("lxc-usernsexec", args);
3513 SYSERROR("Failed executing usernsexec");
3514 exit(1);
f6d3e3e4 3515 }
c4d10a05 3516 return wait_for_pid(pid);
f6d3e3e4
SH
3517}
3518
c4d10a05 3519int ttys_shift_ids(struct lxc_conf *c)
f6d3e3e4 3520{
c4d10a05 3521 int i;
f6d3e3e4 3522
c4d10a05 3523 if (lxc_list_empty(&c->id_map))
f6d3e3e4 3524 return 0;
c4d10a05
SH
3525
3526 for (i = 0; i < c->tty_info.nbtty; i++) {
3527 struct lxc_pty_info *pty_info = &c->tty_info.pty_info[i];
3528
3529 if (chown_mapped_root(pty_info->name, c) < 0) {
3530 ERROR("Failed to chown %s", pty_info->name);
f6d3e3e4
SH
3531 return -1;
3532 }
3533 }
3534
29b10e4f 3535 if (strcmp(c->console.name, "") !=0 && chown_mapped_root(c->console.name, c) < 0) {
c4d10a05
SH
3536 ERROR("Failed to chown %s", c->console.name);
3537 return -1;
3538 }
3539
f6d3e3e4
SH
3540 return 0;
3541}
3542
bc6928ff
MW
3543/*
3544 * This routine is called when the configuration does not already specify a value
3545 * for autodev (mounting a file system on /dev and populating it in a container).
3546 * If a hard override value has not be specified, then we try to apply some
3547 * heuristics to determine if we should switch to autodev mode.
3548 *
3549 * For instance, if the container has an /etc/systemd/system directory then it
3550 * is probably running systemd as the init process and it needs the autodev
3551 * mount to prevent it from mounting devtmpfs on /dev on it's own causing conflicts
3552 * in the host.
3553 *
3554 * We may also want to enable autodev if the host has devtmpfs mounted on its
3555 * /dev as this then enable us to use subdirectories under /dev for the container
3556 * /dev directories and we can fake udev devices.
3557 */
3558struct start_args {
3559 char *const *argv;
3560};
3561
3562#define MAX_SYMLINK_DEPTH 32
3563
74a3920a 3564static int check_autodev( const char *rootfs, void *data )
bc6928ff
MW
3565{
3566 struct start_args *arg = data;
3567 int ret;
3568 int loop_count = 0;
3569 struct stat s;
3570 char absrootfs[MAXPATHLEN];
3571 char path[MAXPATHLEN];
3572 char abs_path[MAXPATHLEN];
3573 char *command = "/sbin/init";
3574
3575 if (rootfs == NULL || strlen(rootfs) == 0)
3576 return -2;
3577
3578 if (!realpath(rootfs, absrootfs))
3579 return -2;
3580
3581 if( arg && arg->argv[0] ) {
3582 command = arg->argv[0];
959aee9c 3583 DEBUG("Set exec command to %s", command );
bc6928ff
MW
3584 }
3585
3586 strncpy( path, command, MAXPATHLEN-1 );
3587
3588 if ( 0 != access(path, F_OK) || 0 != stat(path, &s) )
3589 return -2;
3590
3591 /* Dereference down the symlink merry path testing as we go. */
3592 /* If anything references systemd in the path - set autodev! */
3593 /* Renormalize to the rootfs before each dereference */
3594 /* Relative symlinks should fall out in the wash even with .. */
3595 while( 1 ) {
3596 if ( strstr( path, "systemd" ) ) {
3597 INFO("Container with systemd init detected - enabling autodev!");
3598 return 1;
3599 }
3600
3601 ret = snprintf(abs_path, MAXPATHLEN-1, "%s/%s", absrootfs, path);
3602 if (ret < 0 || ret > MAXPATHLEN)
3603 return -2;
3604
3605 ret = readlink( abs_path, path, MAXPATHLEN-1 );
3606
3607 if ( ( ret <= 0 ) || ( ++loop_count > MAX_SYMLINK_DEPTH ) ) {
3608 break; /* Break out for other tests */
3609 }
3610 path[ret] = '\0';
3611 }
3612
3613 /*
3614 * Add future checks here.
3615 * Return positive if we should go autodev
3616 * Return 0 if we should NOT go autodev
3617 * Return negative if we encounter an error or can not determine...
3618 */
3619
3620 /* All else fails, we don't need autodev */
3621 INFO("Autodev not required.");
3622 return 0;
3623}
3624
5112cd70
SH
3625/*
3626 * _do_tmp_proc_mount: Mount /proc inside container if not already
3627 * mounted
3628 *
3629 * @rootfs : the rootfs where proc should be mounted
3630 *
3631 * Returns < 0 on failure, 0 if the correct proc was already mounted
3632 * and 1 if a new proc was mounted.
3633 */
3634static int do_tmp_proc_mount(const char *rootfs)
3635{
3636 char path[MAXPATHLEN];
3637 char link[20];
3638 int linklen, ret;
3639
3640 ret = snprintf(path, MAXPATHLEN, "%s/proc/self", rootfs);
3641 if (ret < 0 || ret >= MAXPATHLEN) {
3642 SYSERROR("proc path name too long");
3643 return -1;
3644 }
3645 memset(link, 0, 20);
3646 linklen = readlink(path, link, 20);
3647 INFO("I am %d, /proc/self points to '%s'", getpid(), link);
3648 ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs);
3649 if (linklen < 0) /* /proc not mounted */
3650 goto domount;
3651 /* can't be longer than rootfs/proc/1 */
3652 if (strncmp(link, "1", linklen) != 0) {
3653 /* wrong /procs mounted */
3654 umount2(path, MNT_DETACH); /* ignore failure */
3655 goto domount;
3656 }
3657 /* the right proc is already mounted */
3658 return 0;
3659
3660domount:
3661 if (mount("proc", path, "proc", 0, NULL))
3662 return -1;
3663 INFO("Mounted /proc in container for security transition");
3664 return 1;
3665}
3666
3667int tmp_proc_mount(struct lxc_conf *lxc_conf)
3668{
3669 int mounted;
3670
3671 if (lxc_conf->rootfs.path == NULL || strlen(lxc_conf->rootfs.path) == 0) {
3672 if (mount("proc", "/proc", "proc", 0, NULL)) {
3673 SYSERROR("Failed mounting /proc, proceeding");
3674 mounted = 0;
3675 } else
3676 mounted = 1;
3677 } else
3678 mounted = do_tmp_proc_mount(lxc_conf->rootfs.mount);
3679 if (mounted == -1) {
3680 SYSERROR("failed to mount /proc in the container.");
3681 return -1;
3682 } else if (mounted == 1) {
3683 lxc_conf->tmp_umount_proc = 1;
3684 }
3685 return 0;
3686}
3687
3688void tmp_proc_unmount(struct lxc_conf *lxc_conf)
3689{
3690 if (lxc_conf->tmp_umount_proc == 1) {
3691 umount("/proc");
3692 lxc_conf->tmp_umount_proc = 0;
3693 }
3694}
3695
d4ef7c50 3696int lxc_setup(struct lxc_handler *handler)
0ad19a3f 3697{
d4ef7c50
SH
3698 const char *name = handler->name;
3699 struct lxc_conf *lxc_conf = handler->conf;
3700 const char *lxcpath = handler->lxcpath;
3701 void *data = handler->data;
d4ef7c50 3702
cd698bdd
FK
3703 if (detect_shared_rootfs()) {
3704 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL)) {
3705 SYSERROR("Failed to make / rslave");
3706 ERROR("Continuing...");
3707 }
3708 }
3709 if (detect_ramfs_rootfs()) {
3710 if (chroot_into_slave(lxc_conf)) {
3711 ERROR("Failed to chroot into slave /");
3712 return -1;
3713 }
3714 }
3715
6c544cb3
MM
3716 if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
3717 if (setup_utsname(lxc_conf->utsname)) {
3718 ERROR("failed to setup the utsname for '%s'", name);
3719 return -1;
3720 }
0ad19a3f 3721 }
3722
5f4535a3 3723 if (setup_network(&lxc_conf->network)) {
36eb9bde 3724 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 3725 return -1;
0ad19a3f 3726 }
3727
283678ed 3728 if (run_lxc_hooks(name, "pre-mount", lxc_conf, lxcpath, NULL)) {
89eaa05e
SH
3729 ERROR("failed to run pre-mount hooks for container '%s'.", name);
3730 return -1;
3731 }
5ea6163a 3732
cc28d0b0 3733 if (setup_rootfs(lxc_conf)) {
ac778708 3734 ERROR("failed to setup rootfs for '%s'", name);
95b5ffaf 3735 return -1;
0ad19a3f 3736 }
3737
bc6928ff
MW
3738 if (lxc_conf->autodev < 0) {
3739 lxc_conf->autodev = check_autodev(lxc_conf->rootfs.mount, data);
3740 }
3741
3742 if (lxc_conf->autodev > 0) {
3743 if (mount_autodev(name, lxc_conf->rootfs.mount, lxcpath)) {
91c3830e 3744 ERROR("failed to mount /dev in the container");
c6883f38
SH
3745 return -1;
3746 }
3747 }
3748
368bbc02
CS
3749 /* do automatic mounts (mainly /proc and /sys), but exclude
3750 * those that need to wait until other stuff has finished
3751 */
4fb3cba5 3752 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
3753 ERROR("failed to setup the automatic mounts for '%s'", name);
3754 return -1;
3755 }
3756
80a881b2 3757 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
36eb9bde 3758 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 3759 return -1;
576f946d 3760 }
3761
c1dc38c2 3762 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
e7938e9e
MN
3763 ERROR("failed to setup the mount entries for '%s'", name);
3764 return -1;
3765 }
3766
368bbc02
CS
3767 /* now mount only cgroup, if wanted;
3768 * before, /sys could not have been mounted
3769 * (is either mounted automatically or via fstab entries)
3770 */
4fb3cba5 3771 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
3772 ERROR("failed to setup the automatic mounts for '%s'", name);
3773 return -1;
3774 }
3775
283678ed 3776 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
773fb9ca
SH
3777 ERROR("failed to run mount hooks for container '%s'.", name);
3778 return -1;
3779 }
3780
bc6928ff 3781 if (lxc_conf->autodev > 0) {
283678ed 3782 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
f7bee6c6
MW
3783 ERROR("failed to run autodev hooks for container '%s'.", name);
3784 return -1;
3785 }
91c3830e
SH
3786 if (setup_autodev(lxc_conf->rootfs.mount)) {
3787 ERROR("failed to populate /dev in the container");
3788 return -1;
3789 }
3790 }
368bbc02 3791
37903589 3792 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 3793 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 3794 return -1;
6e590161 3795 }
3796
7e0e1d94
AV
3797 if (lxc_conf->kmsg) {
3798 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
3799 ERROR("failed to setup kmsg for '%s'", name);
3800 }
1bd051a6 3801
37903589 3802 if (!lxc_conf->is_execute && setup_tty(&lxc_conf->rootfs, &lxc_conf->tty_info, lxc_conf->ttydir)) {
36eb9bde 3803 ERROR("failed to setup the ttys for '%s'", name);
95b5ffaf 3804 return -1;
b0a33c1e 3805 }
3806
69aa6655
DE
3807 if (!lxc_conf->is_execute && setup_dev_symlinks(&lxc_conf->rootfs)) {
3808 ERROR("failed to setup /dev symlinks for '%s'", name);
3809 return -1;
3810 }
3811
5112cd70
SH
3812 /* mount /proc if it's not already there */
3813 if (tmp_proc_mount(lxc_conf) < 0) {
fe4de9a6 3814 ERROR("failed to LSM mount proc for '%s'", name);
e075f5d9 3815 return -1;
e075f5d9 3816 }
e075f5d9 3817
ac778708 3818 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 3819 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 3820 return -1;
ed502555 3821 }
3822
571e6ec8 3823 if (setup_pts(lxc_conf->pts)) {
36eb9bde 3824 ERROR("failed to setup the new pts instance");
95b5ffaf 3825 return -1;
3c26f34e 3826 }
3827
cccc74b5
DL
3828 if (setup_personality(lxc_conf->personality)) {
3829 ERROR("failed to setup personality");
3830 return -1;
3831 }
3832
f6d3e3e4 3833 if (lxc_list_empty(&lxc_conf->id_map)) {
1fb86a7c
SH
3834 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
3835 if (!lxc_list_empty(&lxc_conf->caps)) {
3836 ERROR("Simultaneously requested dropping and keeping caps");
3837 return -1;
3838 }
3839 if (dropcaps_except(&lxc_conf->keepcaps)) {
959aee9c 3840 ERROR("failed to keep requested caps");
1fb86a7c
SH
3841 return -1;
3842 }
3843 } else if (setup_caps(&lxc_conf->caps)) {
f6d3e3e4
SH
3844 ERROR("failed to drop capabilities");
3845 return -1;
3846 }
81810dd1
DL
3847 }
3848
cd54d859
DL
3849 NOTICE("'%s' is setup.", name);
3850
0ad19a3f 3851 return 0;
3852}
26ddeedd 3853
283678ed
SH
3854int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
3855 const char *lxcpath, char *argv[])
26ddeedd
SH
3856{
3857 int which = -1;
3858 struct lxc_list *it;
3859
3860 if (strcmp(hook, "pre-start") == 0)
3861 which = LXCHOOK_PRESTART;
5ea6163a
SH
3862 else if (strcmp(hook, "pre-mount") == 0)
3863 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
3864 else if (strcmp(hook, "mount") == 0)
3865 which = LXCHOOK_MOUNT;
f7bee6c6
MW
3866 else if (strcmp(hook, "autodev") == 0)
3867 which = LXCHOOK_AUTODEV;
26ddeedd
SH
3868 else if (strcmp(hook, "start") == 0)
3869 which = LXCHOOK_START;
3870 else if (strcmp(hook, "post-stop") == 0)
3871 which = LXCHOOK_POSTSTOP;
148e91f5
SH
3872 else if (strcmp(hook, "clone") == 0)
3873 which = LXCHOOK_CLONE;
26ddeedd
SH
3874 else
3875 return -1;
3876 lxc_list_for_each(it, &conf->hooks[which]) {
3877 int ret;
3878 char *hookname = it->elem;
283678ed 3879 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
26ddeedd
SH
3880 if (ret)
3881 return ret;
3882 }
3883 return 0;
3884}
72d0e1cb 3885
427b3a21 3886static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
3887{
3888 struct lxc_netdev *netdev = it->elem;
9ebb03ad 3889 struct lxc_list *it2,*next;
72d0e1cb
SG
3890
3891 lxc_list_del(it);
3892
3893 if (netdev->link)
3894 free(netdev->link);
3895 if (netdev->name)
3896 free(netdev->name);
c9bb9a85
DE
3897 if (netdev->type == LXC_NET_VETH && netdev->priv.veth_attr.pair)
3898 free(netdev->priv.veth_attr.pair);
72d0e1cb
SG
3899 if (netdev->upscript)
3900 free(netdev->upscript);
3901 if (netdev->hwaddr)
3902 free(netdev->hwaddr);
3903 if (netdev->mtu)
3904 free(netdev->mtu);
3905 if (netdev->ipv4_gateway)
3906 free(netdev->ipv4_gateway);
3907 if (netdev->ipv6_gateway)
3908 free(netdev->ipv6_gateway);
9ebb03ad 3909 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3910 lxc_list_del(it2);
3911 free(it2->elem);
3912 free(it2);
3913 }
9ebb03ad 3914 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3915 lxc_list_del(it2);
3916 free(it2->elem);
3917 free(it2);
3918 }
d95db067 3919 free(netdev);
72d0e1cb
SG
3920 free(it);
3921}
3922
3923/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 3924int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
3925{
3926 char *p1;
3927 int ret, idx, i;
3928 struct lxc_list *it;
3929 struct lxc_netdev *netdev;
3930
3931 p1 = index(key, '.');
3932 if (!p1 || *(p1+1) == '\0')
3933 p1 = NULL;
3934
3935 ret = sscanf(key, "%d", &idx);
3936 if (ret != 1) return -1;
3937 if (idx < 0)
3938 return -1;
3939
3940 i = 0;
3941 lxc_list_for_each(it, &c->network) {
3942 if (i == idx)
3943 break;
3944 i++;
3945 }
3946 if (i < idx) // we don't have that many nics defined
3947 return -1;
3948
3949 if (!it || !it->elem)
3950 return -1;
3951
3952 netdev = it->elem;
3953
3954 if (!p1) {
3955 lxc_remove_nic(it);
52d21d40 3956 } else if (strcmp(p1, ".ipv4") == 0) {
9ebb03ad
DE
3957 struct lxc_list *it2,*next;
3958 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3959 lxc_list_del(it2);
3960 free(it2->elem);
3961 free(it2);
3962 }
52d21d40 3963 } else if (strcmp(p1, ".ipv6") == 0) {
9ebb03ad
DE
3964 struct lxc_list *it2,*next;
3965 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3966 lxc_list_del(it2);
3967 free(it2->elem);
3968 free(it2);
3969 }
52d21d40 3970 } else if (strcmp(p1, ".link") == 0) {
72d0e1cb
SG
3971 if (netdev->link) {
3972 free(netdev->link);
3973 netdev->link = NULL;
3974 }
52d21d40 3975 } else if (strcmp(p1, ".name") == 0) {
72d0e1cb
SG
3976 if (netdev->name) {
3977 free(netdev->name);
3978 netdev->name = NULL;
3979 }
52d21d40 3980 } else if (strcmp(p1, ".script.up") == 0) {
72d0e1cb
SG
3981 if (netdev->upscript) {
3982 free(netdev->upscript);
3983 netdev->upscript = NULL;
3984 }
52d21d40 3985 } else if (strcmp(p1, ".hwaddr") == 0) {
72d0e1cb
SG
3986 if (netdev->hwaddr) {
3987 free(netdev->hwaddr);
3988 netdev->hwaddr = NULL;
3989 }
52d21d40 3990 } else if (strcmp(p1, ".mtu") == 0) {
72d0e1cb
SG
3991 if (netdev->mtu) {
3992 free(netdev->mtu);
3993 netdev->mtu = NULL;
3994 }
52d21d40 3995 } else if (strcmp(p1, ".ipv4_gateway") == 0) {
72d0e1cb
SG
3996 if (netdev->ipv4_gateway) {
3997 free(netdev->ipv4_gateway);
3998 netdev->ipv4_gateway = NULL;
3999 }
52d21d40 4000 } else if (strcmp(p1, ".ipv6_gateway") == 0) {
72d0e1cb
SG
4001 if (netdev->ipv6_gateway) {
4002 free(netdev->ipv6_gateway);
4003 netdev->ipv6_gateway = NULL;
4004 }
4005 }
4006 else return -1;
4007
4008 return 0;
4009}
4010
4011int lxc_clear_config_network(struct lxc_conf *c)
4012{
9ebb03ad
DE
4013 struct lxc_list *it,*next;
4014 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
4015 lxc_remove_nic(it);
4016 }
4017 return 0;
4018}
4019
4020int lxc_clear_config_caps(struct lxc_conf *c)
4021{
9ebb03ad 4022 struct lxc_list *it,*next;
72d0e1cb 4023
9ebb03ad 4024 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
4025 lxc_list_del(it);
4026 free(it->elem);
4027 free(it);
4028 }
4029 return 0;
4030}
4031
74a3920a 4032static int lxc_free_idmap(struct lxc_list *id_map) {
27c27d73
SH
4033 struct lxc_list *it, *next;
4034
4355ab5f 4035 lxc_list_for_each_safe(it, id_map, next) {
27c27d73
SH
4036 lxc_list_del(it);
4037 free(it->elem);
4038 free(it);
4039 }
4040 return 0;
4041}
4042
4355ab5f
SH
4043int lxc_clear_idmaps(struct lxc_conf *c)
4044{
4045 return lxc_free_idmap(&c->id_map);
4046}
4047
1fb86a7c
SH
4048int lxc_clear_config_keepcaps(struct lxc_conf *c)
4049{
4050 struct lxc_list *it,*next;
4051
4052 lxc_list_for_each_safe(it, &c->keepcaps, next) {
4053 lxc_list_del(it);
4054 free(it->elem);
4055 free(it);
4056 }
4057 return 0;
4058}
4059
12a50cc6 4060int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 4061{
9ebb03ad 4062 struct lxc_list *it,*next;
72d0e1cb 4063 bool all = false;
12a50cc6 4064 const char *k = key + 11;
72d0e1cb
SG
4065
4066 if (strcmp(key, "lxc.cgroup") == 0)
4067 all = true;
4068
9ebb03ad 4069 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
4070 struct lxc_cgroup *cg = it->elem;
4071 if (!all && strcmp(cg->subsystem, k) != 0)
4072 continue;
4073 lxc_list_del(it);
4074 free(cg->subsystem);
4075 free(cg->value);
4076 free(cg);
4077 free(it);
4078 }
4079 return 0;
4080}
4081
ee1e7aa0
SG
4082int lxc_clear_groups(struct lxc_conf *c)
4083{
4084 struct lxc_list *it,*next;
4085
4086 lxc_list_for_each_safe(it, &c->groups, next) {
4087 lxc_list_del(it);
4088 free(it->elem);
4089 free(it);
4090 }
4091 return 0;
4092}
4093
72d0e1cb
SG
4094int lxc_clear_mount_entries(struct lxc_conf *c)
4095{
9ebb03ad 4096 struct lxc_list *it,*next;
72d0e1cb 4097
9ebb03ad 4098 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
4099 lxc_list_del(it);
4100 free(it->elem);
4101 free(it);
4102 }
4103 return 0;
4104}
4105
b099e9e9
SH
4106int lxc_clear_automounts(struct lxc_conf *c)
4107{
4108 c->auto_mounts = 0;
4109 return 0;
4110}
4111
12a50cc6 4112int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 4113{
9ebb03ad 4114 struct lxc_list *it,*next;
17ed13a3 4115 bool all = false, done = false;
12a50cc6 4116 const char *k = key + 9;
72d0e1cb
SG
4117 int i;
4118
17ed13a3
SH
4119 if (strcmp(key, "lxc.hook") == 0)
4120 all = true;
4121
72d0e1cb 4122 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 4123 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 4124 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
4125 lxc_list_del(it);
4126 free(it->elem);
4127 free(it);
4128 }
4129 done = true;
72d0e1cb
SG
4130 }
4131 }
17ed13a3
SH
4132
4133 if (!done) {
4134 ERROR("Invalid hook key: %s", key);
4135 return -1;
4136 }
72d0e1cb
SG
4137 return 0;
4138}
8eb5694b 4139
74a3920a 4140static void lxc_clear_saved_nics(struct lxc_conf *conf)
7b35f3d6
SH
4141{
4142 int i;
4143
0cf45501 4144 if (!conf->saved_nics)
7b35f3d6
SH
4145 return;
4146 for (i=0; i < conf->num_savednics; i++)
4147 free(conf->saved_nics[i].orig_name);
7b35f3d6
SH
4148 free(conf->saved_nics);
4149}
4150
8eb5694b
SH
4151void lxc_conf_free(struct lxc_conf *conf)
4152{
4153 if (!conf)
4154 return;
4155 if (conf->console.path)
4156 free(conf->console.path);
54c30e29 4157 if (conf->rootfs.mount)
8eb5694b 4158 free(conf->rootfs.mount);
a17b1e65
SG
4159 if (conf->rootfs.options)
4160 free(conf->rootfs.options);
d95db067
DE
4161 if (conf->rootfs.path)
4162 free(conf->rootfs.path);
a58878d6
SH
4163 if (conf->rootfs.pivot)
4164 free(conf->rootfs.pivot);
4165 if (conf->logfile)
4166 free(conf->logfile);
d95db067
DE
4167 if (conf->utsname)
4168 free(conf->utsname);
4169 if (conf->ttydir)
4170 free(conf->ttydir);
4171 if (conf->fstab)
4172 free(conf->fstab);
fc7e8864
WM
4173 if (conf->rcfile)
4174 free(conf->rcfile);
8eb5694b 4175 lxc_clear_config_network(conf);
fe4de9a6
DE
4176 if (conf->lsm_aa_profile)
4177 free(conf->lsm_aa_profile);
4178 if (conf->lsm_se_context)
4179 free(conf->lsm_se_context);
769872f9 4180 lxc_seccomp_free(conf);
8eb5694b 4181 lxc_clear_config_caps(conf);
1fb86a7c 4182 lxc_clear_config_keepcaps(conf);
8eb5694b 4183 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 4184 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 4185 lxc_clear_mount_entries(conf);
7b35f3d6 4186 lxc_clear_saved_nics(conf);
27c27d73 4187 lxc_clear_idmaps(conf);
ee1e7aa0 4188 lxc_clear_groups(conf);
8eb5694b
SH
4189 free(conf);
4190}
4355ab5f
SH
4191
4192struct userns_fn_data {
4193 int (*fn)(void *);
4194 void *arg;
4195 int p[2];
4196};
4197
4198static int run_userns_fn(void *data)
4199{
4200 struct userns_fn_data *d = data;
4201 char c;
4202 // we're not sharing with the parent any more, if it was a thread
4203
4204 close(d->p[1]);
4205 if (read(d->p[0], &c, 1) != 1)
4206 return -1;
4207 close(d->p[0]);
4208 return d->fn(d->arg);
4209}
4210
4211/*
4212 * Add a ID_TYPE_UID entry to an existing lxc_conf, if it is not
4213 * alread there.
4214 * We may want to generalize this to do gids as well as uids, but right now
4215 * it's not necessary.
4216 */
4217static struct lxc_list *idmap_add_id(struct lxc_conf *conf, uid_t uid)
4218{
2133f58c 4219 int hostid_mapped = mapped_hostid(uid, conf, ID_TYPE_UID);
4355ab5f
SH
4220 struct lxc_list *new = NULL, *tmp, *it, *next;
4221 struct id_map *entry;
4222
3ec1648d
SH
4223 new = malloc(sizeof(*new));
4224 if (!new) {
4225 ERROR("Out of memory building id map");
4226 return NULL;
4227 }
4228 lxc_list_init(new);
4229
4355ab5f 4230 if (hostid_mapped < 0) {
2133f58c 4231 hostid_mapped = find_unmapped_nsuid(conf, ID_TYPE_UID);
3ec1648d
SH
4232 if (hostid_mapped < 0)
4233 goto err;
4234 tmp = malloc(sizeof(*tmp));
4235 if (!tmp)
4236 goto err;
4355ab5f
SH
4237 entry = malloc(sizeof(*entry));
4238 if (!entry) {
3ec1648d
SH
4239 free(tmp);
4240 goto err;
4355ab5f 4241 }
3ec1648d 4242 tmp->elem = entry;
4355ab5f
SH
4243 entry->idtype = ID_TYPE_UID;
4244 entry->nsid = hostid_mapped;
4245 entry->hostid = (unsigned long)uid;
4246 entry->range = 1;
3ec1648d 4247 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4248 }
4249 lxc_list_for_each_safe(it, &conf->id_map, next) {
4250 tmp = malloc(sizeof(*tmp));
4251 if (!tmp)
4252 goto err;
4253 entry = malloc(sizeof(*entry));
4254 if (!entry) {
4255 free(tmp);
4256 goto err;
4257 }
4258 memset(entry, 0, sizeof(*entry));
4259 memcpy(entry, it->elem, sizeof(*entry));
4260 tmp->elem = entry;
3ec1648d 4261 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4262 }
4263
4264 return new;
4265
4266err:
4267 ERROR("Out of memory building a new uid map");
908fde6a
SH
4268 if (new)
4269 lxc_free_idmap(new);
c30ac545 4270 free(new);
4355ab5f
SH
4271 return NULL;
4272}
4273
4274/*
4275 * Run a function in a new user namespace.
4276 * The caller's euid will be mapped in if it is not already.
4277 */
4278int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
4279{
4280 int ret, pid;
4281 struct userns_fn_data d;
4282 char c = '1';
4283 int p[2];
4284 struct lxc_list *idmap;
4285
4355ab5f 4286 ret = pipe(p);
4355ab5f
SH
4287 if (ret < 0) {
4288 SYSERROR("opening pipe");
4289 return -1;
4290 }
4291 d.fn = fn;
4292 d.arg = data;
4293 d.p[0] = p[0];
4294 d.p[1] = p[1];
4295 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4296 if (pid < 0)
4297 goto err;
4355ab5f 4298 close(p[0]);
4355ab5f
SH
4299 p[0] = -1;
4300
4301 if ((idmap = idmap_add_id(conf, geteuid())) == NULL) {
4302 ERROR("Error adding self to container uid map");
4303 goto err;
4304 }
4305
4306 ret = lxc_map_ids(idmap, pid);
4307 lxc_free_idmap(idmap);
88dd66fc 4308 free(idmap);
565e571c 4309 if (ret) {
4355ab5f
SH
4310 ERROR("Error setting up child mappings");
4311 goto err;
4312 }
4313
4314 // kick the child
4315 if (write(p[1], &c, 1) != 1) {
4316 SYSERROR("writing to pipe to child");
4317 goto err;
4318 }
4319
3139aead
SG
4320 ret = wait_for_pid(pid);
4321
4322 close(p[1]);
4323 return ret;
4324
4355ab5f 4325err:
4355ab5f
SH
4326 if (p[0] != -1)
4327 close(p[0]);
4328 close(p[1]);
4355ab5f
SH
4329 return -1;
4330}