]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
doc: Add the note related mount in Japanese lxc.container.conf(5)
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
d06245b8
NC
23#include "config.h"
24
0ad19a3f 25#include <stdio.h>
0ad19a3f 26#include <stdlib.h>
e3b4c4c4 27#include <stdarg.h>
0ad19a3f 28#include <errno.h>
29#include <string.h>
30#include <dirent.h>
0ad19a3f 31#include <unistd.h>
bc6928ff 32#include <inttypes.h>
e3b4c4c4 33#include <sys/wait.h>
2d76d1d7 34#include <sys/syscall.h>
97e9cfa0
SH
35#include <sys/types.h>
36#include <pwd.h>
37#include <grp.h>
4a0ba80d 38#include <time.h>
614305f3 39#ifdef HAVE_STATVFS
2938f7c8 40#include <sys/statvfs.h>
614305f3 41#endif
e827ff7e
SG
42
43#if HAVE_PTY_H
b0a33c1e 44#include <pty.h>
e827ff7e
SG
45#else
46#include <../include/openpty.h>
47#endif
0ad19a3f 48
b3ecde1e
DL
49#include <linux/loop.h>
50
0ad19a3f 51#include <sys/types.h>
52#include <sys/utsname.h>
53#include <sys/param.h>
54#include <sys/stat.h>
55#include <sys/socket.h>
56#include <sys/mount.h>
57#include <sys/mman.h>
81810dd1 58#include <sys/prctl.h>
0ad19a3f 59
60#include <arpa/inet.h>
61#include <fcntl.h>
62#include <netinet/in.h>
63#include <net/if.h>
6f4a3756 64#include <libgen.h>
0ad19a3f 65
e5bda9ee 66#include "network.h"
67#include "error.h"
e8bd4e43 68#include "af_unix.h"
b2718c72 69#include "parse.h"
1b09f2c0
DL
70#include "utils.h"
71#include "conf.h"
72#include "log.h"
d55bc1ad 73#include "caps.h" /* for lxc_caps_last_cap() */
9be53773 74#include "bdev.h"
368bbc02 75#include "cgroup.h"
025ed0f3 76#include "lxclock.h"
4355ab5f 77#include "namespace.h"
fe4de9a6 78#include "lsm/lsm.h"
d0a36f2c 79
495d2046
SG
80#if HAVE_SYS_CAPABILITY_H
81#include <sys/capability.h>
82#endif
83
6ff05e18
SG
84#if HAVE_SYS_PERSONALITY_H
85#include <sys/personality.h>
86#endif
87
edaf8b1b
SG
88#if IS_BIONIC
89#include <../include/lxcmntent.h>
90#else
91#include <mntent.h>
92#endif
93
769872f9
SH
94#include "lxcseccomp.h"
95
36eb9bde 96lxc_log_define(lxc_conf, lxc);
e5bda9ee 97
87da4ec3 98#define LINELEN 4096
0ad19a3f 99
495d2046 100#if HAVE_SYS_CAPABILITY_H
b09094da
MN
101#ifndef CAP_SETFCAP
102#define CAP_SETFCAP 31
103#endif
104
105#ifndef CAP_MAC_OVERRIDE
106#define CAP_MAC_OVERRIDE 32
107#endif
108
109#ifndef CAP_MAC_ADMIN
110#define CAP_MAC_ADMIN 33
111#endif
495d2046 112#endif
b09094da
MN
113
114#ifndef PR_CAPBSET_DROP
115#define PR_CAPBSET_DROP 24
116#endif
117
9818cae4
SG
118#ifndef LO_FLAGS_AUTOCLEAR
119#define LO_FLAGS_AUTOCLEAR 4
120#endif
121
0769b82a
CS
122/* needed for cgroup automount checks, regardless of whether we
123 * have included linux/capability.h or not */
124#ifndef CAP_SYS_ADMIN
125#define CAP_SYS_ADMIN 21
126#endif
127
2d76d1d7
SG
128/* Define pivot_root() if missing from the C library */
129#ifndef HAVE_PIVOT_ROOT
130static int pivot_root(const char * new_root, const char * put_old)
131{
132#ifdef __NR_pivot_root
133return syscall(__NR_pivot_root, new_root, put_old);
134#else
135errno = ENOSYS;
136return -1;
137#endif
138}
139#else
140extern int pivot_root(const char * new_root, const char * put_old);
141#endif
142
143/* Define sethostname() if missing from the C library */
144#ifndef HAVE_SETHOSTNAME
145static int sethostname(const char * name, size_t len)
146{
147#ifdef __NR_sethostname
148return syscall(__NR_sethostname, name, len);
149#else
150errno = ENOSYS;
151return -1;
152#endif
153}
154#endif
155
72f919c4
SG
156/* Define __S_ISTYPE if missing from the C library */
157#ifndef __S_ISTYPE
158#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
159#endif
160
ecec0126
SG
161#ifndef MS_PRIVATE
162#define MS_PRIVATE (1<<18)
163#endif
164
72d0e1cb 165char *lxchook_names[NUM_LXC_HOOKS] = {
37cf711b 166 "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop", "clone", "destroy" };
72d0e1cb 167
a589434e 168typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 169
998ac676
RT
170struct mount_opt {
171 char *name;
172 int clear;
173 int flag;
174};
175
81810dd1
DL
176struct caps_opt {
177 char *name;
178 int value;
179};
180
858377e4
SH
181/*
182 * The lxc_conf of the container currently being worked on in an
183 * API call
184 * This is used in the error calls
185 */
186#ifdef HAVE_TLS
187__thread struct lxc_conf *current_config;
188#else
189struct lxc_conf *current_config;
190#endif
191
0769b82a
CS
192/* Declare this here, since we don't want to reshuffle the whole file. */
193static int in_caplist(int cap, struct lxc_list *caps);
194
a589434e
JN
195static int instantiate_veth(struct lxc_handler *, struct lxc_netdev *);
196static int instantiate_macvlan(struct lxc_handler *, struct lxc_netdev *);
197static int instantiate_vlan(struct lxc_handler *, struct lxc_netdev *);
198static int instantiate_phys(struct lxc_handler *, struct lxc_netdev *);
199static int instantiate_empty(struct lxc_handler *, struct lxc_netdev *);
200static int instantiate_none(struct lxc_handler *, struct lxc_netdev *);
201
202static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
203 [LXC_NET_VETH] = instantiate_veth,
204 [LXC_NET_MACVLAN] = instantiate_macvlan,
205 [LXC_NET_VLAN] = instantiate_vlan,
206 [LXC_NET_PHYS] = instantiate_phys,
207 [LXC_NET_EMPTY] = instantiate_empty,
208 [LXC_NET_NONE] = instantiate_none,
0ad19a3f 209};
210
74a2b586
JK
211static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
212static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
213static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
214static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
215static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 216static int shutdown_none(struct lxc_handler *, struct lxc_netdev *);
74a2b586 217
a589434e 218static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
74a2b586
JK
219 [LXC_NET_VETH] = shutdown_veth,
220 [LXC_NET_MACVLAN] = shutdown_macvlan,
221 [LXC_NET_VLAN] = shutdown_vlan,
222 [LXC_NET_PHYS] = shutdown_phys,
223 [LXC_NET_EMPTY] = shutdown_empty,
26b797f3 224 [LXC_NET_NONE] = shutdown_none,
74a2b586
JK
225};
226
998ac676 227static struct mount_opt mount_opt[] = {
88d413d5
SW
228 { "defaults", 0, 0 },
229 { "ro", 0, MS_RDONLY },
230 { "rw", 1, MS_RDONLY },
231 { "suid", 1, MS_NOSUID },
232 { "nosuid", 0, MS_NOSUID },
233 { "dev", 1, MS_NODEV },
234 { "nodev", 0, MS_NODEV },
235 { "exec", 1, MS_NOEXEC },
236 { "noexec", 0, MS_NOEXEC },
237 { "sync", 0, MS_SYNCHRONOUS },
238 { "async", 1, MS_SYNCHRONOUS },
239 { "dirsync", 0, MS_DIRSYNC },
240 { "remount", 0, MS_REMOUNT },
241 { "mand", 0, MS_MANDLOCK },
242 { "nomand", 1, MS_MANDLOCK },
243 { "atime", 1, MS_NOATIME },
244 { "noatime", 0, MS_NOATIME },
245 { "diratime", 1, MS_NODIRATIME },
246 { "nodiratime", 0, MS_NODIRATIME },
247 { "bind", 0, MS_BIND },
248 { "rbind", 0, MS_BIND|MS_REC },
249 { "relatime", 0, MS_RELATIME },
250 { "norelatime", 1, MS_RELATIME },
251 { "strictatime", 0, MS_STRICTATIME },
252 { "nostrictatime", 1, MS_STRICTATIME },
253 { NULL, 0, 0 },
998ac676
RT
254};
255
495d2046 256#if HAVE_SYS_CAPABILITY_H
81810dd1 257static struct caps_opt caps_opt[] = {
a6afdde9 258 { "chown", CAP_CHOWN },
1e11be34
DL
259 { "dac_override", CAP_DAC_OVERRIDE },
260 { "dac_read_search", CAP_DAC_READ_SEARCH },
261 { "fowner", CAP_FOWNER },
262 { "fsetid", CAP_FSETID },
81810dd1
DL
263 { "kill", CAP_KILL },
264 { "setgid", CAP_SETGID },
265 { "setuid", CAP_SETUID },
266 { "setpcap", CAP_SETPCAP },
267 { "linux_immutable", CAP_LINUX_IMMUTABLE },
268 { "net_bind_service", CAP_NET_BIND_SERVICE },
269 { "net_broadcast", CAP_NET_BROADCAST },
270 { "net_admin", CAP_NET_ADMIN },
271 { "net_raw", CAP_NET_RAW },
272 { "ipc_lock", CAP_IPC_LOCK },
273 { "ipc_owner", CAP_IPC_OWNER },
274 { "sys_module", CAP_SYS_MODULE },
275 { "sys_rawio", CAP_SYS_RAWIO },
276 { "sys_chroot", CAP_SYS_CHROOT },
277 { "sys_ptrace", CAP_SYS_PTRACE },
278 { "sys_pacct", CAP_SYS_PACCT },
279 { "sys_admin", CAP_SYS_ADMIN },
280 { "sys_boot", CAP_SYS_BOOT },
281 { "sys_nice", CAP_SYS_NICE },
282 { "sys_resource", CAP_SYS_RESOURCE },
283 { "sys_time", CAP_SYS_TIME },
284 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
285 { "mknod", CAP_MKNOD },
286 { "lease", CAP_LEASE },
57b837e2
CB
287#ifdef CAP_AUDIT_READ
288 { "audit_read", CAP_AUDIT_READ },
289#endif
9527e566 290#ifdef CAP_AUDIT_WRITE
81810dd1 291 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
292#endif
293#ifdef CAP_AUDIT_CONTROL
81810dd1 294 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 295#endif
81810dd1
DL
296 { "setfcap", CAP_SETFCAP },
297 { "mac_override", CAP_MAC_OVERRIDE },
298 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
299#ifdef CAP_SYSLOG
300 { "syslog", CAP_SYSLOG },
301#endif
302#ifdef CAP_WAKE_ALARM
303 { "wake_alarm", CAP_WAKE_ALARM },
304#endif
2b54359b
CB
305#ifdef CAP_BLOCK_SUSPEND
306 { "block_suspend", CAP_BLOCK_SUSPEND },
307#endif
81810dd1 308};
495d2046
SG
309#else
310static struct caps_opt caps_opt[] = {};
311#endif
81810dd1 312
91c3830e
SH
313static int run_buffer(char *buffer)
314{
ebec9176 315 struct lxc_popen_FILE *f;
91c3830e 316 char *output;
8e7da691 317 int ret;
91c3830e 318
ebec9176 319 f = lxc_popen(buffer);
91c3830e
SH
320 if (!f) {
321 SYSERROR("popen failed");
322 return -1;
323 }
324
325 output = malloc(LXC_LOG_BUFFER_SIZE);
326 if (!output) {
327 ERROR("failed to allocate memory for script output");
ebec9176 328 lxc_pclose(f);
91c3830e
SH
329 return -1;
330 }
331
ebec9176 332 while(fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
91c3830e
SH
333 DEBUG("script output: %s", output);
334
335 free(output);
336
ebec9176 337 ret = lxc_pclose(f);
8e7da691 338 if (ret == -1) {
91c3830e
SH
339 SYSERROR("Script exited on error");
340 return -1;
8e7da691
DE
341 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
342 ERROR("Script exited with status %d", WEXITSTATUS(ret));
343 return -1;
344 } else if (WIFSIGNALED(ret)) {
345 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
346 strsignal(WTERMSIG(ret)));
347 return -1;
91c3830e
SH
348 }
349
350 return 0;
351}
352
148e91f5 353static int run_script_argv(const char *name, const char *section,
283678ed
SH
354 const char *script, const char *hook, const char *lxcpath,
355 char **argsin)
148e91f5
SH
356{
357 int ret, i;
358 char *buffer;
359 size_t size = 0;
360
361 INFO("Executing script '%s' for container '%s', config section '%s'",
362 script, name, section);
363
364 for (i=0; argsin && argsin[i]; i++)
365 size += strlen(argsin[i]) + 1;
366
367 size += strlen(hook) + 1;
368
369 size += strlen(script);
370 size += strlen(name);
371 size += strlen(section);
372 size += 3;
373
374 if (size > INT_MAX)
375 return -1;
376
377 buffer = alloca(size);
378 if (!buffer) {
379 ERROR("failed to allocate memory");
380 return -1;
381 }
382
383 ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
384 if (ret < 0 || ret >= size) {
385 ERROR("Script name too long");
386 return -1;
387 }
388
389 for (i=0; argsin && argsin[i]; i++) {
390 int len = size-ret;
391 int rc;
392 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
393 if (rc < 0 || rc >= len) {
394 ERROR("Script args too long");
395 return -1;
396 }
397 ret += rc;
398 }
399
400 return run_buffer(buffer);
401}
402
751d9dcd
DL
403static int run_script(const char *name, const char *section,
404 const char *script, ...)
e3b4c4c4 405{
abbfd20b 406 int ret;
91c3830e 407 char *buffer, *p;
abbfd20b
DL
408 size_t size = 0;
409 va_list ap;
751d9dcd
DL
410
411 INFO("Executing script '%s' for container '%s', config section '%s'",
412 script, name, section);
e3b4c4c4 413
abbfd20b
DL
414 va_start(ap, script);
415 while ((p = va_arg(ap, char *)))
95642a10 416 size += strlen(p) + 1;
abbfd20b
DL
417 va_end(ap);
418
419 size += strlen(script);
420 size += strlen(name);
421 size += strlen(section);
95642a10 422 size += 3;
abbfd20b 423
95642a10
MS
424 if (size > INT_MAX)
425 return -1;
426
427 buffer = alloca(size);
abbfd20b
DL
428 if (!buffer) {
429 ERROR("failed to allocate memory");
751d9dcd
DL
430 return -1;
431 }
432
9ba8130c
SH
433 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
434 if (ret < 0 || ret >= size) {
435 ERROR("Script name too long");
9ba8130c
SH
436 return -1;
437 }
751d9dcd 438
abbfd20b 439 va_start(ap, script);
9ba8130c
SH
440 while ((p = va_arg(ap, char *))) {
441 int len = size-ret;
442 int rc;
443 rc = snprintf(buffer + ret, len, " %s", p);
444 if (rc < 0 || rc >= len) {
9ba8130c
SH
445 ERROR("Script args too long");
446 return -1;
447 }
448 ret += rc;
449 }
abbfd20b 450 va_end(ap);
751d9dcd 451
91c3830e 452 return run_buffer(buffer);
e3b4c4c4
ST
453}
454
a6afdde9 455static int find_fstype_cb(char* buffer, void *data)
78ae2fcc 456{
457 struct cbarg {
458 const char *rootfs;
a6afdde9 459 const char *target;
a17b1e65 460 const char *options;
78ae2fcc 461 } *cbarg = data;
462
a17b1e65
SG
463 unsigned long mntflags;
464 char *mntdata;
78ae2fcc 465 char *fstype;
466
467 /* we don't try 'nodev' entries */
468 if (strstr(buffer, "nodev"))
469 return 0;
470
471 fstype = buffer;
b2718c72 472 fstype += lxc_char_left_gc(fstype, strlen(fstype));
473 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
78ae2fcc 474
9827ecdb
YK
475 /* ignore blank line and comment */
476 if (fstype[0] == '\0' || fstype[0] == '#')
477 return 0;
478
a6afdde9
DL
479 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
480 cbarg->rootfs, cbarg->target, fstype);
481
a17b1e65
SG
482 if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) {
483 free(mntdata);
484 return -1;
485 }
486
487 if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
a6afdde9 488 DEBUG("mount failed with error: %s", strerror(errno));
a17b1e65 489 free(mntdata);
78ae2fcc 490 return 0;
a6afdde9 491 }
a17b1e65 492 free(mntdata);
78ae2fcc 493
a6afdde9
DL
494 INFO("mounted '%s' on '%s', with fstype '%s'",
495 cbarg->rootfs, cbarg->target, fstype);
78ae2fcc 496
497 return 1;
498}
499
a17b1e65
SG
500static int mount_unknown_fs(const char *rootfs, const char *target,
501 const char *options)
78ae2fcc 502{
a6afdde9 503 int i;
78ae2fcc 504
505 struct cbarg {
506 const char *rootfs;
a6afdde9 507 const char *target;
a17b1e65 508 const char *options;
78ae2fcc 509 } cbarg = {
510 .rootfs = rootfs,
a6afdde9 511 .target = target,
a17b1e65 512 .options = options,
78ae2fcc 513 };
514
a6afdde9
DL
515 /*
516 * find the filesystem type with brute force:
517 * first we check with /etc/filesystems, in case the modules
78ae2fcc 518 * are auto-loaded and fall back to the supported kernel fs
519 */
520 char *fsfile[] = {
521 "/etc/filesystems",
522 "/proc/filesystems",
523 };
524
a6afdde9
DL
525 for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
526
527 int ret;
528
529 if (access(fsfile[i], F_OK))
530 continue;
531
532 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
533 if (ret < 0) {
534 ERROR("failed to parse '%s'", fsfile[i]);
535 return -1;
536 }
537
538 if (ret)
539 return 0;
78ae2fcc 540 }
541
a6afdde9
DL
542 ERROR("failed to determine fs type for '%s'", rootfs);
543 return -1;
544}
545
a17b1e65
SG
546static int mount_rootfs_dir(const char *rootfs, const char *target,
547 const char *options)
a6afdde9 548{
a17b1e65
SG
549 unsigned long mntflags;
550 char *mntdata;
551 int ret;
552
553 if (parse_mntopts(options, &mntflags, &mntdata) < 0) {
554 free(mntdata);
555 return -1;
556 }
557
558 ret = mount(rootfs, target, "none", MS_BIND | MS_REC | mntflags, mntdata);
559 free(mntdata);
560
561 return ret;
a6afdde9
DL
562}
563
564static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
565{
566 int rfd;
567 int ret = -1;
568
569 rfd = open(rootfs, O_RDWR);
570 if (rfd < 0) {
571 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 572 return -1;
573 }
574
a6afdde9 575 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 576
a6afdde9 577 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 578
a6afdde9
DL
579 if (ioctl(fd, LOOP_SET_FD, rfd)) {
580 SYSERROR("failed to LOOP_SET_FD");
581 goto out;
78ae2fcc 582 }
583
a6afdde9
DL
584 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
585 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 586 goto out;
587 }
588
a6afdde9 589 ret = 0;
78ae2fcc 590out:
a6afdde9 591 close(rfd);
78ae2fcc 592
a6afdde9 593 return ret;
78ae2fcc 594}
595
a17b1e65
SG
596static int mount_rootfs_file(const char *rootfs, const char *target,
597 const char *options)
78ae2fcc 598{
a6afdde9
DL
599 struct dirent dirent, *direntp;
600 struct loop_info64 loinfo;
9ba8130c 601 int ret = -1, fd = -1, rc;
a6afdde9
DL
602 DIR *dir;
603 char path[MAXPATHLEN];
78ae2fcc 604
a6afdde9
DL
605 dir = opendir("/dev");
606 if (!dir) {
607 SYSERROR("failed to open '/dev'");
78ae2fcc 608 return -1;
609 }
610
a6afdde9
DL
611 while (!readdir_r(dir, &dirent, &direntp)) {
612
613 if (!direntp)
614 break;
615
616 if (!strcmp(direntp->d_name, "."))
617 continue;
618
619 if (!strcmp(direntp->d_name, ".."))
620 continue;
621
622 if (strncmp(direntp->d_name, "loop", 4))
623 continue;
624
9ba8130c
SH
625 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
626 if (rc < 0 || rc >= MAXPATHLEN)
627 continue;
628
a6afdde9
DL
629 fd = open(path, O_RDWR);
630 if (fd < 0)
631 continue;
632
633 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
634 close(fd);
635 continue;
636 }
637
638 if (errno != ENXIO) {
639 WARN("unexpected error for ioctl on '%s': %m",
640 direntp->d_name);
00b6be44 641 close(fd);
a6afdde9
DL
642 continue;
643 }
644
645 DEBUG("found '%s' free lodev", path);
646
647 ret = setup_lodev(rootfs, fd, &loinfo);
648 if (!ret)
a17b1e65 649 ret = mount_unknown_fs(path, target, options);
a6afdde9
DL
650 close(fd);
651
652 break;
653 }
654
655 if (closedir(dir))
656 WARN("failed to close directory");
657
658 return ret;
78ae2fcc 659}
660
a17b1e65
SG
661static int mount_rootfs_block(const char *rootfs, const char *target,
662 const char *options)
a6afdde9 663{
a17b1e65 664 return mount_unknown_fs(rootfs, target, options);
a6afdde9
DL
665}
666
0c547523
SH
667/*
668 * pin_rootfs
b7ed4bf0
CS
669 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
670 * the duration of the container run, to prevent the container from marking
671 * the underlying fs readonly on shutdown. unlink the file immediately so
672 * no name pollution is happens
0c547523
SH
673 * return -1 on error.
674 * return -2 if nothing needed to be pinned.
675 * return an open fd (>=0) if we pinned it.
676 */
677int pin_rootfs(const char *rootfs)
678{
679 char absrootfs[MAXPATHLEN];
680 char absrootfspin[MAXPATHLEN];
681 struct stat s;
682 int ret, fd;
683
e99ee0de 684 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 685 return -2;
e99ee0de 686
00ec333b 687 if (!realpath(rootfs, absrootfs))
9be53773 688 return -2;
0c547523 689
00ec333b 690 if (access(absrootfs, F_OK))
0c547523 691 return -1;
0c547523 692
00ec333b 693 if (stat(absrootfs, &s))
0c547523 694 return -1;
0c547523 695
72f919c4 696 if (!S_ISDIR(s.st_mode))
0c547523
SH
697 return -2;
698
b7ed4bf0 699 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
00ec333b 700 if (ret >= MAXPATHLEN)
0c547523 701 return -1;
0c547523
SH
702
703 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
b7ed4bf0
CS
704 if (fd < 0)
705 return fd;
706 (void)unlink(absrootfspin);
0c547523
SH
707 return fd;
708}
709
e2a7e8dc
SH
710/*
711 * If we are asking to remount something, make sure that any
712 * NOEXEC etc are honored.
713 */
714static unsigned long add_required_remount_flags(const char *s, const char *d,
715 unsigned long flags)
716{
614305f3 717#ifdef HAVE_STATVFS
e2a7e8dc
SH
718 struct statvfs sb;
719 unsigned long required_flags = 0;
720
721 if (!(flags & MS_REMOUNT))
722 return flags;
723
724 if (!s)
725 s = d;
726
727 if (!s)
728 return flags;
729 if (statvfs(s, &sb) < 0)
730 return flags;
731
732 if (sb.f_flag & MS_NOSUID)
733 required_flags |= MS_NOSUID;
734 if (sb.f_flag & MS_NODEV)
735 required_flags |= MS_NODEV;
736 if (sb.f_flag & MS_RDONLY)
737 required_flags |= MS_RDONLY;
738 if (sb.f_flag & MS_NOEXEC)
739 required_flags |= MS_NOEXEC;
740
741 return flags | required_flags;
614305f3
SH
742#else
743 return flags;
744#endif
e2a7e8dc
SH
745}
746
4fb3cba5 747static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
368bbc02 748{
368bbc02 749 int r;
b06b8511
CS
750 size_t i;
751 static struct {
752 int match_mask;
753 int match_flag;
754 const char *source;
755 const char *destination;
756 const char *fstype;
757 unsigned long flags;
758 const char *options;
759 } default_mounts[] = {
760 /* Read-only bind-mounting... In older kernels, doing that required
761 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
762 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
763 * kernel 2.6.26 onwards. However, this apparently does not work on
764 * kernel 3.8. Unfortunately, on that very same kernel, doing the
765 * same trick as above doesn't seem to work either, there one needs
766 * to ALSO specify MS_BIND for the remount, otherwise the entire
767 * fs is remounted read-only or the mount fails because it's busy...
768 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
769 * 2.6.32...
368bbc02 770 */
f24a52d5 771 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
592fd47a
SH
772 /* proc/tty is used as a temporary placeholder for proc/sys/net which we'll move back in a few steps */
773 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys/net", "%r/proc/tty", NULL, MS_BIND, NULL },
f24a52d5
SG
774 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
775 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
592fd47a 776 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/tty", "%r/proc/sys/net", NULL, MS_MOVE, NULL },
f24a52d5
SG
777 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
778 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
779 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
780 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
781 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
782 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
783 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL },
784 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
785 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys/devices/virtual/net", "sysfs", 0, NULL },
786 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys/devices/virtual/net/devices/virtual/net", "%r/sys/devices/virtual/net", NULL, MS_BIND, NULL },
787 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys/devices/virtual/net", NULL, MS_REMOUNT|MS_BIND|MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL },
788 { 0, 0, NULL, NULL, NULL, 0, NULL }
b06b8511 789 };
368bbc02 790
b06b8511
CS
791 for (i = 0; default_mounts[i].match_mask; i++) {
792 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
793 char *source = NULL;
794 char *destination = NULL;
795 int saved_errno;
e2a7e8dc 796 unsigned long mflags;
b06b8511
CS
797
798 if (default_mounts[i].source) {
799 /* will act like strdup if %r is not present */
8ede5f4c 800 source = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].source);
b06b8511
CS
801 if (!source) {
802 SYSERROR("memory allocation error");
803 return -1;
804 }
805 }
806 if (default_mounts[i].destination) {
807 /* will act like strdup if %r is not present */
8ede5f4c 808 destination = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].destination);
b06b8511
CS
809 if (!destination) {
810 saved_errno = errno;
811 SYSERROR("memory allocation error");
812 free(source);
813 errno = saved_errno;
814 return -1;
815 }
816 }
e2a7e8dc
SH
817 mflags = add_required_remount_flags(source, destination,
818 default_mounts[i].flags);
592fd47a 819 r = safe_mount(source, destination, default_mounts[i].fstype, mflags, default_mounts[i].options, conf->rootfs.path ? conf->rootfs.mount : NULL);
b06b8511 820 saved_errno = errno;
b88ff9a0
SG
821 if (r < 0 && errno == ENOENT) {
822 INFO("Mount source or target for %s on %s doesn't exist. Skipping.", source, destination);
823 r = 0;
824 }
825 else if (r < 0)
e2a7e8dc 826 SYSERROR("error mounting %s on %s flags %lu", source, destination, mflags);
f24a52d5 827
b06b8511
CS
828 free(source);
829 free(destination);
830 if (r < 0) {
b06b8511
CS
831 errno = saved_errno;
832 return -1;
833 }
368bbc02 834 }
368bbc02
CS
835 }
836
b06b8511 837 if (flags & LXC_AUTO_CGROUP_MASK) {
0769b82a
CS
838 int cg_flags;
839
840 cg_flags = flags & LXC_AUTO_CGROUP_MASK;
841 /* If the type of cgroup mount was not specified, it depends on the
842 * container's capabilities as to what makes sense: if we have
843 * CAP_SYS_ADMIN, the read-only part can be remounted read-write
844 * anyway, so we may as well default to read-write; then the admin
845 * will not be given a false sense of security. (And if they really
846 * want mixed r/o r/w, then they can explicitly specify :mixed.)
847 * OTOH, if the container lacks CAP_SYS_ADMIN, do only default to
848 * :mixed, because then the container can't remount it read-write. */
849 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
850 int has_sys_admin = 0;
851 if (!lxc_list_empty(&conf->keepcaps)) {
852 has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps);
853 } else {
854 has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps);
855 }
856 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC) {
857 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED;
858 } else {
859 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
860 }
861 }
862
8ede5f4c 863 if (!cgroup_mount(conf->rootfs.path ? conf->rootfs.mount : "", handler, cg_flags)) {
368bbc02 864 SYSERROR("error mounting /sys/fs/cgroup");
b06b8511 865 return -1;
368bbc02
CS
866 }
867 }
868
368bbc02 869 return 0;
368bbc02
CS
870}
871
a17b1e65 872static int mount_rootfs(const char *rootfs, const char *target, const char *options)
0ad19a3f 873{
b09ef133 874 char absrootfs[MAXPATHLEN];
78ae2fcc 875 struct stat s;
a6afdde9 876 int i;
78ae2fcc 877
a17b1e65 878 typedef int (*rootfs_cb)(const char *, const char *, const char *);
78ae2fcc 879
880 struct rootfs_type {
881 int type;
882 rootfs_cb cb;
883 } rtfs_type[] = {
2656d231
DL
884 { S_IFDIR, mount_rootfs_dir },
885 { S_IFBLK, mount_rootfs_block },
886 { S_IFREG, mount_rootfs_file },
78ae2fcc 887 };
0ad19a3f 888
4c8ab83b 889 if (!realpath(rootfs, absrootfs)) {
36eb9bde 890 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 891 return -1;
892 }
b09ef133 893
b09ef133 894 if (access(absrootfs, F_OK)) {
36eb9bde 895 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 896 return -1;
897 }
898
78ae2fcc 899 if (stat(absrootfs, &s)) {
36eb9bde 900 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 901 return -1;
902 }
903
78ae2fcc 904 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 905
78ae2fcc 906 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
907 continue;
9b0f0477 908
a17b1e65 909 return rtfs_type[i].cb(absrootfs, target, options);
78ae2fcc 910 }
9b0f0477 911
36eb9bde 912 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 913 return -1;
0ad19a3f 914}
915
4e5440c6 916static int setup_utsname(struct utsname *utsname)
0ad19a3f 917{
4e5440c6
DL
918 if (!utsname)
919 return 0;
0ad19a3f 920
4e5440c6
DL
921 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
922 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 923 return -1;
924 }
925
4e5440c6 926 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 927
0ad19a3f 928 return 0;
929}
930
69aa6655
DE
931struct dev_symlinks {
932 const char *oldpath;
933 const char *name;
934};
935
936static const struct dev_symlinks dev_symlinks[] = {
937 {"/proc/self/fd", "fd"},
938 {"/proc/self/fd/0", "stdin"},
939 {"/proc/self/fd/1", "stdout"},
940 {"/proc/self/fd/2", "stderr"},
941};
942
943static int setup_dev_symlinks(const struct lxc_rootfs *rootfs)
944{
945 char path[MAXPATHLEN];
946 int ret,i;
09227be2 947 struct stat s;
69aa6655
DE
948
949
950 for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
951 const struct dev_symlinks *d = &dev_symlinks[i];
cd2b3cfe 952 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->path ? rootfs->mount : "", d->name);
69aa6655
DE
953 if (ret < 0 || ret >= MAXPATHLEN)
954 return -1;
09227be2
MW
955
956 /*
957 * Stat the path first. If we don't get an error
958 * accept it as is and don't try to create it
959 */
960 if (!stat(path, &s)) {
961 continue;
962 }
963
69aa6655 964 ret = symlink(d->oldpath, path);
09227be2 965
69aa6655 966 if (ret && errno != EEXIST) {
09227be2
MW
967 if ( errno == EROFS ) {
968 WARN("Warning: Read Only file system while creating %s", path);
969 } else {
970 SYSERROR("Error creating %s", path);
971 return -1;
972 }
69aa6655
DE
973 }
974 }
975 return 0;
976}
977
393903d1
SH
978/*
979 * Build a space-separate list of ptys to pass to systemd.
980 */
981static bool append_ptyname(char **pp, char *name)
b0a33c1e 982{
393903d1
SH
983 char *p;
984
985 if (!*pp) {
986 *pp = malloc(strlen(name) + strlen("container_ttys=") + 1);
987 if (!*pp)
988 return false;
989 sprintf(*pp, "container_ttys=%s", name);
990 return true;
991 }
992 p = realloc(*pp, strlen(*pp) + strlen(name) + 2);
993 if (!p)
994 return false;
995 *pp = p;
996 strcat(p, " ");
997 strcat(p, name);
998 return true;
999}
1000
1001static int setup_tty(struct lxc_conf *conf)
1002{
393903d1
SH
1003 const struct lxc_tty_info *tty_info = &conf->tty_info;
1004 char *ttydir = conf->ttydir;
7c6ef2a2
SH
1005 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1006 int i, ret;
b0a33c1e 1007
e8bd4e43 1008 if (!conf->rootfs.path)
bc9bd0e3
DL
1009 return 0;
1010
b0a33c1e 1011 for (i = 0; i < tty_info->nbtty; i++) {
1012
1013 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
1014
e8bd4e43 1015 ret = snprintf(path, sizeof(path), "/dev/tty%d", i + 1);
7c6ef2a2
SH
1016 if (ret >= sizeof(path)) {
1017 ERROR("pathname too long for ttys");
1018 return -1;
1019 }
1020 if (ttydir) {
1021 /* create dev/lxc/tty%d" */
e8bd4e43 1022 ret = snprintf(lxcpath, sizeof(lxcpath), "/dev/%s/tty%d", ttydir, i + 1);
7c6ef2a2
SH
1023 if (ret >= sizeof(lxcpath)) {
1024 ERROR("pathname too long for ttys");
1025 return -1;
1026 }
1027 ret = creat(lxcpath, 0660);
1028 if (ret==-1 && errno != EEXIST) {
959aee9c 1029 SYSERROR("error creating %s", lxcpath);
7c6ef2a2
SH
1030 return -1;
1031 }
4d44e274
SH
1032 if (ret >= 0)
1033 close(ret);
7c6ef2a2
SH
1034 ret = unlink(path);
1035 if (ret && errno != ENOENT) {
959aee9c 1036 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1037 return -1;
1038 }
b0a33c1e 1039
7c6ef2a2
SH
1040 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
1041 WARN("failed to mount '%s'->'%s'",
1042 pty_info->name, path);
1043 continue;
1044 }
13954cce 1045
9ba8130c
SH
1046 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
1047 if (ret >= sizeof(lxcpath)) {
1048 ERROR("tty pathname too long");
1049 return -1;
1050 }
7c6ef2a2
SH
1051 ret = symlink(lxcpath, path);
1052 if (ret) {
959aee9c 1053 SYSERROR("failed to create symlink for tty %d", i+1);
7c6ef2a2
SH
1054 return -1;
1055 }
1056 } else {
c6883f38
SH
1057 /* If we populated /dev, then we need to create /dev/ttyN */
1058 if (access(path, F_OK)) {
1059 ret = creat(path, 0660);
1060 if (ret==-1) {
959aee9c 1061 SYSERROR("error creating %s", path);
c6883f38 1062 /* this isn't fatal, continue */
025ed0f3 1063 } else {
c6883f38 1064 close(ret);
025ed0f3 1065 }
c6883f38 1066 }
7c6ef2a2 1067 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
e8bd4e43 1068 SYSERROR("failed to mount '%s'->'%s'", pty_info->name, path);
7c6ef2a2
SH
1069 continue;
1070 }
393903d1 1071 }
e8bd4e43 1072 if (!append_ptyname(&conf->pty_names, pty_info->name)) {
393903d1
SH
1073 ERROR("Error setting up container_ttys string");
1074 return -1;
b0a33c1e 1075 }
1076 }
1077
cd54d859
DL
1078 INFO("%d tty(s) has been setup", tty_info->nbtty);
1079
b0a33c1e 1080 return 0;
1081}
1082
bf601689 1083
2d489f9e 1084static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
bf601689 1085{
2d489f9e 1086 int oldroot = -1, newroot = -1;
bf601689 1087
2d489f9e
SH
1088 oldroot = open("/", O_DIRECTORY | O_RDONLY);
1089 if (oldroot < 0) {
1090 SYSERROR("Error opening old-/ for fchdir");
9ba8130c
SH
1091 return -1;
1092 }
2d489f9e
SH
1093 newroot = open(rootfs, O_DIRECTORY | O_RDONLY);
1094 if (newroot < 0) {
1095 SYSERROR("Error opening new-/ for fchdir");
1096 goto fail;
c08556c6 1097 }
bf601689 1098
cc6f6dd7 1099 /* change into new root fs */
2d489f9e 1100 if (fchdir(newroot)) {
cc6f6dd7 1101 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
2d489f9e 1102 goto fail;
cc6f6dd7
DL
1103 }
1104
cc6f6dd7 1105 /* pivot_root into our new root fs */
2d489f9e 1106 if (pivot_root(".", ".")) {
cc6f6dd7 1107 SYSERROR("pivot_root syscall failed");
2d489f9e 1108 goto fail;
bf601689 1109 }
cc6f6dd7 1110
2d489f9e
SH
1111 /*
1112 * at this point the old-root is mounted on top of our new-root
1113 * To unmounted it we must not be chdir'd into it, so escape back
1114 * to old-root
1115 */
1116 if (fchdir(oldroot) < 0) {
1117 SYSERROR("Error entering oldroot");
1118 goto fail;
1119 }
7981ea46 1120 if (umount2(".", MNT_DETACH) < 0) {
2d489f9e
SH
1121 SYSERROR("Error detaching old root");
1122 goto fail;
cc6f6dd7
DL
1123 }
1124
2d489f9e
SH
1125 if (fchdir(newroot) < 0) {
1126 SYSERROR("Error re-entering newroot");
1127 goto fail;
1128 }
cc6f6dd7 1129
2d489f9e
SH
1130 close(oldroot);
1131 close(newroot);
bf601689 1132
2d489f9e 1133 DEBUG("pivot_root syscall to '%s' successful", rootfs);
bf601689 1134
bf601689 1135 return 0;
2d489f9e
SH
1136
1137fail:
1138 if (oldroot != -1)
1139 close(oldroot);
1140 if (newroot != -1)
1141 close(newroot);
1142 return -1;
bf601689
MH
1143}
1144
bc6928ff 1145/*
87da4ec3
SH
1146 * Just create a path for /dev under $lxcpath/$name and in rootfs
1147 * If we hit an error, log it but don't fail yet.
91c3830e 1148 */
14221cbb 1149static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, const char *lxcpath)
91c3830e
SH
1150{
1151 int ret;
87da4ec3
SH
1152 size_t clen;
1153 char *path;
91c3830e 1154
14221cbb 1155 INFO("Mounting container /dev");
bc6928ff 1156
14221cbb
DW
1157 /* $(rootfs->mount) + "/dev/pts" + '\0' */
1158 clen = (rootfs->path ? strlen(rootfs->mount) : 0) + 9;
87da4ec3 1159 path = alloca(clen);
bc6928ff 1160
14221cbb 1161 ret = snprintf(path, clen, "%s/dev", rootfs->path ? rootfs->mount : "");
87da4ec3 1162 if (ret < 0 || ret >= clen)
91c3830e 1163 return -1;
bc6928ff 1164
87da4ec3 1165 if (!dir_exists(path)) {
14221cbb 1166 WARN("No /dev in container.");
87da4ec3
SH
1167 WARN("Proceeding without autodev setup");
1168 return 0;
bc6928ff 1169 }
87da4ec3 1170
592fd47a
SH
1171 if (safe_mount("none", path, "tmpfs", 0, "size=100000,mode=755",
1172 rootfs->path ? rootfs->mount : NULL)) {
87da4ec3
SH
1173 SYSERROR("Failed mounting tmpfs onto %s\n", path);
1174 return false;
91c3830e 1175 }
87da4ec3
SH
1176
1177 INFO("Mounted tmpfs onto %s", path);
1178
14221cbb 1179 ret = snprintf(path, clen, "%s/dev/pts", rootfs->path ? rootfs->mount : "");
87da4ec3 1180 if (ret < 0 || ret >= clen)
91c3830e 1181 return -1;
87da4ec3 1182
bc6928ff
MW
1183 /*
1184 * If we are running on a devtmpfs mapping, dev/pts may already exist.
1185 * If not, then create it and exit if that fails...
1186 */
87da4ec3 1187 if (!dir_exists(path)) {
bc6928ff
MW
1188 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1189 if (ret) {
1190 SYSERROR("Failed to create /dev/pts in container");
1191 return -1;
1192 }
91c3830e
SH
1193 }
1194
14221cbb 1195 INFO("Mounted container /dev");
91c3830e
SH
1196 return 0;
1197}
1198
c6883f38 1199struct lxc_devs {
74a3920a 1200 const char *name;
c6883f38
SH
1201 mode_t mode;
1202 int maj;
1203 int min;
1204};
1205
74a3920a 1206static const struct lxc_devs lxc_devs[] = {
c6883f38
SH
1207 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1208 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1209 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1210 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1211 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1212 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1213 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1214};
1215
14221cbb 1216static int fill_autodev(const struct lxc_rootfs *rootfs)
c6883f38
SH
1217{
1218 int ret;
c6883f38
SH
1219 char path[MAXPATHLEN];
1220 int i;
3a32201c 1221 mode_t cmask;
c6883f38 1222
14221cbb 1223 INFO("Creating initial consoles under container /dev");
91c3830e 1224
14221cbb 1225 ret = snprintf(path, MAXPATHLEN, "%s/dev", rootfs->path ? rootfs->mount : "");
91c3830e
SH
1226 if (ret < 0 || ret >= MAXPATHLEN) {
1227 ERROR("Error calculating container /dev location");
c6883f38 1228 return -1;
f7bee6c6 1229 }
91c3830e 1230
9cb4d183
SH
1231 if (!dir_exists(path)) // ignore, just don't try to fill in
1232 return 0;
1233
14221cbb 1234 INFO("Populating container /dev");
3a32201c 1235 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38 1236 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
74a3920a 1237 const struct lxc_devs *d = &lxc_devs[i];
14221cbb 1238 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", rootfs->path ? rootfs->mount : "", d->name);
c6883f38
SH
1239 if (ret < 0 || ret >= MAXPATHLEN)
1240 return -1;
1241 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1242 if (ret && errno != EEXIST) {
9cb4d183
SH
1243 char hostpath[MAXPATHLEN];
1244 FILE *pathfile;
1245
1246 // Unprivileged containers cannot create devices, so
1247 // bind mount the device from the host
1248 ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", d->name);
1249 if (ret < 0 || ret >= MAXPATHLEN)
1250 return -1;
1251 pathfile = fopen(path, "wb");
1252 if (!pathfile) {
1253 SYSERROR("Failed to create device mount target '%s'", path);
1254 return -1;
1255 }
1256 fclose(pathfile);
592fd47a
SH
1257 if (safe_mount(hostpath, path, 0, MS_BIND, NULL,
1258 rootfs->path ? rootfs->mount : NULL) != 0) {
9cb4d183
SH
1259 SYSERROR("Failed bind mounting device %s from host into container",
1260 d->name);
1261 return -1;
1262 }
c6883f38
SH
1263 }
1264 }
3a32201c 1265 umask(cmask);
c6883f38 1266
14221cbb 1267 INFO("Populated container /dev");
c6883f38
SH
1268 return 0;
1269}
1270
cc28d0b0 1271static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1272{
cc28d0b0
SH
1273 const struct lxc_rootfs *rootfs = &conf->rootfs;
1274
a0f379bf
DW
1275 if (!rootfs->path) {
1276 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1277 SYSERROR("Failed to make / rslave");
1278 return -1;
1279 }
c69bd12f 1280 return 0;
a0f379bf 1281 }
0ad19a3f 1282
12297168 1283 if (access(rootfs->mount, F_OK)) {
b1789442 1284 SYSERROR("failed to access to '%s', check it is present",
12297168 1285 rootfs->mount);
b1789442
DL
1286 return -1;
1287 }
1288
9be53773 1289 // First try mounting rootfs using a bdev
76a26f55 1290 struct bdev *bdev = bdev_init(conf, rootfs->path, rootfs->mount, rootfs->options);
9be53773 1291 if (bdev && bdev->ops->mount(bdev) == 0) {
59d66af2 1292 bdev_put(bdev);
9be53773
SH
1293 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1294 return 0;
1295 }
59d66af2
SH
1296 if (bdev)
1297 bdev_put(bdev);
a17b1e65 1298 if (mount_rootfs(rootfs->path, rootfs->mount, rootfs->options)) {
a6afdde9 1299 ERROR("failed to mount rootfs");
c3f0a28c 1300 return -1;
1301 }
0ad19a3f 1302
12297168 1303 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1304
ac778708
DL
1305 return 0;
1306}
1307
91e93c71
AV
1308int prepare_ramfs_root(char *root)
1309{
1310 char buf[LINELEN], *p;
1311 char nroot[PATH_MAX];
1312 FILE *f;
1313 int i;
1314 char *p2;
1315
1316 if (realpath(root, nroot) == NULL)
1317 return -1;
1318
1319 if (chdir("/") == -1)
1320 return -1;
1321
1322 /*
1323 * We could use here MS_MOVE, but in userns this mount is
1324 * locked and can't be moved.
1325 */
1326 if (mount(root, "/", NULL, MS_REC | MS_BIND, NULL)) {
1327 SYSERROR("Failed to move %s into /", root);
1328 return -1;
1329 }
1330
88322f77 1331 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
91e93c71
AV
1332 SYSERROR("Failed to make . rprivate");
1333 return -1;
1334 }
1335
1336 /*
1337 * The following code cleans up inhereted mounts which are not
1338 * required for CT.
1339 *
1340 * The mountinfo file shows not all mounts, if a few points have been
1341 * unmounted between read operations from the mountinfo. So we need to
1342 * read mountinfo a few times.
1343 *
1344 * This loop can be skipped if a container uses unserns, because all
1345 * inherited mounts are locked and we should live with all this trash.
1346 */
1347 while (1) {
1348 int progress = 0;
1349
1350 f = fopen("./proc/self/mountinfo", "r");
1351 if (!f) {
1352 SYSERROR("Unable to open /proc/self/mountinfo");
1353 return -1;
1354 }
1355 while (fgets(buf, LINELEN, f)) {
1356 for (p = buf, i=0; p && i < 4; i++)
1357 p = strchr(p+1, ' ');
1358 if (!p)
1359 continue;
1360 p2 = strchr(p+1, ' ');
1361 if (!p2)
1362 continue;
1363
1364 *p2 = '\0';
1365 *p = '.';
1366
1367 if (strcmp(p + 1, "/") == 0)
1368 continue;
1369 if (strcmp(p + 1, "/proc") == 0)
1370 continue;
1371
1372 if (umount2(p, MNT_DETACH) == 0)
1373 progress++;
1374 }
1375 fclose(f);
1376 if (!progress)
1377 break;
1378 }
1379
8bea9fae
PR
1380 /* This also can be skipped if a container uses unserns */
1381 umount2("./proc", MNT_DETACH);
91e93c71
AV
1382
1383 /* It is weird, but chdir("..") moves us in a new root */
1384 if (chdir("..") == -1) {
1385 SYSERROR("Unable to change working directory");
1386 return -1;
1387 }
1388
1389 if (chroot(".") == -1) {
1390 SYSERROR("Unable to chroot");
1391 return -1;
1392 }
1393
1394 return 0;
1395}
1396
74a3920a 1397static int setup_pivot_root(const struct lxc_rootfs *rootfs)
ac778708 1398{
ac778708
DL
1399 if (!rootfs->path)
1400 return 0;
1401
91e93c71
AV
1402 if (detect_ramfs_rootfs()) {
1403 if (prepare_ramfs_root(rootfs->mount))
1404 return -1;
1405 } else if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
cc6f6dd7 1406 ERROR("failed to setup pivot root");
25368b52 1407 return -1;
c69bd12f
DL
1408 }
1409
25368b52 1410 return 0;
0ad19a3f 1411}
1412
d852c78c 1413static int setup_pts(int pts)
3c26f34e 1414{
77890c6d
SW
1415 char target[PATH_MAX];
1416
d852c78c
DL
1417 if (!pts)
1418 return 0;
3c26f34e 1419
1420 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1421 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1422 return -1;
1423 }
1424
7e40254a
JTLB
1425 if (mkdir("/dev/pts", 0755)) {
1426 if ( errno != EEXIST ) {
1427 SYSERROR("failed to create '/dev/pts'");
1428 return -1;
1429 }
1430 }
1431
a6afdde9 1432 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
67e5a20a 1433 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
36eb9bde 1434 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1435 return -1;
1436 }
1437
3c26f34e 1438 if (access("/dev/ptmx", F_OK)) {
1439 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1440 goto out;
36eb9bde 1441 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1442 return -1;
1443 }
1444
77890c6d
SW
1445 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1446 goto out;
1447
3c26f34e 1448 /* fallback here, /dev/pts/ptmx exists just mount bind */
1449 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1450 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1451 return -1;
1452 }
cd54d859
DL
1453
1454 INFO("created new pts instance");
d852c78c 1455
3c26f34e 1456out:
1457 return 0;
1458}
1459
cccc74b5
DL
1460static int setup_personality(int persona)
1461{
6ff05e18 1462 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1463 if (persona == -1)
1464 return 0;
1465
1466 if (personality(persona) < 0) {
1467 SYSERROR("failed to set personality to '0x%x'", persona);
1468 return -1;
1469 }
1470
1471 INFO("set personality to '0x%x'", persona);
6ff05e18 1472 #endif
cccc74b5
DL
1473
1474 return 0;
1475}
1476
7c6ef2a2 1477static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1478 const struct lxc_console *console)
6e590161 1479{
63376d7d
DL
1480 char path[MAXPATHLEN];
1481 struct stat s;
7c6ef2a2 1482 int ret;
52e35957 1483
7c6ef2a2
SH
1484 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1485 if (ret >= sizeof(path)) {
959aee9c 1486 ERROR("console path too long");
7c6ef2a2
SH
1487 return -1;
1488 }
52e35957 1489
63376d7d 1490 if (access(path, F_OK)) {
466978b0 1491 WARN("rootfs specified but no console found at '%s'", path);
63376d7d 1492 return 0;
52e35957
DL
1493 }
1494
b5159817
DE
1495 if (console->master < 0) {
1496 INFO("no console");
f78a1f32
DL
1497 return 0;
1498 }
ed502555 1499
63376d7d
DL
1500 if (stat(path, &s)) {
1501 SYSERROR("failed to stat '%s'", path);
1502 return -1;
1503 }
1504
1505 if (chmod(console->name, s.st_mode)) {
1506 SYSERROR("failed to set mode '0%o' to '%s'",
1507 s.st_mode, console->name);
1508 return -1;
1509 }
13954cce 1510
592fd47a 1511 if (safe_mount(console->name, path, "none", MS_BIND, 0, rootfs->mount)) {
63376d7d 1512 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1513 return -1;
1514 }
1515
63376d7d 1516 INFO("console has been setup");
7c6ef2a2
SH
1517 return 0;
1518}
1519
1520static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1521 const struct lxc_console *console,
1522 char *ttydir)
1523{
1524 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1525 int ret;
1526
1527 /* create rootfs/dev/<ttydir> directory */
1528 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1529 ttydir);
1530 if (ret >= sizeof(path))
1531 return -1;
1532 ret = mkdir(path, 0755);
1533 if (ret && errno != EEXIST) {
959aee9c 1534 SYSERROR("failed with errno %d to create %s", errno, path);
7c6ef2a2
SH
1535 return -1;
1536 }
959aee9c 1537 INFO("created %s", path);
7c6ef2a2
SH
1538
1539 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1540 rootfs->mount, ttydir);
1541 if (ret >= sizeof(lxcpath)) {
959aee9c 1542 ERROR("console path too long");
7c6ef2a2
SH
1543 return -1;
1544 }
1545
1546 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1547 ret = unlink(path);
1548 if (ret && errno != ENOENT) {
959aee9c 1549 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1550 return -1;
1551 }
1552
1553 ret = creat(lxcpath, 0660);
1554 if (ret==-1 && errno != EEXIST) {
959aee9c 1555 SYSERROR("error %d creating %s", errno, lxcpath);
7c6ef2a2
SH
1556 return -1;
1557 }
4d44e274
SH
1558 if (ret >= 0)
1559 close(ret);
7c6ef2a2 1560
b5159817
DE
1561 if (console->master < 0) {
1562 INFO("no console");
7c6ef2a2
SH
1563 return 0;
1564 }
1565
592fd47a 1566 if (safe_mount(console->name, lxcpath, "none", MS_BIND, 0, rootfs->mount)) {
7c6ef2a2
SH
1567 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1568 return -1;
1569 }
1570
1571 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1572 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1573 if (ret >= sizeof(lxcpath)) {
1574 ERROR("lxc/console path too long");
1575 return -1;
1576 }
7c6ef2a2
SH
1577 ret = symlink(lxcpath, path);
1578 if (ret) {
1579 SYSERROR("failed to create symlink for console");
1580 return -1;
1581 }
1582
1583 INFO("console has been setup on %s", lxcpath);
cd54d859 1584
6e590161 1585 return 0;
1586}
1587
7c6ef2a2
SH
1588static int setup_console(const struct lxc_rootfs *rootfs,
1589 const struct lxc_console *console,
1590 char *ttydir)
1591{
1592 /* We don't have a rootfs, /dev/console will be shared */
1593 if (!rootfs->path)
1594 return 0;
1595 if (!ttydir)
1596 return setup_dev_console(rootfs, console);
1597
1598 return setup_ttydir_console(rootfs, console, ttydir);
1599}
1600
1bd051a6
SH
1601static int setup_kmsg(const struct lxc_rootfs *rootfs,
1602 const struct lxc_console *console)
1603{
1604 char kpath[MAXPATHLEN];
1605 int ret;
1606
222fea5a
DE
1607 if (!rootfs->path)
1608 return 0;
1bd051a6
SH
1609 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1610 if (ret < 0 || ret >= sizeof(kpath))
1611 return -1;
1612
1613 ret = unlink(kpath);
1614 if (ret && errno != ENOENT) {
959aee9c 1615 SYSERROR("error unlinking %s", kpath);
1bd051a6
SH
1616 return -1;
1617 }
1618
1619 ret = symlink("console", kpath);
1620 if (ret) {
1621 SYSERROR("failed to create symlink for kmsg");
1622 return -1;
1623 }
1624
1625 return 0;
1626}
1627
998ac676
RT
1628static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1629{
1630 struct mount_opt *mo;
1631
1632 /* If opt is found in mount_opt, set or clear flags.
1633 * Otherwise append it to data. */
1634
1635 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1636 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1637 if (mo->clear)
1638 *flags &= ~mo->flag;
1639 else
1640 *flags |= mo->flag;
1641 return;
1642 }
1643 }
1644
1645 if (strlen(*data))
1646 strcat(*data, ",");
1647 strcat(*data, opt);
1648}
1649
a17b1e65 1650int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1651 char **mntdata)
1652{
1653 char *s, *data;
1654 char *p, *saveptr = NULL;
1655
911324ef 1656 *mntdata = NULL;
91656ce5 1657 *mntflags = 0L;
911324ef
DL
1658
1659 if (!mntopts)
998ac676
RT
1660 return 0;
1661
911324ef 1662 s = strdup(mntopts);
998ac676 1663 if (!s) {
36eb9bde 1664 SYSERROR("failed to allocate memory");
998ac676
RT
1665 return -1;
1666 }
1667
1668 data = malloc(strlen(s) + 1);
1669 if (!data) {
36eb9bde 1670 SYSERROR("failed to allocate memory");
998ac676
RT
1671 free(s);
1672 return -1;
1673 }
1674 *data = 0;
1675
1676 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1677 p = strtok_r(NULL, ",", &saveptr))
1678 parse_mntopt(p, mntflags, &data);
1679
1680 if (*data)
1681 *mntdata = data;
1682 else
1683 free(data);
1684 free(s);
1685
1686 return 0;
1687}
1688
6fd5e769
SH
1689static void null_endofword(char *word)
1690{
1691 while (*word && *word != ' ' && *word != '\t')
1692 word++;
1693 *word = '\0';
1694}
1695
1696/*
1697 * skip @nfields spaces in @src
1698 */
1699static char *get_field(char *src, int nfields)
1700{
1701 char *p = src;
1702 int i;
1703
1704 for (i = 0; i < nfields; i++) {
1705 while (*p && *p != ' ' && *p != '\t')
1706 p++;
1707 if (!*p)
1708 break;
1709 p++;
1710 }
1711 return p;
1712}
1713
911324ef
DL
1714static int mount_entry(const char *fsname, const char *target,
1715 const char *fstype, unsigned long mountflags,
592fd47a 1716 const char *data, int optional, const char *rootfs)
911324ef 1717{
614305f3 1718#ifdef HAVE_STATVFS
2938f7c8 1719 struct statvfs sb;
614305f3 1720#endif
2938f7c8 1721
592fd47a 1722 if (safe_mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data, rootfs)) {
1fc64d22
SG
1723 if (optional) {
1724 INFO("failed to mount '%s' on '%s' (optional): %s", fsname,
1725 target, strerror(errno));
1726 return 0;
1727 }
1728 else {
1729 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1730 return -1;
1731 }
911324ef
DL
1732 }
1733
1734 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
2938f7c8
SH
1735 DEBUG("remounting %s on %s to respect bind or remount options",
1736 fsname ? fsname : "(none)", target ? target : "(none)");
7c5b6e7c
AS
1737 unsigned long rqd_flags = 0;
1738 if (mountflags & MS_RDONLY)
1739 rqd_flags |= MS_RDONLY;
614305f3 1740#ifdef HAVE_STATVFS
2938f7c8 1741 if (statvfs(fsname, &sb) == 0) {
7c5b6e7c 1742 unsigned long required_flags = rqd_flags;
2938f7c8
SH
1743 if (sb.f_flag & MS_NOSUID)
1744 required_flags |= MS_NOSUID;
1745 if (sb.f_flag & MS_NODEV)
1746 required_flags |= MS_NODEV;
1747 if (sb.f_flag & MS_RDONLY)
1748 required_flags |= MS_RDONLY;
1749 if (sb.f_flag & MS_NOEXEC)
1750 required_flags |= MS_NOEXEC;
1751 DEBUG("(at remount) flags for %s was %lu, required extra flags are %lu", fsname, sb.f_flag, required_flags);
1752 /*
1753 * If this was a bind mount request, and required_flags
1754 * does not have any flags which are not already in
1755 * mountflags, then skip the remount
1756 */
1757 if (!(mountflags & MS_REMOUNT)) {
7c5b6e7c 1758 if (!(required_flags & ~mountflags) && rqd_flags == 0) {
2938f7c8
SH
1759 DEBUG("mountflags already was %lu, skipping remount",
1760 mountflags);
1761 goto skipremount;
1762 }
1763 }
1764 mountflags |= required_flags;
6fd5e769 1765 }
614305f3 1766#endif
911324ef
DL
1767
1768 if (mount(fsname, target, fstype,
592fd47a 1769 mountflags | MS_REMOUNT, data) < 0) {
1fc64d22
SG
1770 if (optional) {
1771 INFO("failed to mount '%s' on '%s' (optional): %s",
1772 fsname, target, strerror(errno));
1773 return 0;
1774 }
1775 else {
1776 SYSERROR("failed to mount '%s' on '%s'",
1777 fsname, target);
1778 return -1;
1779 }
911324ef
DL
1780 }
1781 }
1782
614305f3 1783#ifdef HAVE_STATVFS
6fd5e769 1784skipremount:
614305f3 1785#endif
911324ef
DL
1786 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1787
1788 return 0;
1789}
1790
4e4ca161
SH
1791/*
1792 * Remove 'optional', 'create=dir', and 'create=file' from mntopt
1793 */
1794static void cull_mntent_opt(struct mntent *mntent)
1795{
1796 int i;
1797 char *p, *p2;
1798 char *list[] = {"create=dir",
1799 "create=file",
1800 "optional",
1801 NULL };
1802
1803 for (i=0; list[i]; i++) {
1804 if (!(p = strstr(mntent->mnt_opts, list[i])))
1805 continue;
1806 p2 = strchr(p, ',');
1807 if (!p2) {
1808 /* no more mntopts, so just chop it here */
1809 *p = '\0';
1810 continue;
1811 }
1812 memmove(p, p2+1, strlen(p2+1)+1);
1813 }
1814}
1815
4d5b72a1
NC
1816static int mount_entry_create_dir_file(const struct mntent *mntent,
1817 const char* path)
0ad19a3f 1818{
4d5b72a1 1819 char *pathdirname = NULL;
608e3567 1820 int ret = 0;
34cfffb3 1821 FILE *pathfile = NULL;
911324ef 1822
34cfffb3 1823 if (hasmntopt(mntent, "create=dir")) {
4d5b72a1
NC
1824 if (mkdir_p(path, 0755) < 0) {
1825 WARN("Failed to create mount target '%s'", path);
34cfffb3
SG
1826 ret = -1;
1827 }
1828 }
1829
4d5b72a1
NC
1830 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
1831 pathdirname = strdup(path);
34cfffb3 1832 pathdirname = dirname(pathdirname);
119126b6
SG
1833 if (mkdir_p(pathdirname, 0755) < 0) {
1834 WARN("Failed to create target directory");
1835 }
4d5b72a1 1836 pathfile = fopen(path, "wb");
34cfffb3 1837 if (!pathfile) {
4d5b72a1 1838 WARN("Failed to create mount target '%s'", path);
34cfffb3
SG
1839 ret = -1;
1840 }
1841 else
1842 fclose(pathfile);
1843 }
4d5b72a1
NC
1844 free(pathdirname);
1845 return ret;
1846}
1847
db4aba38 1848static inline int mount_entry_on_generic(struct mntent *mntent,
592fd47a 1849 const char* path, const char *rootfs)
4d5b72a1
NC
1850{
1851 unsigned long mntflags;
1852 char *mntdata;
1853 int ret;
1854 bool optional = hasmntopt(mntent, "optional") != NULL;
1855
db4aba38 1856 ret = mount_entry_create_dir_file(mntent, path);
34cfffb3 1857
608e3567
SH
1858 if (ret < 0)
1859 return optional ? 0 : -1;
1860
4e4ca161
SH
1861 cull_mntent_opt(mntent);
1862
a17b1e65
SG
1863 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1864 free(mntdata);
1865 return -1;
1866 }
1867
db4aba38 1868 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
592fd47a 1869 mntflags, mntdata, optional, rootfs);
68c152ef 1870
911324ef
DL
1871 free(mntdata);
1872
1873 return ret;
1874}
1875
db4aba38
NC
1876static inline int mount_entry_on_systemfs(struct mntent *mntent)
1877{
592fd47a 1878 return mount_entry_on_generic(mntent, mntent->mnt_dir, NULL);
db4aba38
NC
1879}
1880
4e4ca161 1881static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2
SH
1882 const struct lxc_rootfs *rootfs,
1883 const char *lxc_name)
911324ef 1884{
013bd428 1885 char *aux;
59760f5d 1886 char path[MAXPATHLEN];
80a881b2 1887 int r, ret = 0, offset;
67e571de 1888 const char *lxcpath;
0ad19a3f 1889
593e8478 1890 lxcpath = lxc_global_config_value("lxc.lxcpath");
2a59a681
SH
1891 if (!lxcpath) {
1892 ERROR("Out of memory");
1893 return -1;
1894 }
1895
80a881b2 1896 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
1897 * use $lxcpath/CN/rootfs as the target prefix */
1898 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
1899 if (r < 0 || r >= MAXPATHLEN)
1900 goto skipvarlib;
1901
1902 aux = strstr(mntent->mnt_dir, path);
1903 if (aux) {
1904 offset = strlen(path);
1905 goto skipabs;
1906 }
1907
1908skipvarlib:
013bd428
DL
1909 aux = strstr(mntent->mnt_dir, rootfs->path);
1910 if (!aux) {
1911 WARN("ignoring mount point '%s'", mntent->mnt_dir);
db4aba38 1912 return ret;
013bd428 1913 }
80a881b2
SH
1914 offset = strlen(rootfs->path);
1915
1916skipabs:
013bd428 1917
9ba8130c 1918 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
1919 aux + offset);
1920 if (r < 0 || r >= MAXPATHLEN) {
1921 WARN("pathnme too long for '%s'", mntent->mnt_dir);
a17b1e65
SG
1922 return -1;
1923 }
1924
592fd47a 1925 return mount_entry_on_generic(mntent, path, rootfs->mount);
911324ef 1926}
d330fe7b 1927
4e4ca161 1928static int mount_entry_on_relative_rootfs(struct mntent *mntent,
911324ef
DL
1929 const char *rootfs)
1930{
1931 char path[MAXPATHLEN];
911324ef 1932 int ret;
d330fe7b 1933
34cfffb3 1934 /* relative to root mount point */
9ba8130c
SH
1935 ret = snprintf(path, sizeof(path), "%s/%s", rootfs, mntent->mnt_dir);
1936 if (ret >= sizeof(path)) {
1937 ERROR("path name too long");
1938 return -1;
1939 }
911324ef 1940
592fd47a 1941 return mount_entry_on_generic(mntent, path, rootfs);
911324ef
DL
1942}
1943
80a881b2
SH
1944static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
1945 const char *lxc_name)
911324ef 1946{
aaf901be
AM
1947 struct mntent mntent;
1948 char buf[4096];
911324ef 1949 int ret = -1;
e76b8764 1950
aaf901be 1951 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
e76b8764 1952
911324ef 1953 if (!rootfs->path) {
aaf901be 1954 if (mount_entry_on_systemfs(&mntent))
e76b8764 1955 goto out;
911324ef 1956 continue;
e76b8764
CDC
1957 }
1958
911324ef 1959 /* We have a separate root, mounts are relative to it */
aaf901be
AM
1960 if (mntent.mnt_dir[0] != '/') {
1961 if (mount_entry_on_relative_rootfs(&mntent,
911324ef
DL
1962 rootfs->mount))
1963 goto out;
1964 continue;
1965 }
cd54d859 1966
aaf901be 1967 if (mount_entry_on_absolute_rootfs(&mntent, rootfs, lxc_name))
911324ef 1968 goto out;
0ad19a3f 1969 }
cd54d859 1970
0ad19a3f 1971 ret = 0;
cd54d859
DL
1972
1973 INFO("mount points have been setup");
0ad19a3f 1974out:
e7938e9e
MN
1975 return ret;
1976}
1977
80a881b2
SH
1978static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
1979 const char *lxc_name)
e7938e9e
MN
1980{
1981 FILE *file;
1982 int ret;
1983
1984 if (!fstab)
1985 return 0;
1986
1987 file = setmntent(fstab, "r");
1988 if (!file) {
1989 SYSERROR("failed to use '%s'", fstab);
1990 return -1;
1991 }
1992
80a881b2 1993 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e 1994
0ad19a3f 1995 endmntent(file);
1996 return ret;
1997}
1998
9fc7f8c0 1999FILE *write_mount_file(struct lxc_list *mount)
e7938e9e
MN
2000{
2001 FILE *file;
2002 struct lxc_list *iterator;
2003 char *mount_entry;
e7938e9e
MN
2004
2005 file = tmpfile();
2006 if (!file) {
2007 ERROR("tmpfile error: %m");
9fc7f8c0 2008 return NULL;
e7938e9e
MN
2009 }
2010
2011 lxc_list_for_each(iterator, mount) {
2012 mount_entry = iterator->elem;
1d6b1976 2013 fprintf(file, "%s\n", mount_entry);
e7938e9e
MN
2014 }
2015
2016 rewind(file);
9fc7f8c0
TA
2017 return file;
2018}
2019
2020static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
2021 const char *lxc_name)
2022{
2023 FILE *file;
2024 int ret;
2025
2026 file = write_mount_file(mount);
2027 if (!file)
2028 return -1;
e7938e9e 2029
80a881b2 2030 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e
MN
2031
2032 fclose(file);
2033 return ret;
2034}
2035
bab88e68
CS
2036static int parse_cap(const char *cap)
2037{
2038 char *ptr = NULL;
2039 int i, capid = -1;
2040
7035407c
DE
2041 if (!strcmp(cap, "none"))
2042 return -2;
2043
bab88e68
CS
2044 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2045
2046 if (strcmp(cap, caps_opt[i].name))
2047 continue;
2048
2049 capid = caps_opt[i].value;
2050 break;
2051 }
2052
2053 if (capid < 0) {
2054 /* try to see if it's numeric, so the user may specify
2055 * capabilities that the running kernel knows about but
2056 * we don't */
2057 errno = 0;
2058 capid = strtol(cap, &ptr, 10);
2059 if (!ptr || *ptr != '\0' || errno != 0)
2060 /* not a valid number */
2061 capid = -1;
2062 else if (capid > lxc_caps_last_cap())
2063 /* we have a number but it's not a valid
2064 * capability */
2065 capid = -1;
2066 }
2067
2068 return capid;
2069}
2070
0769b82a
CS
2071int in_caplist(int cap, struct lxc_list *caps)
2072{
2073 struct lxc_list *iterator;
2074 int capid;
2075
2076 lxc_list_for_each(iterator, caps) {
2077 capid = parse_cap(iterator->elem);
2078 if (capid == cap)
2079 return 1;
2080 }
2081
2082 return 0;
2083}
2084
81810dd1
DL
2085static int setup_caps(struct lxc_list *caps)
2086{
2087 struct lxc_list *iterator;
2088 char *drop_entry;
bab88e68 2089 int capid;
81810dd1
DL
2090
2091 lxc_list_for_each(iterator, caps) {
2092
2093 drop_entry = iterator->elem;
2094
bab88e68 2095 capid = parse_cap(drop_entry);
d55bc1ad 2096
81810dd1 2097 if (capid < 0) {
1e11be34
DL
2098 ERROR("unknown capability %s", drop_entry);
2099 return -1;
81810dd1
DL
2100 }
2101
2102 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
2103
2104 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
3ec1648d
SH
2105 SYSERROR("failed to remove %s capability", drop_entry);
2106 return -1;
2107 }
81810dd1
DL
2108
2109 }
2110
1fb86a7c
SH
2111 DEBUG("capabilities have been setup");
2112
2113 return 0;
2114}
2115
2116static int dropcaps_except(struct lxc_list *caps)
2117{
2118 struct lxc_list *iterator;
2119 char *keep_entry;
1fb86a7c
SH
2120 int i, capid;
2121 int numcaps = lxc_caps_last_cap() + 1;
959aee9c 2122 INFO("found %d capabilities", numcaps);
1fb86a7c 2123
2caf9a97
SH
2124 if (numcaps <= 0 || numcaps > 200)
2125 return -1;
2126
1fb86a7c
SH
2127 // caplist[i] is 1 if we keep capability i
2128 int *caplist = alloca(numcaps * sizeof(int));
2129 memset(caplist, 0, numcaps * sizeof(int));
2130
2131 lxc_list_for_each(iterator, caps) {
2132
2133 keep_entry = iterator->elem;
2134
bab88e68 2135 capid = parse_cap(keep_entry);
1fb86a7c 2136
7035407c
DE
2137 if (capid == -2)
2138 continue;
2139
1fb86a7c
SH
2140 if (capid < 0) {
2141 ERROR("unknown capability %s", keep_entry);
2142 return -1;
2143 }
2144
8255688a 2145 DEBUG("keep capability '%s' (%d)", keep_entry, capid);
1fb86a7c
SH
2146
2147 caplist[capid] = 1;
2148 }
2149 for (i=0; i<numcaps; i++) {
2150 if (caplist[i])
2151 continue;
2152 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
3ec1648d
SH
2153 SYSERROR("failed to remove capability %d", i);
2154 return -1;
2155 }
1fb86a7c
SH
2156 }
2157
2158 DEBUG("capabilities have been setup");
81810dd1
DL
2159
2160 return 0;
2161}
2162
0ad19a3f 2163static int setup_hw_addr(char *hwaddr, const char *ifname)
2164{
2165 struct sockaddr sockaddr;
2166 struct ifreq ifr;
2167 int ret, fd;
2168
3cfc0f3a
MN
2169 ret = lxc_convert_mac(hwaddr, &sockaddr);
2170 if (ret) {
2171 ERROR("mac address '%s' conversion failed : %s",
2172 hwaddr, strerror(-ret));
0ad19a3f 2173 return -1;
2174 }
2175
2176 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
5da6aa8c 2177 ifr.ifr_name[IFNAMSIZ-1] = '\0';
0ad19a3f 2178 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2179
2180 fd = socket(AF_INET, SOCK_DGRAM, 0);
2181 if (fd < 0) {
3ab87b66 2182 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 2183 return -1;
2184 }
2185
2186 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
2187 close(fd);
2188 if (ret)
3ab87b66 2189 ERROR("ioctl failure : %s", strerror(errno));
0ad19a3f 2190
5da6aa8c 2191 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
cd54d859 2192
0ad19a3f 2193 return ret;
2194}
2195
82d5ae15 2196static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2197{
82d5ae15
DL
2198 struct lxc_list *iterator;
2199 struct lxc_inetdev *inetdev;
3cfc0f3a 2200 int err;
0ad19a3f 2201
82d5ae15
DL
2202 lxc_list_for_each(iterator, ip) {
2203
2204 inetdev = iterator->elem;
2205
0093bb8c
DL
2206 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2207 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
2208 if (err) {
2209 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2210 ifindex, strerror(-err));
82d5ae15
DL
2211 return -1;
2212 }
2213 }
2214
2215 return 0;
0ad19a3f 2216}
2217
82d5ae15 2218static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2219{
82d5ae15 2220 struct lxc_list *iterator;
7fa9074f 2221 struct lxc_inet6dev *inet6dev;
3cfc0f3a 2222 int err;
0ad19a3f 2223
82d5ae15
DL
2224 lxc_list_for_each(iterator, ip) {
2225
2226 inet6dev = iterator->elem;
2227
b3df193c 2228 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
2229 &inet6dev->mcast, &inet6dev->acast,
2230 inet6dev->prefix);
3cfc0f3a
MN
2231 if (err) {
2232 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2233 ifindex, strerror(-err));
82d5ae15 2234 return -1;
3cfc0f3a 2235 }
82d5ae15
DL
2236 }
2237
2238 return 0;
0ad19a3f 2239}
2240
82d5ae15 2241static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 2242{
0ad19a3f 2243 char ifname[IFNAMSIZ];
0ad19a3f 2244 char *current_ifname = ifname;
3cfc0f3a 2245 int err;
0ad19a3f 2246
82d5ae15
DL
2247 /* empty network namespace */
2248 if (!netdev->ifindex) {
b0efbac4 2249 if (netdev->flags & IFF_UP) {
d472214b 2250 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2251 if (err) {
2252 ERROR("failed to set the loopback up : %s",
2253 strerror(-err));
82d5ae15
DL
2254 return -1;
2255 }
82d5ae15 2256 }
40790553
SH
2257 if (netdev->type != LXC_NET_VETH)
2258 return 0;
2259 netdev->ifindex = if_nametoindex(netdev->name);
0ad19a3f 2260 }
13954cce 2261
b466dc33 2262 /* get the new ifindex in case of physical netdev */
40790553 2263 if (netdev->type == LXC_NET_PHYS) {
b466dc33
BP
2264 if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
2265 ERROR("failed to get ifindex for %s",
2266 netdev->link);
2267 return -1;
2268 }
40790553 2269 }
b466dc33 2270
82d5ae15
DL
2271 /* retrieve the name of the interface */
2272 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 2273 ERROR("no interface corresponding to index '%d'",
82d5ae15 2274 netdev->ifindex);
0ad19a3f 2275 return -1;
2276 }
13954cce 2277
018ef520 2278 /* default: let the system to choose one interface name */
9d083402 2279 if (!netdev->name)
fb6d9b2f
DL
2280 netdev->name = netdev->type == LXC_NET_PHYS ?
2281 netdev->link : "eth%d";
018ef520 2282
82d5ae15 2283 /* rename the interface name */
40790553
SH
2284 if (strcmp(ifname, netdev->name) != 0) {
2285 err = lxc_netdev_rename_by_name(ifname, netdev->name);
2286 if (err) {
2287 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2288 strerror(-err));
2289 return -1;
2290 }
018ef520
DL
2291 }
2292
2293 /* Re-read the name of the interface because its name has changed
2294 * and would be automatically allocated by the system
2295 */
82d5ae15 2296 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 2297 ERROR("no interface corresponding to index '%d'",
82d5ae15 2298 netdev->ifindex);
018ef520 2299 return -1;
0ad19a3f 2300 }
2301
82d5ae15
DL
2302 /* set a mac address */
2303 if (netdev->hwaddr) {
2304 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 2305 ERROR("failed to setup hw address for '%s'",
82d5ae15 2306 current_ifname);
0ad19a3f 2307 return -1;
2308 }
2309 }
2310
82d5ae15
DL
2311 /* setup ipv4 addresses on the interface */
2312 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 2313 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 2314 ifname);
2315 return -1;
2316 }
2317
82d5ae15
DL
2318 /* setup ipv6 addresses on the interface */
2319 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 2320 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 2321 ifname);
2322 return -1;
2323 }
2324
82d5ae15 2325 /* set the network device up */
b0efbac4 2326 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
2327 int err;
2328
d472214b 2329 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
2330 if (err) {
2331 ERROR("failed to set '%s' up : %s", current_ifname,
2332 strerror(-err));
0ad19a3f 2333 return -1;
2334 }
2335
2336 /* the network is up, make the loopback up too */
d472214b 2337 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2338 if (err) {
2339 ERROR("failed to set the loopback up : %s",
2340 strerror(-err));
0ad19a3f 2341 return -1;
2342 }
2343 }
2344
f8fee0e2
MK
2345 /* We can only set up the default routes after bringing
2346 * up the interface, sine bringing up the interface adds
2347 * the link-local routes and we can't add a default
2348 * route if the gateway is not reachable. */
2349
2350 /* setup ipv4 gateway on the interface */
2351 if (netdev->ipv4_gateway) {
2352 if (!(netdev->flags & IFF_UP)) {
2353 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2354 return -1;
2355 }
2356
2357 if (lxc_list_empty(&netdev->ipv4)) {
2358 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2359 return -1;
2360 }
2361
2362 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2363 if (err) {
fc739df5
SG
2364 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2365 if (err) {
2366 ERROR("failed to add ipv4 dest for '%s': %s",
2367 ifname, strerror(-err));
2368 }
2369
2370 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2371 if (err) {
2372 ERROR("failed to setup ipv4 gateway for '%s': %s",
2373 ifname, strerror(-err));
2374 if (netdev->ipv4_gateway_auto) {
2375 char buf[INET_ADDRSTRLEN];
2376 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2377 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2378 }
2379 return -1;
19a26f82 2380 }
f8fee0e2
MK
2381 }
2382 }
2383
2384 /* setup ipv6 gateway on the interface */
2385 if (netdev->ipv6_gateway) {
2386 if (!(netdev->flags & IFF_UP)) {
2387 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2388 return -1;
2389 }
2390
2391 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2392 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2393 return -1;
2394 }
2395
2396 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2397 if (err) {
fc739df5
SG
2398 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2399 if (err) {
2400 ERROR("failed to add ipv6 dest for '%s': %s",
f8fee0e2 2401 ifname, strerror(-err));
19a26f82 2402 }
fc739df5
SG
2403
2404 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2405 if (err) {
2406 ERROR("failed to setup ipv6 gateway for '%s': %s",
2407 ifname, strerror(-err));
2408 if (netdev->ipv6_gateway_auto) {
2409 char buf[INET6_ADDRSTRLEN];
2410 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2411 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2412 }
2413 return -1;
2414 }
f8fee0e2
MK
2415 }
2416 }
2417
cd54d859
DL
2418 DEBUG("'%s' has been setup", current_ifname);
2419
0ad19a3f 2420 return 0;
2421}
2422
5f4535a3 2423static int setup_network(struct lxc_list *network)
0ad19a3f 2424{
82d5ae15 2425 struct lxc_list *iterator;
82d5ae15 2426 struct lxc_netdev *netdev;
0ad19a3f 2427
5f4535a3 2428 lxc_list_for_each(iterator, network) {
cd54d859 2429
5f4535a3 2430 netdev = iterator->elem;
82d5ae15
DL
2431
2432 if (setup_netdev(netdev)) {
2433 ERROR("failed to setup netdev");
2434 return -1;
2435 }
2436 }
cd54d859 2437
5f4535a3
DL
2438 if (!lxc_list_empty(network))
2439 INFO("network has been setup");
cd54d859
DL
2440
2441 return 0;
0ad19a3f 2442}
2443
2af6bd1b
SH
2444/* try to move physical nics to the init netns */
2445void restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf)
2446{
2447 int i, ret, oldfd;
2448 char path[MAXPATHLEN];
2449
2450 if (netnsfd < 0)
2451 return;
2452
2453 ret = snprintf(path, MAXPATHLEN, "/proc/self/ns/net");
2454 if (ret < 0 || ret >= MAXPATHLEN) {
2455 WARN("Failed to open monitor netns fd");
2456 return;
2457 }
2458 if ((oldfd = open(path, O_RDONLY)) < 0) {
2459 SYSERROR("Failed to open monitor netns fd");
2460 return;
2461 }
2462 if (setns(netnsfd, 0) != 0) {
2463 SYSERROR("Failed to enter container netns to reset nics");
2464 close(oldfd);
2465 return;
2466 }
2467 for (i=0; i<conf->num_savednics; i++) {
2468 struct saved_nic *s = &conf->saved_nics[i];
8d357196 2469 if (lxc_netdev_move_by_index(s->ifindex, 1, NULL))
2af6bd1b
SH
2470 WARN("Error moving nic index:%d back to host netns",
2471 s->ifindex);
2472 }
2473 if (setns(oldfd, 0) != 0)
2474 SYSERROR("Failed to re-enter monitor's netns");
2475 close(oldfd);
2476}
2477
2478void lxc_rename_phys_nics_on_shutdown(int netnsfd, struct lxc_conf *conf)
7b35f3d6
SH
2479{
2480 int i;
2481
2af6bd1b
SH
2482 if (conf->num_savednics == 0)
2483 return;
2484
7b35f3d6 2485 INFO("running to reset %d nic names", conf->num_savednics);
2af6bd1b 2486 restore_phys_nics_to_netns(netnsfd, conf);
7b35f3d6
SH
2487 for (i=0; i<conf->num_savednics; i++) {
2488 struct saved_nic *s = &conf->saved_nics[i];
959aee9c 2489 INFO("resetting nic %d to %s", s->ifindex, s->orig_name);
7b35f3d6
SH
2490 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
2491 free(s->orig_name);
2492 }
2493 conf->num_savednics = 0;
7b35f3d6
SH
2494}
2495
ae9242c8
SH
2496static char *default_rootfs_mount = LXCROOTFSMOUNT;
2497
7b379ab3 2498struct lxc_conf *lxc_conf_init(void)
089cd8b8 2499{
7b379ab3 2500 struct lxc_conf *new;
26ddeedd 2501 int i;
7b379ab3
MN
2502
2503 new = malloc(sizeof(*new));
2504 if (!new) {
2505 ERROR("lxc_conf_init : %m");
2506 return NULL;
2507 }
2508 memset(new, 0, sizeof(*new));
2509
b40a606e 2510 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
cccc74b5 2511 new->personality = -1;
124fa0a8 2512 new->autodev = 1;
596a818d
DE
2513 new->console.log_path = NULL;
2514 new->console.log_fd = -1;
28a4b0e5 2515 new->console.path = NULL;
63376d7d 2516 new->console.peer = -1;
b5159817
DE
2517 new->console.peerpty.busy = -1;
2518 new->console.peerpty.master = -1;
2519 new->console.peerpty.slave = -1;
63376d7d
DL
2520 new->console.master = -1;
2521 new->console.slave = -1;
2522 new->console.name[0] = '\0';
d2e30e99 2523 new->maincmd_fd = -1;
76a26f55 2524 new->nbd_idx = -1;
54c30e29 2525 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048
SH
2526 if (!new->rootfs.mount) {
2527 ERROR("lxc_conf_init : %m");
2528 free(new);
2529 return NULL;
2530 }
d89de239 2531 new->kmsg = 0;
858377e4 2532 new->logfd = -1;
7b379ab3
MN
2533 lxc_list_init(&new->cgroup);
2534 lxc_list_init(&new->network);
2535 lxc_list_init(&new->mount_list);
81810dd1 2536 lxc_list_init(&new->caps);
1fb86a7c 2537 lxc_list_init(&new->keepcaps);
f6d3e3e4 2538 lxc_list_init(&new->id_map);
f979ac15 2539 lxc_list_init(&new->includes);
4184c3e1 2540 lxc_list_init(&new->aliens);
7c661726 2541 lxc_list_init(&new->environment);
26ddeedd
SH
2542 for (i=0; i<NUM_LXC_HOOKS; i++)
2543 lxc_list_init(&new->hooks[i]);
ee1e7aa0 2544 lxc_list_init(&new->groups);
fe4de9a6
DE
2545 new->lsm_aa_profile = NULL;
2546 new->lsm_se_context = NULL;
5112cd70 2547 new->tmp_umount_proc = 0;
7b379ab3 2548
9f30a190
MM
2549 for (i = 0; i < LXC_NS_MAX; i++)
2550 new->inherit_ns_fd[i] = -1;
2551
72bb04e4
PT
2552 /* if running in a new user namespace, init and COMMAND
2553 * default to running as UID/GID 0 when using lxc-execute */
2554 new->init_uid = 0;
2555 new->init_gid = 0;
2556
7b379ab3 2557 return new;
089cd8b8
DL
2558}
2559
a589434e 2560static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2561{
8634bc19 2562 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2563 char veth2buf[IFNAMSIZ], *veth2;
e54864d3 2564 int err, mtu = 0;
13954cce 2565
8bee8851 2566 if (netdev->priv.veth_attr.pair) {
e892973e 2567 veth1 = netdev->priv.veth_attr.pair;
8bee8851
WB
2568 if (handler->conf->reboot)
2569 lxc_netdev_delete_by_name(veth1);
2570 } else {
9ba8130c
SH
2571 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2572 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2573 ERROR("veth1 name too long");
2574 return -1;
2575 }
a0265685 2576 veth1 = lxc_mkifname(veth1buf);
ad40563e
ÇO
2577 if (!veth1) {
2578 ERROR("failed to allocate a temporary name");
2579 return -1;
2580 }
74a2b586
JK
2581 /* store away for deconf */
2582 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2583 }
82d5ae15 2584
0e391e57 2585 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
a0265685 2586 veth2 = lxc_mkifname(veth2buf);
ad40563e 2587 if (!veth2) {
82d5ae15 2588 ERROR("failed to allocate a temporary name");
ad40563e 2589 goto out_delete;
0ad19a3f 2590 }
2591
3cfc0f3a
MN
2592 err = lxc_veth_create(veth1, veth2);
2593 if (err) {
2e2d6a7b 2594 ERROR("failed to create veth pair (%s and %s): %s", veth1, veth2,
3cfc0f3a 2595 strerror(-err));
ad40563e 2596 goto out_delete;
0ad19a3f 2597 }
13954cce 2598
49684c0b
CS
2599 /* changing the high byte of the mac address to 0xfe, the bridge interface
2600 * will always keep the host's mac address and not take the mac address
2601 * of a container */
2602 err = setup_private_host_hw_addr(veth1);
2603 if (err) {
2e2d6a7b 2604 ERROR("failed to change mac address of host interface '%s': %s",
49684c0b
CS
2605 veth1, strerror(-err));
2606 goto out_delete;
2607 }
2608
af651aa9
SN
2609 netdev->ifindex = if_nametoindex(veth2);
2610 if (!netdev->ifindex) {
2611 ERROR("failed to retrieve the index for %s", veth2);
2612 goto out_delete;
2613 }
2614
82d5ae15 2615 if (netdev->mtu) {
e54864d3
NC
2616 mtu = atoi(netdev->mtu);
2617 } else if (netdev->link) {
af651aa9 2618 mtu = netdev_get_mtu(netdev->ifindex);
e54864d3
NC
2619 }
2620
2621 if (mtu) {
2622 err = lxc_netdev_set_mtu(veth1, mtu);
3cfc0f3a 2623 if (!err)
e54864d3 2624 err = lxc_netdev_set_mtu(veth2, mtu);
3cfc0f3a 2625 if (err) {
e54864d3
NC
2626 ERROR("failed to set mtu '%i' for veth pair (%s and %s): %s",
2627 mtu, veth1, veth2, strerror(-err));
eb14c10a 2628 goto out_delete;
75d09f83
DL
2629 }
2630 }
2631
3cfc0f3a
MN
2632 if (netdev->link) {
2633 err = lxc_bridge_attach(netdev->link, veth1);
2634 if (err) {
2e2d6a7b 2635 ERROR("failed to attach '%s' to the bridge '%s': %s",
3cfc0f3a
MN
2636 veth1, netdev->link, strerror(-err));
2637 goto out_delete;
2638 }
eb14c10a
DL
2639 }
2640
d472214b 2641 err = lxc_netdev_up(veth1);
6e35af2e
DL
2642 if (err) {
2643 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2644 goto out_delete;
0ad19a3f 2645 }
2646
e3b4c4c4 2647 if (netdev->upscript) {
751d9dcd
DL
2648 err = run_script(handler->name, "net", netdev->upscript, "up",
2649 "veth", veth1, (char*) NULL);
2650 if (err)
e3b4c4c4 2651 goto out_delete;
e3b4c4c4
ST
2652 }
2653
a589434e 2654 DEBUG("instantiated veth '%s/%s', index is '%d'",
82d5ae15
DL
2655 veth1, veth2, netdev->ifindex);
2656
6ab9ab6d 2657 return 0;
eb14c10a
DL
2658
2659out_delete:
b84f58b9 2660 lxc_netdev_delete_by_name(veth1);
f10fad2f 2661 if (!netdev->priv.veth_attr.pair)
ad40563e 2662 free(veth1);
f10fad2f 2663 free(veth2);
6ab9ab6d 2664 return -1;
13954cce 2665}
d957ae2d 2666
74a2b586
JK
2667static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2668{
2669 char *veth1;
2670 int err;
2671
2672 if (netdev->priv.veth_attr.pair)
2673 veth1 = netdev->priv.veth_attr.pair;
2674 else
2675 veth1 = netdev->priv.veth_attr.veth1;
2676
2677 if (netdev->downscript) {
2678 err = run_script(handler->name, "net", netdev->downscript,
2679 "down", "veth", veth1, (char*) NULL);
2680 if (err)
2681 return -1;
2682 }
2683 return 0;
2684}
2685
a589434e 2686static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2687{
0e391e57 2688 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2689 int err;
d957ae2d
MT
2690
2691 if (!netdev->link) {
2692 ERROR("no link specified for macvlan netdev");
2693 return -1;
2694 }
13954cce 2695
9ba8130c
SH
2696 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2697 if (err >= sizeof(peerbuf))
2698 return -1;
82d5ae15 2699
a0265685 2700 peer = lxc_mkifname(peerbuf);
ad40563e 2701 if (!peer) {
82d5ae15
DL
2702 ERROR("failed to make a temporary name");
2703 return -1;
0ad19a3f 2704 }
2705
3cfc0f3a
MN
2706 err = lxc_macvlan_create(netdev->link, peer,
2707 netdev->priv.macvlan_attr.mode);
2708 if (err) {
2709 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2710 peer, netdev->link, strerror(-err));
ad40563e 2711 goto out;
0ad19a3f 2712 }
2713
82d5ae15
DL
2714 netdev->ifindex = if_nametoindex(peer);
2715 if (!netdev->ifindex) {
36eb9bde 2716 ERROR("failed to retrieve the index for %s", peer);
ad40563e 2717 goto out;
22ebac19 2718 }
2719
e3b4c4c4 2720 if (netdev->upscript) {
751d9dcd
DL
2721 err = run_script(handler->name, "net", netdev->upscript, "up",
2722 "macvlan", netdev->link, (char*) NULL);
2723 if (err)
ad40563e 2724 goto out;
e3b4c4c4
ST
2725 }
2726
a589434e 2727 DEBUG("instantiated macvlan '%s', index is '%d' and mode '%d'",
e892973e 2728 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 2729
d957ae2d 2730 return 0;
ad40563e
ÇO
2731out:
2732 lxc_netdev_delete_by_name(peer);
2733 free(peer);
2734 return -1;
0ad19a3f 2735}
2736
74a2b586
JK
2737static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2738{
2739 int err;
2740
2741 if (netdev->downscript) {
2742 err = run_script(handler->name, "net", netdev->downscript,
2743 "down", "macvlan", netdev->link,
2744 (char*) NULL);
2745 if (err)
2746 return -1;
2747 }
2748 return 0;
2749}
2750
a589434e
JN
2751/* XXX: merge with instantiate_macvlan */
2752static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
2753{
2754 char peer[IFNAMSIZ];
3cfc0f3a 2755 int err;
82f58d03 2756 static uint16_t vlan_cntr = 0;
26c39028
JHS
2757
2758 if (!netdev->link) {
2759 ERROR("no link specified for vlan netdev");
2760 return -1;
2761 }
2762
82f58d03 2763 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
9ba8130c
SH
2764 if (err >= sizeof(peer)) {
2765 ERROR("peer name too long");
2766 return -1;
2767 }
26c39028 2768
3cfc0f3a
MN
2769 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2770 if (err) {
2771 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2772 peer, netdev->link, strerror(-err));
26c39028
JHS
2773 return -1;
2774 }
2775
2776 netdev->ifindex = if_nametoindex(peer);
2777 if (!netdev->ifindex) {
2778 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 2779 lxc_netdev_delete_by_name(peer);
26c39028
JHS
2780 return -1;
2781 }
2782
a589434e 2783 DEBUG("instantiated vlan '%s', ifindex is '%d'", " vlan1000",
e892973e
DL
2784 netdev->ifindex);
2785
26c39028
JHS
2786 return 0;
2787}
2788
74a2b586
JK
2789static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2790{
2791 return 0;
2792}
2793
a589434e 2794static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2795{
6168e99f
DL
2796 if (!netdev->link) {
2797 ERROR("no link specified for the physical interface");
2798 return -1;
2799 }
2800
9d083402 2801 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 2802 if (!netdev->ifindex) {
9d083402 2803 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 2804 return -1;
2805 }
2806
e3b4c4c4
ST
2807 if (netdev->upscript) {
2808 int err;
751d9dcd
DL
2809 err = run_script(handler->name, "net", netdev->upscript,
2810 "up", "phys", netdev->link, (char*) NULL);
2811 if (err)
e3b4c4c4 2812 return -1;
e3b4c4c4
ST
2813 }
2814
82d5ae15 2815 return 0;
0ad19a3f 2816}
2817
74a2b586
JK
2818static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2819{
2820 int err;
2821
2822 if (netdev->downscript) {
2823 err = run_script(handler->name, "net", netdev->downscript,
2824 "down", "phys", netdev->link, (char*) NULL);
2825 if (err)
2826 return -1;
2827 }
2828 return 0;
2829}
2830
a589434e 2831static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
26b797f3
SH
2832{
2833 netdev->ifindex = 0;
2834 return 0;
2835}
2836
a589434e 2837static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2838{
82d5ae15 2839 netdev->ifindex = 0;
e3b4c4c4
ST
2840 if (netdev->upscript) {
2841 int err;
751d9dcd
DL
2842 err = run_script(handler->name, "net", netdev->upscript,
2843 "up", "empty", (char*) NULL);
2844 if (err)
e3b4c4c4 2845 return -1;
e3b4c4c4 2846 }
82d5ae15 2847 return 0;
0ad19a3f 2848}
2849
74a2b586
JK
2850static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2851{
2852 int err;
2853
2854 if (netdev->downscript) {
2855 err = run_script(handler->name, "net", netdev->downscript,
2856 "down", "empty", (char*) NULL);
2857 if (err)
2858 return -1;
2859 }
2860 return 0;
2861}
2862
26b797f3
SH
2863static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
2864{
2865 return 0;
2866}
2867
2868int lxc_requests_empty_network(struct lxc_handler *handler)
2869{
2870 struct lxc_list *network = &handler->conf->network;
2871 struct lxc_list *iterator;
2872 struct lxc_netdev *netdev;
2873 bool found_none = false, found_nic = false;
2874
2875 if (lxc_list_empty(network))
2876 return 0;
2877
2878 lxc_list_for_each(iterator, network) {
2879
2880 netdev = iterator->elem;
2881
2882 if (netdev->type == LXC_NET_NONE)
2883 found_none = true;
2884 else
2885 found_nic = true;
2886 }
2887 if (found_none && !found_nic)
2888 return 1;
2889 return 0;
2890}
2891
e3b4c4c4 2892int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 2893{
e3b4c4c4 2894 struct lxc_list *network = &handler->conf->network;
82d5ae15 2895 struct lxc_list *iterator;
82d5ae15 2896 struct lxc_netdev *netdev;
cbef6c52
SH
2897 int am_root = (getuid() == 0);
2898
2899 if (!am_root)
2900 return 0;
0ad19a3f 2901
5f4535a3 2902 lxc_list_for_each(iterator, network) {
0ad19a3f 2903
5f4535a3 2904 netdev = iterator->elem;
13954cce 2905
24654103 2906 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 2907 ERROR("invalid network configuration type '%d'",
5f4535a3 2908 netdev->type);
82d5ae15
DL
2909 return -1;
2910 }
0ad19a3f 2911
e3b4c4c4 2912 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
2913 ERROR("failed to create netdev");
2914 return -1;
2915 }
e3b4c4c4 2916
0ad19a3f 2917 }
2918
2919 return 0;
2920}
2921
74a2b586 2922void lxc_delete_network(struct lxc_handler *handler)
7fef7a06 2923{
74a2b586 2924 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
2925 struct lxc_list *iterator;
2926 struct lxc_netdev *netdev;
2927
2928 lxc_list_for_each(iterator, network) {
2929 netdev = iterator->elem;
d472214b 2930
74a2b586 2931 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352
DL
2932 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
2933 WARN("failed to rename to the initial name the " \
2934 "netdev '%s'", netdev->link);
d472214b 2935 continue;
d8f8e352 2936 }
d472214b 2937
74a2b586
JK
2938 if (netdev_deconf[netdev->type](handler, netdev)) {
2939 WARN("failed to destroy netdev");
2940 }
2941
d8f8e352
DL
2942 /* Recent kernel remove the virtual interfaces when the network
2943 * namespace is destroyed but in case we did not moved the
2944 * interface to the network namespace, we have to destroy it
2945 */
74a2b586
JK
2946 if (netdev->ifindex != 0 &&
2947 lxc_netdev_delete_by_index(netdev->ifindex))
d8f8e352 2948 WARN("failed to remove interface '%s'", netdev->name);
7fef7a06
DL
2949 }
2950}
2951
45e854dc
SG
2952#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
2953
fe1f672f
ÇO
2954/* lxc-user-nic returns "interface_name:interface_name\n" */
2955#define MAX_BUFFER_SIZE IFNAMSIZ*2 + 2
74a3920a 2956static int unpriv_assign_nic(struct lxc_netdev *netdev, pid_t pid)
cbef6c52
SH
2957{
2958 pid_t child;
a7242d9a
ÇO
2959 int bytes, pipefd[2];
2960 char *token, *saveptr = NULL;
fe1f672f 2961 char buffer[MAX_BUFFER_SIZE];
cff7b5eb 2962 char netdev_link[IFNAMSIZ+1];
cbef6c52
SH
2963
2964 if (netdev->type != LXC_NET_VETH) {
2965 ERROR("nic type %d not support for unprivileged use",
2966 netdev->type);
2967 return -1;
2968 }
2969
a7242d9a
ÇO
2970 if(pipe(pipefd) < 0) {
2971 SYSERROR("pipe failed");
2972 return -1;
2973 }
2974
cbef6c52
SH
2975 if ((child = fork()) < 0) {
2976 SYSERROR("fork");
a7242d9a
ÇO
2977 close(pipefd[0]);
2978 close(pipefd[1]);
2979 return -1;
2980 }
2981
2982 if (child == 0) { // child
2983 /* close the read-end of the pipe */
2984 close(pipefd[0]);
2985 /* redirect the stdout to write-end of the pipe */
2986 dup2(pipefd[1], STDOUT_FILENO);
2987 /* close the write-end of the pipe */
fe1f672f 2988 close(pipefd[1]);
a7242d9a
ÇO
2989
2990 // Call lxc-user-nic pid type bridge
2991 char pidstr[20];
cff7b5eb
FN
2992 if (netdev->link) {
2993 strncpy(netdev_link, netdev->link, IFNAMSIZ);
2994 } else {
2995 strncpy(netdev_link, "none", IFNAMSIZ);
2996 }
2997 char *args[] = {LXC_USERNIC_PATH, pidstr, "veth", netdev_link, netdev->name, NULL };
a7242d9a
ÇO
2998 snprintf(pidstr, 19, "%lu", (unsigned long) pid);
2999 pidstr[19] = '\0';
3000 execvp(args[0], args);
3001 SYSERROR("execvp lxc-user-nic");
3002 exit(1);
3003 }
3004
3005 /* close the write-end of the pipe */
3006 close(pipefd[1]);
3007
fe1f672f 3008 bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE);
a7242d9a
ÇO
3009 if (bytes < 0) {
3010 SYSERROR("read failed");
3011 }
3012 buffer[bytes - 1] = '\0';
3013
3014 if (wait_for_pid(child) != 0) {
3015 close(pipefd[0]);
cbef6c52
SH
3016 return -1;
3017 }
3018
a7242d9a
ÇO
3019 /* close the read-end of the pipe */
3020 close(pipefd[0]);
cbef6c52 3021
a7242d9a
ÇO
3022 /* fill netdev->name field */
3023 token = strtok_r(buffer, ":", &saveptr);
3024 if (!token)
3025 return -1;
658979c5
SH
3026 netdev->name = malloc(IFNAMSIZ+1);
3027 if (!netdev->name) {
3028 ERROR("Out of memory");
3029 return -1;
3030 }
3031 memset(netdev->name, 0, IFNAMSIZ+1);
3032 strncpy(netdev->name, token, IFNAMSIZ);
a7242d9a
ÇO
3033
3034 /* fill netdev->veth_attr.pair field */
3035 token = strtok_r(NULL, ":", &saveptr);
3036 if (!token)
3037 return -1;
3038 netdev->priv.veth_attr.pair = strdup(token);
658979c5
SH
3039 if (!netdev->priv.veth_attr.pair) {
3040 ERROR("Out of memory");
3041 return -1;
3042 }
45e854dc 3043
a7242d9a 3044 return 0;
cbef6c52
SH
3045}
3046
5f4535a3 3047int lxc_assign_network(struct lxc_list *network, pid_t pid)
0ad19a3f 3048{
82d5ae15 3049 struct lxc_list *iterator;
82d5ae15 3050 struct lxc_netdev *netdev;
cbef6c52 3051 int am_root = (getuid() == 0);
3cfc0f3a 3052 int err;
0ad19a3f 3053
5f4535a3 3054 lxc_list_for_each(iterator, network) {
82d5ae15 3055
5f4535a3 3056 netdev = iterator->elem;
82d5ae15 3057
fbb16259 3058 if (netdev->type == LXC_NET_VETH && !am_root) {
cbef6c52
SH
3059 if (unpriv_assign_nic(netdev, pid))
3060 return -1;
658979c5
SH
3061 // lxc-user-nic has moved the nic to the new ns.
3062 // unpriv_assign_nic() fills in netdev->name.
3063 // netdev->ifindex will be filed in at setup_netdev.
cbef6c52
SH
3064 continue;
3065 }
236087a6 3066
fbb16259
SH
3067 /* empty network namespace, nothing to move */
3068 if (!netdev->ifindex)
3069 continue;
3070
8d357196 3071 err = lxc_netdev_move_by_index(netdev->ifindex, pid, NULL);
3cfc0f3a
MN
3072 if (err) {
3073 ERROR("failed to move '%s' to the container : %s",
3074 netdev->link, strerror(-err));
82d5ae15
DL
3075 return -1;
3076 }
3077
c1c75c04 3078 DEBUG("move '%s' to '%d'", netdev->name, pid);
0ad19a3f 3079 }
3080
3081 return 0;
3082}
3083
251d0d2a
DE
3084static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
3085 size_t buf_size)
f6d3e3e4
SH
3086{
3087 char path[PATH_MAX];
e4ccd113 3088 int ret, closeret;
f6d3e3e4
SH
3089 FILE *f;
3090
3091 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
3092 if (ret < 0 || ret >= PATH_MAX) {
03fadd16 3093 fprintf(stderr, "%s: path name too long\n", __func__);
f6d3e3e4
SH
3094 return -E2BIG;
3095 }
3096 f = fopen(path, "w");
3097 if (!f) {
3098 perror("open");
3099 return -EINVAL;
3100 }
251d0d2a 3101 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 3102 if (ret < 0)
e4ccd113
SH
3103 SYSERROR("writing id mapping");
3104 closeret = fclose(f);
3105 if (closeret)
3106 SYSERROR("writing id mapping");
3107 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
3108}
3109
3110int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
3111{
3112 struct lxc_list *iterator;
3113 struct id_map *map;
8afb3e61 3114 int ret = 0, use_shadow = 0;
251d0d2a 3115 enum idtype type;
8afb3e61
SG
3116 char *buf = NULL, *pos, *cmdpath = NULL;
3117
22038de5
SH
3118 /*
3119 * If newuidmap exists, that is, if shadow is handing out subuid
3120 * ranges, then insist that root also reserve ranges in subuid. This
3121 * will protected it by preventing another user from being handed the
3122 * range by shadow.
3123 */
9d9c111c 3124 cmdpath = on_path("newuidmap", NULL);
8afb3e61
SG
3125 if (cmdpath) {
3126 use_shadow = 1;
3127 free(cmdpath);
3128 }
3129
0e6e3a41
SG
3130 if (!use_shadow && geteuid()) {
3131 ERROR("Missing newuidmap/newgidmap");
3132 return -1;
3133 }
251d0d2a
DE
3134
3135 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4 3136 int left, fill;
cf3ef16d
SH
3137 int had_entry = 0;
3138 if (!buf) {
3139 buf = pos = malloc(4096);
4f7521b4
SH
3140 if (!buf)
3141 return -ENOMEM;
cf3ef16d
SH
3142 }
3143 pos = buf;
0e6e3a41 3144 if (use_shadow)
d1838f34 3145 pos += sprintf(buf, "new%cidmap %d",
cf3ef16d
SH
3146 type == ID_TYPE_UID ? 'u' : 'g',
3147 pid);
4f7521b4 3148
cf3ef16d
SH
3149 lxc_list_for_each(iterator, idmap) {
3150 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
251d0d2a 3151 map = iterator->elem;
cf3ef16d
SH
3152 if (map->idtype != type)
3153 continue;
3154
3155 had_entry = 1;
3156 left = 4096 - (pos - buf);
d1838f34 3157 fill = snprintf(pos, left, "%s%lu %lu %lu%s",
0e6e3a41 3158 use_shadow ? " " : "",
d1838f34 3159 map->nsid, map->hostid, map->range,
0e6e3a41 3160 use_shadow ? "" : "\n");
cf3ef16d
SH
3161 if (fill <= 0 || fill >= left)
3162 SYSERROR("snprintf failed, too many mappings");
3163 pos += fill;
251d0d2a 3164 }
cf3ef16d 3165 if (!had_entry)
4f7521b4 3166 continue;
cf3ef16d 3167
0e6e3a41 3168 if (!use_shadow) {
cf3ef16d 3169 ret = write_id_mapping(type, pid, buf, pos-buf);
d1838f34
MS
3170 } else {
3171 left = 4096 - (pos - buf);
3172 fill = snprintf(pos, left, "\n");
3173 if (fill <= 0 || fill >= left)
3174 SYSERROR("snprintf failed, too many mappings");
3175 pos += fill;
cf3ef16d 3176 ret = system(buf);
d1838f34 3177 }
cf3ef16d 3178
f6d3e3e4
SH
3179 if (ret)
3180 break;
3181 }
251d0d2a 3182
f10fad2f 3183 free(buf);
f6d3e3e4
SH
3184 return ret;
3185}
3186
cf3ef16d 3187/*
7b50c609
TS
3188 * return the host uid/gid to which the container root is mapped in
3189 * *val.
0b3a6504 3190 * Return true if id was found, false otherwise.
cf3ef16d 3191 */
2a9a80cb 3192bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
3ec1648d 3193 unsigned long *val)
cf3ef16d
SH
3194{
3195 struct lxc_list *it;
3196 struct id_map *map;
3197
3198 lxc_list_for_each(it, &conf->id_map) {
3199 map = it->elem;
7b50c609 3200 if (map->idtype != idtype)
cf3ef16d
SH
3201 continue;
3202 if (map->nsid != 0)
3203 continue;
2a9a80cb
SH
3204 *val = map->hostid;
3205 return true;
cf3ef16d 3206 }
2a9a80cb 3207 return false;
cf3ef16d
SH
3208}
3209
2133f58c 3210int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3211{
3212 struct lxc_list *it;
3213 struct id_map *map;
3214 lxc_list_for_each(it, &conf->id_map) {
3215 map = it->elem;
2133f58c 3216 if (map->idtype != idtype)
cf3ef16d
SH
3217 continue;
3218 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 3219 return (id - map->hostid) + map->nsid;
cf3ef16d 3220 }
57d116ab 3221 return -1;
cf3ef16d
SH
3222}
3223
2133f58c 3224int find_unmapped_nsuid(struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3225{
3226 struct lxc_list *it;
3227 struct id_map *map;
2133f58c 3228 unsigned int freeid = 0;
cf3ef16d
SH
3229again:
3230 lxc_list_for_each(it, &conf->id_map) {
3231 map = it->elem;
2133f58c 3232 if (map->idtype != idtype)
cf3ef16d
SH
3233 continue;
3234 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3235 freeid = map->nsid + map->range;
3236 goto again;
3237 }
3238 }
3239 return freeid;
3240}
3241
19a26f82
MK
3242int lxc_find_gateway_addresses(struct lxc_handler *handler)
3243{
3244 struct lxc_list *network = &handler->conf->network;
3245 struct lxc_list *iterator;
3246 struct lxc_netdev *netdev;
3247 int link_index;
3248
3249 lxc_list_for_each(iterator, network) {
3250 netdev = iterator->elem;
3251
3252 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
3253 continue;
3254
3255 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
3256 ERROR("gateway = auto only supported for "
3257 "veth and macvlan");
3258 return -1;
3259 }
3260
3261 if (!netdev->link) {
3262 ERROR("gateway = auto needs a link interface");
3263 return -1;
3264 }
3265
3266 link_index = if_nametoindex(netdev->link);
3267 if (!link_index)
3268 return -EINVAL;
3269
3270 if (netdev->ipv4_gateway_auto) {
3271 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
3272 ERROR("failed to automatically find ipv4 gateway "
3273 "address from link interface '%s'", netdev->link);
3274 return -1;
3275 }
3276 }
3277
3278 if (netdev->ipv6_gateway_auto) {
3279 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
3280 ERROR("failed to automatically find ipv6 gateway "
3281 "address from link interface '%s'", netdev->link);
3282 return -1;
3283 }
3284 }
3285 }
3286
3287 return 0;
3288}
3289
5e4a62bf 3290int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 3291{
5e4a62bf 3292 struct lxc_tty_info *tty_info = &conf->tty_info;
025ed0f3 3293 int i, ret;
b0a33c1e 3294
5e4a62bf
DL
3295 /* no tty in the configuration */
3296 if (!conf->tty)
b0a33c1e 3297 return 0;
3298
13954cce 3299 tty_info->pty_info =
e4e7d59d 3300 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 3301 if (!tty_info->pty_info) {
36eb9bde 3302 SYSERROR("failed to allocate pty_info");
985d15b1 3303 return -1;
b0a33c1e 3304 }
3305
985d15b1 3306 for (i = 0; i < conf->tty; i++) {
13954cce 3307
b0a33c1e 3308 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3309
025ed0f3
SH
3310 process_lock();
3311 ret = openpty(&pty_info->master, &pty_info->slave,
3312 pty_info->name, NULL, NULL);
3313 process_unlock();
3314 if (ret) {
36eb9bde 3315 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
3316 tty_info->nbtty = i;
3317 lxc_delete_tty(tty_info);
3318 return -1;
b0a33c1e 3319 }
3320
5332bb84
DL
3321 DEBUG("allocated pty '%s' (%d/%d)",
3322 pty_info->name, pty_info->master, pty_info->slave);
3323
3ec1648d 3324 /* Prevent leaking the file descriptors to the container */
b035ad62
MS
3325 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3326 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3327
b0a33c1e 3328 pty_info->busy = 0;
3329 }
3330
985d15b1 3331 tty_info->nbtty = conf->tty;
1ac470c0
DL
3332
3333 INFO("tty's configured");
3334
985d15b1 3335 return 0;
b0a33c1e 3336}
3337
3338void lxc_delete_tty(struct lxc_tty_info *tty_info)
3339{
3340 int i;
3341
3342 for (i = 0; i < tty_info->nbtty; i++) {
3343 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3344
3345 close(pty_info->master);
3346 close(pty_info->slave);
3347 }
3348
3349 free(tty_info->pty_info);
3350 tty_info->nbtty = 0;
3351}
3352
f6d3e3e4 3353/*
7b50c609
TS
3354 * chown_mapped_root: for an unprivileged user with uid/gid X to
3355 * chown a dir to subuid/subgid Y, he needs to run chown as root
3356 * in a userns where nsid 0 is mapped to hostuid/hostgid Y, and
3357 * nsid Y is mapped to hostuid/hostgid X. That way, the container
3358 * root is privileged with respect to hostuid/hostgid X, allowing
3359 * him to do the chown.
f6d3e3e4 3360 */
c4d10a05 3361int chown_mapped_root(char *path, struct lxc_conf *conf)
f6d3e3e4 3362{
7b50c609
TS
3363 uid_t rootuid;
3364 gid_t rootgid;
c4d10a05 3365 pid_t pid;
2a9a80cb 3366 unsigned long val;
a7ef8753 3367 char *chownpath = path;
f6d3e3e4 3368
2a9a80cb 3369 if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
c4d10a05
SH
3370 ERROR("No mapping for container root");
3371 return -1;
f6d3e3e4 3372 }
7b50c609
TS
3373 rootuid = (uid_t) val;
3374 if (!get_mapped_rootid(conf, ID_TYPE_GID, &val)) {
3375 ERROR("No mapping for container root");
3376 return -1;
3377 }
3378 rootgid = (gid_t) val;
2a9a80cb 3379
a7ef8753
SH
3380 /*
3381 * In case of overlay, we want only the writeable layer
3382 * to be chowned
3383 */
1f92162d 3384 if (strncmp(path, "overlayfs:", 10) == 0 || strncmp(path, "aufs:", 5) == 0) {
a7ef8753
SH
3385 chownpath = strchr(path, ':');
3386 if (!chownpath) {
3387 ERROR("Bad overlay path: %s", path);
3388 return -1;
3389 }
3390 chownpath = strchr(chownpath+1, ':');
3391 if (!chownpath) {
3392 ERROR("Bad overlay path: %s", path);
3393 return -1;
3394 }
3395 chownpath++;
3396 }
3397 path = chownpath;
c4d10a05 3398 if (geteuid() == 0) {
7b50c609 3399 if (chown(path, rootuid, rootgid) < 0) {
c4d10a05
SH
3400 ERROR("Error chowning %s", path);
3401 return -1;
3402 }
3403 return 0;
3404 }
f3d7e4ca 3405
7b50c609 3406 if (rootuid == geteuid()) {
f3d7e4ca
SH
3407 // nothing to do
3408 INFO("%s: container root is our uid; no need to chown" ,__func__);
3409 return 0;
3410 }
3411
c4d10a05
SH
3412 pid = fork();
3413 if (pid < 0) {
3414 SYSERROR("Failed forking");
f6d3e3e4
SH
3415 return -1;
3416 }
c4d10a05 3417 if (!pid) {
7b50c609
TS
3418 int hostuid = geteuid(), hostgid = getegid(), ret;
3419 struct stat sb;
3420 char map1[100], map2[100], map3[100], map4[100], map5[100];
3421 char ugid[100];
3422 char *args1[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3423 "-m", map3, "-m", map5,
3424 "--", "chown", ugid, path, NULL };
3425 char *args2[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3426 "-m", map3, "-m", map4, "-m", map5,
3427 "--", "chown", ugid, path, NULL };
3428
3429 // save the current gid of "path"
3430 if (stat(path, &sb) < 0) {
3431 ERROR("Error stat %s", path);
3432 return -1;
3433 }
f6d3e3e4 3434
9a7c2aba
SH
3435 /*
3436 * A file has to be group-owned by a gid mapped into the
3437 * container, or the container won't be privileged over it.
3438 */
3439 if (sb.st_uid == geteuid() &&
3440 mapped_hostid(sb.st_gid, conf, ID_TYPE_GID) < 0 &&
3441 chown(path, -1, hostgid) < 0) {
3442 ERROR("Failed chgrping %s", path);
7b50c609
TS
3443 return -1;
3444 }
3445
3446 // "u:0:rootuid:1"
3447 ret = snprintf(map1, 100, "u:0:%d:1", rootuid);
c4d10a05
SH
3448 if (ret < 0 || ret >= 100) {
3449 ERROR("Error uid printing map string");
f6d3e3e4
SH
3450 return -1;
3451 }
c4d10a05 3452
98e5ba51
SH
3453 // "u:hostuid:hostuid:1"
3454 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3455 if (ret < 0 || ret >= 100) {
3456 ERROR("Error uid printing map string");
3457 return -1;
3458 }
3459
7b50c609
TS
3460 // "g:0:rootgid:1"
3461 ret = snprintf(map3, 100, "g:0:%d:1", rootgid);
c4d10a05 3462 if (ret < 0 || ret >= 100) {
7b50c609 3463 ERROR("Error gid printing map string");
c4d10a05
SH
3464 return -1;
3465 }
3466
7b50c609 3467 // "g:pathgid:rootgid+pathgid:1"
b4c1e35d
SG
3468 ret = snprintf(map4, 100, "g:%d:%d:1", (gid_t)sb.st_gid,
3469 rootgid + (gid_t)sb.st_gid);
7b50c609
TS
3470 if (ret < 0 || ret >= 100) {
3471 ERROR("Error gid printing map string");
3472 return -1;
3473 }
3474
3475 // "g:hostgid:hostgid:1"
3476 ret = snprintf(map5, 100, "g:%d:%d:1", hostgid, hostgid);
3477 if (ret < 0 || ret >= 100) {
3478 ERROR("Error gid printing map string");
3479 return -1;
3480 }
3481
3482 // "0:pathgid" (chown)
b4c1e35d 3483 ret = snprintf(ugid, 100, "0:%d", (gid_t)sb.st_gid);
7b50c609
TS
3484 if (ret < 0 || ret >= 100) {
3485 ERROR("Error owner printing format string for chown");
3486 return -1;
3487 }
3488
3489 if (hostgid == sb.st_gid)
3490 ret = execvp("lxc-usernsexec", args1);
3491 else
3492 ret = execvp("lxc-usernsexec", args2);
c4d10a05
SH
3493 SYSERROR("Failed executing usernsexec");
3494 exit(1);
f6d3e3e4 3495 }
c4d10a05 3496 return wait_for_pid(pid);
f6d3e3e4
SH
3497}
3498
c4d10a05 3499int ttys_shift_ids(struct lxc_conf *c)
f6d3e3e4 3500{
c4d10a05 3501 if (lxc_list_empty(&c->id_map))
f6d3e3e4 3502 return 0;
c4d10a05 3503
29b10e4f 3504 if (strcmp(c->console.name, "") !=0 && chown_mapped_root(c->console.name, c) < 0) {
c4d10a05
SH
3505 ERROR("Failed to chown %s", c->console.name);
3506 return -1;
3507 }
3508
f6d3e3e4
SH
3509 return 0;
3510}
3511
5112cd70
SH
3512int tmp_proc_mount(struct lxc_conf *lxc_conf)
3513{
3514 int mounted;
3515
01958b1f 3516 mounted = mount_proc_if_needed(lxc_conf->rootfs.path ? lxc_conf->rootfs.mount : "");
5112cd70
SH
3517 if (mounted == -1) {
3518 SYSERROR("failed to mount /proc in the container.");
01958b1f
DW
3519 /* continue only if there is no rootfs */
3520 if (lxc_conf->rootfs.path)
3521 return -1;
5112cd70
SH
3522 } else if (mounted == 1) {
3523 lxc_conf->tmp_umount_proc = 1;
3524 }
3525 return 0;
3526}
3527
3528void tmp_proc_unmount(struct lxc_conf *lxc_conf)
3529{
3530 if (lxc_conf->tmp_umount_proc == 1) {
3531 umount("/proc");
3532 lxc_conf->tmp_umount_proc = 0;
3533 }
3534}
3535
6a0c909a 3536void remount_all_slave(void)
e995d7a2
SH
3537{
3538 /* walk /proc/mounts and change any shared entries to slave */
3539 FILE *f = fopen("/proc/self/mountinfo", "r");
3540 char *line = NULL;
3541 size_t len = 0;
3542
3543 if (!f) {
3544 SYSERROR("Failed to open /proc/self/mountinfo to mark all shared");
3545 ERROR("Continuing container startup...");
3546 return;
3547 }
3548
3549 while (getline(&line, &len, f) != -1) {
3550 char *target, *opts;
3551 target = get_field(line, 4);
3552 if (!target)
3553 continue;
3554 opts = get_field(target, 2);
3555 if (!opts)
3556 continue;
3557 null_endofword(opts);
3558 if (!strstr(opts, "shared"))
3559 continue;
3560 null_endofword(target);
3561 if (mount(NULL, target, NULL, MS_SLAVE, NULL)) {
3562 SYSERROR("Failed to make %s rslave", target);
3563 ERROR("Continuing...");
3564 }
3565 }
3566 fclose(f);
f10fad2f 3567 free(line);
e995d7a2
SH
3568}
3569
2322903b
SH
3570void lxc_execute_bind_init(struct lxc_conf *conf)
3571{
3572 int ret;
9d9c111c
SH
3573 char path[PATH_MAX], destpath[PATH_MAX], *p;
3574
3575 /* If init exists in the container, don't bind mount a static one */
3576 p = choose_init(conf->rootfs.mount);
3577 if (p) {
3578 free(p);
3579 return;
3580 }
2322903b
SH
3581
3582 ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
3583 if (ret < 0 || ret >= PATH_MAX) {
3584 WARN("Path name too long searching for lxc.init.static");
3585 return;
3586 }
3587
3588 if (!file_exists(path)) {
3589 INFO("%s does not exist on host", path);
3590 return;
3591 }
3592
3593 ret = snprintf(destpath, PATH_MAX, "%s%s", conf->rootfs.mount, "/init.lxc.static");
3594 if (ret < 0 || ret >= PATH_MAX) {
3595 WARN("Path name too long for container's lxc.init.static");
3596 return;
3597 }
3598
3599 if (!file_exists(destpath)) {
3600 FILE * pathfile = fopen(destpath, "wb");
3601 if (!pathfile) {
3602 SYSERROR("Failed to create mount target '%s'", destpath);
3603 return;
3604 }
3605 fclose(pathfile);
3606 }
3607
592fd47a 3608 ret = safe_mount(path, destpath, "none", MS_BIND, NULL, conf->rootfs.mount);
2322903b
SH
3609 if (ret < 0)
3610 SYSERROR("Failed to bind lxc.init.static into container");
3611 INFO("lxc.init.static bound into container at %s", path);
3612}
3613
35120d9c
SH
3614/*
3615 * This does the work of remounting / if it is shared, calling the
3616 * container pre-mount hooks, and mounting the rootfs.
3617 */
3618int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath)
0ad19a3f 3619{
35120d9c
SH
3620 if (conf->rootfs_setup) {
3621 /*
3622 * rootfs was set up in another namespace. bind-mount it
3623 * to give us a mount in our own ns so we can pivot_root to it
3624 */
3625 const char *path = conf->rootfs.mount;
3626 if (mount(path, path, "rootfs", MS_BIND, NULL) < 0) {
3627 ERROR("Failed to bind-mount container / onto itself");
145832ba 3628 return -1;
35120d9c 3629 }
145832ba 3630 return 0;
35120d9c 3631 }
d4ef7c50 3632
e995d7a2
SH
3633 remount_all_slave();
3634
35120d9c
SH
3635 if (run_lxc_hooks(name, "pre-mount", conf, lxcpath, NULL)) {
3636 ERROR("failed to run pre-mount hooks for container '%s'.", name);
3637 return -1;
3638 }
3639
3640 if (setup_rootfs(conf)) {
3641 ERROR("failed to setup rootfs for '%s'", name);
3642 return -1;
3643 }
3644
3645 conf->rootfs_setup = true;
3646 return 0;
3647}
3648
1c1c7051
SH
3649static bool verify_start_hooks(struct lxc_conf *conf)
3650{
3651 struct lxc_list *it;
3652 char path[MAXPATHLEN];
3653 lxc_list_for_each(it, &conf->hooks[LXCHOOK_START]) {
3654 char *hookname = it->elem;
3655 struct stat st;
3656 int ret;
3657
3658 ret = snprintf(path, MAXPATHLEN, "%s%s",
7b6753e7 3659 conf->rootfs.path ? conf->rootfs.mount : "", hookname);
1c1c7051
SH
3660 if (ret < 0 || ret >= MAXPATHLEN)
3661 return false;
3662 ret = stat(path, &st);
3663 if (ret) {
7b6753e7 3664 SYSERROR("Start hook %s not found in container",
1c1c7051
SH
3665 hookname);
3666 return false;
3667 }
6a0c909a 3668 return true;
1c1c7051
SH
3669 }
3670
3671 return true;
3672}
3673
e8bd4e43
SH
3674static int send_fd(int sock, int fd)
3675{
3676 int ret = lxc_abstract_unix_send_fd(sock, fd, NULL, 0);
3677
3678
3679 if (ret < 0) {
3680 SYSERROR("Error sending tty fd to parent");
3681 return -1;
3682 }
3683
3684 return 0;
3685}
3686
3687static int send_ttys_to_parent(struct lxc_handler *handler)
3688{
3689 struct lxc_conf *conf = handler->conf;
3690 const struct lxc_tty_info *tty_info = &conf->tty_info;
3691 int i;
3692 int sock = handler->ttysock[0];
3693
3694 for (i = 0; i < tty_info->nbtty; i++) {
3695 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3696 if (send_fd(sock, pty_info->slave) < 0)
3697 goto bad;
3698 close(pty_info->slave);
3699 pty_info->slave = -1;
3700 if (send_fd(sock, pty_info->master) < 0)
3701 goto bad;
3702 close(pty_info->master);
3703 pty_info->master = -1;
3704 }
3705
3706 close(handler->ttysock[0]);
3707 close(handler->ttysock[1]);
3708
3709 return 0;
3710
3711bad:
3712 ERROR("Error writing tty fd to parent");
3713 return -1;
3714}
3715
35120d9c
SH
3716int lxc_setup(struct lxc_handler *handler)
3717{
3718 const char *name = handler->name;
3719 struct lxc_conf *lxc_conf = handler->conf;
3720 const char *lxcpath = handler->lxcpath;
35120d9c
SH
3721
3722 if (do_rootfs_setup(lxc_conf, name, lxcpath) < 0) {
3723 ERROR("Error setting up rootfs mount after spawn");
3724 return -1;
3725 }
3726
6c544cb3
MM
3727 if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
3728 if (setup_utsname(lxc_conf->utsname)) {
3729 ERROR("failed to setup the utsname for '%s'", name);
3730 return -1;
3731 }
0ad19a3f 3732 }
3733
5f4535a3 3734 if (setup_network(&lxc_conf->network)) {
36eb9bde 3735 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 3736 return -1;
0ad19a3f 3737 }
3738
bc6928ff 3739 if (lxc_conf->autodev > 0) {
14221cbb 3740 if (mount_autodev(name, &lxc_conf->rootfs, lxcpath)) {
91c3830e 3741 ERROR("failed to mount /dev in the container");
c6883f38
SH
3742 return -1;
3743 }
3744 }
3745
368bbc02
CS
3746 /* do automatic mounts (mainly /proc and /sys), but exclude
3747 * those that need to wait until other stuff has finished
3748 */
4fb3cba5 3749 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
3750 ERROR("failed to setup the automatic mounts for '%s'", name);
3751 return -1;
3752 }
3753
80a881b2 3754 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
36eb9bde 3755 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 3756 return -1;
576f946d 3757 }
3758
c1dc38c2 3759 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
e7938e9e
MN
3760 ERROR("failed to setup the mount entries for '%s'", name);
3761 return -1;
3762 }
3763
7b6753e7 3764 /* Make sure any start hooks are in the container */
1c1c7051
SH
3765 if (!verify_start_hooks(lxc_conf))
3766 return -1;
3767
2322903b
SH
3768 if (lxc_conf->is_execute)
3769 lxc_execute_bind_init(lxc_conf);
3770
368bbc02
CS
3771 /* now mount only cgroup, if wanted;
3772 * before, /sys could not have been mounted
3773 * (is either mounted automatically or via fstab entries)
3774 */
4fb3cba5 3775 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
3776 ERROR("failed to setup the automatic mounts for '%s'", name);
3777 return -1;
3778 }
3779
283678ed 3780 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
773fb9ca
SH
3781 ERROR("failed to run mount hooks for container '%s'.", name);
3782 return -1;
3783 }
3784
bc6928ff 3785 if (lxc_conf->autodev > 0) {
283678ed 3786 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
f7bee6c6
MW
3787 ERROR("failed to run autodev hooks for container '%s'.", name);
3788 return -1;
3789 }
14221cbb 3790 if (fill_autodev(&lxc_conf->rootfs)) {
91c3830e
SH
3791 ERROR("failed to populate /dev in the container");
3792 return -1;
3793 }
3794 }
368bbc02 3795
37903589 3796 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 3797 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 3798 return -1;
6e590161 3799 }
3800
7e0e1d94
AV
3801 if (lxc_conf->kmsg) {
3802 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
3803 ERROR("failed to setup kmsg for '%s'", name);
3804 }
1bd051a6 3805
69aa6655
DE
3806 if (!lxc_conf->is_execute && setup_dev_symlinks(&lxc_conf->rootfs)) {
3807 ERROR("failed to setup /dev symlinks for '%s'", name);
3808 return -1;
3809 }
3810
5112cd70
SH
3811 /* mount /proc if it's not already there */
3812 if (tmp_proc_mount(lxc_conf) < 0) {
fe4de9a6 3813 ERROR("failed to LSM mount proc for '%s'", name);
e075f5d9 3814 return -1;
e075f5d9 3815 }
e075f5d9 3816
ac778708 3817 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 3818 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 3819 return -1;
ed502555 3820 }
3821
571e6ec8 3822 if (setup_pts(lxc_conf->pts)) {
36eb9bde 3823 ERROR("failed to setup the new pts instance");
95b5ffaf 3824 return -1;
3c26f34e 3825 }
3826
e8bd4e43
SH
3827 if (lxc_create_tty(name, lxc_conf)) {
3828 ERROR("failed to create the ttys");
3829 return -1;
3830 }
3831
3832 if (send_ttys_to_parent(handler) < 0) {
3833 ERROR("failure sending console info to parent");
3834 return -1;
3835 }
3836
3837
3838 if (!lxc_conf->is_execute && setup_tty(lxc_conf)) {
3839 ERROR("failed to setup the ttys for '%s'", name);
3840 return -1;
3841 }
3842
3843 if (lxc_conf->pty_names && setenv("container_ttys", lxc_conf->pty_names, 1))
3844 SYSERROR("failed to set environment variable for container ptys");
3845
3846
cccc74b5
DL
3847 if (setup_personality(lxc_conf->personality)) {
3848 ERROR("failed to setup personality");
3849 return -1;
3850 }
3851
97a8f74f
SG
3852 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
3853 if (!lxc_list_empty(&lxc_conf->caps)) {
3854 ERROR("Simultaneously requested dropping and keeping caps");
f6d3e3e4
SH
3855 return -1;
3856 }
97a8f74f
SG
3857 if (dropcaps_except(&lxc_conf->keepcaps)) {
3858 ERROR("failed to keep requested caps");
3859 return -1;
3860 }
3861 } else if (setup_caps(&lxc_conf->caps)) {
3862 ERROR("failed to drop capabilities");
3863 return -1;
81810dd1
DL
3864 }
3865
cd54d859
DL
3866 NOTICE("'%s' is setup.", name);
3867
0ad19a3f 3868 return 0;
3869}
26ddeedd 3870
283678ed
SH
3871int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
3872 const char *lxcpath, char *argv[])
26ddeedd
SH
3873{
3874 int which = -1;
3875 struct lxc_list *it;
3876
3877 if (strcmp(hook, "pre-start") == 0)
3878 which = LXCHOOK_PRESTART;
5ea6163a
SH
3879 else if (strcmp(hook, "pre-mount") == 0)
3880 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
3881 else if (strcmp(hook, "mount") == 0)
3882 which = LXCHOOK_MOUNT;
f7bee6c6
MW
3883 else if (strcmp(hook, "autodev") == 0)
3884 which = LXCHOOK_AUTODEV;
26ddeedd
SH
3885 else if (strcmp(hook, "start") == 0)
3886 which = LXCHOOK_START;
3887 else if (strcmp(hook, "post-stop") == 0)
3888 which = LXCHOOK_POSTSTOP;
148e91f5
SH
3889 else if (strcmp(hook, "clone") == 0)
3890 which = LXCHOOK_CLONE;
37cf711b
SY
3891 else if (strcmp(hook, "destroy") == 0)
3892 which = LXCHOOK_DESTROY;
26ddeedd
SH
3893 else
3894 return -1;
3895 lxc_list_for_each(it, &conf->hooks[which]) {
3896 int ret;
3897 char *hookname = it->elem;
283678ed 3898 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
26ddeedd
SH
3899 if (ret)
3900 return ret;
3901 }
3902 return 0;
3903}
72d0e1cb 3904
427b3a21 3905static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
3906{
3907 struct lxc_netdev *netdev = it->elem;
9ebb03ad 3908 struct lxc_list *it2,*next;
72d0e1cb
SG
3909
3910 lxc_list_del(it);
3911
f10fad2f
ME
3912 free(netdev->link);
3913 free(netdev->name);
3914 if (netdev->type == LXC_NET_VETH)
c9bb9a85 3915 free(netdev->priv.veth_attr.pair);
f10fad2f
ME
3916 free(netdev->upscript);
3917 free(netdev->hwaddr);
3918 free(netdev->mtu);
3919 free(netdev->ipv4_gateway);
3920 free(netdev->ipv6_gateway);
9ebb03ad 3921 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3922 lxc_list_del(it2);
3923 free(it2->elem);
3924 free(it2);
3925 }
9ebb03ad 3926 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3927 lxc_list_del(it2);
3928 free(it2->elem);
3929 free(it2);
3930 }
d95db067 3931 free(netdev);
72d0e1cb
SG
3932 free(it);
3933}
3934
3935/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 3936int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
3937{
3938 char *p1;
3939 int ret, idx, i;
3940 struct lxc_list *it;
3941 struct lxc_netdev *netdev;
3942
46cd2845 3943 p1 = strchr(key, '.');
72d0e1cb
SG
3944 if (!p1 || *(p1+1) == '\0')
3945 p1 = NULL;
3946
3947 ret = sscanf(key, "%d", &idx);
3948 if (ret != 1) return -1;
3949 if (idx < 0)
3950 return -1;
3951
3952 i = 0;
3953 lxc_list_for_each(it, &c->network) {
3954 if (i == idx)
3955 break;
3956 i++;
3957 }
3958 if (i < idx) // we don't have that many nics defined
3959 return -1;
3960
3961 if (!it || !it->elem)
3962 return -1;
3963
3964 netdev = it->elem;
3965
3966 if (!p1) {
3967 lxc_remove_nic(it);
52d21d40 3968 } else if (strcmp(p1, ".ipv4") == 0) {
9ebb03ad
DE
3969 struct lxc_list *it2,*next;
3970 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3971 lxc_list_del(it2);
3972 free(it2->elem);
3973 free(it2);
3974 }
52d21d40 3975 } else if (strcmp(p1, ".ipv6") == 0) {
9ebb03ad
DE
3976 struct lxc_list *it2,*next;
3977 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3978 lxc_list_del(it2);
3979 free(it2->elem);
3980 free(it2);
3981 }
72d0e1cb
SG
3982 }
3983 else return -1;
3984
3985 return 0;
3986}
3987
3988int lxc_clear_config_network(struct lxc_conf *c)
3989{
9ebb03ad
DE
3990 struct lxc_list *it,*next;
3991 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
3992 lxc_remove_nic(it);
3993 }
3994 return 0;
3995}
3996
3997int lxc_clear_config_caps(struct lxc_conf *c)
3998{
9ebb03ad 3999 struct lxc_list *it,*next;
72d0e1cb 4000
9ebb03ad 4001 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
4002 lxc_list_del(it);
4003 free(it->elem);
4004 free(it);
4005 }
4006 return 0;
4007}
4008
74a3920a 4009static int lxc_free_idmap(struct lxc_list *id_map) {
27c27d73
SH
4010 struct lxc_list *it, *next;
4011
4355ab5f 4012 lxc_list_for_each_safe(it, id_map, next) {
27c27d73
SH
4013 lxc_list_del(it);
4014 free(it->elem);
4015 free(it);
4016 }
4017 return 0;
4018}
4019
4355ab5f
SH
4020int lxc_clear_idmaps(struct lxc_conf *c)
4021{
4022 return lxc_free_idmap(&c->id_map);
4023}
4024
1fb86a7c
SH
4025int lxc_clear_config_keepcaps(struct lxc_conf *c)
4026{
4027 struct lxc_list *it,*next;
4028
4029 lxc_list_for_each_safe(it, &c->keepcaps, next) {
4030 lxc_list_del(it);
4031 free(it->elem);
4032 free(it);
4033 }
4034 return 0;
4035}
4036
12a50cc6 4037int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 4038{
9ebb03ad 4039 struct lxc_list *it,*next;
72d0e1cb 4040 bool all = false;
12a50cc6 4041 const char *k = key + 11;
72d0e1cb
SG
4042
4043 if (strcmp(key, "lxc.cgroup") == 0)
4044 all = true;
4045
9ebb03ad 4046 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
4047 struct lxc_cgroup *cg = it->elem;
4048 if (!all && strcmp(cg->subsystem, k) != 0)
4049 continue;
4050 lxc_list_del(it);
4051 free(cg->subsystem);
4052 free(cg->value);
4053 free(cg);
4054 free(it);
4055 }
4056 return 0;
4057}
4058
ee1e7aa0
SG
4059int lxc_clear_groups(struct lxc_conf *c)
4060{
4061 struct lxc_list *it,*next;
4062
4063 lxc_list_for_each_safe(it, &c->groups, next) {
4064 lxc_list_del(it);
4065 free(it->elem);
4066 free(it);
4067 }
4068 return 0;
4069}
4070
ab799c0b
SG
4071int lxc_clear_environment(struct lxc_conf *c)
4072{
4073 struct lxc_list *it,*next;
4074
4075 lxc_list_for_each_safe(it, &c->environment, next) {
4076 lxc_list_del(it);
4077 free(it->elem);
4078 free(it);
4079 }
4080 return 0;
4081}
4082
4083
72d0e1cb
SG
4084int lxc_clear_mount_entries(struct lxc_conf *c)
4085{
9ebb03ad 4086 struct lxc_list *it,*next;
72d0e1cb 4087
9ebb03ad 4088 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
4089 lxc_list_del(it);
4090 free(it->elem);
4091 free(it);
4092 }
4093 return 0;
4094}
4095
b099e9e9
SH
4096int lxc_clear_automounts(struct lxc_conf *c)
4097{
4098 c->auto_mounts = 0;
4099 return 0;
4100}
4101
12a50cc6 4102int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 4103{
9ebb03ad 4104 struct lxc_list *it,*next;
17ed13a3 4105 bool all = false, done = false;
12a50cc6 4106 const char *k = key + 9;
72d0e1cb
SG
4107 int i;
4108
17ed13a3
SH
4109 if (strcmp(key, "lxc.hook") == 0)
4110 all = true;
4111
72d0e1cb 4112 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 4113 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 4114 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
4115 lxc_list_del(it);
4116 free(it->elem);
4117 free(it);
4118 }
4119 done = true;
72d0e1cb
SG
4120 }
4121 }
17ed13a3
SH
4122
4123 if (!done) {
4124 ERROR("Invalid hook key: %s", key);
4125 return -1;
4126 }
72d0e1cb
SG
4127 return 0;
4128}
8eb5694b 4129
74a3920a 4130static void lxc_clear_saved_nics(struct lxc_conf *conf)
7b35f3d6
SH
4131{
4132 int i;
4133
0cf45501 4134 if (!conf->saved_nics)
7b35f3d6
SH
4135 return;
4136 for (i=0; i < conf->num_savednics; i++)
4137 free(conf->saved_nics[i].orig_name);
7b35f3d6
SH
4138 free(conf->saved_nics);
4139}
4140
4184c3e1
SH
4141static inline void lxc_clear_aliens(struct lxc_conf *conf)
4142{
4143 struct lxc_list *it,*next;
4144
4145 lxc_list_for_each_safe(it, &conf->aliens, next) {
4146 lxc_list_del(it);
4147 free(it->elem);
4148 free(it);
4149 }
4150}
4151
f979ac15
SH
4152static inline void lxc_clear_includes(struct lxc_conf *conf)
4153{
4154 struct lxc_list *it,*next;
4155
4156 lxc_list_for_each_safe(it, &conf->includes, next) {
4157 lxc_list_del(it);
4158 free(it->elem);
4159 free(it);
4160 }
4161}
4162
8eb5694b
SH
4163void lxc_conf_free(struct lxc_conf *conf)
4164{
4165 if (!conf)
4166 return;
858377e4
SH
4167 if (current_config == conf)
4168 current_config = NULL;
f10fad2f
ME
4169 free(conf->console.log_path);
4170 free(conf->console.path);
4171 free(conf->rootfs.mount);
4172 free(conf->rootfs.options);
4173 free(conf->rootfs.path);
4174 free(conf->rootfs.pivot);
4175 free(conf->logfile);
858377e4
SH
4176 if (conf->logfd != -1)
4177 close(conf->logfd);
f10fad2f
ME
4178 free(conf->utsname);
4179 free(conf->ttydir);
4180 free(conf->fstab);
4181 free(conf->rcfile);
4182 free(conf->init_cmd);
6b0d5538 4183 free(conf->unexpanded_config);
393903d1 4184 free(conf->pty_names);
8eb5694b 4185 lxc_clear_config_network(conf);
f10fad2f
ME
4186 free(conf->lsm_aa_profile);
4187 free(conf->lsm_se_context);
769872f9 4188 lxc_seccomp_free(conf);
8eb5694b 4189 lxc_clear_config_caps(conf);
1fb86a7c 4190 lxc_clear_config_keepcaps(conf);
8eb5694b 4191 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 4192 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 4193 lxc_clear_mount_entries(conf);
7b35f3d6 4194 lxc_clear_saved_nics(conf);
27c27d73 4195 lxc_clear_idmaps(conf);
ee1e7aa0 4196 lxc_clear_groups(conf);
f979ac15 4197 lxc_clear_includes(conf);
761d81ca 4198 lxc_clear_aliens(conf);
ab799c0b 4199 lxc_clear_environment(conf);
8eb5694b
SH
4200 free(conf);
4201}
4355ab5f
SH
4202
4203struct userns_fn_data {
4204 int (*fn)(void *);
4205 void *arg;
4206 int p[2];
4207};
4208
4209static int run_userns_fn(void *data)
4210{
4211 struct userns_fn_data *d = data;
4212 char c;
4213 // we're not sharing with the parent any more, if it was a thread
4214
4215 close(d->p[1]);
4216 if (read(d->p[0], &c, 1) != 1)
4217 return -1;
4218 close(d->p[0]);
4219 return d->fn(d->arg);
4220}
4221
4222/*
8b227008
TS
4223 * Add ID_TYPE_UID/ID_TYPE_GID entries to an existing lxc_conf,
4224 * if they are not already there.
4355ab5f 4225 */
8b227008
TS
4226static struct lxc_list *idmap_add_id(struct lxc_conf *conf,
4227 uid_t uid, gid_t gid)
4355ab5f 4228{
8b227008
TS
4229 int hostuid_mapped = mapped_hostid(uid, conf, ID_TYPE_UID);
4230 int hostgid_mapped = mapped_hostid(gid, conf, ID_TYPE_GID);
4355ab5f
SH
4231 struct lxc_list *new = NULL, *tmp, *it, *next;
4232 struct id_map *entry;
4233
3ec1648d
SH
4234 new = malloc(sizeof(*new));
4235 if (!new) {
4236 ERROR("Out of memory building id map");
4237 return NULL;
4238 }
4239 lxc_list_init(new);
4240
8b227008
TS
4241 if (hostuid_mapped < 0) {
4242 hostuid_mapped = find_unmapped_nsuid(conf, ID_TYPE_UID);
4243 if (hostuid_mapped < 0)
3ec1648d
SH
4244 goto err;
4245 tmp = malloc(sizeof(*tmp));
4246 if (!tmp)
4247 goto err;
4355ab5f
SH
4248 entry = malloc(sizeof(*entry));
4249 if (!entry) {
3ec1648d
SH
4250 free(tmp);
4251 goto err;
4355ab5f 4252 }
3ec1648d 4253 tmp->elem = entry;
4355ab5f 4254 entry->idtype = ID_TYPE_UID;
8b227008
TS
4255 entry->nsid = hostuid_mapped;
4256 entry->hostid = (unsigned long) uid;
4257 entry->range = 1;
4258 lxc_list_add_tail(new, tmp);
4259 }
4260 if (hostgid_mapped < 0) {
4261 hostgid_mapped = find_unmapped_nsuid(conf, ID_TYPE_GID);
4262 if (hostgid_mapped < 0)
4263 goto err;
4264 tmp = malloc(sizeof(*tmp));
4265 if (!tmp)
4266 goto err;
4267 entry = malloc(sizeof(*entry));
4268 if (!entry) {
4269 free(tmp);
4270 goto err;
4271 }
4272 tmp->elem = entry;
4273 entry->idtype = ID_TYPE_GID;
4274 entry->nsid = hostgid_mapped;
4275 entry->hostid = (unsigned long) gid;
4355ab5f 4276 entry->range = 1;
3ec1648d 4277 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4278 }
4279 lxc_list_for_each_safe(it, &conf->id_map, next) {
4280 tmp = malloc(sizeof(*tmp));
4281 if (!tmp)
4282 goto err;
4283 entry = malloc(sizeof(*entry));
4284 if (!entry) {
4285 free(tmp);
4286 goto err;
4287 }
4288 memset(entry, 0, sizeof(*entry));
4289 memcpy(entry, it->elem, sizeof(*entry));
4290 tmp->elem = entry;
3ec1648d 4291 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4292 }
4293
4294 return new;
4295
4296err:
8b227008 4297 ERROR("Out of memory building a new uid/gid map");
908fde6a
SH
4298 if (new)
4299 lxc_free_idmap(new);
c30ac545 4300 free(new);
4355ab5f
SH
4301 return NULL;
4302}
4303
4304/*
4305 * Run a function in a new user namespace.
8b227008 4306 * The caller's euid/egid will be mapped in if it is not already.
4355ab5f
SH
4307 */
4308int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
4309{
4310 int ret, pid;
4311 struct userns_fn_data d;
4312 char c = '1';
4313 int p[2];
4314 struct lxc_list *idmap;
4315
4355ab5f 4316 ret = pipe(p);
4355ab5f
SH
4317 if (ret < 0) {
4318 SYSERROR("opening pipe");
4319 return -1;
4320 }
4321 d.fn = fn;
4322 d.arg = data;
4323 d.p[0] = p[0];
4324 d.p[1] = p[1];
4325 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4326 if (pid < 0)
4327 goto err;
4355ab5f 4328 close(p[0]);
4355ab5f
SH
4329 p[0] = -1;
4330
8b227008
TS
4331 if ((idmap = idmap_add_id(conf, geteuid(), getegid())) == NULL) {
4332 ERROR("Error adding self to container uid/gid map");
4355ab5f
SH
4333 goto err;
4334 }
4335
4336 ret = lxc_map_ids(idmap, pid);
4337 lxc_free_idmap(idmap);
88dd66fc 4338 free(idmap);
565e571c 4339 if (ret) {
4355ab5f
SH
4340 ERROR("Error setting up child mappings");
4341 goto err;
4342 }
4343
4344 // kick the child
4345 if (write(p[1], &c, 1) != 1) {
4346 SYSERROR("writing to pipe to child");
4347 goto err;
4348 }
4349
3139aead
SG
4350 ret = wait_for_pid(pid);
4351
4352 close(p[1]);
4353 return ret;
4354
4355ab5f 4355err:
4355ab5f
SH
4356 if (p[0] != -1)
4357 close(p[0]);
4358 close(p[1]);
4355ab5f
SH
4359 return -1;
4360}
97e9cfa0 4361
a96a8e8c 4362/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4363static char* getuname(void)
4364{
a96a8e8c 4365 struct passwd *result;
97e9cfa0 4366
a96a8e8c
SH
4367 result = getpwuid(geteuid());
4368 if (!result)
97e9cfa0
SH
4369 return NULL;
4370
a96a8e8c 4371 return strdup(result->pw_name);
97e9cfa0
SH
4372}
4373
a96a8e8c 4374/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4375static char *getgname(void)
4376{
a96a8e8c 4377 struct group *result;
97e9cfa0 4378
a96a8e8c
SH
4379 result = getgrgid(getegid());
4380 if (!result)
97e9cfa0
SH
4381 return NULL;
4382
a96a8e8c 4383 return strdup(result->gr_name);
97e9cfa0
SH
4384}
4385
a96a8e8c 4386/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4387void suggest_default_idmap(void)
4388{
4389 FILE *f;
4390 unsigned int uid = 0, urange = 0, gid = 0, grange = 0;
4391 char *line = NULL;
4392 char *uname, *gname;
4393 size_t len = 0;
4394
4395 if (!(uname = getuname()))
4396 return;
4397
4398 if (!(gname = getgname())) {
4399 free(uname);
4400 return;
4401 }
4402
4403 f = fopen(subuidfile, "r");
4404 if (!f) {
4405 ERROR("Your system is not configured with subuids");
4406 free(gname);
4407 free(uname);
4408 return;
4409 }
4410 while (getline(&line, &len, f) != -1) {
4411 char *p = strchr(line, ':'), *p2;
4412 if (*line == '#')
4413 continue;
4414 if (!p)
4415 continue;
4416 *p = '\0';
4417 p++;
4418 if (strcmp(line, uname))
4419 continue;
4420 p2 = strchr(p, ':');
4421 if (!p2)
4422 continue;
4423 *p2 = '\0';
4424 p2++;
4425 if (!*p2)
4426 continue;
4427 uid = atoi(p);
4428 urange = atoi(p2);
4429 }
4430 fclose(f);
4431
4432 f = fopen(subuidfile, "r");
4433 if (!f) {
4434 ERROR("Your system is not configured with subgids");
4435 free(gname);
4436 free(uname);
4437 return;
4438 }
4439 while (getline(&line, &len, f) != -1) {
4440 char *p = strchr(line, ':'), *p2;
4441 if (*line == '#')
4442 continue;
4443 if (!p)
4444 continue;
4445 *p = '\0';
4446 p++;
4447 if (strcmp(line, uname))
4448 continue;
4449 p2 = strchr(p, ':');
4450 if (!p2)
4451 continue;
4452 *p2 = '\0';
4453 p2++;
4454 if (!*p2)
4455 continue;
4456 gid = atoi(p);
4457 grange = atoi(p2);
4458 }
4459 fclose(f);
4460
f10fad2f 4461 free(line);
97e9cfa0
SH
4462
4463 if (!urange || !grange) {
4464 ERROR("You do not have subuids or subgids allocated");
4465 ERROR("Unprivileged containers require subuids and subgids");
4466 return;
4467 }
4468
4469 ERROR("You must either run as root, or define uid mappings");
4470 ERROR("To pass uid mappings to lxc-create, you could create");
4471 ERROR("~/.config/lxc/default.conf:");
4472 ERROR("lxc.include = %s", LXC_DEFAULT_CONFIG);
4473 ERROR("lxc.id_map = u 0 %u %u", uid, urange);
4474 ERROR("lxc.id_map = g 0 %u %u", gid, grange);
4475
4476 free(gname);
4477 free(uname);
4478}
aaf26830 4479
a7307747
SH
4480static void free_cgroup_settings(struct lxc_list *result)
4481{
4482 struct lxc_list *iterator, *next;
4483
4484 lxc_list_for_each_safe(iterator, result, next) {
4485 lxc_list_del(iterator);
4486 free(iterator);
4487 }
4488 free(result);
4489}
4490
aaf26830
KT
4491/*
4492 * Return the list of cgroup_settings sorted according to the following rules
4493 * 1. Put memory.limit_in_bytes before memory.memsw.limit_in_bytes
4494 */
4495struct lxc_list *sort_cgroup_settings(struct lxc_list* cgroup_settings)
4496{
4497 struct lxc_list *result;
4498 struct lxc_list *memsw_limit = NULL;
4499 struct lxc_list *it = NULL;
4500 struct lxc_cgroup *cg = NULL;
4501 struct lxc_list *item = NULL;
4502
4503 result = malloc(sizeof(*result));
fac7c663
KT
4504 if (!result) {
4505 ERROR("failed to allocate memory to sort cgroup settings");
4506 return NULL;
4507 }
aaf26830
KT
4508 lxc_list_init(result);
4509
4510 /*Iterate over the cgroup settings and copy them to the output list*/
4511 lxc_list_for_each(it, cgroup_settings) {
4512 item = malloc(sizeof(*item));
fac7c663
KT
4513 if (!item) {
4514 ERROR("failed to allocate memory to sort cgroup settings");
a7307747 4515 free_cgroup_settings(result);
fac7c663
KT
4516 return NULL;
4517 }
aaf26830
KT
4518 item->elem = it->elem;
4519 cg = it->elem;
4520 if (strcmp(cg->subsystem, "memory.memsw.limit_in_bytes") == 0) {
4521 /* Store the memsw_limit location */
4522 memsw_limit = item;
4523 } else if (strcmp(cg->subsystem, "memory.limit_in_bytes") == 0 && memsw_limit != NULL) {
4d5b72a1 4524 /* lxc.cgroup.memory.memsw.limit_in_bytes is found before
aaf26830
KT
4525 * lxc.cgroup.memory.limit_in_bytes, swap these two items */
4526 item->elem = memsw_limit->elem;
4527 memsw_limit->elem = it->elem;
4528 }
4529 lxc_list_add_tail(result, item);
4530 }
4531
4532 return result;
a7307747 4533}