]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
Use consistent /proc, /sys and /sys/fs/cgroup (v2)
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
d06245b8
NC
23#include "config.h"
24
0ad19a3f 25#include <stdio.h>
0ad19a3f 26#include <stdlib.h>
e3b4c4c4 27#include <stdarg.h>
0ad19a3f 28#include <errno.h>
29#include <string.h>
30#include <dirent.h>
0ad19a3f 31#include <unistd.h>
bc6928ff 32#include <inttypes.h>
e3b4c4c4 33#include <sys/wait.h>
2d76d1d7 34#include <sys/syscall.h>
97e9cfa0
SH
35#include <sys/types.h>
36#include <pwd.h>
37#include <grp.h>
4a0ba80d 38#include <time.h>
614305f3 39#ifdef HAVE_STATVFS
2938f7c8 40#include <sys/statvfs.h>
614305f3 41#endif
e827ff7e
SG
42
43#if HAVE_PTY_H
b0a33c1e 44#include <pty.h>
e827ff7e
SG
45#else
46#include <../include/openpty.h>
47#endif
0ad19a3f 48
b3ecde1e
DL
49#include <linux/loop.h>
50
0ad19a3f 51#include <sys/types.h>
52#include <sys/utsname.h>
53#include <sys/param.h>
54#include <sys/stat.h>
55#include <sys/socket.h>
56#include <sys/mount.h>
57#include <sys/mman.h>
81810dd1 58#include <sys/prctl.h>
0ad19a3f 59
60#include <arpa/inet.h>
61#include <fcntl.h>
62#include <netinet/in.h>
63#include <net/if.h>
6f4a3756 64#include <libgen.h>
0ad19a3f 65
e5bda9ee 66#include "network.h"
67#include "error.h"
b2718c72 68#include "parse.h"
1b09f2c0
DL
69#include "utils.h"
70#include "conf.h"
71#include "log.h"
d55bc1ad 72#include "caps.h" /* for lxc_caps_last_cap() */
9be53773 73#include "bdev.h"
368bbc02 74#include "cgroup.h"
025ed0f3 75#include "lxclock.h"
4355ab5f 76#include "namespace.h"
fe4de9a6 77#include "lsm/lsm.h"
d0a36f2c 78
495d2046
SG
79#if HAVE_SYS_CAPABILITY_H
80#include <sys/capability.h>
81#endif
82
6ff05e18
SG
83#if HAVE_SYS_PERSONALITY_H
84#include <sys/personality.h>
85#endif
86
edaf8b1b
SG
87#if IS_BIONIC
88#include <../include/lxcmntent.h>
89#else
90#include <mntent.h>
91#endif
92
769872f9
SH
93#include "lxcseccomp.h"
94
36eb9bde 95lxc_log_define(lxc_conf, lxc);
e5bda9ee 96
87da4ec3 97#define LINELEN 4096
0ad19a3f 98
495d2046 99#if HAVE_SYS_CAPABILITY_H
b09094da
MN
100#ifndef CAP_SETFCAP
101#define CAP_SETFCAP 31
102#endif
103
104#ifndef CAP_MAC_OVERRIDE
105#define CAP_MAC_OVERRIDE 32
106#endif
107
108#ifndef CAP_MAC_ADMIN
109#define CAP_MAC_ADMIN 33
110#endif
495d2046 111#endif
b09094da
MN
112
113#ifndef PR_CAPBSET_DROP
114#define PR_CAPBSET_DROP 24
115#endif
116
9818cae4
SG
117#ifndef LO_FLAGS_AUTOCLEAR
118#define LO_FLAGS_AUTOCLEAR 4
119#endif
120
0769b82a
CS
121/* needed for cgroup automount checks, regardless of whether we
122 * have included linux/capability.h or not */
123#ifndef CAP_SYS_ADMIN
124#define CAP_SYS_ADMIN 21
125#endif
126
2d76d1d7
SG
127/* Define pivot_root() if missing from the C library */
128#ifndef HAVE_PIVOT_ROOT
129static int pivot_root(const char * new_root, const char * put_old)
130{
131#ifdef __NR_pivot_root
132return syscall(__NR_pivot_root, new_root, put_old);
133#else
134errno = ENOSYS;
135return -1;
136#endif
137}
138#else
139extern int pivot_root(const char * new_root, const char * put_old);
140#endif
141
142/* Define sethostname() if missing from the C library */
143#ifndef HAVE_SETHOSTNAME
144static int sethostname(const char * name, size_t len)
145{
146#ifdef __NR_sethostname
147return syscall(__NR_sethostname, name, len);
148#else
149errno = ENOSYS;
150return -1;
151#endif
152}
153#endif
154
72f919c4
SG
155/* Define __S_ISTYPE if missing from the C library */
156#ifndef __S_ISTYPE
157#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
158#endif
159
ecec0126
SG
160#ifndef MS_PRIVATE
161#define MS_PRIVATE (1<<18)
162#endif
163
72d0e1cb 164char *lxchook_names[NUM_LXC_HOOKS] = {
148e91f5 165 "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop", "clone" };
72d0e1cb 166
a589434e 167typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 168
998ac676
RT
169struct mount_opt {
170 char *name;
171 int clear;
172 int flag;
173};
174
81810dd1
DL
175struct caps_opt {
176 char *name;
177 int value;
178};
179
0769b82a
CS
180/* Declare this here, since we don't want to reshuffle the whole file. */
181static int in_caplist(int cap, struct lxc_list *caps);
182
a589434e
JN
183static int instantiate_veth(struct lxc_handler *, struct lxc_netdev *);
184static int instantiate_macvlan(struct lxc_handler *, struct lxc_netdev *);
185static int instantiate_vlan(struct lxc_handler *, struct lxc_netdev *);
186static int instantiate_phys(struct lxc_handler *, struct lxc_netdev *);
187static int instantiate_empty(struct lxc_handler *, struct lxc_netdev *);
188static int instantiate_none(struct lxc_handler *, struct lxc_netdev *);
189
190static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
191 [LXC_NET_VETH] = instantiate_veth,
192 [LXC_NET_MACVLAN] = instantiate_macvlan,
193 [LXC_NET_VLAN] = instantiate_vlan,
194 [LXC_NET_PHYS] = instantiate_phys,
195 [LXC_NET_EMPTY] = instantiate_empty,
196 [LXC_NET_NONE] = instantiate_none,
0ad19a3f 197};
198
74a2b586
JK
199static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
200static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
201static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
202static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
203static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 204static int shutdown_none(struct lxc_handler *, struct lxc_netdev *);
74a2b586 205
a589434e 206static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
74a2b586
JK
207 [LXC_NET_VETH] = shutdown_veth,
208 [LXC_NET_MACVLAN] = shutdown_macvlan,
209 [LXC_NET_VLAN] = shutdown_vlan,
210 [LXC_NET_PHYS] = shutdown_phys,
211 [LXC_NET_EMPTY] = shutdown_empty,
26b797f3 212 [LXC_NET_NONE] = shutdown_none,
74a2b586
JK
213};
214
998ac676 215static struct mount_opt mount_opt[] = {
88d413d5
SW
216 { "defaults", 0, 0 },
217 { "ro", 0, MS_RDONLY },
218 { "rw", 1, MS_RDONLY },
219 { "suid", 1, MS_NOSUID },
220 { "nosuid", 0, MS_NOSUID },
221 { "dev", 1, MS_NODEV },
222 { "nodev", 0, MS_NODEV },
223 { "exec", 1, MS_NOEXEC },
224 { "noexec", 0, MS_NOEXEC },
225 { "sync", 0, MS_SYNCHRONOUS },
226 { "async", 1, MS_SYNCHRONOUS },
227 { "dirsync", 0, MS_DIRSYNC },
228 { "remount", 0, MS_REMOUNT },
229 { "mand", 0, MS_MANDLOCK },
230 { "nomand", 1, MS_MANDLOCK },
231 { "atime", 1, MS_NOATIME },
232 { "noatime", 0, MS_NOATIME },
233 { "diratime", 1, MS_NODIRATIME },
234 { "nodiratime", 0, MS_NODIRATIME },
235 { "bind", 0, MS_BIND },
236 { "rbind", 0, MS_BIND|MS_REC },
237 { "relatime", 0, MS_RELATIME },
238 { "norelatime", 1, MS_RELATIME },
239 { "strictatime", 0, MS_STRICTATIME },
240 { "nostrictatime", 1, MS_STRICTATIME },
241 { NULL, 0, 0 },
998ac676
RT
242};
243
495d2046 244#if HAVE_SYS_CAPABILITY_H
81810dd1 245static struct caps_opt caps_opt[] = {
a6afdde9 246 { "chown", CAP_CHOWN },
1e11be34
DL
247 { "dac_override", CAP_DAC_OVERRIDE },
248 { "dac_read_search", CAP_DAC_READ_SEARCH },
249 { "fowner", CAP_FOWNER },
250 { "fsetid", CAP_FSETID },
81810dd1
DL
251 { "kill", CAP_KILL },
252 { "setgid", CAP_SETGID },
253 { "setuid", CAP_SETUID },
254 { "setpcap", CAP_SETPCAP },
255 { "linux_immutable", CAP_LINUX_IMMUTABLE },
256 { "net_bind_service", CAP_NET_BIND_SERVICE },
257 { "net_broadcast", CAP_NET_BROADCAST },
258 { "net_admin", CAP_NET_ADMIN },
259 { "net_raw", CAP_NET_RAW },
260 { "ipc_lock", CAP_IPC_LOCK },
261 { "ipc_owner", CAP_IPC_OWNER },
262 { "sys_module", CAP_SYS_MODULE },
263 { "sys_rawio", CAP_SYS_RAWIO },
264 { "sys_chroot", CAP_SYS_CHROOT },
265 { "sys_ptrace", CAP_SYS_PTRACE },
266 { "sys_pacct", CAP_SYS_PACCT },
267 { "sys_admin", CAP_SYS_ADMIN },
268 { "sys_boot", CAP_SYS_BOOT },
269 { "sys_nice", CAP_SYS_NICE },
270 { "sys_resource", CAP_SYS_RESOURCE },
271 { "sys_time", CAP_SYS_TIME },
272 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
273 { "mknod", CAP_MKNOD },
274 { "lease", CAP_LEASE },
9527e566 275#ifdef CAP_AUDIT_WRITE
81810dd1 276 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
277#endif
278#ifdef CAP_AUDIT_CONTROL
81810dd1 279 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 280#endif
81810dd1
DL
281 { "setfcap", CAP_SETFCAP },
282 { "mac_override", CAP_MAC_OVERRIDE },
283 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
284#ifdef CAP_SYSLOG
285 { "syslog", CAP_SYSLOG },
286#endif
287#ifdef CAP_WAKE_ALARM
288 { "wake_alarm", CAP_WAKE_ALARM },
289#endif
81810dd1 290};
495d2046
SG
291#else
292static struct caps_opt caps_opt[] = {};
293#endif
81810dd1 294
91c3830e
SH
295static int run_buffer(char *buffer)
296{
ebec9176 297 struct lxc_popen_FILE *f;
91c3830e 298 char *output;
8e7da691 299 int ret;
91c3830e 300
ebec9176 301 f = lxc_popen(buffer);
91c3830e
SH
302 if (!f) {
303 SYSERROR("popen failed");
304 return -1;
305 }
306
307 output = malloc(LXC_LOG_BUFFER_SIZE);
308 if (!output) {
309 ERROR("failed to allocate memory for script output");
ebec9176 310 lxc_pclose(f);
91c3830e
SH
311 return -1;
312 }
313
ebec9176 314 while(fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
91c3830e
SH
315 DEBUG("script output: %s", output);
316
317 free(output);
318
ebec9176 319 ret = lxc_pclose(f);
8e7da691 320 if (ret == -1) {
91c3830e
SH
321 SYSERROR("Script exited on error");
322 return -1;
8e7da691
DE
323 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
324 ERROR("Script exited with status %d", WEXITSTATUS(ret));
325 return -1;
326 } else if (WIFSIGNALED(ret)) {
327 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
328 strsignal(WTERMSIG(ret)));
329 return -1;
91c3830e
SH
330 }
331
332 return 0;
333}
334
148e91f5 335static int run_script_argv(const char *name, const char *section,
283678ed
SH
336 const char *script, const char *hook, const char *lxcpath,
337 char **argsin)
148e91f5
SH
338{
339 int ret, i;
340 char *buffer;
341 size_t size = 0;
342
343 INFO("Executing script '%s' for container '%s', config section '%s'",
344 script, name, section);
345
346 for (i=0; argsin && argsin[i]; i++)
347 size += strlen(argsin[i]) + 1;
348
349 size += strlen(hook) + 1;
350
351 size += strlen(script);
352 size += strlen(name);
353 size += strlen(section);
354 size += 3;
355
356 if (size > INT_MAX)
357 return -1;
358
359 buffer = alloca(size);
360 if (!buffer) {
361 ERROR("failed to allocate memory");
362 return -1;
363 }
364
365 ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
366 if (ret < 0 || ret >= size) {
367 ERROR("Script name too long");
368 return -1;
369 }
370
371 for (i=0; argsin && argsin[i]; i++) {
372 int len = size-ret;
373 int rc;
374 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
375 if (rc < 0 || rc >= len) {
376 ERROR("Script args too long");
377 return -1;
378 }
379 ret += rc;
380 }
381
382 return run_buffer(buffer);
383}
384
751d9dcd
DL
385static int run_script(const char *name, const char *section,
386 const char *script, ...)
e3b4c4c4 387{
abbfd20b 388 int ret;
91c3830e 389 char *buffer, *p;
abbfd20b
DL
390 size_t size = 0;
391 va_list ap;
751d9dcd
DL
392
393 INFO("Executing script '%s' for container '%s', config section '%s'",
394 script, name, section);
e3b4c4c4 395
abbfd20b
DL
396 va_start(ap, script);
397 while ((p = va_arg(ap, char *)))
95642a10 398 size += strlen(p) + 1;
abbfd20b
DL
399 va_end(ap);
400
401 size += strlen(script);
402 size += strlen(name);
403 size += strlen(section);
95642a10 404 size += 3;
abbfd20b 405
95642a10
MS
406 if (size > INT_MAX)
407 return -1;
408
409 buffer = alloca(size);
abbfd20b
DL
410 if (!buffer) {
411 ERROR("failed to allocate memory");
751d9dcd
DL
412 return -1;
413 }
414
9ba8130c
SH
415 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
416 if (ret < 0 || ret >= size) {
417 ERROR("Script name too long");
9ba8130c
SH
418 return -1;
419 }
751d9dcd 420
abbfd20b 421 va_start(ap, script);
9ba8130c
SH
422 while ((p = va_arg(ap, char *))) {
423 int len = size-ret;
424 int rc;
425 rc = snprintf(buffer + ret, len, " %s", p);
426 if (rc < 0 || rc >= len) {
9ba8130c
SH
427 ERROR("Script args too long");
428 return -1;
429 }
430 ret += rc;
431 }
abbfd20b 432 va_end(ap);
751d9dcd 433
91c3830e 434 return run_buffer(buffer);
e3b4c4c4
ST
435}
436
a6afdde9 437static int find_fstype_cb(char* buffer, void *data)
78ae2fcc 438{
439 struct cbarg {
440 const char *rootfs;
a6afdde9 441 const char *target;
a17b1e65 442 const char *options;
78ae2fcc 443 } *cbarg = data;
444
a17b1e65
SG
445 unsigned long mntflags;
446 char *mntdata;
78ae2fcc 447 char *fstype;
448
449 /* we don't try 'nodev' entries */
450 if (strstr(buffer, "nodev"))
451 return 0;
452
453 fstype = buffer;
b2718c72 454 fstype += lxc_char_left_gc(fstype, strlen(fstype));
455 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
78ae2fcc 456
9827ecdb
YK
457 /* ignore blank line and comment */
458 if (fstype[0] == '\0' || fstype[0] == '#')
459 return 0;
460
a6afdde9
DL
461 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
462 cbarg->rootfs, cbarg->target, fstype);
463
a17b1e65
SG
464 if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) {
465 free(mntdata);
466 return -1;
467 }
468
469 if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
a6afdde9 470 DEBUG("mount failed with error: %s", strerror(errno));
a17b1e65 471 free(mntdata);
78ae2fcc 472 return 0;
a6afdde9 473 }
a17b1e65 474 free(mntdata);
78ae2fcc 475
a6afdde9
DL
476 INFO("mounted '%s' on '%s', with fstype '%s'",
477 cbarg->rootfs, cbarg->target, fstype);
78ae2fcc 478
479 return 1;
480}
481
a17b1e65
SG
482static int mount_unknown_fs(const char *rootfs, const char *target,
483 const char *options)
78ae2fcc 484{
a6afdde9 485 int i;
78ae2fcc 486
487 struct cbarg {
488 const char *rootfs;
a6afdde9 489 const char *target;
a17b1e65 490 const char *options;
78ae2fcc 491 } cbarg = {
492 .rootfs = rootfs,
a6afdde9 493 .target = target,
a17b1e65 494 .options = options,
78ae2fcc 495 };
496
a6afdde9
DL
497 /*
498 * find the filesystem type with brute force:
499 * first we check with /etc/filesystems, in case the modules
78ae2fcc 500 * are auto-loaded and fall back to the supported kernel fs
501 */
502 char *fsfile[] = {
503 "/etc/filesystems",
504 "/proc/filesystems",
505 };
506
a6afdde9
DL
507 for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
508
509 int ret;
510
511 if (access(fsfile[i], F_OK))
512 continue;
513
514 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
515 if (ret < 0) {
516 ERROR("failed to parse '%s'", fsfile[i]);
517 return -1;
518 }
519
520 if (ret)
521 return 0;
78ae2fcc 522 }
523
a6afdde9
DL
524 ERROR("failed to determine fs type for '%s'", rootfs);
525 return -1;
526}
527
a17b1e65
SG
528static int mount_rootfs_dir(const char *rootfs, const char *target,
529 const char *options)
a6afdde9 530{
a17b1e65
SG
531 unsigned long mntflags;
532 char *mntdata;
533 int ret;
534
535 if (parse_mntopts(options, &mntflags, &mntdata) < 0) {
536 free(mntdata);
537 return -1;
538 }
539
540 ret = mount(rootfs, target, "none", MS_BIND | MS_REC | mntflags, mntdata);
541 free(mntdata);
542
543 return ret;
a6afdde9
DL
544}
545
546static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
547{
548 int rfd;
549 int ret = -1;
550
551 rfd = open(rootfs, O_RDWR);
552 if (rfd < 0) {
553 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 554 return -1;
555 }
556
a6afdde9 557 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 558
a6afdde9 559 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 560
a6afdde9
DL
561 if (ioctl(fd, LOOP_SET_FD, rfd)) {
562 SYSERROR("failed to LOOP_SET_FD");
563 goto out;
78ae2fcc 564 }
565
a6afdde9
DL
566 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
567 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 568 goto out;
569 }
570
a6afdde9 571 ret = 0;
78ae2fcc 572out:
a6afdde9 573 close(rfd);
78ae2fcc 574
a6afdde9 575 return ret;
78ae2fcc 576}
577
a17b1e65
SG
578static int mount_rootfs_file(const char *rootfs, const char *target,
579 const char *options)
78ae2fcc 580{
a6afdde9
DL
581 struct dirent dirent, *direntp;
582 struct loop_info64 loinfo;
9ba8130c 583 int ret = -1, fd = -1, rc;
a6afdde9
DL
584 DIR *dir;
585 char path[MAXPATHLEN];
78ae2fcc 586
a6afdde9
DL
587 dir = opendir("/dev");
588 if (!dir) {
589 SYSERROR("failed to open '/dev'");
78ae2fcc 590 return -1;
591 }
592
a6afdde9
DL
593 while (!readdir_r(dir, &dirent, &direntp)) {
594
595 if (!direntp)
596 break;
597
598 if (!strcmp(direntp->d_name, "."))
599 continue;
600
601 if (!strcmp(direntp->d_name, ".."))
602 continue;
603
604 if (strncmp(direntp->d_name, "loop", 4))
605 continue;
606
9ba8130c
SH
607 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
608 if (rc < 0 || rc >= MAXPATHLEN)
609 continue;
610
a6afdde9
DL
611 fd = open(path, O_RDWR);
612 if (fd < 0)
613 continue;
614
615 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
616 close(fd);
617 continue;
618 }
619
620 if (errno != ENXIO) {
621 WARN("unexpected error for ioctl on '%s': %m",
622 direntp->d_name);
00b6be44 623 close(fd);
a6afdde9
DL
624 continue;
625 }
626
627 DEBUG("found '%s' free lodev", path);
628
629 ret = setup_lodev(rootfs, fd, &loinfo);
630 if (!ret)
a17b1e65 631 ret = mount_unknown_fs(path, target, options);
a6afdde9
DL
632 close(fd);
633
634 break;
635 }
636
637 if (closedir(dir))
638 WARN("failed to close directory");
639
640 return ret;
78ae2fcc 641}
642
a17b1e65
SG
643static int mount_rootfs_block(const char *rootfs, const char *target,
644 const char *options)
a6afdde9 645{
a17b1e65 646 return mount_unknown_fs(rootfs, target, options);
a6afdde9
DL
647}
648
0c547523
SH
649/*
650 * pin_rootfs
b7ed4bf0
CS
651 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
652 * the duration of the container run, to prevent the container from marking
653 * the underlying fs readonly on shutdown. unlink the file immediately so
654 * no name pollution is happens
0c547523
SH
655 * return -1 on error.
656 * return -2 if nothing needed to be pinned.
657 * return an open fd (>=0) if we pinned it.
658 */
659int pin_rootfs(const char *rootfs)
660{
661 char absrootfs[MAXPATHLEN];
662 char absrootfspin[MAXPATHLEN];
663 struct stat s;
664 int ret, fd;
665
e99ee0de 666 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 667 return -2;
e99ee0de 668
00ec333b 669 if (!realpath(rootfs, absrootfs))
9be53773 670 return -2;
0c547523 671
00ec333b 672 if (access(absrootfs, F_OK))
0c547523 673 return -1;
0c547523 674
00ec333b 675 if (stat(absrootfs, &s))
0c547523 676 return -1;
0c547523 677
72f919c4 678 if (!S_ISDIR(s.st_mode))
0c547523
SH
679 return -2;
680
b7ed4bf0 681 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
00ec333b 682 if (ret >= MAXPATHLEN)
0c547523 683 return -1;
0c547523
SH
684
685 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
b7ed4bf0
CS
686 if (fd < 0)
687 return fd;
688 (void)unlink(absrootfspin);
0c547523
SH
689 return fd;
690}
691
e2a7e8dc
SH
692/*
693 * If we are asking to remount something, make sure that any
694 * NOEXEC etc are honored.
695 */
696static unsigned long add_required_remount_flags(const char *s, const char *d,
697 unsigned long flags)
698{
614305f3 699#ifdef HAVE_STATVFS
e2a7e8dc
SH
700 struct statvfs sb;
701 unsigned long required_flags = 0;
702
703 if (!(flags & MS_REMOUNT))
704 return flags;
705
706 if (!s)
707 s = d;
708
709 if (!s)
710 return flags;
711 if (statvfs(s, &sb) < 0)
712 return flags;
713
714 if (sb.f_flag & MS_NOSUID)
715 required_flags |= MS_NOSUID;
716 if (sb.f_flag & MS_NODEV)
717 required_flags |= MS_NODEV;
718 if (sb.f_flag & MS_RDONLY)
719 required_flags |= MS_RDONLY;
720 if (sb.f_flag & MS_NOEXEC)
721 required_flags |= MS_NOEXEC;
722
723 return flags | required_flags;
614305f3
SH
724#else
725 return flags;
726#endif
e2a7e8dc
SH
727}
728
4fb3cba5 729static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
368bbc02 730{
368bbc02 731 int r;
b06b8511
CS
732 size_t i;
733 static struct {
734 int match_mask;
735 int match_flag;
736 const char *source;
737 const char *destination;
738 const char *fstype;
739 unsigned long flags;
740 const char *options;
741 } default_mounts[] = {
742 /* Read-only bind-mounting... In older kernels, doing that required
743 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
744 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
745 * kernel 2.6.26 onwards. However, this apparently does not work on
746 * kernel 3.8. Unfortunately, on that very same kernel, doing the
747 * same trick as above doesn't seem to work either, there one needs
748 * to ALSO specify MS_BIND for the remount, otherwise the entire
749 * fs is remounted read-only or the mount fails because it's busy...
750 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
751 * 2.6.32...
368bbc02 752 */
f24a52d5
SG
753 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
754 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
755 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
756 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
757 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
758 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
759 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
760 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
761 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
762 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL },
763 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
764 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys/devices/virtual/net", "sysfs", 0, NULL },
765 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys/devices/virtual/net/devices/virtual/net", "%r/sys/devices/virtual/net", NULL, MS_BIND, NULL },
766 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys/devices/virtual/net", NULL, MS_REMOUNT|MS_BIND|MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL },
767 { 0, 0, NULL, NULL, NULL, 0, NULL }
b06b8511 768 };
368bbc02 769
b06b8511
CS
770 for (i = 0; default_mounts[i].match_mask; i++) {
771 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
772 char *source = NULL;
773 char *destination = NULL;
774 int saved_errno;
e2a7e8dc 775 unsigned long mflags;
b06b8511
CS
776
777 if (default_mounts[i].source) {
778 /* will act like strdup if %r is not present */
779 source = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].source);
780 if (!source) {
781 SYSERROR("memory allocation error");
782 return -1;
783 }
784 }
785 if (default_mounts[i].destination) {
786 /* will act like strdup if %r is not present */
787 destination = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].destination);
788 if (!destination) {
789 saved_errno = errno;
790 SYSERROR("memory allocation error");
791 free(source);
792 errno = saved_errno;
793 return -1;
794 }
795 }
e2a7e8dc
SH
796 mflags = add_required_remount_flags(source, destination,
797 default_mounts[i].flags);
798 r = mount(source, destination, default_mounts[i].fstype, mflags, default_mounts[i].options);
b06b8511 799 saved_errno = errno;
c414be25 800 if (r < 0)
e2a7e8dc 801 SYSERROR("error mounting %s on %s flags %lu", source, destination, mflags);
f24a52d5 802
b06b8511
CS
803 free(source);
804 free(destination);
805 if (r < 0) {
b06b8511
CS
806 errno = saved_errno;
807 return -1;
808 }
368bbc02 809 }
368bbc02
CS
810 }
811
b06b8511 812 if (flags & LXC_AUTO_CGROUP_MASK) {
0769b82a
CS
813 int cg_flags;
814
815 cg_flags = flags & LXC_AUTO_CGROUP_MASK;
816 /* If the type of cgroup mount was not specified, it depends on the
817 * container's capabilities as to what makes sense: if we have
818 * CAP_SYS_ADMIN, the read-only part can be remounted read-write
819 * anyway, so we may as well default to read-write; then the admin
820 * will not be given a false sense of security. (And if they really
821 * want mixed r/o r/w, then they can explicitly specify :mixed.)
822 * OTOH, if the container lacks CAP_SYS_ADMIN, do only default to
823 * :mixed, because then the container can't remount it read-write. */
824 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
825 int has_sys_admin = 0;
826 if (!lxc_list_empty(&conf->keepcaps)) {
827 has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps);
828 } else {
829 has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps);
830 }
831 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC) {
832 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED;
833 } else {
834 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
835 }
836 }
837
838 if (!cgroup_mount(conf->rootfs.mount, handler, cg_flags)) {
368bbc02 839 SYSERROR("error mounting /sys/fs/cgroup");
b06b8511 840 return -1;
368bbc02
CS
841 }
842 }
843
368bbc02 844 return 0;
368bbc02
CS
845}
846
a17b1e65 847static int mount_rootfs(const char *rootfs, const char *target, const char *options)
0ad19a3f 848{
b09ef133 849 char absrootfs[MAXPATHLEN];
78ae2fcc 850 struct stat s;
a6afdde9 851 int i;
78ae2fcc 852
a17b1e65 853 typedef int (*rootfs_cb)(const char *, const char *, const char *);
78ae2fcc 854
855 struct rootfs_type {
856 int type;
857 rootfs_cb cb;
858 } rtfs_type[] = {
2656d231
DL
859 { S_IFDIR, mount_rootfs_dir },
860 { S_IFBLK, mount_rootfs_block },
861 { S_IFREG, mount_rootfs_file },
78ae2fcc 862 };
0ad19a3f 863
4c8ab83b 864 if (!realpath(rootfs, absrootfs)) {
36eb9bde 865 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 866 return -1;
867 }
b09ef133 868
b09ef133 869 if (access(absrootfs, F_OK)) {
36eb9bde 870 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 871 return -1;
872 }
873
78ae2fcc 874 if (stat(absrootfs, &s)) {
36eb9bde 875 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 876 return -1;
877 }
878
78ae2fcc 879 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 880
78ae2fcc 881 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
882 continue;
9b0f0477 883
a17b1e65 884 return rtfs_type[i].cb(absrootfs, target, options);
78ae2fcc 885 }
9b0f0477 886
36eb9bde 887 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 888 return -1;
0ad19a3f 889}
890
4e5440c6 891static int setup_utsname(struct utsname *utsname)
0ad19a3f 892{
4e5440c6
DL
893 if (!utsname)
894 return 0;
0ad19a3f 895
4e5440c6
DL
896 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
897 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 898 return -1;
899 }
900
4e5440c6 901 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 902
0ad19a3f 903 return 0;
904}
905
69aa6655
DE
906struct dev_symlinks {
907 const char *oldpath;
908 const char *name;
909};
910
911static const struct dev_symlinks dev_symlinks[] = {
912 {"/proc/self/fd", "fd"},
913 {"/proc/self/fd/0", "stdin"},
914 {"/proc/self/fd/1", "stdout"},
915 {"/proc/self/fd/2", "stderr"},
916};
917
918static int setup_dev_symlinks(const struct lxc_rootfs *rootfs)
919{
920 char path[MAXPATHLEN];
921 int ret,i;
09227be2 922 struct stat s;
69aa6655
DE
923
924
925 for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
926 const struct dev_symlinks *d = &dev_symlinks[i];
927 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount, d->name);
928 if (ret < 0 || ret >= MAXPATHLEN)
929 return -1;
09227be2
MW
930
931 /*
932 * Stat the path first. If we don't get an error
933 * accept it as is and don't try to create it
934 */
935 if (!stat(path, &s)) {
936 continue;
937 }
938
69aa6655 939 ret = symlink(d->oldpath, path);
09227be2 940
69aa6655 941 if (ret && errno != EEXIST) {
09227be2
MW
942 if ( errno == EROFS ) {
943 WARN("Warning: Read Only file system while creating %s", path);
944 } else {
945 SYSERROR("Error creating %s", path);
946 return -1;
947 }
69aa6655
DE
948 }
949 }
950 return 0;
951}
952
393903d1
SH
953/*
954 * Build a space-separate list of ptys to pass to systemd.
955 */
956static bool append_ptyname(char **pp, char *name)
b0a33c1e 957{
393903d1
SH
958 char *p;
959
960 if (!*pp) {
961 *pp = malloc(strlen(name) + strlen("container_ttys=") + 1);
962 if (!*pp)
963 return false;
964 sprintf(*pp, "container_ttys=%s", name);
965 return true;
966 }
967 p = realloc(*pp, strlen(*pp) + strlen(name) + 2);
968 if (!p)
969 return false;
970 *pp = p;
971 strcat(p, " ");
972 strcat(p, name);
973 return true;
974}
975
976static int setup_tty(struct lxc_conf *conf)
977{
978 const struct lxc_rootfs *rootfs = &conf->rootfs;
979 const struct lxc_tty_info *tty_info = &conf->tty_info;
980 char *ttydir = conf->ttydir;
7c6ef2a2
SH
981 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
982 int i, ret;
b0a33c1e 983
bc9bd0e3
DL
984 if (!rootfs->path)
985 return 0;
986
b0a33c1e 987 for (i = 0; i < tty_info->nbtty; i++) {
988
989 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
990
7c6ef2a2 991 ret = snprintf(path, sizeof(path), "%s/dev/tty%d",
12297168 992 rootfs->mount, i + 1);
7c6ef2a2
SH
993 if (ret >= sizeof(path)) {
994 ERROR("pathname too long for ttys");
995 return -1;
996 }
997 if (ttydir) {
998 /* create dev/lxc/tty%d" */
9ba8130c 999 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/tty%d",
7c6ef2a2
SH
1000 rootfs->mount, ttydir, i + 1);
1001 if (ret >= sizeof(lxcpath)) {
1002 ERROR("pathname too long for ttys");
1003 return -1;
1004 }
1005 ret = creat(lxcpath, 0660);
1006 if (ret==-1 && errno != EEXIST) {
959aee9c 1007 SYSERROR("error creating %s", lxcpath);
7c6ef2a2
SH
1008 return -1;
1009 }
4d44e274
SH
1010 if (ret >= 0)
1011 close(ret);
7c6ef2a2
SH
1012 ret = unlink(path);
1013 if (ret && errno != ENOENT) {
959aee9c 1014 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1015 return -1;
1016 }
b0a33c1e 1017
7c6ef2a2
SH
1018 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
1019 WARN("failed to mount '%s'->'%s'",
1020 pty_info->name, path);
1021 continue;
1022 }
13954cce 1023
9ba8130c
SH
1024 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
1025 if (ret >= sizeof(lxcpath)) {
1026 ERROR("tty pathname too long");
1027 return -1;
1028 }
7c6ef2a2
SH
1029 ret = symlink(lxcpath, path);
1030 if (ret) {
959aee9c 1031 SYSERROR("failed to create symlink for tty %d", i+1);
7c6ef2a2
SH
1032 return -1;
1033 }
393903d1
SH
1034 /* Now save the relative path in @path for append_ptyname */
1035 sprintf(path, "%s/tty%d", ttydir, i + 1);
7c6ef2a2 1036 } else {
c6883f38
SH
1037 /* If we populated /dev, then we need to create /dev/ttyN */
1038 if (access(path, F_OK)) {
1039 ret = creat(path, 0660);
1040 if (ret==-1) {
959aee9c 1041 SYSERROR("error creating %s", path);
c6883f38 1042 /* this isn't fatal, continue */
025ed0f3 1043 } else {
c6883f38 1044 close(ret);
025ed0f3 1045 }
c6883f38 1046 }
7c6ef2a2
SH
1047 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
1048 WARN("failed to mount '%s'->'%s'",
1049 pty_info->name, path);
1050 continue;
1051 }
393903d1
SH
1052 /* Now save the relative path in @path for append_ptyname */
1053 sprintf(path, "tty%d", i + 1);
1054 }
1055 if (!append_ptyname(&conf->pty_names, path)) {
1056 ERROR("Error setting up container_ttys string");
1057 return -1;
b0a33c1e 1058 }
1059 }
1060
cd54d859
DL
1061 INFO("%d tty(s) has been setup", tty_info->nbtty);
1062
b0a33c1e 1063 return 0;
1064}
1065
bf601689 1066
2d489f9e 1067static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
bf601689 1068{
2d489f9e 1069 int oldroot = -1, newroot = -1;
bf601689 1070
2d489f9e
SH
1071 oldroot = open("/", O_DIRECTORY | O_RDONLY);
1072 if (oldroot < 0) {
1073 SYSERROR("Error opening old-/ for fchdir");
9ba8130c
SH
1074 return -1;
1075 }
2d489f9e
SH
1076 newroot = open(rootfs, O_DIRECTORY | O_RDONLY);
1077 if (newroot < 0) {
1078 SYSERROR("Error opening new-/ for fchdir");
1079 goto fail;
c08556c6 1080 }
bf601689 1081
cc6f6dd7 1082 /* change into new root fs */
2d489f9e 1083 if (fchdir(newroot)) {
cc6f6dd7 1084 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
2d489f9e 1085 goto fail;
cc6f6dd7
DL
1086 }
1087
cc6f6dd7 1088 /* pivot_root into our new root fs */
2d489f9e 1089 if (pivot_root(".", ".")) {
cc6f6dd7 1090 SYSERROR("pivot_root syscall failed");
2d489f9e 1091 goto fail;
bf601689 1092 }
cc6f6dd7 1093
2d489f9e
SH
1094 /*
1095 * at this point the old-root is mounted on top of our new-root
1096 * To unmounted it we must not be chdir'd into it, so escape back
1097 * to old-root
1098 */
1099 if (fchdir(oldroot) < 0) {
1100 SYSERROR("Error entering oldroot");
1101 goto fail;
1102 }
7981ea46 1103 if (umount2(".", MNT_DETACH) < 0) {
2d489f9e
SH
1104 SYSERROR("Error detaching old root");
1105 goto fail;
cc6f6dd7
DL
1106 }
1107
2d489f9e
SH
1108 if (fchdir(newroot) < 0) {
1109 SYSERROR("Error re-entering newroot");
1110 goto fail;
1111 }
cc6f6dd7 1112
2d489f9e
SH
1113 close(oldroot);
1114 close(newroot);
bf601689 1115
2d489f9e 1116 DEBUG("pivot_root syscall to '%s' successful", rootfs);
bf601689 1117
bf601689 1118 return 0;
2d489f9e
SH
1119
1120fail:
1121 if (oldroot != -1)
1122 close(oldroot);
1123 if (newroot != -1)
1124 close(newroot);
1125 return -1;
bf601689
MH
1126}
1127
bc6928ff 1128/*
87da4ec3
SH
1129 * Just create a path for /dev under $lxcpath/$name and in rootfs
1130 * If we hit an error, log it but don't fail yet.
91c3830e 1131 */
bc6928ff 1132static int mount_autodev(const char *name, char *root, const char *lxcpath)
91c3830e
SH
1133{
1134 int ret;
87da4ec3
SH
1135 size_t clen;
1136 char *path;
91c3830e 1137
959aee9c 1138 INFO("Mounting /dev under %s", root);
bc6928ff 1139
87da4ec3
SH
1140 /* $(root) + "/dev/pts" + '\0' */
1141 clen = strlen(root) + 9;
1142 path = alloca(clen);
bc6928ff 1143
87da4ec3
SH
1144 ret = snprintf(path, clen, "%s/dev", root);
1145 if (ret < 0 || ret >= clen)
91c3830e 1146 return -1;
bc6928ff 1147
87da4ec3
SH
1148 if (!dir_exists(path)) {
1149 WARN("No /dev on container rootfs.");
1150 WARN("Proceeding without autodev setup");
1151 return 0;
bc6928ff 1152 }
87da4ec3
SH
1153
1154 if (mount("none", path, "tmpfs", 0, "size=100000,mode=755")) {
1155 SYSERROR("Failed mounting tmpfs onto %s\n", path);
1156 return false;
91c3830e 1157 }
87da4ec3
SH
1158
1159 INFO("Mounted tmpfs onto %s", path);
1160
1161 ret = snprintf(path, clen, "%s/dev/pts", root);
1162 if (ret < 0 || ret >= clen)
91c3830e 1163 return -1;
87da4ec3 1164
bc6928ff
MW
1165 /*
1166 * If we are running on a devtmpfs mapping, dev/pts may already exist.
1167 * If not, then create it and exit if that fails...
1168 */
87da4ec3 1169 if (!dir_exists(path)) {
bc6928ff
MW
1170 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1171 if (ret) {
1172 SYSERROR("Failed to create /dev/pts in container");
1173 return -1;
1174 }
91c3830e
SH
1175 }
1176
959aee9c 1177 INFO("Mounted /dev under %s", root);
91c3830e
SH
1178 return 0;
1179}
1180
c6883f38 1181struct lxc_devs {
74a3920a 1182 const char *name;
c6883f38
SH
1183 mode_t mode;
1184 int maj;
1185 int min;
1186};
1187
74a3920a 1188static const struct lxc_devs lxc_devs[] = {
c6883f38
SH
1189 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1190 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1191 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1192 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1193 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1194 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1195 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1196};
1197
9cb4d183 1198static int fill_autodev(const char *root)
c6883f38
SH
1199{
1200 int ret;
c6883f38
SH
1201 char path[MAXPATHLEN];
1202 int i;
3a32201c 1203 mode_t cmask;
c6883f38 1204
959aee9c 1205 INFO("Creating initial consoles under %s/dev", root);
91c3830e 1206
c6883f38 1207 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
91c3830e
SH
1208 if (ret < 0 || ret >= MAXPATHLEN) {
1209 ERROR("Error calculating container /dev location");
c6883f38 1210 return -1;
f7bee6c6 1211 }
91c3830e 1212
9cb4d183
SH
1213 if (!dir_exists(path)) // ignore, just don't try to fill in
1214 return 0;
1215
959aee9c 1216 INFO("Populating /dev under %s", root);
3a32201c 1217 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38 1218 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
74a3920a 1219 const struct lxc_devs *d = &lxc_devs[i];
c6883f38
SH
1220 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", root, d->name);
1221 if (ret < 0 || ret >= MAXPATHLEN)
1222 return -1;
1223 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1224 if (ret && errno != EEXIST) {
9cb4d183
SH
1225 char hostpath[MAXPATHLEN];
1226 FILE *pathfile;
1227
1228 // Unprivileged containers cannot create devices, so
1229 // bind mount the device from the host
1230 ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", d->name);
1231 if (ret < 0 || ret >= MAXPATHLEN)
1232 return -1;
1233 pathfile = fopen(path, "wb");
1234 if (!pathfile) {
1235 SYSERROR("Failed to create device mount target '%s'", path);
1236 return -1;
1237 }
1238 fclose(pathfile);
1239 if (mount(hostpath, path, 0, MS_BIND, NULL) != 0) {
1240 SYSERROR("Failed bind mounting device %s from host into container",
1241 d->name);
1242 return -1;
1243 }
c6883f38
SH
1244 }
1245 }
3a32201c 1246 umask(cmask);
c6883f38 1247
959aee9c 1248 INFO("Populated /dev under %s", root);
c6883f38
SH
1249 return 0;
1250}
1251
cc28d0b0 1252static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1253{
cc28d0b0
SH
1254 const struct lxc_rootfs *rootfs = &conf->rootfs;
1255
a0f379bf
DW
1256 if (!rootfs->path) {
1257 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1258 SYSERROR("Failed to make / rslave");
1259 return -1;
1260 }
c69bd12f 1261 return 0;
a0f379bf 1262 }
0ad19a3f 1263
12297168 1264 if (access(rootfs->mount, F_OK)) {
b1789442 1265 SYSERROR("failed to access to '%s', check it is present",
12297168 1266 rootfs->mount);
b1789442
DL
1267 return -1;
1268 }
1269
9be53773 1270 // First try mounting rootfs using a bdev
76a26f55 1271 struct bdev *bdev = bdev_init(conf, rootfs->path, rootfs->mount, rootfs->options);
9be53773 1272 if (bdev && bdev->ops->mount(bdev) == 0) {
59d66af2 1273 bdev_put(bdev);
9be53773
SH
1274 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1275 return 0;
1276 }
59d66af2
SH
1277 if (bdev)
1278 bdev_put(bdev);
a17b1e65 1279 if (mount_rootfs(rootfs->path, rootfs->mount, rootfs->options)) {
a6afdde9 1280 ERROR("failed to mount rootfs");
c3f0a28c 1281 return -1;
1282 }
0ad19a3f 1283
12297168 1284 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1285
ac778708
DL
1286 return 0;
1287}
1288
91e93c71
AV
1289int prepare_ramfs_root(char *root)
1290{
1291 char buf[LINELEN], *p;
1292 char nroot[PATH_MAX];
1293 FILE *f;
1294 int i;
1295 char *p2;
1296
1297 if (realpath(root, nroot) == NULL)
1298 return -1;
1299
1300 if (chdir("/") == -1)
1301 return -1;
1302
1303 /*
1304 * We could use here MS_MOVE, but in userns this mount is
1305 * locked and can't be moved.
1306 */
1307 if (mount(root, "/", NULL, MS_REC | MS_BIND, NULL)) {
1308 SYSERROR("Failed to move %s into /", root);
1309 return -1;
1310 }
1311
88322f77 1312 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
91e93c71
AV
1313 SYSERROR("Failed to make . rprivate");
1314 return -1;
1315 }
1316
1317 /*
1318 * The following code cleans up inhereted mounts which are not
1319 * required for CT.
1320 *
1321 * The mountinfo file shows not all mounts, if a few points have been
1322 * unmounted between read operations from the mountinfo. So we need to
1323 * read mountinfo a few times.
1324 *
1325 * This loop can be skipped if a container uses unserns, because all
1326 * inherited mounts are locked and we should live with all this trash.
1327 */
1328 while (1) {
1329 int progress = 0;
1330
1331 f = fopen("./proc/self/mountinfo", "r");
1332 if (!f) {
1333 SYSERROR("Unable to open /proc/self/mountinfo");
1334 return -1;
1335 }
1336 while (fgets(buf, LINELEN, f)) {
1337 for (p = buf, i=0; p && i < 4; i++)
1338 p = strchr(p+1, ' ');
1339 if (!p)
1340 continue;
1341 p2 = strchr(p+1, ' ');
1342 if (!p2)
1343 continue;
1344
1345 *p2 = '\0';
1346 *p = '.';
1347
1348 if (strcmp(p + 1, "/") == 0)
1349 continue;
1350 if (strcmp(p + 1, "/proc") == 0)
1351 continue;
1352
1353 if (umount2(p, MNT_DETACH) == 0)
1354 progress++;
1355 }
1356 fclose(f);
1357 if (!progress)
1358 break;
1359 }
1360
1361 if (umount2("./proc", MNT_DETACH)) {
1362 SYSERROR("Unable to umount /proc");
1363 return -1;
1364 }
1365
1366 /* It is weird, but chdir("..") moves us in a new root */
1367 if (chdir("..") == -1) {
1368 SYSERROR("Unable to change working directory");
1369 return -1;
1370 }
1371
1372 if (chroot(".") == -1) {
1373 SYSERROR("Unable to chroot");
1374 return -1;
1375 }
1376
1377 return 0;
1378}
1379
74a3920a 1380static int setup_pivot_root(const struct lxc_rootfs *rootfs)
ac778708 1381{
ac778708
DL
1382 if (!rootfs->path)
1383 return 0;
1384
91e93c71
AV
1385 if (detect_ramfs_rootfs()) {
1386 if (prepare_ramfs_root(rootfs->mount))
1387 return -1;
1388 } else if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
cc6f6dd7 1389 ERROR("failed to setup pivot root");
25368b52 1390 return -1;
c69bd12f
DL
1391 }
1392
25368b52 1393 return 0;
0ad19a3f 1394}
1395
d852c78c 1396static int setup_pts(int pts)
3c26f34e 1397{
77890c6d
SW
1398 char target[PATH_MAX];
1399
d852c78c
DL
1400 if (!pts)
1401 return 0;
3c26f34e 1402
1403 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1404 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1405 return -1;
1406 }
1407
7e40254a
JTLB
1408 if (mkdir("/dev/pts", 0755)) {
1409 if ( errno != EEXIST ) {
1410 SYSERROR("failed to create '/dev/pts'");
1411 return -1;
1412 }
1413 }
1414
a6afdde9 1415 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
67e5a20a 1416 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
36eb9bde 1417 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1418 return -1;
1419 }
1420
3c26f34e 1421 if (access("/dev/ptmx", F_OK)) {
1422 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1423 goto out;
36eb9bde 1424 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1425 return -1;
1426 }
1427
77890c6d
SW
1428 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1429 goto out;
1430
3c26f34e 1431 /* fallback here, /dev/pts/ptmx exists just mount bind */
1432 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1433 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1434 return -1;
1435 }
cd54d859
DL
1436
1437 INFO("created new pts instance");
d852c78c 1438
3c26f34e 1439out:
1440 return 0;
1441}
1442
cccc74b5
DL
1443static int setup_personality(int persona)
1444{
6ff05e18 1445 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1446 if (persona == -1)
1447 return 0;
1448
1449 if (personality(persona) < 0) {
1450 SYSERROR("failed to set personality to '0x%x'", persona);
1451 return -1;
1452 }
1453
1454 INFO("set personality to '0x%x'", persona);
6ff05e18 1455 #endif
cccc74b5
DL
1456
1457 return 0;
1458}
1459
7c6ef2a2 1460static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1461 const struct lxc_console *console)
6e590161 1462{
63376d7d
DL
1463 char path[MAXPATHLEN];
1464 struct stat s;
7c6ef2a2 1465 int ret;
52e35957 1466
7c6ef2a2
SH
1467 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1468 if (ret >= sizeof(path)) {
959aee9c 1469 ERROR("console path too long");
7c6ef2a2
SH
1470 return -1;
1471 }
52e35957 1472
63376d7d 1473 if (access(path, F_OK)) {
466978b0 1474 WARN("rootfs specified but no console found at '%s'", path);
63376d7d 1475 return 0;
52e35957
DL
1476 }
1477
b5159817
DE
1478 if (console->master < 0) {
1479 INFO("no console");
f78a1f32
DL
1480 return 0;
1481 }
ed502555 1482
63376d7d
DL
1483 if (stat(path, &s)) {
1484 SYSERROR("failed to stat '%s'", path);
1485 return -1;
1486 }
1487
1488 if (chmod(console->name, s.st_mode)) {
1489 SYSERROR("failed to set mode '0%o' to '%s'",
1490 s.st_mode, console->name);
1491 return -1;
1492 }
13954cce 1493
63376d7d
DL
1494 if (mount(console->name, path, "none", MS_BIND, 0)) {
1495 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1496 return -1;
1497 }
1498
63376d7d 1499 INFO("console has been setup");
7c6ef2a2
SH
1500 return 0;
1501}
1502
1503static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1504 const struct lxc_console *console,
1505 char *ttydir)
1506{
1507 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1508 int ret;
1509
1510 /* create rootfs/dev/<ttydir> directory */
1511 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1512 ttydir);
1513 if (ret >= sizeof(path))
1514 return -1;
1515 ret = mkdir(path, 0755);
1516 if (ret && errno != EEXIST) {
959aee9c 1517 SYSERROR("failed with errno %d to create %s", errno, path);
7c6ef2a2
SH
1518 return -1;
1519 }
959aee9c 1520 INFO("created %s", path);
7c6ef2a2
SH
1521
1522 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1523 rootfs->mount, ttydir);
1524 if (ret >= sizeof(lxcpath)) {
959aee9c 1525 ERROR("console path too long");
7c6ef2a2
SH
1526 return -1;
1527 }
1528
1529 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1530 ret = unlink(path);
1531 if (ret && errno != ENOENT) {
959aee9c 1532 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1533 return -1;
1534 }
1535
1536 ret = creat(lxcpath, 0660);
1537 if (ret==-1 && errno != EEXIST) {
959aee9c 1538 SYSERROR("error %d creating %s", errno, lxcpath);
7c6ef2a2
SH
1539 return -1;
1540 }
4d44e274
SH
1541 if (ret >= 0)
1542 close(ret);
7c6ef2a2 1543
b5159817
DE
1544 if (console->master < 0) {
1545 INFO("no console");
7c6ef2a2
SH
1546 return 0;
1547 }
1548
1549 if (mount(console->name, lxcpath, "none", MS_BIND, 0)) {
1550 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1551 return -1;
1552 }
1553
1554 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1555 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1556 if (ret >= sizeof(lxcpath)) {
1557 ERROR("lxc/console path too long");
1558 return -1;
1559 }
7c6ef2a2
SH
1560 ret = symlink(lxcpath, path);
1561 if (ret) {
1562 SYSERROR("failed to create symlink for console");
1563 return -1;
1564 }
1565
1566 INFO("console has been setup on %s", lxcpath);
cd54d859 1567
6e590161 1568 return 0;
1569}
1570
7c6ef2a2
SH
1571static int setup_console(const struct lxc_rootfs *rootfs,
1572 const struct lxc_console *console,
1573 char *ttydir)
1574{
1575 /* We don't have a rootfs, /dev/console will be shared */
1576 if (!rootfs->path)
1577 return 0;
1578 if (!ttydir)
1579 return setup_dev_console(rootfs, console);
1580
1581 return setup_ttydir_console(rootfs, console, ttydir);
1582}
1583
1bd051a6
SH
1584static int setup_kmsg(const struct lxc_rootfs *rootfs,
1585 const struct lxc_console *console)
1586{
1587 char kpath[MAXPATHLEN];
1588 int ret;
1589
222fea5a
DE
1590 if (!rootfs->path)
1591 return 0;
1bd051a6
SH
1592 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1593 if (ret < 0 || ret >= sizeof(kpath))
1594 return -1;
1595
1596 ret = unlink(kpath);
1597 if (ret && errno != ENOENT) {
959aee9c 1598 SYSERROR("error unlinking %s", kpath);
1bd051a6
SH
1599 return -1;
1600 }
1601
1602 ret = symlink("console", kpath);
1603 if (ret) {
1604 SYSERROR("failed to create symlink for kmsg");
1605 return -1;
1606 }
1607
1608 return 0;
1609}
1610
998ac676
RT
1611static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1612{
1613 struct mount_opt *mo;
1614
1615 /* If opt is found in mount_opt, set or clear flags.
1616 * Otherwise append it to data. */
1617
1618 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1619 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1620 if (mo->clear)
1621 *flags &= ~mo->flag;
1622 else
1623 *flags |= mo->flag;
1624 return;
1625 }
1626 }
1627
1628 if (strlen(*data))
1629 strcat(*data, ",");
1630 strcat(*data, opt);
1631}
1632
a17b1e65 1633int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1634 char **mntdata)
1635{
1636 char *s, *data;
1637 char *p, *saveptr = NULL;
1638
911324ef 1639 *mntdata = NULL;
91656ce5 1640 *mntflags = 0L;
911324ef
DL
1641
1642 if (!mntopts)
998ac676
RT
1643 return 0;
1644
911324ef 1645 s = strdup(mntopts);
998ac676 1646 if (!s) {
36eb9bde 1647 SYSERROR("failed to allocate memory");
998ac676
RT
1648 return -1;
1649 }
1650
1651 data = malloc(strlen(s) + 1);
1652 if (!data) {
36eb9bde 1653 SYSERROR("failed to allocate memory");
998ac676
RT
1654 free(s);
1655 return -1;
1656 }
1657 *data = 0;
1658
1659 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1660 p = strtok_r(NULL, ",", &saveptr))
1661 parse_mntopt(p, mntflags, &data);
1662
1663 if (*data)
1664 *mntdata = data;
1665 else
1666 free(data);
1667 free(s);
1668
1669 return 0;
1670}
1671
6fd5e769
SH
1672static void null_endofword(char *word)
1673{
1674 while (*word && *word != ' ' && *word != '\t')
1675 word++;
1676 *word = '\0';
1677}
1678
1679/*
1680 * skip @nfields spaces in @src
1681 */
1682static char *get_field(char *src, int nfields)
1683{
1684 char *p = src;
1685 int i;
1686
1687 for (i = 0; i < nfields; i++) {
1688 while (*p && *p != ' ' && *p != '\t')
1689 p++;
1690 if (!*p)
1691 break;
1692 p++;
1693 }
1694 return p;
1695}
1696
911324ef
DL
1697static int mount_entry(const char *fsname, const char *target,
1698 const char *fstype, unsigned long mountflags,
1fc64d22 1699 const char *data, int optional)
911324ef 1700{
614305f3 1701#ifdef HAVE_STATVFS
2938f7c8 1702 struct statvfs sb;
614305f3 1703#endif
2938f7c8 1704
911324ef 1705 if (mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data)) {
1fc64d22
SG
1706 if (optional) {
1707 INFO("failed to mount '%s' on '%s' (optional): %s", fsname,
1708 target, strerror(errno));
1709 return 0;
1710 }
1711 else {
1712 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1713 return -1;
1714 }
911324ef
DL
1715 }
1716
1717 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
2938f7c8
SH
1718 DEBUG("remounting %s on %s to respect bind or remount options",
1719 fsname ? fsname : "(none)", target ? target : "(none)");
7c5b6e7c
AS
1720 unsigned long rqd_flags = 0;
1721 if (mountflags & MS_RDONLY)
1722 rqd_flags |= MS_RDONLY;
614305f3 1723#ifdef HAVE_STATVFS
2938f7c8 1724 if (statvfs(fsname, &sb) == 0) {
7c5b6e7c 1725 unsigned long required_flags = rqd_flags;
2938f7c8
SH
1726 if (sb.f_flag & MS_NOSUID)
1727 required_flags |= MS_NOSUID;
1728 if (sb.f_flag & MS_NODEV)
1729 required_flags |= MS_NODEV;
1730 if (sb.f_flag & MS_RDONLY)
1731 required_flags |= MS_RDONLY;
1732 if (sb.f_flag & MS_NOEXEC)
1733 required_flags |= MS_NOEXEC;
1734 DEBUG("(at remount) flags for %s was %lu, required extra flags are %lu", fsname, sb.f_flag, required_flags);
1735 /*
1736 * If this was a bind mount request, and required_flags
1737 * does not have any flags which are not already in
1738 * mountflags, then skip the remount
1739 */
1740 if (!(mountflags & MS_REMOUNT)) {
7c5b6e7c 1741 if (!(required_flags & ~mountflags) && rqd_flags == 0) {
2938f7c8
SH
1742 DEBUG("mountflags already was %lu, skipping remount",
1743 mountflags);
1744 goto skipremount;
1745 }
1746 }
1747 mountflags |= required_flags;
6fd5e769 1748 }
614305f3 1749#endif
911324ef
DL
1750
1751 if (mount(fsname, target, fstype,
1752 mountflags | MS_REMOUNT, data)) {
1fc64d22
SG
1753 if (optional) {
1754 INFO("failed to mount '%s' on '%s' (optional): %s",
1755 fsname, target, strerror(errno));
1756 return 0;
1757 }
1758 else {
1759 SYSERROR("failed to mount '%s' on '%s'",
1760 fsname, target);
1761 return -1;
1762 }
911324ef
DL
1763 }
1764 }
1765
614305f3 1766#ifdef HAVE_STATVFS
6fd5e769 1767skipremount:
614305f3 1768#endif
911324ef
DL
1769 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1770
1771 return 0;
1772}
1773
4e4ca161
SH
1774/*
1775 * Remove 'optional', 'create=dir', and 'create=file' from mntopt
1776 */
1777static void cull_mntent_opt(struct mntent *mntent)
1778{
1779 int i;
1780 char *p, *p2;
1781 char *list[] = {"create=dir",
1782 "create=file",
1783 "optional",
1784 NULL };
1785
1786 for (i=0; list[i]; i++) {
1787 if (!(p = strstr(mntent->mnt_opts, list[i])))
1788 continue;
1789 p2 = strchr(p, ',');
1790 if (!p2) {
1791 /* no more mntopts, so just chop it here */
1792 *p = '\0';
1793 continue;
1794 }
1795 memmove(p, p2+1, strlen(p2+1)+1);
1796 }
1797}
1798
1799static inline int mount_entry_on_systemfs(struct mntent *mntent)
0ad19a3f 1800{
998ac676
RT
1801 unsigned long mntflags;
1802 char *mntdata;
911324ef 1803 int ret;
34cfffb3
SG
1804 FILE *pathfile = NULL;
1805 char* pathdirname = NULL;
4f1d50d1 1806 bool optional = hasmntopt(mntent, "optional") != NULL;
911324ef 1807
34cfffb3 1808 if (hasmntopt(mntent, "create=dir")) {
119126b6 1809 if (mkdir_p(mntent->mnt_dir, 0755) < 0) {
34cfffb3
SG
1810 WARN("Failed to create mount target '%s'", mntent->mnt_dir);
1811 ret = -1;
1812 }
1813 }
1814
1815 if (hasmntopt(mntent, "create=file") && access(mntent->mnt_dir, F_OK)) {
1816 pathdirname = strdup(mntent->mnt_dir);
1817 pathdirname = dirname(pathdirname);
119126b6
SG
1818 if (mkdir_p(pathdirname, 0755) < 0) {
1819 WARN("Failed to create target directory");
1820 }
34cfffb3
SG
1821 pathfile = fopen(mntent->mnt_dir, "wb");
1822 if (!pathfile) {
1823 WARN("Failed to create mount target '%s'", mntent->mnt_dir);
1824 ret = -1;
1825 }
1826 else
1827 fclose(pathfile);
1828 }
1829
4e4ca161
SH
1830 cull_mntent_opt(mntent);
1831
a17b1e65
SG
1832 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1833 free(mntdata);
1834 return -1;
1835 }
1836
911324ef 1837 ret = mount_entry(mntent->mnt_fsname, mntent->mnt_dir,
1fc64d22 1838 mntent->mnt_type, mntflags, mntdata, optional);
68c152ef 1839
34cfffb3 1840 free(pathdirname);
911324ef
DL
1841 free(mntdata);
1842
1843 return ret;
1844}
1845
4e4ca161 1846static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2
SH
1847 const struct lxc_rootfs *rootfs,
1848 const char *lxc_name)
911324ef 1849{
013bd428 1850 char *aux;
59760f5d 1851 char path[MAXPATHLEN];
911324ef
DL
1852 unsigned long mntflags;
1853 char *mntdata;
80a881b2 1854 int r, ret = 0, offset;
67e571de 1855 const char *lxcpath;
34cfffb3
SG
1856 FILE *pathfile = NULL;
1857 char *pathdirname = NULL;
4f1d50d1 1858 bool optional = hasmntopt(mntent, "optional") != NULL;
0ad19a3f 1859
593e8478 1860 lxcpath = lxc_global_config_value("lxc.lxcpath");
2a59a681
SH
1861 if (!lxcpath) {
1862 ERROR("Out of memory");
1863 return -1;
1864 }
1865
80a881b2 1866 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
1867 * use $lxcpath/CN/rootfs as the target prefix */
1868 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
1869 if (r < 0 || r >= MAXPATHLEN)
1870 goto skipvarlib;
1871
1872 aux = strstr(mntent->mnt_dir, path);
1873 if (aux) {
1874 offset = strlen(path);
1875 goto skipabs;
1876 }
1877
1878skipvarlib:
013bd428
DL
1879 aux = strstr(mntent->mnt_dir, rootfs->path);
1880 if (!aux) {
1881 WARN("ignoring mount point '%s'", mntent->mnt_dir);
1882 goto out;
1883 }
80a881b2
SH
1884 offset = strlen(rootfs->path);
1885
1886skipabs:
013bd428 1887
9ba8130c 1888 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
1889 aux + offset);
1890 if (r < 0 || r >= MAXPATHLEN) {
1891 WARN("pathnme too long for '%s'", mntent->mnt_dir);
1892 ret = -1;
1893 goto out;
1894 }
1895
34cfffb3 1896 if (hasmntopt(mntent, "create=dir")) {
119126b6 1897 if (mkdir_p(path, 0755) < 0) {
34cfffb3
SG
1898 WARN("Failed to create mount target '%s'", path);
1899 ret = -1;
1900 }
1901 }
1902
1903 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
1904 pathdirname = strdup(path);
1905 pathdirname = dirname(pathdirname);
119126b6
SG
1906 if (mkdir_p(pathdirname, 0755) < 0) {
1907 WARN("Failed to create target directory");
1908 }
34cfffb3
SG
1909 pathfile = fopen(path, "wb");
1910 if (!pathfile) {
1911 WARN("Failed to create mount target '%s'", path);
1912 ret = -1;
1913 }
1914 else
1915 fclose(pathfile);
1916 }
4e4ca161 1917 cull_mntent_opt(mntent);
d330fe7b 1918
a17b1e65
SG
1919 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1920 free(mntdata);
1921 return -1;
1922 }
1923
013bd428 1924 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1fc64d22 1925 mntflags, mntdata, optional);
0ad19a3f 1926
a17b1e65
SG
1927 free(mntdata);
1928
013bd428 1929out:
34cfffb3 1930 free(pathdirname);
911324ef
DL
1931 return ret;
1932}
d330fe7b 1933
4e4ca161 1934static int mount_entry_on_relative_rootfs(struct mntent *mntent,
911324ef
DL
1935 const char *rootfs)
1936{
1937 char path[MAXPATHLEN];
1938 unsigned long mntflags;
1939 char *mntdata;
1940 int ret;
34cfffb3
SG
1941 FILE *pathfile = NULL;
1942 char *pathdirname = NULL;
4f1d50d1 1943 bool optional = hasmntopt(mntent, "optional") != NULL;
d330fe7b 1944
34cfffb3 1945 /* relative to root mount point */
9ba8130c
SH
1946 ret = snprintf(path, sizeof(path), "%s/%s", rootfs, mntent->mnt_dir);
1947 if (ret >= sizeof(path)) {
1948 ERROR("path name too long");
1949 return -1;
1950 }
911324ef 1951
34cfffb3 1952 if (hasmntopt(mntent, "create=dir")) {
119126b6 1953 if (mkdir_p(path, 0755) < 0) {
34cfffb3
SG
1954 WARN("Failed to create mount target '%s'", path);
1955 ret = -1;
1956 }
1957 }
1958
1959 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
1960 pathdirname = strdup(path);
1961 pathdirname = dirname(pathdirname);
119126b6
SG
1962 if (mkdir_p(pathdirname, 0755) < 0) {
1963 WARN("Failed to create target directory");
1964 }
34cfffb3
SG
1965 pathfile = fopen(path, "wb");
1966 if (!pathfile) {
1967 WARN("Failed to create mount target '%s'", path);
1968 ret = -1;
1969 }
1970 else
1971 fclose(pathfile);
1972 }
4e4ca161 1973 cull_mntent_opt(mntent);
34cfffb3 1974
a17b1e65
SG
1975 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1976 free(mntdata);
1977 return -1;
1978 }
1979
911324ef 1980 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1fc64d22 1981 mntflags, mntdata, optional);
68c152ef 1982
34cfffb3 1983 free(pathdirname);
911324ef 1984 free(mntdata);
998ac676 1985
911324ef
DL
1986 return ret;
1987}
1988
80a881b2
SH
1989static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
1990 const char *lxc_name)
911324ef 1991{
aaf901be
AM
1992 struct mntent mntent;
1993 char buf[4096];
911324ef 1994 int ret = -1;
e76b8764 1995
aaf901be 1996 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
e76b8764 1997
911324ef 1998 if (!rootfs->path) {
aaf901be 1999 if (mount_entry_on_systemfs(&mntent))
e76b8764 2000 goto out;
911324ef 2001 continue;
e76b8764
CDC
2002 }
2003
911324ef 2004 /* We have a separate root, mounts are relative to it */
aaf901be
AM
2005 if (mntent.mnt_dir[0] != '/') {
2006 if (mount_entry_on_relative_rootfs(&mntent,
911324ef
DL
2007 rootfs->mount))
2008 goto out;
2009 continue;
2010 }
cd54d859 2011
aaf901be 2012 if (mount_entry_on_absolute_rootfs(&mntent, rootfs, lxc_name))
911324ef 2013 goto out;
0ad19a3f 2014 }
cd54d859 2015
0ad19a3f 2016 ret = 0;
cd54d859
DL
2017
2018 INFO("mount points have been setup");
0ad19a3f 2019out:
e7938e9e
MN
2020 return ret;
2021}
2022
80a881b2
SH
2023static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
2024 const char *lxc_name)
e7938e9e
MN
2025{
2026 FILE *file;
2027 int ret;
2028
2029 if (!fstab)
2030 return 0;
2031
2032 file = setmntent(fstab, "r");
2033 if (!file) {
2034 SYSERROR("failed to use '%s'", fstab);
2035 return -1;
2036 }
2037
80a881b2 2038 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e 2039
0ad19a3f 2040 endmntent(file);
2041 return ret;
2042}
2043
80a881b2
SH
2044static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
2045 const char *lxc_name)
e7938e9e
MN
2046{
2047 FILE *file;
2048 struct lxc_list *iterator;
2049 char *mount_entry;
2050 int ret;
2051
2052 file = tmpfile();
2053 if (!file) {
2054 ERROR("tmpfile error: %m");
2055 return -1;
2056 }
2057
2058 lxc_list_for_each(iterator, mount) {
2059 mount_entry = iterator->elem;
1d6b1976 2060 fprintf(file, "%s\n", mount_entry);
e7938e9e
MN
2061 }
2062
2063 rewind(file);
2064
80a881b2 2065 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e
MN
2066
2067 fclose(file);
2068 return ret;
2069}
2070
bab88e68
CS
2071static int parse_cap(const char *cap)
2072{
2073 char *ptr = NULL;
2074 int i, capid = -1;
2075
7035407c
DE
2076 if (!strcmp(cap, "none"))
2077 return -2;
2078
bab88e68
CS
2079 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2080
2081 if (strcmp(cap, caps_opt[i].name))
2082 continue;
2083
2084 capid = caps_opt[i].value;
2085 break;
2086 }
2087
2088 if (capid < 0) {
2089 /* try to see if it's numeric, so the user may specify
2090 * capabilities that the running kernel knows about but
2091 * we don't */
2092 errno = 0;
2093 capid = strtol(cap, &ptr, 10);
2094 if (!ptr || *ptr != '\0' || errno != 0)
2095 /* not a valid number */
2096 capid = -1;
2097 else if (capid > lxc_caps_last_cap())
2098 /* we have a number but it's not a valid
2099 * capability */
2100 capid = -1;
2101 }
2102
2103 return capid;
2104}
2105
0769b82a
CS
2106int in_caplist(int cap, struct lxc_list *caps)
2107{
2108 struct lxc_list *iterator;
2109 int capid;
2110
2111 lxc_list_for_each(iterator, caps) {
2112 capid = parse_cap(iterator->elem);
2113 if (capid == cap)
2114 return 1;
2115 }
2116
2117 return 0;
2118}
2119
81810dd1
DL
2120static int setup_caps(struct lxc_list *caps)
2121{
2122 struct lxc_list *iterator;
2123 char *drop_entry;
bab88e68 2124 int capid;
81810dd1
DL
2125
2126 lxc_list_for_each(iterator, caps) {
2127
2128 drop_entry = iterator->elem;
2129
bab88e68 2130 capid = parse_cap(drop_entry);
d55bc1ad 2131
81810dd1 2132 if (capid < 0) {
1e11be34
DL
2133 ERROR("unknown capability %s", drop_entry);
2134 return -1;
81810dd1
DL
2135 }
2136
2137 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
2138
2139 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
3ec1648d
SH
2140 SYSERROR("failed to remove %s capability", drop_entry);
2141 return -1;
2142 }
81810dd1
DL
2143
2144 }
2145
1fb86a7c
SH
2146 DEBUG("capabilities have been setup");
2147
2148 return 0;
2149}
2150
2151static int dropcaps_except(struct lxc_list *caps)
2152{
2153 struct lxc_list *iterator;
2154 char *keep_entry;
1fb86a7c
SH
2155 int i, capid;
2156 int numcaps = lxc_caps_last_cap() + 1;
959aee9c 2157 INFO("found %d capabilities", numcaps);
1fb86a7c 2158
2caf9a97
SH
2159 if (numcaps <= 0 || numcaps > 200)
2160 return -1;
2161
1fb86a7c
SH
2162 // caplist[i] is 1 if we keep capability i
2163 int *caplist = alloca(numcaps * sizeof(int));
2164 memset(caplist, 0, numcaps * sizeof(int));
2165
2166 lxc_list_for_each(iterator, caps) {
2167
2168 keep_entry = iterator->elem;
2169
bab88e68 2170 capid = parse_cap(keep_entry);
1fb86a7c 2171
7035407c
DE
2172 if (capid == -2)
2173 continue;
2174
1fb86a7c
SH
2175 if (capid < 0) {
2176 ERROR("unknown capability %s", keep_entry);
2177 return -1;
2178 }
2179
8255688a 2180 DEBUG("keep capability '%s' (%d)", keep_entry, capid);
1fb86a7c
SH
2181
2182 caplist[capid] = 1;
2183 }
2184 for (i=0; i<numcaps; i++) {
2185 if (caplist[i])
2186 continue;
2187 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
3ec1648d
SH
2188 SYSERROR("failed to remove capability %d", i);
2189 return -1;
2190 }
1fb86a7c
SH
2191 }
2192
2193 DEBUG("capabilities have been setup");
81810dd1
DL
2194
2195 return 0;
2196}
2197
0ad19a3f 2198static int setup_hw_addr(char *hwaddr, const char *ifname)
2199{
2200 struct sockaddr sockaddr;
2201 struct ifreq ifr;
2202 int ret, fd;
2203
3cfc0f3a
MN
2204 ret = lxc_convert_mac(hwaddr, &sockaddr);
2205 if (ret) {
2206 ERROR("mac address '%s' conversion failed : %s",
2207 hwaddr, strerror(-ret));
0ad19a3f 2208 return -1;
2209 }
2210
2211 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
5da6aa8c 2212 ifr.ifr_name[IFNAMSIZ-1] = '\0';
0ad19a3f 2213 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2214
2215 fd = socket(AF_INET, SOCK_DGRAM, 0);
2216 if (fd < 0) {
3ab87b66 2217 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 2218 return -1;
2219 }
2220
2221 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
2222 close(fd);
2223 if (ret)
3ab87b66 2224 ERROR("ioctl failure : %s", strerror(errno));
0ad19a3f 2225
5da6aa8c 2226 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
cd54d859 2227
0ad19a3f 2228 return ret;
2229}
2230
82d5ae15 2231static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2232{
82d5ae15
DL
2233 struct lxc_list *iterator;
2234 struct lxc_inetdev *inetdev;
3cfc0f3a 2235 int err;
0ad19a3f 2236
82d5ae15
DL
2237 lxc_list_for_each(iterator, ip) {
2238
2239 inetdev = iterator->elem;
2240
0093bb8c
DL
2241 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2242 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
2243 if (err) {
2244 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2245 ifindex, strerror(-err));
82d5ae15
DL
2246 return -1;
2247 }
2248 }
2249
2250 return 0;
0ad19a3f 2251}
2252
82d5ae15 2253static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2254{
82d5ae15 2255 struct lxc_list *iterator;
7fa9074f 2256 struct lxc_inet6dev *inet6dev;
3cfc0f3a 2257 int err;
0ad19a3f 2258
82d5ae15
DL
2259 lxc_list_for_each(iterator, ip) {
2260
2261 inet6dev = iterator->elem;
2262
b3df193c 2263 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
2264 &inet6dev->mcast, &inet6dev->acast,
2265 inet6dev->prefix);
3cfc0f3a
MN
2266 if (err) {
2267 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2268 ifindex, strerror(-err));
82d5ae15 2269 return -1;
3cfc0f3a 2270 }
82d5ae15
DL
2271 }
2272
2273 return 0;
0ad19a3f 2274}
2275
82d5ae15 2276static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 2277{
0ad19a3f 2278 char ifname[IFNAMSIZ];
0ad19a3f 2279 char *current_ifname = ifname;
3cfc0f3a 2280 int err;
0ad19a3f 2281
82d5ae15
DL
2282 /* empty network namespace */
2283 if (!netdev->ifindex) {
b0efbac4 2284 if (netdev->flags & IFF_UP) {
d472214b 2285 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2286 if (err) {
2287 ERROR("failed to set the loopback up : %s",
2288 strerror(-err));
82d5ae15
DL
2289 return -1;
2290 }
82d5ae15 2291 }
40790553
SH
2292 if (netdev->type != LXC_NET_VETH)
2293 return 0;
2294 netdev->ifindex = if_nametoindex(netdev->name);
0ad19a3f 2295 }
13954cce 2296
b466dc33 2297 /* get the new ifindex in case of physical netdev */
40790553 2298 if (netdev->type == LXC_NET_PHYS) {
b466dc33
BP
2299 if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
2300 ERROR("failed to get ifindex for %s",
2301 netdev->link);
2302 return -1;
2303 }
40790553 2304 }
b466dc33 2305
82d5ae15
DL
2306 /* retrieve the name of the interface */
2307 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 2308 ERROR("no interface corresponding to index '%d'",
82d5ae15 2309 netdev->ifindex);
0ad19a3f 2310 return -1;
2311 }
13954cce 2312
018ef520 2313 /* default: let the system to choose one interface name */
9d083402 2314 if (!netdev->name)
fb6d9b2f
DL
2315 netdev->name = netdev->type == LXC_NET_PHYS ?
2316 netdev->link : "eth%d";
018ef520 2317
82d5ae15 2318 /* rename the interface name */
40790553
SH
2319 if (strcmp(ifname, netdev->name) != 0) {
2320 err = lxc_netdev_rename_by_name(ifname, netdev->name);
2321 if (err) {
2322 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2323 strerror(-err));
2324 return -1;
2325 }
018ef520
DL
2326 }
2327
2328 /* Re-read the name of the interface because its name has changed
2329 * and would be automatically allocated by the system
2330 */
82d5ae15 2331 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 2332 ERROR("no interface corresponding to index '%d'",
82d5ae15 2333 netdev->ifindex);
018ef520 2334 return -1;
0ad19a3f 2335 }
2336
82d5ae15
DL
2337 /* set a mac address */
2338 if (netdev->hwaddr) {
2339 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 2340 ERROR("failed to setup hw address for '%s'",
82d5ae15 2341 current_ifname);
0ad19a3f 2342 return -1;
2343 }
2344 }
2345
82d5ae15
DL
2346 /* setup ipv4 addresses on the interface */
2347 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 2348 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 2349 ifname);
2350 return -1;
2351 }
2352
82d5ae15
DL
2353 /* setup ipv6 addresses on the interface */
2354 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 2355 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 2356 ifname);
2357 return -1;
2358 }
2359
82d5ae15 2360 /* set the network device up */
b0efbac4 2361 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
2362 int err;
2363
d472214b 2364 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
2365 if (err) {
2366 ERROR("failed to set '%s' up : %s", current_ifname,
2367 strerror(-err));
0ad19a3f 2368 return -1;
2369 }
2370
2371 /* the network is up, make the loopback up too */
d472214b 2372 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2373 if (err) {
2374 ERROR("failed to set the loopback up : %s",
2375 strerror(-err));
0ad19a3f 2376 return -1;
2377 }
2378 }
2379
f8fee0e2
MK
2380 /* We can only set up the default routes after bringing
2381 * up the interface, sine bringing up the interface adds
2382 * the link-local routes and we can't add a default
2383 * route if the gateway is not reachable. */
2384
2385 /* setup ipv4 gateway on the interface */
2386 if (netdev->ipv4_gateway) {
2387 if (!(netdev->flags & IFF_UP)) {
2388 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2389 return -1;
2390 }
2391
2392 if (lxc_list_empty(&netdev->ipv4)) {
2393 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2394 return -1;
2395 }
2396
2397 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2398 if (err) {
fc739df5
SG
2399 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2400 if (err) {
2401 ERROR("failed to add ipv4 dest for '%s': %s",
2402 ifname, strerror(-err));
2403 }
2404
2405 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2406 if (err) {
2407 ERROR("failed to setup ipv4 gateway for '%s': %s",
2408 ifname, strerror(-err));
2409 if (netdev->ipv4_gateway_auto) {
2410 char buf[INET_ADDRSTRLEN];
2411 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2412 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2413 }
2414 return -1;
19a26f82 2415 }
f8fee0e2
MK
2416 }
2417 }
2418
2419 /* setup ipv6 gateway on the interface */
2420 if (netdev->ipv6_gateway) {
2421 if (!(netdev->flags & IFF_UP)) {
2422 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2423 return -1;
2424 }
2425
2426 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2427 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2428 return -1;
2429 }
2430
2431 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2432 if (err) {
fc739df5
SG
2433 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2434 if (err) {
2435 ERROR("failed to add ipv6 dest for '%s': %s",
f8fee0e2 2436 ifname, strerror(-err));
19a26f82 2437 }
fc739df5
SG
2438
2439 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2440 if (err) {
2441 ERROR("failed to setup ipv6 gateway for '%s': %s",
2442 ifname, strerror(-err));
2443 if (netdev->ipv6_gateway_auto) {
2444 char buf[INET6_ADDRSTRLEN];
2445 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2446 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2447 }
2448 return -1;
2449 }
f8fee0e2
MK
2450 }
2451 }
2452
cd54d859
DL
2453 DEBUG("'%s' has been setup", current_ifname);
2454
0ad19a3f 2455 return 0;
2456}
2457
5f4535a3 2458static int setup_network(struct lxc_list *network)
0ad19a3f 2459{
82d5ae15 2460 struct lxc_list *iterator;
82d5ae15 2461 struct lxc_netdev *netdev;
0ad19a3f 2462
5f4535a3 2463 lxc_list_for_each(iterator, network) {
cd54d859 2464
5f4535a3 2465 netdev = iterator->elem;
82d5ae15
DL
2466
2467 if (setup_netdev(netdev)) {
2468 ERROR("failed to setup netdev");
2469 return -1;
2470 }
2471 }
cd54d859 2472
5f4535a3
DL
2473 if (!lxc_list_empty(network))
2474 INFO("network has been setup");
cd54d859
DL
2475
2476 return 0;
0ad19a3f 2477}
2478
2af6bd1b
SH
2479/* try to move physical nics to the init netns */
2480void restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf)
2481{
2482 int i, ret, oldfd;
2483 char path[MAXPATHLEN];
2484
2485 if (netnsfd < 0)
2486 return;
2487
2488 ret = snprintf(path, MAXPATHLEN, "/proc/self/ns/net");
2489 if (ret < 0 || ret >= MAXPATHLEN) {
2490 WARN("Failed to open monitor netns fd");
2491 return;
2492 }
2493 if ((oldfd = open(path, O_RDONLY)) < 0) {
2494 SYSERROR("Failed to open monitor netns fd");
2495 return;
2496 }
2497 if (setns(netnsfd, 0) != 0) {
2498 SYSERROR("Failed to enter container netns to reset nics");
2499 close(oldfd);
2500 return;
2501 }
2502 for (i=0; i<conf->num_savednics; i++) {
2503 struct saved_nic *s = &conf->saved_nics[i];
8d357196 2504 if (lxc_netdev_move_by_index(s->ifindex, 1, NULL))
2af6bd1b
SH
2505 WARN("Error moving nic index:%d back to host netns",
2506 s->ifindex);
2507 }
2508 if (setns(oldfd, 0) != 0)
2509 SYSERROR("Failed to re-enter monitor's netns");
2510 close(oldfd);
2511}
2512
2513void lxc_rename_phys_nics_on_shutdown(int netnsfd, struct lxc_conf *conf)
7b35f3d6
SH
2514{
2515 int i;
2516
2af6bd1b
SH
2517 if (conf->num_savednics == 0)
2518 return;
2519
7b35f3d6 2520 INFO("running to reset %d nic names", conf->num_savednics);
2af6bd1b 2521 restore_phys_nics_to_netns(netnsfd, conf);
7b35f3d6
SH
2522 for (i=0; i<conf->num_savednics; i++) {
2523 struct saved_nic *s = &conf->saved_nics[i];
959aee9c 2524 INFO("resetting nic %d to %s", s->ifindex, s->orig_name);
7b35f3d6
SH
2525 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
2526 free(s->orig_name);
2527 }
2528 conf->num_savednics = 0;
7b35f3d6
SH
2529}
2530
ae9242c8
SH
2531static char *default_rootfs_mount = LXCROOTFSMOUNT;
2532
7b379ab3 2533struct lxc_conf *lxc_conf_init(void)
089cd8b8 2534{
7b379ab3 2535 struct lxc_conf *new;
26ddeedd 2536 int i;
7b379ab3
MN
2537
2538 new = malloc(sizeof(*new));
2539 if (!new) {
2540 ERROR("lxc_conf_init : %m");
2541 return NULL;
2542 }
2543 memset(new, 0, sizeof(*new));
2544
b40a606e 2545 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
cccc74b5 2546 new->personality = -1;
124fa0a8 2547 new->autodev = 1;
596a818d
DE
2548 new->console.log_path = NULL;
2549 new->console.log_fd = -1;
28a4b0e5 2550 new->console.path = NULL;
63376d7d 2551 new->console.peer = -1;
b5159817
DE
2552 new->console.peerpty.busy = -1;
2553 new->console.peerpty.master = -1;
2554 new->console.peerpty.slave = -1;
63376d7d
DL
2555 new->console.master = -1;
2556 new->console.slave = -1;
2557 new->console.name[0] = '\0';
d2e30e99 2558 new->maincmd_fd = -1;
76a26f55 2559 new->nbd_idx = -1;
54c30e29 2560 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048
SH
2561 if (!new->rootfs.mount) {
2562 ERROR("lxc_conf_init : %m");
2563 free(new);
2564 return NULL;
2565 }
d89de239 2566 new->kmsg = 0;
7b379ab3
MN
2567 lxc_list_init(&new->cgroup);
2568 lxc_list_init(&new->network);
2569 lxc_list_init(&new->mount_list);
81810dd1 2570 lxc_list_init(&new->caps);
1fb86a7c 2571 lxc_list_init(&new->keepcaps);
f6d3e3e4 2572 lxc_list_init(&new->id_map);
f979ac15 2573 lxc_list_init(&new->includes);
4184c3e1 2574 lxc_list_init(&new->aliens);
7c661726 2575 lxc_list_init(&new->environment);
26ddeedd
SH
2576 for (i=0; i<NUM_LXC_HOOKS; i++)
2577 lxc_list_init(&new->hooks[i]);
ee1e7aa0 2578 lxc_list_init(&new->groups);
fe4de9a6
DE
2579 new->lsm_aa_profile = NULL;
2580 new->lsm_se_context = NULL;
5112cd70 2581 new->tmp_umount_proc = 0;
7b379ab3 2582
9f30a190
MM
2583 for (i = 0; i < LXC_NS_MAX; i++)
2584 new->inherit_ns_fd[i] = -1;
2585
7b379ab3 2586 return new;
089cd8b8
DL
2587}
2588
a589434e 2589static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2590{
8634bc19 2591 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2592 char veth2buf[IFNAMSIZ], *veth2;
3cfc0f3a 2593 int err;
13954cce 2594
e892973e
DL
2595 if (netdev->priv.veth_attr.pair)
2596 veth1 = netdev->priv.veth_attr.pair;
8634bc19 2597 else {
9ba8130c
SH
2598 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2599 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2600 ERROR("veth1 name too long");
2601 return -1;
2602 }
a0265685 2603 veth1 = lxc_mkifname(veth1buf);
ad40563e
ÇO
2604 if (!veth1) {
2605 ERROR("failed to allocate a temporary name");
2606 return -1;
2607 }
74a2b586
JK
2608 /* store away for deconf */
2609 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2610 }
82d5ae15 2611
0e391e57 2612 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
a0265685 2613 veth2 = lxc_mkifname(veth2buf);
ad40563e 2614 if (!veth2) {
82d5ae15 2615 ERROR("failed to allocate a temporary name");
ad40563e 2616 goto out_delete;
0ad19a3f 2617 }
2618
3cfc0f3a
MN
2619 err = lxc_veth_create(veth1, veth2);
2620 if (err) {
2e2d6a7b 2621 ERROR("failed to create veth pair (%s and %s): %s", veth1, veth2,
3cfc0f3a 2622 strerror(-err));
ad40563e 2623 goto out_delete;
0ad19a3f 2624 }
13954cce 2625
49684c0b
CS
2626 /* changing the high byte of the mac address to 0xfe, the bridge interface
2627 * will always keep the host's mac address and not take the mac address
2628 * of a container */
2629 err = setup_private_host_hw_addr(veth1);
2630 if (err) {
2e2d6a7b 2631 ERROR("failed to change mac address of host interface '%s': %s",
49684c0b
CS
2632 veth1, strerror(-err));
2633 goto out_delete;
2634 }
2635
82d5ae15 2636 if (netdev->mtu) {
d472214b 2637 err = lxc_netdev_set_mtu(veth1, atoi(netdev->mtu));
3cfc0f3a 2638 if (!err)
d472214b 2639 err = lxc_netdev_set_mtu(veth2, atoi(netdev->mtu));
3cfc0f3a 2640 if (err) {
2e2d6a7b 2641 ERROR("failed to set mtu '%s' for veth pair (%s and %s): %s",
3cfc0f3a 2642 netdev->mtu, veth1, veth2, strerror(-err));
eb14c10a 2643 goto out_delete;
75d09f83
DL
2644 }
2645 }
2646
3cfc0f3a
MN
2647 if (netdev->link) {
2648 err = lxc_bridge_attach(netdev->link, veth1);
2649 if (err) {
2e2d6a7b 2650 ERROR("failed to attach '%s' to the bridge '%s': %s",
3cfc0f3a
MN
2651 veth1, netdev->link, strerror(-err));
2652 goto out_delete;
2653 }
eb14c10a
DL
2654 }
2655
82d5ae15
DL
2656 netdev->ifindex = if_nametoindex(veth2);
2657 if (!netdev->ifindex) {
36eb9bde 2658 ERROR("failed to retrieve the index for %s", veth2);
eb14c10a
DL
2659 goto out_delete;
2660 }
2661
d472214b 2662 err = lxc_netdev_up(veth1);
6e35af2e
DL
2663 if (err) {
2664 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2665 goto out_delete;
0ad19a3f 2666 }
2667
e3b4c4c4 2668 if (netdev->upscript) {
751d9dcd
DL
2669 err = run_script(handler->name, "net", netdev->upscript, "up",
2670 "veth", veth1, (char*) NULL);
2671 if (err)
e3b4c4c4 2672 goto out_delete;
e3b4c4c4
ST
2673 }
2674
a589434e 2675 DEBUG("instantiated veth '%s/%s', index is '%d'",
82d5ae15
DL
2676 veth1, veth2, netdev->ifindex);
2677
6ab9ab6d 2678 return 0;
eb14c10a
DL
2679
2680out_delete:
b84f58b9 2681 lxc_netdev_delete_by_name(veth1);
f10fad2f 2682 if (!netdev->priv.veth_attr.pair)
ad40563e 2683 free(veth1);
f10fad2f 2684 free(veth2);
6ab9ab6d 2685 return -1;
13954cce 2686}
d957ae2d 2687
74a2b586
JK
2688static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2689{
2690 char *veth1;
2691 int err;
2692
2693 if (netdev->priv.veth_attr.pair)
2694 veth1 = netdev->priv.veth_attr.pair;
2695 else
2696 veth1 = netdev->priv.veth_attr.veth1;
2697
2698 if (netdev->downscript) {
2699 err = run_script(handler->name, "net", netdev->downscript,
2700 "down", "veth", veth1, (char*) NULL);
2701 if (err)
2702 return -1;
2703 }
2704 return 0;
2705}
2706
a589434e 2707static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2708{
0e391e57 2709 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2710 int err;
d957ae2d
MT
2711
2712 if (!netdev->link) {
2713 ERROR("no link specified for macvlan netdev");
2714 return -1;
2715 }
13954cce 2716
9ba8130c
SH
2717 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2718 if (err >= sizeof(peerbuf))
2719 return -1;
82d5ae15 2720
a0265685 2721 peer = lxc_mkifname(peerbuf);
ad40563e 2722 if (!peer) {
82d5ae15
DL
2723 ERROR("failed to make a temporary name");
2724 return -1;
0ad19a3f 2725 }
2726
3cfc0f3a
MN
2727 err = lxc_macvlan_create(netdev->link, peer,
2728 netdev->priv.macvlan_attr.mode);
2729 if (err) {
2730 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2731 peer, netdev->link, strerror(-err));
ad40563e 2732 goto out;
0ad19a3f 2733 }
2734
82d5ae15
DL
2735 netdev->ifindex = if_nametoindex(peer);
2736 if (!netdev->ifindex) {
36eb9bde 2737 ERROR("failed to retrieve the index for %s", peer);
ad40563e 2738 goto out;
22ebac19 2739 }
2740
e3b4c4c4 2741 if (netdev->upscript) {
751d9dcd
DL
2742 err = run_script(handler->name, "net", netdev->upscript, "up",
2743 "macvlan", netdev->link, (char*) NULL);
2744 if (err)
ad40563e 2745 goto out;
e3b4c4c4
ST
2746 }
2747
a589434e 2748 DEBUG("instantiated macvlan '%s', index is '%d' and mode '%d'",
e892973e 2749 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 2750
d957ae2d 2751 return 0;
ad40563e
ÇO
2752out:
2753 lxc_netdev_delete_by_name(peer);
2754 free(peer);
2755 return -1;
0ad19a3f 2756}
2757
74a2b586
JK
2758static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2759{
2760 int err;
2761
2762 if (netdev->downscript) {
2763 err = run_script(handler->name, "net", netdev->downscript,
2764 "down", "macvlan", netdev->link,
2765 (char*) NULL);
2766 if (err)
2767 return -1;
2768 }
2769 return 0;
2770}
2771
a589434e
JN
2772/* XXX: merge with instantiate_macvlan */
2773static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
2774{
2775 char peer[IFNAMSIZ];
3cfc0f3a 2776 int err;
82f58d03 2777 static uint16_t vlan_cntr = 0;
26c39028
JHS
2778
2779 if (!netdev->link) {
2780 ERROR("no link specified for vlan netdev");
2781 return -1;
2782 }
2783
82f58d03 2784 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
9ba8130c
SH
2785 if (err >= sizeof(peer)) {
2786 ERROR("peer name too long");
2787 return -1;
2788 }
26c39028 2789
3cfc0f3a
MN
2790 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2791 if (err) {
2792 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2793 peer, netdev->link, strerror(-err));
26c39028
JHS
2794 return -1;
2795 }
2796
2797 netdev->ifindex = if_nametoindex(peer);
2798 if (!netdev->ifindex) {
2799 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 2800 lxc_netdev_delete_by_name(peer);
26c39028
JHS
2801 return -1;
2802 }
2803
a589434e 2804 DEBUG("instantiated vlan '%s', ifindex is '%d'", " vlan1000",
e892973e
DL
2805 netdev->ifindex);
2806
26c39028
JHS
2807 return 0;
2808}
2809
74a2b586
JK
2810static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2811{
2812 return 0;
2813}
2814
a589434e 2815static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2816{
6168e99f
DL
2817 if (!netdev->link) {
2818 ERROR("no link specified for the physical interface");
2819 return -1;
2820 }
2821
9d083402 2822 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 2823 if (!netdev->ifindex) {
9d083402 2824 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 2825 return -1;
2826 }
2827
e3b4c4c4
ST
2828 if (netdev->upscript) {
2829 int err;
751d9dcd
DL
2830 err = run_script(handler->name, "net", netdev->upscript,
2831 "up", "phys", netdev->link, (char*) NULL);
2832 if (err)
e3b4c4c4 2833 return -1;
e3b4c4c4
ST
2834 }
2835
82d5ae15 2836 return 0;
0ad19a3f 2837}
2838
74a2b586
JK
2839static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2840{
2841 int err;
2842
2843 if (netdev->downscript) {
2844 err = run_script(handler->name, "net", netdev->downscript,
2845 "down", "phys", netdev->link, (char*) NULL);
2846 if (err)
2847 return -1;
2848 }
2849 return 0;
2850}
2851
a589434e 2852static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
26b797f3
SH
2853{
2854 netdev->ifindex = 0;
2855 return 0;
2856}
2857
a589434e 2858static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2859{
82d5ae15 2860 netdev->ifindex = 0;
e3b4c4c4
ST
2861 if (netdev->upscript) {
2862 int err;
751d9dcd
DL
2863 err = run_script(handler->name, "net", netdev->upscript,
2864 "up", "empty", (char*) NULL);
2865 if (err)
e3b4c4c4 2866 return -1;
e3b4c4c4 2867 }
82d5ae15 2868 return 0;
0ad19a3f 2869}
2870
74a2b586
JK
2871static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2872{
2873 int err;
2874
2875 if (netdev->downscript) {
2876 err = run_script(handler->name, "net", netdev->downscript,
2877 "down", "empty", (char*) NULL);
2878 if (err)
2879 return -1;
2880 }
2881 return 0;
2882}
2883
26b797f3
SH
2884static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
2885{
2886 return 0;
2887}
2888
2889int lxc_requests_empty_network(struct lxc_handler *handler)
2890{
2891 struct lxc_list *network = &handler->conf->network;
2892 struct lxc_list *iterator;
2893 struct lxc_netdev *netdev;
2894 bool found_none = false, found_nic = false;
2895
2896 if (lxc_list_empty(network))
2897 return 0;
2898
2899 lxc_list_for_each(iterator, network) {
2900
2901 netdev = iterator->elem;
2902
2903 if (netdev->type == LXC_NET_NONE)
2904 found_none = true;
2905 else
2906 found_nic = true;
2907 }
2908 if (found_none && !found_nic)
2909 return 1;
2910 return 0;
2911}
2912
e3b4c4c4 2913int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 2914{
e3b4c4c4 2915 struct lxc_list *network = &handler->conf->network;
82d5ae15 2916 struct lxc_list *iterator;
82d5ae15 2917 struct lxc_netdev *netdev;
cbef6c52
SH
2918 int am_root = (getuid() == 0);
2919
2920 if (!am_root)
2921 return 0;
0ad19a3f 2922
5f4535a3 2923 lxc_list_for_each(iterator, network) {
0ad19a3f 2924
5f4535a3 2925 netdev = iterator->elem;
13954cce 2926
24654103 2927 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 2928 ERROR("invalid network configuration type '%d'",
5f4535a3 2929 netdev->type);
82d5ae15
DL
2930 return -1;
2931 }
0ad19a3f 2932
e3b4c4c4 2933 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
2934 ERROR("failed to create netdev");
2935 return -1;
2936 }
e3b4c4c4 2937
0ad19a3f 2938 }
2939
2940 return 0;
2941}
2942
74a2b586 2943void lxc_delete_network(struct lxc_handler *handler)
7fef7a06 2944{
74a2b586 2945 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
2946 struct lxc_list *iterator;
2947 struct lxc_netdev *netdev;
2948
2949 lxc_list_for_each(iterator, network) {
2950 netdev = iterator->elem;
d472214b 2951
74a2b586 2952 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352
DL
2953 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
2954 WARN("failed to rename to the initial name the " \
2955 "netdev '%s'", netdev->link);
d472214b 2956 continue;
d8f8e352 2957 }
d472214b 2958
74a2b586
JK
2959 if (netdev_deconf[netdev->type](handler, netdev)) {
2960 WARN("failed to destroy netdev");
2961 }
2962
d8f8e352
DL
2963 /* Recent kernel remove the virtual interfaces when the network
2964 * namespace is destroyed but in case we did not moved the
2965 * interface to the network namespace, we have to destroy it
2966 */
74a2b586
JK
2967 if (netdev->ifindex != 0 &&
2968 lxc_netdev_delete_by_index(netdev->ifindex))
d8f8e352 2969 WARN("failed to remove interface '%s'", netdev->name);
7fef7a06
DL
2970 }
2971}
2972
45e854dc
SG
2973#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
2974
fe1f672f
ÇO
2975/* lxc-user-nic returns "interface_name:interface_name\n" */
2976#define MAX_BUFFER_SIZE IFNAMSIZ*2 + 2
74a3920a 2977static int unpriv_assign_nic(struct lxc_netdev *netdev, pid_t pid)
cbef6c52
SH
2978{
2979 pid_t child;
a7242d9a
ÇO
2980 int bytes, pipefd[2];
2981 char *token, *saveptr = NULL;
fe1f672f 2982 char buffer[MAX_BUFFER_SIZE];
cbef6c52
SH
2983
2984 if (netdev->type != LXC_NET_VETH) {
2985 ERROR("nic type %d not support for unprivileged use",
2986 netdev->type);
2987 return -1;
2988 }
2989
a7242d9a
ÇO
2990 if(pipe(pipefd) < 0) {
2991 SYSERROR("pipe failed");
2992 return -1;
2993 }
2994
cbef6c52
SH
2995 if ((child = fork()) < 0) {
2996 SYSERROR("fork");
a7242d9a
ÇO
2997 close(pipefd[0]);
2998 close(pipefd[1]);
2999 return -1;
3000 }
3001
3002 if (child == 0) { // child
3003 /* close the read-end of the pipe */
3004 close(pipefd[0]);
3005 /* redirect the stdout to write-end of the pipe */
3006 dup2(pipefd[1], STDOUT_FILENO);
3007 /* close the write-end of the pipe */
fe1f672f 3008 close(pipefd[1]);
a7242d9a
ÇO
3009
3010 // Call lxc-user-nic pid type bridge
3011 char pidstr[20];
3012 char *args[] = {LXC_USERNIC_PATH, pidstr, "veth", netdev->link, netdev->name, NULL };
3013 snprintf(pidstr, 19, "%lu", (unsigned long) pid);
3014 pidstr[19] = '\0';
3015 execvp(args[0], args);
3016 SYSERROR("execvp lxc-user-nic");
3017 exit(1);
3018 }
3019
3020 /* close the write-end of the pipe */
3021 close(pipefd[1]);
3022
fe1f672f 3023 bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE);
a7242d9a
ÇO
3024 if (bytes < 0) {
3025 SYSERROR("read failed");
3026 }
3027 buffer[bytes - 1] = '\0';
3028
3029 if (wait_for_pid(child) != 0) {
3030 close(pipefd[0]);
cbef6c52
SH
3031 return -1;
3032 }
3033
a7242d9a
ÇO
3034 /* close the read-end of the pipe */
3035 close(pipefd[0]);
cbef6c52 3036
a7242d9a
ÇO
3037 /* fill netdev->name field */
3038 token = strtok_r(buffer, ":", &saveptr);
3039 if (!token)
3040 return -1;
658979c5
SH
3041 netdev->name = malloc(IFNAMSIZ+1);
3042 if (!netdev->name) {
3043 ERROR("Out of memory");
3044 return -1;
3045 }
3046 memset(netdev->name, 0, IFNAMSIZ+1);
3047 strncpy(netdev->name, token, IFNAMSIZ);
a7242d9a
ÇO
3048
3049 /* fill netdev->veth_attr.pair field */
3050 token = strtok_r(NULL, ":", &saveptr);
3051 if (!token)
3052 return -1;
3053 netdev->priv.veth_attr.pair = strdup(token);
658979c5
SH
3054 if (!netdev->priv.veth_attr.pair) {
3055 ERROR("Out of memory");
3056 return -1;
3057 }
45e854dc 3058
a7242d9a 3059 return 0;
cbef6c52
SH
3060}
3061
5f4535a3 3062int lxc_assign_network(struct lxc_list *network, pid_t pid)
0ad19a3f 3063{
82d5ae15 3064 struct lxc_list *iterator;
82d5ae15 3065 struct lxc_netdev *netdev;
cbef6c52 3066 int am_root = (getuid() == 0);
3cfc0f3a 3067 int err;
0ad19a3f 3068
5f4535a3 3069 lxc_list_for_each(iterator, network) {
82d5ae15 3070
5f4535a3 3071 netdev = iterator->elem;
82d5ae15 3072
fbb16259 3073 if (netdev->type == LXC_NET_VETH && !am_root) {
cbef6c52
SH
3074 if (unpriv_assign_nic(netdev, pid))
3075 return -1;
658979c5
SH
3076 // lxc-user-nic has moved the nic to the new ns.
3077 // unpriv_assign_nic() fills in netdev->name.
3078 // netdev->ifindex will be filed in at setup_netdev.
cbef6c52
SH
3079 continue;
3080 }
236087a6 3081
fbb16259
SH
3082 /* empty network namespace, nothing to move */
3083 if (!netdev->ifindex)
3084 continue;
3085
8d357196 3086 err = lxc_netdev_move_by_index(netdev->ifindex, pid, NULL);
3cfc0f3a
MN
3087 if (err) {
3088 ERROR("failed to move '%s' to the container : %s",
3089 netdev->link, strerror(-err));
82d5ae15
DL
3090 return -1;
3091 }
3092
c1c75c04 3093 DEBUG("move '%s' to '%d'", netdev->name, pid);
0ad19a3f 3094 }
3095
3096 return 0;
3097}
3098
251d0d2a
DE
3099static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
3100 size_t buf_size)
f6d3e3e4
SH
3101{
3102 char path[PATH_MAX];
e4ccd113 3103 int ret, closeret;
f6d3e3e4
SH
3104 FILE *f;
3105
3106 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
3107 if (ret < 0 || ret >= PATH_MAX) {
03fadd16 3108 fprintf(stderr, "%s: path name too long\n", __func__);
f6d3e3e4
SH
3109 return -E2BIG;
3110 }
3111 f = fopen(path, "w");
3112 if (!f) {
3113 perror("open");
3114 return -EINVAL;
3115 }
251d0d2a 3116 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 3117 if (ret < 0)
e4ccd113
SH
3118 SYSERROR("writing id mapping");
3119 closeret = fclose(f);
3120 if (closeret)
3121 SYSERROR("writing id mapping");
3122 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
3123}
3124
3125int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
3126{
3127 struct lxc_list *iterator;
3128 struct id_map *map;
8afb3e61 3129 int ret = 0, use_shadow = 0;
251d0d2a 3130 enum idtype type;
8afb3e61
SG
3131 char *buf = NULL, *pos, *cmdpath = NULL;
3132
22038de5
SH
3133 /*
3134 * If newuidmap exists, that is, if shadow is handing out subuid
3135 * ranges, then insist that root also reserve ranges in subuid. This
3136 * will protected it by preventing another user from being handed the
3137 * range by shadow.
3138 */
9d9c111c 3139 cmdpath = on_path("newuidmap", NULL);
8afb3e61
SG
3140 if (cmdpath) {
3141 use_shadow = 1;
3142 free(cmdpath);
3143 }
3144
0e6e3a41
SG
3145 if (!use_shadow && geteuid()) {
3146 ERROR("Missing newuidmap/newgidmap");
3147 return -1;
3148 }
251d0d2a
DE
3149
3150 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4 3151 int left, fill;
cf3ef16d
SH
3152 int had_entry = 0;
3153 if (!buf) {
3154 buf = pos = malloc(4096);
4f7521b4
SH
3155 if (!buf)
3156 return -ENOMEM;
cf3ef16d
SH
3157 }
3158 pos = buf;
0e6e3a41 3159 if (use_shadow)
d1838f34 3160 pos += sprintf(buf, "new%cidmap %d",
cf3ef16d
SH
3161 type == ID_TYPE_UID ? 'u' : 'g',
3162 pid);
4f7521b4 3163
cf3ef16d
SH
3164 lxc_list_for_each(iterator, idmap) {
3165 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
251d0d2a 3166 map = iterator->elem;
cf3ef16d
SH
3167 if (map->idtype != type)
3168 continue;
3169
3170 had_entry = 1;
3171 left = 4096 - (pos - buf);
d1838f34 3172 fill = snprintf(pos, left, "%s%lu %lu %lu%s",
0e6e3a41 3173 use_shadow ? " " : "",
d1838f34 3174 map->nsid, map->hostid, map->range,
0e6e3a41 3175 use_shadow ? "" : "\n");
cf3ef16d
SH
3176 if (fill <= 0 || fill >= left)
3177 SYSERROR("snprintf failed, too many mappings");
3178 pos += fill;
251d0d2a 3179 }
cf3ef16d 3180 if (!had_entry)
4f7521b4 3181 continue;
cf3ef16d 3182
0e6e3a41 3183 if (!use_shadow) {
cf3ef16d 3184 ret = write_id_mapping(type, pid, buf, pos-buf);
d1838f34
MS
3185 } else {
3186 left = 4096 - (pos - buf);
3187 fill = snprintf(pos, left, "\n");
3188 if (fill <= 0 || fill >= left)
3189 SYSERROR("snprintf failed, too many mappings");
3190 pos += fill;
cf3ef16d 3191 ret = system(buf);
d1838f34 3192 }
cf3ef16d 3193
f6d3e3e4
SH
3194 if (ret)
3195 break;
3196 }
251d0d2a 3197
f10fad2f 3198 free(buf);
f6d3e3e4
SH
3199 return ret;
3200}
3201
cf3ef16d 3202/*
7b50c609
TS
3203 * return the host uid/gid to which the container root is mapped in
3204 * *val.
0b3a6504 3205 * Return true if id was found, false otherwise.
cf3ef16d 3206 */
2a9a80cb 3207bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
3ec1648d 3208 unsigned long *val)
cf3ef16d
SH
3209{
3210 struct lxc_list *it;
3211 struct id_map *map;
3212
3213 lxc_list_for_each(it, &conf->id_map) {
3214 map = it->elem;
7b50c609 3215 if (map->idtype != idtype)
cf3ef16d
SH
3216 continue;
3217 if (map->nsid != 0)
3218 continue;
2a9a80cb
SH
3219 *val = map->hostid;
3220 return true;
cf3ef16d 3221 }
2a9a80cb 3222 return false;
cf3ef16d
SH
3223}
3224
2133f58c 3225int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3226{
3227 struct lxc_list *it;
3228 struct id_map *map;
3229 lxc_list_for_each(it, &conf->id_map) {
3230 map = it->elem;
2133f58c 3231 if (map->idtype != idtype)
cf3ef16d
SH
3232 continue;
3233 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 3234 return (id - map->hostid) + map->nsid;
cf3ef16d 3235 }
57d116ab 3236 return -1;
cf3ef16d
SH
3237}
3238
2133f58c 3239int find_unmapped_nsuid(struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3240{
3241 struct lxc_list *it;
3242 struct id_map *map;
2133f58c 3243 unsigned int freeid = 0;
cf3ef16d
SH
3244again:
3245 lxc_list_for_each(it, &conf->id_map) {
3246 map = it->elem;
2133f58c 3247 if (map->idtype != idtype)
cf3ef16d
SH
3248 continue;
3249 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3250 freeid = map->nsid + map->range;
3251 goto again;
3252 }
3253 }
3254 return freeid;
3255}
3256
19a26f82
MK
3257int lxc_find_gateway_addresses(struct lxc_handler *handler)
3258{
3259 struct lxc_list *network = &handler->conf->network;
3260 struct lxc_list *iterator;
3261 struct lxc_netdev *netdev;
3262 int link_index;
3263
3264 lxc_list_for_each(iterator, network) {
3265 netdev = iterator->elem;
3266
3267 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
3268 continue;
3269
3270 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
3271 ERROR("gateway = auto only supported for "
3272 "veth and macvlan");
3273 return -1;
3274 }
3275
3276 if (!netdev->link) {
3277 ERROR("gateway = auto needs a link interface");
3278 return -1;
3279 }
3280
3281 link_index = if_nametoindex(netdev->link);
3282 if (!link_index)
3283 return -EINVAL;
3284
3285 if (netdev->ipv4_gateway_auto) {
3286 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
3287 ERROR("failed to automatically find ipv4 gateway "
3288 "address from link interface '%s'", netdev->link);
3289 return -1;
3290 }
3291 }
3292
3293 if (netdev->ipv6_gateway_auto) {
3294 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
3295 ERROR("failed to automatically find ipv6 gateway "
3296 "address from link interface '%s'", netdev->link);
3297 return -1;
3298 }
3299 }
3300 }
3301
3302 return 0;
3303}
3304
5e4a62bf 3305int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 3306{
5e4a62bf 3307 struct lxc_tty_info *tty_info = &conf->tty_info;
025ed0f3 3308 int i, ret;
b0a33c1e 3309
5e4a62bf
DL
3310 /* no tty in the configuration */
3311 if (!conf->tty)
b0a33c1e 3312 return 0;
3313
13954cce 3314 tty_info->pty_info =
e4e7d59d 3315 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 3316 if (!tty_info->pty_info) {
36eb9bde 3317 SYSERROR("failed to allocate pty_info");
985d15b1 3318 return -1;
b0a33c1e 3319 }
3320
985d15b1 3321 for (i = 0; i < conf->tty; i++) {
13954cce 3322
b0a33c1e 3323 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3324
025ed0f3
SH
3325 process_lock();
3326 ret = openpty(&pty_info->master, &pty_info->slave,
3327 pty_info->name, NULL, NULL);
3328 process_unlock();
3329 if (ret) {
36eb9bde 3330 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
3331 tty_info->nbtty = i;
3332 lxc_delete_tty(tty_info);
3333 return -1;
b0a33c1e 3334 }
3335
5332bb84
DL
3336 DEBUG("allocated pty '%s' (%d/%d)",
3337 pty_info->name, pty_info->master, pty_info->slave);
3338
3ec1648d 3339 /* Prevent leaking the file descriptors to the container */
b035ad62
MS
3340 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3341 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3342
b0a33c1e 3343 pty_info->busy = 0;
3344 }
3345
985d15b1 3346 tty_info->nbtty = conf->tty;
1ac470c0
DL
3347
3348 INFO("tty's configured");
3349
985d15b1 3350 return 0;
b0a33c1e 3351}
3352
3353void lxc_delete_tty(struct lxc_tty_info *tty_info)
3354{
3355 int i;
3356
3357 for (i = 0; i < tty_info->nbtty; i++) {
3358 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3359
3360 close(pty_info->master);
3361 close(pty_info->slave);
3362 }
3363
3364 free(tty_info->pty_info);
3365 tty_info->nbtty = 0;
3366}
3367
f6d3e3e4 3368/*
7b50c609
TS
3369 * chown_mapped_root: for an unprivileged user with uid/gid X to
3370 * chown a dir to subuid/subgid Y, he needs to run chown as root
3371 * in a userns where nsid 0 is mapped to hostuid/hostgid Y, and
3372 * nsid Y is mapped to hostuid/hostgid X. That way, the container
3373 * root is privileged with respect to hostuid/hostgid X, allowing
3374 * him to do the chown.
f6d3e3e4 3375 */
c4d10a05 3376int chown_mapped_root(char *path, struct lxc_conf *conf)
f6d3e3e4 3377{
7b50c609
TS
3378 uid_t rootuid;
3379 gid_t rootgid;
c4d10a05 3380 pid_t pid;
2a9a80cb 3381 unsigned long val;
a7ef8753 3382 char *chownpath = path;
f6d3e3e4 3383
2a9a80cb 3384 if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
c4d10a05
SH
3385 ERROR("No mapping for container root");
3386 return -1;
f6d3e3e4 3387 }
7b50c609
TS
3388 rootuid = (uid_t) val;
3389 if (!get_mapped_rootid(conf, ID_TYPE_GID, &val)) {
3390 ERROR("No mapping for container root");
3391 return -1;
3392 }
3393 rootgid = (gid_t) val;
2a9a80cb 3394
a7ef8753
SH
3395 /*
3396 * In case of overlay, we want only the writeable layer
3397 * to be chowned
3398 */
1f92162d 3399 if (strncmp(path, "overlayfs:", 10) == 0 || strncmp(path, "aufs:", 5) == 0) {
a7ef8753
SH
3400 chownpath = strchr(path, ':');
3401 if (!chownpath) {
3402 ERROR("Bad overlay path: %s", path);
3403 return -1;
3404 }
3405 chownpath = strchr(chownpath+1, ':');
3406 if (!chownpath) {
3407 ERROR("Bad overlay path: %s", path);
3408 return -1;
3409 }
3410 chownpath++;
3411 }
3412 path = chownpath;
c4d10a05 3413 if (geteuid() == 0) {
7b50c609 3414 if (chown(path, rootuid, rootgid) < 0) {
c4d10a05
SH
3415 ERROR("Error chowning %s", path);
3416 return -1;
3417 }
3418 return 0;
3419 }
f3d7e4ca 3420
7b50c609 3421 if (rootuid == geteuid()) {
f3d7e4ca
SH
3422 // nothing to do
3423 INFO("%s: container root is our uid; no need to chown" ,__func__);
3424 return 0;
3425 }
3426
c4d10a05
SH
3427 pid = fork();
3428 if (pid < 0) {
3429 SYSERROR("Failed forking");
f6d3e3e4
SH
3430 return -1;
3431 }
c4d10a05 3432 if (!pid) {
7b50c609
TS
3433 int hostuid = geteuid(), hostgid = getegid(), ret;
3434 struct stat sb;
3435 char map1[100], map2[100], map3[100], map4[100], map5[100];
3436 char ugid[100];
3437 char *args1[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3438 "-m", map3, "-m", map5,
3439 "--", "chown", ugid, path, NULL };
3440 char *args2[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3441 "-m", map3, "-m", map4, "-m", map5,
3442 "--", "chown", ugid, path, NULL };
3443
3444 // save the current gid of "path"
3445 if (stat(path, &sb) < 0) {
3446 ERROR("Error stat %s", path);
3447 return -1;
3448 }
f6d3e3e4 3449
9a7c2aba
SH
3450 /*
3451 * A file has to be group-owned by a gid mapped into the
3452 * container, or the container won't be privileged over it.
3453 */
3454 if (sb.st_uid == geteuid() &&
3455 mapped_hostid(sb.st_gid, conf, ID_TYPE_GID) < 0 &&
3456 chown(path, -1, hostgid) < 0) {
3457 ERROR("Failed chgrping %s", path);
7b50c609
TS
3458 return -1;
3459 }
3460
3461 // "u:0:rootuid:1"
3462 ret = snprintf(map1, 100, "u:0:%d:1", rootuid);
c4d10a05
SH
3463 if (ret < 0 || ret >= 100) {
3464 ERROR("Error uid printing map string");
f6d3e3e4
SH
3465 return -1;
3466 }
c4d10a05 3467
98e5ba51
SH
3468 // "u:hostuid:hostuid:1"
3469 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3470 if (ret < 0 || ret >= 100) {
3471 ERROR("Error uid printing map string");
3472 return -1;
3473 }
3474
7b50c609
TS
3475 // "g:0:rootgid:1"
3476 ret = snprintf(map3, 100, "g:0:%d:1", rootgid);
c4d10a05 3477 if (ret < 0 || ret >= 100) {
7b50c609 3478 ERROR("Error gid printing map string");
c4d10a05
SH
3479 return -1;
3480 }
3481
7b50c609 3482 // "g:pathgid:rootgid+pathgid:1"
b4c1e35d
SG
3483 ret = snprintf(map4, 100, "g:%d:%d:1", (gid_t)sb.st_gid,
3484 rootgid + (gid_t)sb.st_gid);
7b50c609
TS
3485 if (ret < 0 || ret >= 100) {
3486 ERROR("Error gid printing map string");
3487 return -1;
3488 }
3489
3490 // "g:hostgid:hostgid:1"
3491 ret = snprintf(map5, 100, "g:%d:%d:1", hostgid, hostgid);
3492 if (ret < 0 || ret >= 100) {
3493 ERROR("Error gid printing map string");
3494 return -1;
3495 }
3496
3497 // "0:pathgid" (chown)
b4c1e35d 3498 ret = snprintf(ugid, 100, "0:%d", (gid_t)sb.st_gid);
7b50c609
TS
3499 if (ret < 0 || ret >= 100) {
3500 ERROR("Error owner printing format string for chown");
3501 return -1;
3502 }
3503
3504 if (hostgid == sb.st_gid)
3505 ret = execvp("lxc-usernsexec", args1);
3506 else
3507 ret = execvp("lxc-usernsexec", args2);
c4d10a05
SH
3508 SYSERROR("Failed executing usernsexec");
3509 exit(1);
f6d3e3e4 3510 }
c4d10a05 3511 return wait_for_pid(pid);
f6d3e3e4
SH
3512}
3513
c4d10a05 3514int ttys_shift_ids(struct lxc_conf *c)
f6d3e3e4 3515{
c4d10a05 3516 int i;
f6d3e3e4 3517
c4d10a05 3518 if (lxc_list_empty(&c->id_map))
f6d3e3e4 3519 return 0;
c4d10a05
SH
3520
3521 for (i = 0; i < c->tty_info.nbtty; i++) {
3522 struct lxc_pty_info *pty_info = &c->tty_info.pty_info[i];
3523
3524 if (chown_mapped_root(pty_info->name, c) < 0) {
3525 ERROR("Failed to chown %s", pty_info->name);
f6d3e3e4
SH
3526 return -1;
3527 }
3528 }
3529
29b10e4f 3530 if (strcmp(c->console.name, "") !=0 && chown_mapped_root(c->console.name, c) < 0) {
c4d10a05
SH
3531 ERROR("Failed to chown %s", c->console.name);
3532 return -1;
3533 }
3534
f6d3e3e4
SH
3535 return 0;
3536}
3537
5112cd70
SH
3538/*
3539 * _do_tmp_proc_mount: Mount /proc inside container if not already
3540 * mounted
3541 *
3542 * @rootfs : the rootfs where proc should be mounted
3543 *
3544 * Returns < 0 on failure, 0 if the correct proc was already mounted
3545 * and 1 if a new proc was mounted.
3546 */
3547static int do_tmp_proc_mount(const char *rootfs)
3548{
3549 char path[MAXPATHLEN];
3550 char link[20];
3551 int linklen, ret;
3552
3553 ret = snprintf(path, MAXPATHLEN, "%s/proc/self", rootfs);
3554 if (ret < 0 || ret >= MAXPATHLEN) {
3555 SYSERROR("proc path name too long");
3556 return -1;
3557 }
3558 memset(link, 0, 20);
3559 linklen = readlink(path, link, 20);
3560 INFO("I am %d, /proc/self points to '%s'", getpid(), link);
3561 ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs);
3562 if (linklen < 0) /* /proc not mounted */
3563 goto domount;
3564 /* can't be longer than rootfs/proc/1 */
3565 if (strncmp(link, "1", linklen) != 0) {
3566 /* wrong /procs mounted */
3567 umount2(path, MNT_DETACH); /* ignore failure */
3568 goto domount;
3569 }
3570 /* the right proc is already mounted */
3571 return 0;
3572
3573domount:
3574 if (mount("proc", path, "proc", 0, NULL))
3575 return -1;
3576 INFO("Mounted /proc in container for security transition");
3577 return 1;
3578}
3579
3580int tmp_proc_mount(struct lxc_conf *lxc_conf)
3581{
3582 int mounted;
3583
3584 if (lxc_conf->rootfs.path == NULL || strlen(lxc_conf->rootfs.path) == 0) {
3585 if (mount("proc", "/proc", "proc", 0, NULL)) {
3586 SYSERROR("Failed mounting /proc, proceeding");
3587 mounted = 0;
3588 } else
3589 mounted = 1;
3590 } else
3591 mounted = do_tmp_proc_mount(lxc_conf->rootfs.mount);
3592 if (mounted == -1) {
3593 SYSERROR("failed to mount /proc in the container.");
3594 return -1;
3595 } else if (mounted == 1) {
3596 lxc_conf->tmp_umount_proc = 1;
3597 }
3598 return 0;
3599}
3600
3601void tmp_proc_unmount(struct lxc_conf *lxc_conf)
3602{
3603 if (lxc_conf->tmp_umount_proc == 1) {
3604 umount("/proc");
3605 lxc_conf->tmp_umount_proc = 0;
3606 }
3607}
3608
6a0c909a 3609void remount_all_slave(void)
e995d7a2
SH
3610{
3611 /* walk /proc/mounts and change any shared entries to slave */
3612 FILE *f = fopen("/proc/self/mountinfo", "r");
3613 char *line = NULL;
3614 size_t len = 0;
3615
3616 if (!f) {
3617 SYSERROR("Failed to open /proc/self/mountinfo to mark all shared");
3618 ERROR("Continuing container startup...");
3619 return;
3620 }
3621
3622 while (getline(&line, &len, f) != -1) {
3623 char *target, *opts;
3624 target = get_field(line, 4);
3625 if (!target)
3626 continue;
3627 opts = get_field(target, 2);
3628 if (!opts)
3629 continue;
3630 null_endofword(opts);
3631 if (!strstr(opts, "shared"))
3632 continue;
3633 null_endofword(target);
3634 if (mount(NULL, target, NULL, MS_SLAVE, NULL)) {
3635 SYSERROR("Failed to make %s rslave", target);
3636 ERROR("Continuing...");
3637 }
3638 }
3639 fclose(f);
f10fad2f 3640 free(line);
e995d7a2
SH
3641}
3642
2322903b
SH
3643void lxc_execute_bind_init(struct lxc_conf *conf)
3644{
3645 int ret;
9d9c111c
SH
3646 char path[PATH_MAX], destpath[PATH_MAX], *p;
3647
3648 /* If init exists in the container, don't bind mount a static one */
3649 p = choose_init(conf->rootfs.mount);
3650 if (p) {
3651 free(p);
3652 return;
3653 }
2322903b
SH
3654
3655 ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
3656 if (ret < 0 || ret >= PATH_MAX) {
3657 WARN("Path name too long searching for lxc.init.static");
3658 return;
3659 }
3660
3661 if (!file_exists(path)) {
3662 INFO("%s does not exist on host", path);
3663 return;
3664 }
3665
3666 ret = snprintf(destpath, PATH_MAX, "%s%s", conf->rootfs.mount, "/init.lxc.static");
3667 if (ret < 0 || ret >= PATH_MAX) {
3668 WARN("Path name too long for container's lxc.init.static");
3669 return;
3670 }
3671
3672 if (!file_exists(destpath)) {
3673 FILE * pathfile = fopen(destpath, "wb");
3674 if (!pathfile) {
3675 SYSERROR("Failed to create mount target '%s'", destpath);
3676 return;
3677 }
3678 fclose(pathfile);
3679 }
3680
3681 ret = mount(path, destpath, "none", MS_BIND, NULL);
3682 if (ret < 0)
3683 SYSERROR("Failed to bind lxc.init.static into container");
3684 INFO("lxc.init.static bound into container at %s", path);
3685}
3686
35120d9c
SH
3687/*
3688 * This does the work of remounting / if it is shared, calling the
3689 * container pre-mount hooks, and mounting the rootfs.
3690 */
3691int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath)
0ad19a3f 3692{
35120d9c
SH
3693 if (conf->rootfs_setup) {
3694 /*
3695 * rootfs was set up in another namespace. bind-mount it
3696 * to give us a mount in our own ns so we can pivot_root to it
3697 */
3698 const char *path = conf->rootfs.mount;
3699 if (mount(path, path, "rootfs", MS_BIND, NULL) < 0) {
3700 ERROR("Failed to bind-mount container / onto itself");
145832ba 3701 return -1;
35120d9c 3702 }
145832ba 3703 return 0;
35120d9c 3704 }
d4ef7c50 3705
e995d7a2
SH
3706 remount_all_slave();
3707
35120d9c
SH
3708 if (run_lxc_hooks(name, "pre-mount", conf, lxcpath, NULL)) {
3709 ERROR("failed to run pre-mount hooks for container '%s'.", name);
3710 return -1;
3711 }
3712
3713 if (setup_rootfs(conf)) {
3714 ERROR("failed to setup rootfs for '%s'", name);
3715 return -1;
3716 }
3717
3718 conf->rootfs_setup = true;
3719 return 0;
3720}
3721
1c1c7051
SH
3722static bool verify_start_hooks(struct lxc_conf *conf)
3723{
3724 struct lxc_list *it;
3725 char path[MAXPATHLEN];
3726 lxc_list_for_each(it, &conf->hooks[LXCHOOK_START]) {
3727 char *hookname = it->elem;
3728 struct stat st;
3729 int ret;
3730
3731 ret = snprintf(path, MAXPATHLEN, "%s%s",
3732 conf->rootfs.mount, hookname);
3733 if (ret < 0 || ret >= MAXPATHLEN)
3734 return false;
3735 ret = stat(path, &st);
3736 if (ret) {
3737 SYSERROR("Start hook %s not found in container rootfs",
3738 hookname);
3739 return false;
3740 }
6a0c909a 3741 return true;
1c1c7051
SH
3742 }
3743
3744 return true;
3745}
3746
35120d9c
SH
3747int lxc_setup(struct lxc_handler *handler)
3748{
3749 const char *name = handler->name;
3750 struct lxc_conf *lxc_conf = handler->conf;
3751 const char *lxcpath = handler->lxcpath;
35120d9c
SH
3752
3753 if (do_rootfs_setup(lxc_conf, name, lxcpath) < 0) {
3754 ERROR("Error setting up rootfs mount after spawn");
3755 return -1;
3756 }
3757
6c544cb3
MM
3758 if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
3759 if (setup_utsname(lxc_conf->utsname)) {
3760 ERROR("failed to setup the utsname for '%s'", name);
3761 return -1;
3762 }
0ad19a3f 3763 }
3764
5f4535a3 3765 if (setup_network(&lxc_conf->network)) {
36eb9bde 3766 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 3767 return -1;
0ad19a3f 3768 }
3769
bc6928ff
MW
3770 if (lxc_conf->autodev > 0) {
3771 if (mount_autodev(name, lxc_conf->rootfs.mount, lxcpath)) {
91c3830e 3772 ERROR("failed to mount /dev in the container");
c6883f38
SH
3773 return -1;
3774 }
3775 }
3776
368bbc02
CS
3777 /* do automatic mounts (mainly /proc and /sys), but exclude
3778 * those that need to wait until other stuff has finished
3779 */
4fb3cba5 3780 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
3781 ERROR("failed to setup the automatic mounts for '%s'", name);
3782 return -1;
3783 }
3784
80a881b2 3785 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
36eb9bde 3786 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 3787 return -1;
576f946d 3788 }
3789
c1dc38c2 3790 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
e7938e9e
MN
3791 ERROR("failed to setup the mount entries for '%s'", name);
3792 return -1;
3793 }
3794
1c1c7051
SH
3795 /* Make sure any start hooks are in the rootfs */
3796 if (!verify_start_hooks(lxc_conf))
3797 return -1;
3798
2322903b
SH
3799 if (lxc_conf->is_execute)
3800 lxc_execute_bind_init(lxc_conf);
3801
368bbc02
CS
3802 /* now mount only cgroup, if wanted;
3803 * before, /sys could not have been mounted
3804 * (is either mounted automatically or via fstab entries)
3805 */
4fb3cba5 3806 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
3807 ERROR("failed to setup the automatic mounts for '%s'", name);
3808 return -1;
3809 }
3810
283678ed 3811 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
773fb9ca
SH
3812 ERROR("failed to run mount hooks for container '%s'.", name);
3813 return -1;
3814 }
3815
bc6928ff 3816 if (lxc_conf->autodev > 0) {
283678ed 3817 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
f7bee6c6
MW
3818 ERROR("failed to run autodev hooks for container '%s'.", name);
3819 return -1;
3820 }
9cb4d183 3821 if (fill_autodev(lxc_conf->rootfs.mount)) {
91c3830e
SH
3822 ERROR("failed to populate /dev in the container");
3823 return -1;
3824 }
3825 }
368bbc02 3826
37903589 3827 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 3828 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 3829 return -1;
6e590161 3830 }
3831
7e0e1d94
AV
3832 if (lxc_conf->kmsg) {
3833 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
3834 ERROR("failed to setup kmsg for '%s'", name);
3835 }
1bd051a6 3836
393903d1 3837 if (!lxc_conf->is_execute && setup_tty(lxc_conf)) {
36eb9bde 3838 ERROR("failed to setup the ttys for '%s'", name);
95b5ffaf 3839 return -1;
b0a33c1e 3840 }
3841
393903d1
SH
3842 if (lxc_conf->pty_names && setenv("container_ttys", lxc_conf->pty_names, 1))
3843 SYSERROR("failed to set environment variable for container ptys");
3844
69aa6655
DE
3845 if (!lxc_conf->is_execute && setup_dev_symlinks(&lxc_conf->rootfs)) {
3846 ERROR("failed to setup /dev symlinks for '%s'", name);
3847 return -1;
3848 }
3849
5112cd70
SH
3850 /* mount /proc if it's not already there */
3851 if (tmp_proc_mount(lxc_conf) < 0) {
fe4de9a6 3852 ERROR("failed to LSM mount proc for '%s'", name);
e075f5d9 3853 return -1;
e075f5d9 3854 }
e075f5d9 3855
ac778708 3856 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 3857 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 3858 return -1;
ed502555 3859 }
3860
571e6ec8 3861 if (setup_pts(lxc_conf->pts)) {
36eb9bde 3862 ERROR("failed to setup the new pts instance");
95b5ffaf 3863 return -1;
3c26f34e 3864 }
3865
cccc74b5
DL
3866 if (setup_personality(lxc_conf->personality)) {
3867 ERROR("failed to setup personality");
3868 return -1;
3869 }
3870
97a8f74f
SG
3871 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
3872 if (!lxc_list_empty(&lxc_conf->caps)) {
3873 ERROR("Simultaneously requested dropping and keeping caps");
f6d3e3e4
SH
3874 return -1;
3875 }
97a8f74f
SG
3876 if (dropcaps_except(&lxc_conf->keepcaps)) {
3877 ERROR("failed to keep requested caps");
3878 return -1;
3879 }
3880 } else if (setup_caps(&lxc_conf->caps)) {
3881 ERROR("failed to drop capabilities");
3882 return -1;
81810dd1
DL
3883 }
3884
cd54d859
DL
3885 NOTICE("'%s' is setup.", name);
3886
0ad19a3f 3887 return 0;
3888}
26ddeedd 3889
283678ed
SH
3890int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
3891 const char *lxcpath, char *argv[])
26ddeedd
SH
3892{
3893 int which = -1;
3894 struct lxc_list *it;
3895
3896 if (strcmp(hook, "pre-start") == 0)
3897 which = LXCHOOK_PRESTART;
5ea6163a
SH
3898 else if (strcmp(hook, "pre-mount") == 0)
3899 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
3900 else if (strcmp(hook, "mount") == 0)
3901 which = LXCHOOK_MOUNT;
f7bee6c6
MW
3902 else if (strcmp(hook, "autodev") == 0)
3903 which = LXCHOOK_AUTODEV;
26ddeedd
SH
3904 else if (strcmp(hook, "start") == 0)
3905 which = LXCHOOK_START;
3906 else if (strcmp(hook, "post-stop") == 0)
3907 which = LXCHOOK_POSTSTOP;
148e91f5
SH
3908 else if (strcmp(hook, "clone") == 0)
3909 which = LXCHOOK_CLONE;
26ddeedd
SH
3910 else
3911 return -1;
3912 lxc_list_for_each(it, &conf->hooks[which]) {
3913 int ret;
3914 char *hookname = it->elem;
283678ed 3915 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
26ddeedd
SH
3916 if (ret)
3917 return ret;
3918 }
3919 return 0;
3920}
72d0e1cb 3921
427b3a21 3922static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
3923{
3924 struct lxc_netdev *netdev = it->elem;
9ebb03ad 3925 struct lxc_list *it2,*next;
72d0e1cb
SG
3926
3927 lxc_list_del(it);
3928
f10fad2f
ME
3929 free(netdev->link);
3930 free(netdev->name);
3931 if (netdev->type == LXC_NET_VETH)
c9bb9a85 3932 free(netdev->priv.veth_attr.pair);
f10fad2f
ME
3933 free(netdev->upscript);
3934 free(netdev->hwaddr);
3935 free(netdev->mtu);
3936 free(netdev->ipv4_gateway);
3937 free(netdev->ipv6_gateway);
9ebb03ad 3938 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3939 lxc_list_del(it2);
3940 free(it2->elem);
3941 free(it2);
3942 }
9ebb03ad 3943 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3944 lxc_list_del(it2);
3945 free(it2->elem);
3946 free(it2);
3947 }
d95db067 3948 free(netdev);
72d0e1cb
SG
3949 free(it);
3950}
3951
3952/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 3953int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
3954{
3955 char *p1;
3956 int ret, idx, i;
3957 struct lxc_list *it;
3958 struct lxc_netdev *netdev;
3959
46cd2845 3960 p1 = strchr(key, '.');
72d0e1cb
SG
3961 if (!p1 || *(p1+1) == '\0')
3962 p1 = NULL;
3963
3964 ret = sscanf(key, "%d", &idx);
3965 if (ret != 1) return -1;
3966 if (idx < 0)
3967 return -1;
3968
3969 i = 0;
3970 lxc_list_for_each(it, &c->network) {
3971 if (i == idx)
3972 break;
3973 i++;
3974 }
3975 if (i < idx) // we don't have that many nics defined
3976 return -1;
3977
3978 if (!it || !it->elem)
3979 return -1;
3980
3981 netdev = it->elem;
3982
3983 if (!p1) {
3984 lxc_remove_nic(it);
52d21d40 3985 } else if (strcmp(p1, ".ipv4") == 0) {
9ebb03ad
DE
3986 struct lxc_list *it2,*next;
3987 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3988 lxc_list_del(it2);
3989 free(it2->elem);
3990 free(it2);
3991 }
52d21d40 3992 } else if (strcmp(p1, ".ipv6") == 0) {
9ebb03ad
DE
3993 struct lxc_list *it2,*next;
3994 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3995 lxc_list_del(it2);
3996 free(it2->elem);
3997 free(it2);
3998 }
72d0e1cb
SG
3999 }
4000 else return -1;
4001
4002 return 0;
4003}
4004
4005int lxc_clear_config_network(struct lxc_conf *c)
4006{
9ebb03ad
DE
4007 struct lxc_list *it,*next;
4008 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
4009 lxc_remove_nic(it);
4010 }
4011 return 0;
4012}
4013
4014int lxc_clear_config_caps(struct lxc_conf *c)
4015{
9ebb03ad 4016 struct lxc_list *it,*next;
72d0e1cb 4017
9ebb03ad 4018 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
4019 lxc_list_del(it);
4020 free(it->elem);
4021 free(it);
4022 }
4023 return 0;
4024}
4025
74a3920a 4026static int lxc_free_idmap(struct lxc_list *id_map) {
27c27d73
SH
4027 struct lxc_list *it, *next;
4028
4355ab5f 4029 lxc_list_for_each_safe(it, id_map, next) {
27c27d73
SH
4030 lxc_list_del(it);
4031 free(it->elem);
4032 free(it);
4033 }
4034 return 0;
4035}
4036
4355ab5f
SH
4037int lxc_clear_idmaps(struct lxc_conf *c)
4038{
4039 return lxc_free_idmap(&c->id_map);
4040}
4041
1fb86a7c
SH
4042int lxc_clear_config_keepcaps(struct lxc_conf *c)
4043{
4044 struct lxc_list *it,*next;
4045
4046 lxc_list_for_each_safe(it, &c->keepcaps, next) {
4047 lxc_list_del(it);
4048 free(it->elem);
4049 free(it);
4050 }
4051 return 0;
4052}
4053
12a50cc6 4054int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 4055{
9ebb03ad 4056 struct lxc_list *it,*next;
72d0e1cb 4057 bool all = false;
12a50cc6 4058 const char *k = key + 11;
72d0e1cb
SG
4059
4060 if (strcmp(key, "lxc.cgroup") == 0)
4061 all = true;
4062
9ebb03ad 4063 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
4064 struct lxc_cgroup *cg = it->elem;
4065 if (!all && strcmp(cg->subsystem, k) != 0)
4066 continue;
4067 lxc_list_del(it);
4068 free(cg->subsystem);
4069 free(cg->value);
4070 free(cg);
4071 free(it);
4072 }
4073 return 0;
4074}
4075
ee1e7aa0
SG
4076int lxc_clear_groups(struct lxc_conf *c)
4077{
4078 struct lxc_list *it,*next;
4079
4080 lxc_list_for_each_safe(it, &c->groups, next) {
4081 lxc_list_del(it);
4082 free(it->elem);
4083 free(it);
4084 }
4085 return 0;
4086}
4087
ab799c0b
SG
4088int lxc_clear_environment(struct lxc_conf *c)
4089{
4090 struct lxc_list *it,*next;
4091
4092 lxc_list_for_each_safe(it, &c->environment, next) {
4093 lxc_list_del(it);
4094 free(it->elem);
4095 free(it);
4096 }
4097 return 0;
4098}
4099
4100
72d0e1cb
SG
4101int lxc_clear_mount_entries(struct lxc_conf *c)
4102{
9ebb03ad 4103 struct lxc_list *it,*next;
72d0e1cb 4104
9ebb03ad 4105 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
4106 lxc_list_del(it);
4107 free(it->elem);
4108 free(it);
4109 }
4110 return 0;
4111}
4112
b099e9e9
SH
4113int lxc_clear_automounts(struct lxc_conf *c)
4114{
4115 c->auto_mounts = 0;
4116 return 0;
4117}
4118
12a50cc6 4119int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 4120{
9ebb03ad 4121 struct lxc_list *it,*next;
17ed13a3 4122 bool all = false, done = false;
12a50cc6 4123 const char *k = key + 9;
72d0e1cb
SG
4124 int i;
4125
17ed13a3
SH
4126 if (strcmp(key, "lxc.hook") == 0)
4127 all = true;
4128
72d0e1cb 4129 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 4130 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 4131 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
4132 lxc_list_del(it);
4133 free(it->elem);
4134 free(it);
4135 }
4136 done = true;
72d0e1cb
SG
4137 }
4138 }
17ed13a3
SH
4139
4140 if (!done) {
4141 ERROR("Invalid hook key: %s", key);
4142 return -1;
4143 }
72d0e1cb
SG
4144 return 0;
4145}
8eb5694b 4146
74a3920a 4147static void lxc_clear_saved_nics(struct lxc_conf *conf)
7b35f3d6
SH
4148{
4149 int i;
4150
0cf45501 4151 if (!conf->saved_nics)
7b35f3d6
SH
4152 return;
4153 for (i=0; i < conf->num_savednics; i++)
4154 free(conf->saved_nics[i].orig_name);
7b35f3d6
SH
4155 free(conf->saved_nics);
4156}
4157
4184c3e1
SH
4158static inline void lxc_clear_aliens(struct lxc_conf *conf)
4159{
4160 struct lxc_list *it,*next;
4161
4162 lxc_list_for_each_safe(it, &conf->aliens, next) {
4163 lxc_list_del(it);
4164 free(it->elem);
4165 free(it);
4166 }
4167}
4168
f979ac15
SH
4169static inline void lxc_clear_includes(struct lxc_conf *conf)
4170{
4171 struct lxc_list *it,*next;
4172
4173 lxc_list_for_each_safe(it, &conf->includes, next) {
4174 lxc_list_del(it);
4175 free(it->elem);
4176 free(it);
4177 }
4178}
4179
8eb5694b
SH
4180void lxc_conf_free(struct lxc_conf *conf)
4181{
4182 if (!conf)
4183 return;
f10fad2f
ME
4184 free(conf->console.log_path);
4185 free(conf->console.path);
4186 free(conf->rootfs.mount);
4187 free(conf->rootfs.options);
4188 free(conf->rootfs.path);
4189 free(conf->rootfs.pivot);
4190 free(conf->logfile);
4191 free(conf->utsname);
4192 free(conf->ttydir);
4193 free(conf->fstab);
4194 free(conf->rcfile);
4195 free(conf->init_cmd);
6b0d5538 4196 free(conf->unexpanded_config);
393903d1 4197 free(conf->pty_names);
8eb5694b 4198 lxc_clear_config_network(conf);
f10fad2f
ME
4199 free(conf->lsm_aa_profile);
4200 free(conf->lsm_se_context);
769872f9 4201 lxc_seccomp_free(conf);
8eb5694b 4202 lxc_clear_config_caps(conf);
1fb86a7c 4203 lxc_clear_config_keepcaps(conf);
8eb5694b 4204 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 4205 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 4206 lxc_clear_mount_entries(conf);
7b35f3d6 4207 lxc_clear_saved_nics(conf);
27c27d73 4208 lxc_clear_idmaps(conf);
ee1e7aa0 4209 lxc_clear_groups(conf);
f979ac15 4210 lxc_clear_includes(conf);
761d81ca 4211 lxc_clear_aliens(conf);
ab799c0b 4212 lxc_clear_environment(conf);
8eb5694b
SH
4213 free(conf);
4214}
4355ab5f
SH
4215
4216struct userns_fn_data {
4217 int (*fn)(void *);
4218 void *arg;
4219 int p[2];
4220};
4221
4222static int run_userns_fn(void *data)
4223{
4224 struct userns_fn_data *d = data;
4225 char c;
4226 // we're not sharing with the parent any more, if it was a thread
4227
4228 close(d->p[1]);
4229 if (read(d->p[0], &c, 1) != 1)
4230 return -1;
4231 close(d->p[0]);
4232 return d->fn(d->arg);
4233}
4234
4235/*
8b227008
TS
4236 * Add ID_TYPE_UID/ID_TYPE_GID entries to an existing lxc_conf,
4237 * if they are not already there.
4355ab5f 4238 */
8b227008
TS
4239static struct lxc_list *idmap_add_id(struct lxc_conf *conf,
4240 uid_t uid, gid_t gid)
4355ab5f 4241{
8b227008
TS
4242 int hostuid_mapped = mapped_hostid(uid, conf, ID_TYPE_UID);
4243 int hostgid_mapped = mapped_hostid(gid, conf, ID_TYPE_GID);
4355ab5f
SH
4244 struct lxc_list *new = NULL, *tmp, *it, *next;
4245 struct id_map *entry;
4246
3ec1648d
SH
4247 new = malloc(sizeof(*new));
4248 if (!new) {
4249 ERROR("Out of memory building id map");
4250 return NULL;
4251 }
4252 lxc_list_init(new);
4253
8b227008
TS
4254 if (hostuid_mapped < 0) {
4255 hostuid_mapped = find_unmapped_nsuid(conf, ID_TYPE_UID);
4256 if (hostuid_mapped < 0)
3ec1648d
SH
4257 goto err;
4258 tmp = malloc(sizeof(*tmp));
4259 if (!tmp)
4260 goto err;
4355ab5f
SH
4261 entry = malloc(sizeof(*entry));
4262 if (!entry) {
3ec1648d
SH
4263 free(tmp);
4264 goto err;
4355ab5f 4265 }
3ec1648d 4266 tmp->elem = entry;
4355ab5f 4267 entry->idtype = ID_TYPE_UID;
8b227008
TS
4268 entry->nsid = hostuid_mapped;
4269 entry->hostid = (unsigned long) uid;
4270 entry->range = 1;
4271 lxc_list_add_tail(new, tmp);
4272 }
4273 if (hostgid_mapped < 0) {
4274 hostgid_mapped = find_unmapped_nsuid(conf, ID_TYPE_GID);
4275 if (hostgid_mapped < 0)
4276 goto err;
4277 tmp = malloc(sizeof(*tmp));
4278 if (!tmp)
4279 goto err;
4280 entry = malloc(sizeof(*entry));
4281 if (!entry) {
4282 free(tmp);
4283 goto err;
4284 }
4285 tmp->elem = entry;
4286 entry->idtype = ID_TYPE_GID;
4287 entry->nsid = hostgid_mapped;
4288 entry->hostid = (unsigned long) gid;
4355ab5f 4289 entry->range = 1;
3ec1648d 4290 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4291 }
4292 lxc_list_for_each_safe(it, &conf->id_map, next) {
4293 tmp = malloc(sizeof(*tmp));
4294 if (!tmp)
4295 goto err;
4296 entry = malloc(sizeof(*entry));
4297 if (!entry) {
4298 free(tmp);
4299 goto err;
4300 }
4301 memset(entry, 0, sizeof(*entry));
4302 memcpy(entry, it->elem, sizeof(*entry));
4303 tmp->elem = entry;
3ec1648d 4304 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4305 }
4306
4307 return new;
4308
4309err:
8b227008 4310 ERROR("Out of memory building a new uid/gid map");
908fde6a
SH
4311 if (new)
4312 lxc_free_idmap(new);
c30ac545 4313 free(new);
4355ab5f
SH
4314 return NULL;
4315}
4316
4317/*
4318 * Run a function in a new user namespace.
8b227008 4319 * The caller's euid/egid will be mapped in if it is not already.
4355ab5f
SH
4320 */
4321int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
4322{
4323 int ret, pid;
4324 struct userns_fn_data d;
4325 char c = '1';
4326 int p[2];
4327 struct lxc_list *idmap;
4328
4355ab5f 4329 ret = pipe(p);
4355ab5f
SH
4330 if (ret < 0) {
4331 SYSERROR("opening pipe");
4332 return -1;
4333 }
4334 d.fn = fn;
4335 d.arg = data;
4336 d.p[0] = p[0];
4337 d.p[1] = p[1];
4338 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4339 if (pid < 0)
4340 goto err;
4355ab5f 4341 close(p[0]);
4355ab5f
SH
4342 p[0] = -1;
4343
8b227008
TS
4344 if ((idmap = idmap_add_id(conf, geteuid(), getegid())) == NULL) {
4345 ERROR("Error adding self to container uid/gid map");
4355ab5f
SH
4346 goto err;
4347 }
4348
4349 ret = lxc_map_ids(idmap, pid);
4350 lxc_free_idmap(idmap);
88dd66fc 4351 free(idmap);
565e571c 4352 if (ret) {
4355ab5f
SH
4353 ERROR("Error setting up child mappings");
4354 goto err;
4355 }
4356
4357 // kick the child
4358 if (write(p[1], &c, 1) != 1) {
4359 SYSERROR("writing to pipe to child");
4360 goto err;
4361 }
4362
3139aead
SG
4363 ret = wait_for_pid(pid);
4364
4365 close(p[1]);
4366 return ret;
4367
4355ab5f 4368err:
4355ab5f
SH
4369 if (p[0] != -1)
4370 close(p[0]);
4371 close(p[1]);
4355ab5f
SH
4372 return -1;
4373}
97e9cfa0 4374
a96a8e8c 4375/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4376static char* getuname(void)
4377{
a96a8e8c 4378 struct passwd *result;
97e9cfa0 4379
a96a8e8c
SH
4380 result = getpwuid(geteuid());
4381 if (!result)
97e9cfa0
SH
4382 return NULL;
4383
a96a8e8c 4384 return strdup(result->pw_name);
97e9cfa0
SH
4385}
4386
a96a8e8c 4387/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4388static char *getgname(void)
4389{
a96a8e8c 4390 struct group *result;
97e9cfa0 4391
a96a8e8c
SH
4392 result = getgrgid(getegid());
4393 if (!result)
97e9cfa0
SH
4394 return NULL;
4395
a96a8e8c 4396 return strdup(result->gr_name);
97e9cfa0
SH
4397}
4398
a96a8e8c 4399/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4400void suggest_default_idmap(void)
4401{
4402 FILE *f;
4403 unsigned int uid = 0, urange = 0, gid = 0, grange = 0;
4404 char *line = NULL;
4405 char *uname, *gname;
4406 size_t len = 0;
4407
4408 if (!(uname = getuname()))
4409 return;
4410
4411 if (!(gname = getgname())) {
4412 free(uname);
4413 return;
4414 }
4415
4416 f = fopen(subuidfile, "r");
4417 if (!f) {
4418 ERROR("Your system is not configured with subuids");
4419 free(gname);
4420 free(uname);
4421 return;
4422 }
4423 while (getline(&line, &len, f) != -1) {
4424 char *p = strchr(line, ':'), *p2;
4425 if (*line == '#')
4426 continue;
4427 if (!p)
4428 continue;
4429 *p = '\0';
4430 p++;
4431 if (strcmp(line, uname))
4432 continue;
4433 p2 = strchr(p, ':');
4434 if (!p2)
4435 continue;
4436 *p2 = '\0';
4437 p2++;
4438 if (!*p2)
4439 continue;
4440 uid = atoi(p);
4441 urange = atoi(p2);
4442 }
4443 fclose(f);
4444
4445 f = fopen(subuidfile, "r");
4446 if (!f) {
4447 ERROR("Your system is not configured with subgids");
4448 free(gname);
4449 free(uname);
4450 return;
4451 }
4452 while (getline(&line, &len, f) != -1) {
4453 char *p = strchr(line, ':'), *p2;
4454 if (*line == '#')
4455 continue;
4456 if (!p)
4457 continue;
4458 *p = '\0';
4459 p++;
4460 if (strcmp(line, uname))
4461 continue;
4462 p2 = strchr(p, ':');
4463 if (!p2)
4464 continue;
4465 *p2 = '\0';
4466 p2++;
4467 if (!*p2)
4468 continue;
4469 gid = atoi(p);
4470 grange = atoi(p2);
4471 }
4472 fclose(f);
4473
f10fad2f 4474 free(line);
97e9cfa0
SH
4475
4476 if (!urange || !grange) {
4477 ERROR("You do not have subuids or subgids allocated");
4478 ERROR("Unprivileged containers require subuids and subgids");
4479 return;
4480 }
4481
4482 ERROR("You must either run as root, or define uid mappings");
4483 ERROR("To pass uid mappings to lxc-create, you could create");
4484 ERROR("~/.config/lxc/default.conf:");
4485 ERROR("lxc.include = %s", LXC_DEFAULT_CONFIG);
4486 ERROR("lxc.id_map = u 0 %u %u", uid, urange);
4487 ERROR("lxc.id_map = g 0 %u %u", gid, grange);
4488
4489 free(gname);
4490 free(uname);
4491}