]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
Merge pull request #1424 from brauner/2017-02-08/thomasDOTjaroschATintra2netDOTcom_pt...
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
1d52bdf7
CB
23
24#define _GNU_SOURCE
d06245b8
NC
25#include "config.h"
26
8f3e280e
CB
27#include <dirent.h>
28#include <errno.h>
29#include <fcntl.h>
30#include <grp.h>
31#include <inttypes.h>
32#include <libgen.h>
33#include <pwd.h>
34#include <stdarg.h>
0ad19a3f 35#include <stdio.h>
0ad19a3f 36#include <stdlib.h>
0ad19a3f 37#include <string.h>
8f3e280e 38#include <time.h>
0ad19a3f 39#include <unistd.h>
8f3e280e
CB
40#include <arpa/inet.h>
41#include <linux/loop.h>
8f3e280e
CB
42#include <net/if.h>
43#include <netinet/in.h>
44#include <sys/mman.h>
45#include <sys/mount.h>
46#include <sys/param.h>
47#include <sys/prctl.h>
48#include <sys/stat.h>
49#include <sys/socket.h>
2d76d1d7 50#include <sys/syscall.h>
97e9cfa0 51#include <sys/types.h>
8f3e280e
CB
52#include <sys/utsname.h>
53#include <sys/wait.h>
1d52bdf7 54
af6824fc
ST
55/* makedev() */
56#ifdef MAJOR_IN_MKDEV
57# include <sys/mkdev.h>
58#endif
59#ifdef MAJOR_IN_SYSMACROS
60# include <sys/sysmacros.h>
61#endif
62
614305f3 63#ifdef HAVE_STATVFS
2938f7c8 64#include <sys/statvfs.h>
614305f3 65#endif
e827ff7e
SG
66
67#if HAVE_PTY_H
b0a33c1e 68#include <pty.h>
e827ff7e
SG
69#else
70#include <../include/openpty.h>
71#endif
0ad19a3f 72
5ef5c9a3
CB
73#ifdef HAVE_LINUX_MEMFD_H
74#include <linux/memfd.h>
75#endif
76
e8bd4e43 77#include "af_unix.h"
8f3e280e
CB
78#include "bdev.h"
79#include "caps.h" /* for lxc_caps_last_cap() */
80#include "cgroup.h"
1b09f2c0 81#include "conf.h"
8f3e280e 82#include "error.h"
1b09f2c0 83#include "log.h"
d8e48992 84#include "lxcaufs.h"
025ed0f3 85#include "lxclock.h"
8f3e280e
CB
86#include "lxcoverlay.h"
87#include "lxcseccomp.h"
4355ab5f 88#include "namespace.h"
8f3e280e
CB
89#include "network.h"
90#include "parse.h"
91#include "utils.h"
fe4de9a6 92#include "lsm/lsm.h"
d0a36f2c 93
495d2046
SG
94#if HAVE_SYS_CAPABILITY_H
95#include <sys/capability.h>
96#endif
97
6ff05e18
SG
98#if HAVE_SYS_PERSONALITY_H
99#include <sys/personality.h>
100#endif
101
edaf8b1b
SG
102#if IS_BIONIC
103#include <../include/lxcmntent.h>
104#else
105#include <mntent.h>
106#endif
107
36eb9bde 108lxc_log_define(lxc_conf, lxc);
e5bda9ee 109
495d2046 110#if HAVE_SYS_CAPABILITY_H
b09094da
MN
111#ifndef CAP_SETFCAP
112#define CAP_SETFCAP 31
113#endif
114
115#ifndef CAP_MAC_OVERRIDE
116#define CAP_MAC_OVERRIDE 32
117#endif
118
119#ifndef CAP_MAC_ADMIN
120#define CAP_MAC_ADMIN 33
121#endif
495d2046 122#endif
b09094da
MN
123
124#ifndef PR_CAPBSET_DROP
125#define PR_CAPBSET_DROP 24
126#endif
127
9818cae4
SG
128#ifndef LO_FLAGS_AUTOCLEAR
129#define LO_FLAGS_AUTOCLEAR 4
130#endif
131
0769b82a
CS
132/* needed for cgroup automount checks, regardless of whether we
133 * have included linux/capability.h or not */
134#ifndef CAP_SYS_ADMIN
135#define CAP_SYS_ADMIN 21
136#endif
137
2d76d1d7
SG
138/* Define pivot_root() if missing from the C library */
139#ifndef HAVE_PIVOT_ROOT
140static int pivot_root(const char * new_root, const char * put_old)
141{
142#ifdef __NR_pivot_root
8f3e280e 143 return syscall(__NR_pivot_root, new_root, put_old);
2d76d1d7 144#else
8f3e280e
CB
145 errno = ENOSYS;
146 return -1;
2d76d1d7
SG
147#endif
148}
149#else
150extern int pivot_root(const char * new_root, const char * put_old);
151#endif
152
153/* Define sethostname() if missing from the C library */
154#ifndef HAVE_SETHOSTNAME
155static int sethostname(const char * name, size_t len)
156{
157#ifdef __NR_sethostname
8f3e280e 158 return syscall(__NR_sethostname, name, len);
2d76d1d7 159#else
8f3e280e
CB
160 errno = ENOSYS;
161 return -1;
2d76d1d7
SG
162#endif
163}
164#endif
165
72f919c4
SG
166/* Define __S_ISTYPE if missing from the C library */
167#ifndef __S_ISTYPE
168#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
169#endif
170
ecec0126
SG
171#ifndef MS_PRIVATE
172#define MS_PRIVATE (1<<18)
173#endif
174
5ef5c9a3
CB
175/* memfd_create() */
176#ifndef MFD_CLOEXEC
177#define MFD_CLOEXEC 0x0001U
178#endif
179
180#ifndef MFD_ALLOW_SEALING
181#define MFD_ALLOW_SEALING 0x0002U
182#endif
183
184#ifndef HAVE_MEMFD_CREATE
185static int memfd_create(const char *name, unsigned int flags) {
186 #ifndef __NR_memfd_create
187 #if defined __i386__
188 #define __NR_memfd_create 356
189 #elif defined __x86_64__
190 #define __NR_memfd_create 319
191 #elif defined __arm__
192 #define __NR_memfd_create 385
193 #elif defined __aarch64__
194 #define __NR_memfd_create 279
195 #elif defined __s390__
196 #define __NR_memfd_create 350
197 #elif defined __powerpc__
198 #define __NR_memfd_create 360
199 #elif defined __sparc__
200 #define __NR_memfd_create 348
201 #elif defined __blackfin__
202 #define __NR_memfd_create 390
203 #elif defined __ia64__
204 #define __NR_memfd_create 1340
205 #elif defined _MIPS_SIM
206 #if _MIPS_SIM == _MIPS_SIM_ABI32
207 #define __NR_memfd_create 4354
208 #endif
209 #if _MIPS_SIM == _MIPS_SIM_NABI32
210 #define __NR_memfd_create 6318
211 #endif
212 #if _MIPS_SIM == _MIPS_SIM_ABI64
213 #define __NR_memfd_create 5314
214 #endif
215 #endif
216 #endif
217 #ifdef __NR_memfd_create
218 return syscall(__NR_memfd_create, name, flags);
219 #else
220 errno = ENOSYS;
221 return -1;
222 #endif
223}
224#else
225extern int memfd_create(const char *name, unsigned int flags);
226#endif
227
72d0e1cb 228char *lxchook_names[NUM_LXC_HOOKS] = {
52492063 229 "pre-start", "pre-mount", "mount", "autodev", "start", "stop", "post-stop", "clone", "destroy" };
72d0e1cb 230
a589434e 231typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 232
998ac676
RT
233struct mount_opt {
234 char *name;
235 int clear;
236 int flag;
237};
238
81810dd1
DL
239struct caps_opt {
240 char *name;
241 int value;
242};
243
858377e4
SH
244/*
245 * The lxc_conf of the container currently being worked on in an
246 * API call
247 * This is used in the error calls
248 */
249#ifdef HAVE_TLS
250__thread struct lxc_conf *current_config;
251#else
252struct lxc_conf *current_config;
253#endif
254
0769b82a
CS
255/* Declare this here, since we don't want to reshuffle the whole file. */
256static int in_caplist(int cap, struct lxc_list *caps);
257
a589434e
JN
258static int instantiate_veth(struct lxc_handler *, struct lxc_netdev *);
259static int instantiate_macvlan(struct lxc_handler *, struct lxc_netdev *);
260static int instantiate_vlan(struct lxc_handler *, struct lxc_netdev *);
261static int instantiate_phys(struct lxc_handler *, struct lxc_netdev *);
262static int instantiate_empty(struct lxc_handler *, struct lxc_netdev *);
263static int instantiate_none(struct lxc_handler *, struct lxc_netdev *);
264
265static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
266 [LXC_NET_VETH] = instantiate_veth,
267 [LXC_NET_MACVLAN] = instantiate_macvlan,
268 [LXC_NET_VLAN] = instantiate_vlan,
269 [LXC_NET_PHYS] = instantiate_phys,
270 [LXC_NET_EMPTY] = instantiate_empty,
271 [LXC_NET_NONE] = instantiate_none,
0ad19a3f 272};
273
74a2b586
JK
274static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
275static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
276static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
277static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
278static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 279static int shutdown_none(struct lxc_handler *, struct lxc_netdev *);
74a2b586 280
a589434e 281static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
74a2b586
JK
282 [LXC_NET_VETH] = shutdown_veth,
283 [LXC_NET_MACVLAN] = shutdown_macvlan,
284 [LXC_NET_VLAN] = shutdown_vlan,
285 [LXC_NET_PHYS] = shutdown_phys,
286 [LXC_NET_EMPTY] = shutdown_empty,
26b797f3 287 [LXC_NET_NONE] = shutdown_none,
74a2b586
JK
288};
289
998ac676 290static struct mount_opt mount_opt[] = {
88d413d5
SW
291 { "defaults", 0, 0 },
292 { "ro", 0, MS_RDONLY },
293 { "rw", 1, MS_RDONLY },
294 { "suid", 1, MS_NOSUID },
295 { "nosuid", 0, MS_NOSUID },
296 { "dev", 1, MS_NODEV },
297 { "nodev", 0, MS_NODEV },
298 { "exec", 1, MS_NOEXEC },
299 { "noexec", 0, MS_NOEXEC },
300 { "sync", 0, MS_SYNCHRONOUS },
301 { "async", 1, MS_SYNCHRONOUS },
302 { "dirsync", 0, MS_DIRSYNC },
303 { "remount", 0, MS_REMOUNT },
304 { "mand", 0, MS_MANDLOCK },
305 { "nomand", 1, MS_MANDLOCK },
306 { "atime", 1, MS_NOATIME },
307 { "noatime", 0, MS_NOATIME },
308 { "diratime", 1, MS_NODIRATIME },
309 { "nodiratime", 0, MS_NODIRATIME },
310 { "bind", 0, MS_BIND },
311 { "rbind", 0, MS_BIND|MS_REC },
312 { "relatime", 0, MS_RELATIME },
313 { "norelatime", 1, MS_RELATIME },
314 { "strictatime", 0, MS_STRICTATIME },
315 { "nostrictatime", 1, MS_STRICTATIME },
316 { NULL, 0, 0 },
998ac676
RT
317};
318
495d2046 319#if HAVE_SYS_CAPABILITY_H
81810dd1 320static struct caps_opt caps_opt[] = {
a6afdde9 321 { "chown", CAP_CHOWN },
1e11be34
DL
322 { "dac_override", CAP_DAC_OVERRIDE },
323 { "dac_read_search", CAP_DAC_READ_SEARCH },
324 { "fowner", CAP_FOWNER },
325 { "fsetid", CAP_FSETID },
81810dd1
DL
326 { "kill", CAP_KILL },
327 { "setgid", CAP_SETGID },
328 { "setuid", CAP_SETUID },
329 { "setpcap", CAP_SETPCAP },
330 { "linux_immutable", CAP_LINUX_IMMUTABLE },
331 { "net_bind_service", CAP_NET_BIND_SERVICE },
332 { "net_broadcast", CAP_NET_BROADCAST },
333 { "net_admin", CAP_NET_ADMIN },
334 { "net_raw", CAP_NET_RAW },
335 { "ipc_lock", CAP_IPC_LOCK },
336 { "ipc_owner", CAP_IPC_OWNER },
337 { "sys_module", CAP_SYS_MODULE },
338 { "sys_rawio", CAP_SYS_RAWIO },
339 { "sys_chroot", CAP_SYS_CHROOT },
340 { "sys_ptrace", CAP_SYS_PTRACE },
341 { "sys_pacct", CAP_SYS_PACCT },
342 { "sys_admin", CAP_SYS_ADMIN },
343 { "sys_boot", CAP_SYS_BOOT },
344 { "sys_nice", CAP_SYS_NICE },
345 { "sys_resource", CAP_SYS_RESOURCE },
346 { "sys_time", CAP_SYS_TIME },
347 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
348 { "mknod", CAP_MKNOD },
349 { "lease", CAP_LEASE },
57b837e2
CB
350#ifdef CAP_AUDIT_READ
351 { "audit_read", CAP_AUDIT_READ },
352#endif
9527e566 353#ifdef CAP_AUDIT_WRITE
81810dd1 354 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
355#endif
356#ifdef CAP_AUDIT_CONTROL
81810dd1 357 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 358#endif
81810dd1
DL
359 { "setfcap", CAP_SETFCAP },
360 { "mac_override", CAP_MAC_OVERRIDE },
361 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
362#ifdef CAP_SYSLOG
363 { "syslog", CAP_SYSLOG },
364#endif
365#ifdef CAP_WAKE_ALARM
366 { "wake_alarm", CAP_WAKE_ALARM },
367#endif
2b54359b
CB
368#ifdef CAP_BLOCK_SUSPEND
369 { "block_suspend", CAP_BLOCK_SUSPEND },
370#endif
81810dd1 371};
495d2046
SG
372#else
373static struct caps_opt caps_opt[] = {};
374#endif
81810dd1 375
91c3830e
SH
376static int run_buffer(char *buffer)
377{
ebec9176 378 struct lxc_popen_FILE *f;
91c3830e 379 char *output;
8e7da691 380 int ret;
91c3830e 381
ebec9176 382 f = lxc_popen(buffer);
91c3830e 383 if (!f) {
062b72c6 384 SYSERROR("Failed to popen() %s.", buffer);
91c3830e
SH
385 return -1;
386 }
387
388 output = malloc(LXC_LOG_BUFFER_SIZE);
389 if (!output) {
062b72c6 390 ERROR("Failed to allocate memory for %s.", buffer);
ebec9176 391 lxc_pclose(f);
91c3830e
SH
392 return -1;
393 }
394
062b72c6
CB
395 while (fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
396 DEBUG("Script %s with output: %s.", buffer, output);
91c3830e
SH
397
398 free(output);
399
ebec9176 400 ret = lxc_pclose(f);
8e7da691 401 if (ret == -1) {
062b72c6 402 SYSERROR("Script exited with error.");
91c3830e 403 return -1;
8e7da691 404 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
062b72c6 405 ERROR("Script exited with status %d.", WEXITSTATUS(ret));
8e7da691
DE
406 return -1;
407 } else if (WIFSIGNALED(ret)) {
062b72c6 408 ERROR("Script terminated by signal %d.", WTERMSIG(ret));
8e7da691 409 return -1;
91c3830e
SH
410 }
411
412 return 0;
413}
414
148e91f5 415static int run_script_argv(const char *name, const char *section,
062b72c6
CB
416 const char *script, const char *hook,
417 const char *lxcpath, char **argsin)
148e91f5
SH
418{
419 int ret, i;
420 char *buffer;
421 size_t size = 0;
422
062b72c6 423 INFO("Executing script \"%s\" for container \"%s\", config section \"%s\".",
148e91f5
SH
424 script, name, section);
425
062b72c6 426 for (i = 0; argsin && argsin[i]; i++)
148e91f5
SH
427 size += strlen(argsin[i]) + 1;
428
429 size += strlen(hook) + 1;
430
431 size += strlen(script);
432 size += strlen(name);
433 size += strlen(section);
434 size += 3;
435
436 if (size > INT_MAX)
437 return -1;
438
439 buffer = alloca(size);
440 if (!buffer) {
062b72c6 441 ERROR("Failed to allocate memory.");
148e91f5
SH
442 return -1;
443 }
444
062b72c6
CB
445 ret =
446 snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
447 if (ret < 0 || (size_t)ret >= size) {
448 ERROR("Script name too long.");
148e91f5
SH
449 return -1;
450 }
451
062b72c6
CB
452 for (i = 0; argsin && argsin[i]; i++) {
453 int len = size - ret;
148e91f5
SH
454 int rc;
455 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
456 if (rc < 0 || rc >= len) {
062b72c6 457 ERROR("Script args too long.");
148e91f5
SH
458 return -1;
459 }
460 ret += rc;
461 }
462
463 return run_buffer(buffer);
464}
465
062b72c6
CB
466static int run_script(const char *name, const char *section, const char *script,
467 ...)
e3b4c4c4 468{
abbfd20b 469 int ret;
91c3830e 470 char *buffer, *p;
abbfd20b
DL
471 size_t size = 0;
472 va_list ap;
751d9dcd 473
062b72c6 474 INFO("Executing script \"%s\" for container \"%s\", config section \"%s\".",
751d9dcd 475 script, name, section);
e3b4c4c4 476
abbfd20b
DL
477 va_start(ap, script);
478 while ((p = va_arg(ap, char *)))
95642a10 479 size += strlen(p) + 1;
abbfd20b
DL
480 va_end(ap);
481
482 size += strlen(script);
483 size += strlen(name);
484 size += strlen(section);
95642a10 485 size += 3;
abbfd20b 486
95642a10
MS
487 if (size > INT_MAX)
488 return -1;
489
490 buffer = alloca(size);
abbfd20b 491 if (!buffer) {
062b72c6 492 ERROR("Failed to allocate memory.");
751d9dcd
DL
493 return -1;
494 }
495
9ba8130c
SH
496 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
497 if (ret < 0 || ret >= size) {
062b72c6 498 ERROR("Script name too long.");
9ba8130c
SH
499 return -1;
500 }
751d9dcd 501
abbfd20b 502 va_start(ap, script);
9ba8130c 503 while ((p = va_arg(ap, char *))) {
062b72c6 504 int len = size - ret;
9ba8130c
SH
505 int rc;
506 rc = snprintf(buffer + ret, len, " %s", p);
507 if (rc < 0 || rc >= len) {
062b72c6 508 ERROR("Script args too long.");
9ba8130c
SH
509 return -1;
510 }
511 ret += rc;
512 }
abbfd20b 513 va_end(ap);
751d9dcd 514
91c3830e 515 return run_buffer(buffer);
e3b4c4c4
ST
516}
517
a17b1e65
SG
518static int mount_rootfs_dir(const char *rootfs, const char *target,
519 const char *options)
a6afdde9 520{
a17b1e65
SG
521 unsigned long mntflags;
522 char *mntdata;
523 int ret;
524
525 if (parse_mntopts(options, &mntflags, &mntdata) < 0) {
526 free(mntdata);
527 return -1;
528 }
529
530 ret = mount(rootfs, target, "none", MS_BIND | MS_REC | mntflags, mntdata);
531 free(mntdata);
532
533 return ret;
a6afdde9
DL
534}
535
536static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
537{
538 int rfd;
539 int ret = -1;
540
541 rfd = open(rootfs, O_RDWR);
542 if (rfd < 0) {
543 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 544 return -1;
545 }
546
a6afdde9 547 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 548
a6afdde9 549 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 550
a6afdde9
DL
551 if (ioctl(fd, LOOP_SET_FD, rfd)) {
552 SYSERROR("failed to LOOP_SET_FD");
553 goto out;
78ae2fcc 554 }
555
a6afdde9
DL
556 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
557 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 558 goto out;
559 }
560
a6afdde9 561 ret = 0;
78ae2fcc 562out:
a6afdde9 563 close(rfd);
78ae2fcc 564
a6afdde9 565 return ret;
78ae2fcc 566}
567
a17b1e65
SG
568static int mount_rootfs_file(const char *rootfs, const char *target,
569 const char *options)
78ae2fcc 570{
74f96976 571 struct dirent *direntp;
a6afdde9 572 struct loop_info64 loinfo;
9ba8130c 573 int ret = -1, fd = -1, rc;
a6afdde9
DL
574 DIR *dir;
575 char path[MAXPATHLEN];
78ae2fcc 576
a6afdde9
DL
577 dir = opendir("/dev");
578 if (!dir) {
579 SYSERROR("failed to open '/dev'");
78ae2fcc 580 return -1;
581 }
582
74f96976 583 while ((direntp = readdir(dir))) {
a6afdde9
DL
584
585 if (!direntp)
586 break;
587
588 if (!strcmp(direntp->d_name, "."))
589 continue;
590
591 if (!strcmp(direntp->d_name, ".."))
592 continue;
593
594 if (strncmp(direntp->d_name, "loop", 4))
595 continue;
596
9ba8130c
SH
597 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
598 if (rc < 0 || rc >= MAXPATHLEN)
599 continue;
600
a6afdde9
DL
601 fd = open(path, O_RDWR);
602 if (fd < 0)
603 continue;
604
605 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
606 close(fd);
607 continue;
608 }
609
610 if (errno != ENXIO) {
611 WARN("unexpected error for ioctl on '%s': %m",
612 direntp->d_name);
00b6be44 613 close(fd);
a6afdde9
DL
614 continue;
615 }
616
617 DEBUG("found '%s' free lodev", path);
618
619 ret = setup_lodev(rootfs, fd, &loinfo);
620 if (!ret)
a17b1e65 621 ret = mount_unknown_fs(path, target, options);
a6afdde9
DL
622 close(fd);
623
624 break;
625 }
626
627 if (closedir(dir))
628 WARN("failed to close directory");
629
630 return ret;
78ae2fcc 631}
632
a17b1e65
SG
633static int mount_rootfs_block(const char *rootfs, const char *target,
634 const char *options)
a6afdde9 635{
a17b1e65 636 return mount_unknown_fs(rootfs, target, options);
a6afdde9
DL
637}
638
0c547523
SH
639/*
640 * pin_rootfs
b7ed4bf0
CS
641 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
642 * the duration of the container run, to prevent the container from marking
643 * the underlying fs readonly on shutdown. unlink the file immediately so
644 * no name pollution is happens
0c547523
SH
645 * return -1 on error.
646 * return -2 if nothing needed to be pinned.
647 * return an open fd (>=0) if we pinned it.
648 */
649int pin_rootfs(const char *rootfs)
650{
651 char absrootfs[MAXPATHLEN];
652 char absrootfspin[MAXPATHLEN];
653 struct stat s;
654 int ret, fd;
655
e99ee0de 656 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 657 return -2;
e99ee0de 658
00ec333b 659 if (!realpath(rootfs, absrootfs))
9be53773 660 return -2;
0c547523 661
00ec333b 662 if (access(absrootfs, F_OK))
0c547523 663 return -1;
0c547523 664
00ec333b 665 if (stat(absrootfs, &s))
0c547523 666 return -1;
0c547523 667
72f919c4 668 if (!S_ISDIR(s.st_mode))
0c547523
SH
669 return -2;
670
b7ed4bf0 671 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
00ec333b 672 if (ret >= MAXPATHLEN)
0c547523 673 return -1;
0c547523
SH
674
675 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
b7ed4bf0
CS
676 if (fd < 0)
677 return fd;
678 (void)unlink(absrootfspin);
0c547523
SH
679 return fd;
680}
681
e2a7e8dc
SH
682/*
683 * If we are asking to remount something, make sure that any
684 * NOEXEC etc are honored.
685 */
686static unsigned long add_required_remount_flags(const char *s, const char *d,
687 unsigned long flags)
688{
614305f3 689#ifdef HAVE_STATVFS
e2a7e8dc
SH
690 struct statvfs sb;
691 unsigned long required_flags = 0;
692
693 if (!(flags & MS_REMOUNT))
694 return flags;
695
696 if (!s)
697 s = d;
698
699 if (!s)
700 return flags;
701 if (statvfs(s, &sb) < 0)
702 return flags;
703
704 if (sb.f_flag & MS_NOSUID)
705 required_flags |= MS_NOSUID;
706 if (sb.f_flag & MS_NODEV)
707 required_flags |= MS_NODEV;
708 if (sb.f_flag & MS_RDONLY)
709 required_flags |= MS_RDONLY;
710 if (sb.f_flag & MS_NOEXEC)
711 required_flags |= MS_NOEXEC;
712
713 return flags | required_flags;
614305f3
SH
714#else
715 return flags;
716#endif
e2a7e8dc
SH
717}
718
4fb3cba5 719static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
368bbc02 720{
368bbc02 721 int r;
80e80c40 722 int i;
b06b8511
CS
723 static struct {
724 int match_mask;
725 int match_flag;
726 const char *source;
727 const char *destination;
728 const char *fstype;
729 unsigned long flags;
730 const char *options;
731 } default_mounts[] = {
732 /* Read-only bind-mounting... In older kernels, doing that required
733 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
734 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
735 * kernel 2.6.26 onwards. However, this apparently does not work on
736 * kernel 3.8. Unfortunately, on that very same kernel, doing the
737 * same trick as above doesn't seem to work either, there one needs
738 * to ALSO specify MS_BIND for the remount, otherwise the entire
739 * fs is remounted read-only or the mount fails because it's busy...
740 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
741 * 2.6.32...
368bbc02 742 */
f24a52d5 743 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
592fd47a
SH
744 /* proc/tty is used as a temporary placeholder for proc/sys/net which we'll move back in a few steps */
745 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys/net", "%r/proc/tty", NULL, MS_BIND, NULL },
f24a52d5
SG
746 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
747 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
592fd47a 748 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/tty", "%r/proc/sys/net", NULL, MS_MOVE, NULL },
f24a52d5
SG
749 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
750 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
751 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
752 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
753 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
754 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
755 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL },
756 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
757 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys/devices/virtual/net", "sysfs", 0, NULL },
758 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys/devices/virtual/net/devices/virtual/net", "%r/sys/devices/virtual/net", NULL, MS_BIND, NULL },
759 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys/devices/virtual/net", NULL, MS_REMOUNT|MS_BIND|MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL },
760 { 0, 0, NULL, NULL, NULL, 0, NULL }
b06b8511 761 };
368bbc02 762
b06b8511
CS
763 for (i = 0; default_mounts[i].match_mask; i++) {
764 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
765 char *source = NULL;
766 char *destination = NULL;
767 int saved_errno;
e2a7e8dc 768 unsigned long mflags;
b06b8511
CS
769
770 if (default_mounts[i].source) {
771 /* will act like strdup if %r is not present */
8ede5f4c 772 source = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].source);
b06b8511
CS
773 if (!source) {
774 SYSERROR("memory allocation error");
775 return -1;
776 }
777 }
cc4fd506
SH
778 if (!default_mounts[i].destination) {
779 ERROR("BUG: auto mounts destination %d was NULL", i);
b2f44b4d 780 free(source);
cc4fd506
SH
781 return -1;
782 }
783 /* will act like strdup if %r is not present */
784 destination = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].destination);
785 if (!destination) {
786 saved_errno = errno;
787 SYSERROR("memory allocation error");
788 free(source);
789 errno = saved_errno;
790 return -1;
b06b8511 791 }
e2a7e8dc
SH
792 mflags = add_required_remount_flags(source, destination,
793 default_mounts[i].flags);
592fd47a 794 r = safe_mount(source, destination, default_mounts[i].fstype, mflags, default_mounts[i].options, conf->rootfs.path ? conf->rootfs.mount : NULL);
b06b8511 795 saved_errno = errno;
b88ff9a0
SG
796 if (r < 0 && errno == ENOENT) {
797 INFO("Mount source or target for %s on %s doesn't exist. Skipping.", source, destination);
798 r = 0;
799 }
800 else if (r < 0)
e2a7e8dc 801 SYSERROR("error mounting %s on %s flags %lu", source, destination, mflags);
f24a52d5 802
b06b8511
CS
803 free(source);
804 free(destination);
805 if (r < 0) {
b06b8511
CS
806 errno = saved_errno;
807 return -1;
808 }
368bbc02 809 }
368bbc02
CS
810 }
811
b06b8511 812 if (flags & LXC_AUTO_CGROUP_MASK) {
0769b82a
CS
813 int cg_flags;
814
815 cg_flags = flags & LXC_AUTO_CGROUP_MASK;
816 /* If the type of cgroup mount was not specified, it depends on the
817 * container's capabilities as to what makes sense: if we have
818 * CAP_SYS_ADMIN, the read-only part can be remounted read-write
819 * anyway, so we may as well default to read-write; then the admin
820 * will not be given a false sense of security. (And if they really
821 * want mixed r/o r/w, then they can explicitly specify :mixed.)
822 * OTOH, if the container lacks CAP_SYS_ADMIN, do only default to
823 * :mixed, because then the container can't remount it read-write. */
824 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
825 int has_sys_admin = 0;
826 if (!lxc_list_empty(&conf->keepcaps)) {
827 has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps);
828 } else {
829 has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps);
830 }
831 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC) {
832 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED;
833 } else {
834 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
835 }
836 }
837
8ede5f4c 838 if (!cgroup_mount(conf->rootfs.path ? conf->rootfs.mount : "", handler, cg_flags)) {
368bbc02 839 SYSERROR("error mounting /sys/fs/cgroup");
b06b8511 840 return -1;
368bbc02
CS
841 }
842 }
843
368bbc02 844 return 0;
368bbc02
CS
845}
846
a17b1e65 847static int mount_rootfs(const char *rootfs, const char *target, const char *options)
0ad19a3f 848{
b09ef133 849 char absrootfs[MAXPATHLEN];
78ae2fcc 850 struct stat s;
a6afdde9 851 int i;
78ae2fcc 852
a17b1e65 853 typedef int (*rootfs_cb)(const char *, const char *, const char *);
78ae2fcc 854
855 struct rootfs_type {
856 int type;
857 rootfs_cb cb;
858 } rtfs_type[] = {
2656d231
DL
859 { S_IFDIR, mount_rootfs_dir },
860 { S_IFBLK, mount_rootfs_block },
861 { S_IFREG, mount_rootfs_file },
78ae2fcc 862 };
0ad19a3f 863
4c8ab83b 864 if (!realpath(rootfs, absrootfs)) {
36eb9bde 865 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 866 return -1;
867 }
b09ef133 868
b09ef133 869 if (access(absrootfs, F_OK)) {
36eb9bde 870 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 871 return -1;
872 }
873
78ae2fcc 874 if (stat(absrootfs, &s)) {
36eb9bde 875 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 876 return -1;
877 }
878
78ae2fcc 879 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 880
78ae2fcc 881 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
882 continue;
9b0f0477 883
a17b1e65 884 return rtfs_type[i].cb(absrootfs, target, options);
78ae2fcc 885 }
9b0f0477 886
36eb9bde 887 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 888 return -1;
0ad19a3f 889}
890
4e5440c6 891static int setup_utsname(struct utsname *utsname)
0ad19a3f 892{
4e5440c6
DL
893 if (!utsname)
894 return 0;
0ad19a3f 895
4e5440c6
DL
896 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
897 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 898 return -1;
899 }
900
4e5440c6 901 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 902
0ad19a3f 903 return 0;
904}
905
69aa6655
DE
906struct dev_symlinks {
907 const char *oldpath;
908 const char *name;
909};
910
911static const struct dev_symlinks dev_symlinks[] = {
912 {"/proc/self/fd", "fd"},
913 {"/proc/self/fd/0", "stdin"},
914 {"/proc/self/fd/1", "stdout"},
915 {"/proc/self/fd/2", "stderr"},
916};
917
918static int setup_dev_symlinks(const struct lxc_rootfs *rootfs)
919{
920 char path[MAXPATHLEN];
921 int ret,i;
09227be2 922 struct stat s;
69aa6655
DE
923
924
925 for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
926 const struct dev_symlinks *d = &dev_symlinks[i];
ec50007f 927 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->path ? rootfs->mount : "", d->name);
69aa6655
DE
928 if (ret < 0 || ret >= MAXPATHLEN)
929 return -1;
09227be2
MW
930
931 /*
932 * Stat the path first. If we don't get an error
933 * accept it as is and don't try to create it
934 */
935 if (!stat(path, &s)) {
936 continue;
937 }
938
69aa6655 939 ret = symlink(d->oldpath, path);
09227be2 940
69aa6655 941 if (ret && errno != EEXIST) {
09227be2
MW
942 if ( errno == EROFS ) {
943 WARN("Warning: Read Only file system while creating %s", path);
944 } else {
945 SYSERROR("Error creating %s", path);
946 return -1;
947 }
69aa6655
DE
948 }
949 }
950 return 0;
951}
952
393903d1
SH
953/*
954 * Build a space-separate list of ptys to pass to systemd.
955 */
956static bool append_ptyname(char **pp, char *name)
b0a33c1e 957{
393903d1
SH
958 char *p;
959
960 if (!*pp) {
961 *pp = malloc(strlen(name) + strlen("container_ttys=") + 1);
962 if (!*pp)
963 return false;
964 sprintf(*pp, "container_ttys=%s", name);
965 return true;
966 }
967 p = realloc(*pp, strlen(*pp) + strlen(name) + 2);
968 if (!p)
969 return false;
970 *pp = p;
971 strcat(p, " ");
972 strcat(p, name);
973 return true;
974}
975
976static int setup_tty(struct lxc_conf *conf)
977{
393903d1
SH
978 const struct lxc_tty_info *tty_info = &conf->tty_info;
979 char *ttydir = conf->ttydir;
7c6ef2a2
SH
980 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
981 int i, ret;
b0a33c1e 982
e8bd4e43 983 if (!conf->rootfs.path)
bc9bd0e3
DL
984 return 0;
985
b0a33c1e 986 for (i = 0; i < tty_info->nbtty; i++) {
987
988 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
989
e8bd4e43 990 ret = snprintf(path, sizeof(path), "/dev/tty%d", i + 1);
7c6ef2a2
SH
991 if (ret >= sizeof(path)) {
992 ERROR("pathname too long for ttys");
993 return -1;
994 }
995 if (ttydir) {
996 /* create dev/lxc/tty%d" */
e8bd4e43 997 ret = snprintf(lxcpath, sizeof(lxcpath), "/dev/%s/tty%d", ttydir, i + 1);
7c6ef2a2
SH
998 if (ret >= sizeof(lxcpath)) {
999 ERROR("pathname too long for ttys");
1000 return -1;
1001 }
1002 ret = creat(lxcpath, 0660);
1003 if (ret==-1 && errno != EEXIST) {
959aee9c 1004 SYSERROR("error creating %s", lxcpath);
7c6ef2a2
SH
1005 return -1;
1006 }
4d44e274
SH
1007 if (ret >= 0)
1008 close(ret);
7c6ef2a2
SH
1009 ret = unlink(path);
1010 if (ret && errno != ENOENT) {
959aee9c 1011 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1012 return -1;
1013 }
b0a33c1e 1014
7c6ef2a2
SH
1015 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
1016 WARN("failed to mount '%s'->'%s'",
1017 pty_info->name, path);
1018 continue;
1019 }
13954cce 1020
9ba8130c
SH
1021 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
1022 if (ret >= sizeof(lxcpath)) {
1023 ERROR("tty pathname too long");
1024 return -1;
1025 }
7c6ef2a2
SH
1026 ret = symlink(lxcpath, path);
1027 if (ret) {
959aee9c 1028 SYSERROR("failed to create symlink for tty %d", i+1);
7c6ef2a2
SH
1029 return -1;
1030 }
1031 } else {
c6883f38
SH
1032 /* If we populated /dev, then we need to create /dev/ttyN */
1033 if (access(path, F_OK)) {
1034 ret = creat(path, 0660);
1035 if (ret==-1) {
959aee9c 1036 SYSERROR("error creating %s", path);
c6883f38 1037 /* this isn't fatal, continue */
025ed0f3 1038 } else {
c6883f38 1039 close(ret);
025ed0f3 1040 }
c6883f38 1041 }
7c6ef2a2 1042 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
e8bd4e43 1043 SYSERROR("failed to mount '%s'->'%s'", pty_info->name, path);
7c6ef2a2
SH
1044 continue;
1045 }
393903d1 1046 }
e8bd4e43 1047 if (!append_ptyname(&conf->pty_names, pty_info->name)) {
393903d1
SH
1048 ERROR("Error setting up container_ttys string");
1049 return -1;
b0a33c1e 1050 }
1051 }
1052
cd54d859
DL
1053 INFO("%d tty(s) has been setup", tty_info->nbtty);
1054
b0a33c1e 1055 return 0;
1056}
1057
bf601689 1058
59bb8698 1059static int setup_rootfs_pivot_root(const char *rootfs)
bf601689 1060{
2d489f9e 1061 int oldroot = -1, newroot = -1;
bf601689 1062
2d489f9e
SH
1063 oldroot = open("/", O_DIRECTORY | O_RDONLY);
1064 if (oldroot < 0) {
1065 SYSERROR("Error opening old-/ for fchdir");
9ba8130c
SH
1066 return -1;
1067 }
2d489f9e
SH
1068 newroot = open(rootfs, O_DIRECTORY | O_RDONLY);
1069 if (newroot < 0) {
1070 SYSERROR("Error opening new-/ for fchdir");
1071 goto fail;
c08556c6 1072 }
bf601689 1073
cc6f6dd7 1074 /* change into new root fs */
2d489f9e 1075 if (fchdir(newroot)) {
cc6f6dd7 1076 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
2d489f9e 1077 goto fail;
cc6f6dd7
DL
1078 }
1079
cc6f6dd7 1080 /* pivot_root into our new root fs */
2d489f9e 1081 if (pivot_root(".", ".")) {
cc6f6dd7 1082 SYSERROR("pivot_root syscall failed");
2d489f9e 1083 goto fail;
bf601689 1084 }
cc6f6dd7 1085
2d489f9e
SH
1086 /*
1087 * at this point the old-root is mounted on top of our new-root
1088 * To unmounted it we must not be chdir'd into it, so escape back
1089 * to old-root
1090 */
1091 if (fchdir(oldroot) < 0) {
1092 SYSERROR("Error entering oldroot");
1093 goto fail;
1094 }
7981ea46 1095 if (umount2(".", MNT_DETACH) < 0) {
2d489f9e
SH
1096 SYSERROR("Error detaching old root");
1097 goto fail;
cc6f6dd7
DL
1098 }
1099
2d489f9e
SH
1100 if (fchdir(newroot) < 0) {
1101 SYSERROR("Error re-entering newroot");
1102 goto fail;
1103 }
cc6f6dd7 1104
2d489f9e
SH
1105 close(oldroot);
1106 close(newroot);
bf601689 1107
2d489f9e 1108 DEBUG("pivot_root syscall to '%s' successful", rootfs);
bf601689 1109
bf601689 1110 return 0;
2d489f9e
SH
1111
1112fail:
1113 if (oldroot != -1)
1114 close(oldroot);
1115 if (newroot != -1)
1116 close(newroot);
1117 return -1;
bf601689
MH
1118}
1119
bc6928ff 1120/*
87da4ec3
SH
1121 * Just create a path for /dev under $lxcpath/$name and in rootfs
1122 * If we hit an error, log it but don't fail yet.
91c3830e 1123 */
14221cbb 1124static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, const char *lxcpath)
91c3830e
SH
1125{
1126 int ret;
87da4ec3
SH
1127 size_t clen;
1128 char *path;
91c3830e 1129
14221cbb 1130 INFO("Mounting container /dev");
bc6928ff 1131
14221cbb 1132 /* $(rootfs->mount) + "/dev/pts" + '\0' */
ec50007f 1133 clen = (rootfs->path ? strlen(rootfs->mount) : 0) + 9;
87da4ec3 1134 path = alloca(clen);
bc6928ff 1135
ec50007f 1136 ret = snprintf(path, clen, "%s/dev", rootfs->path ? rootfs->mount : "");
87da4ec3 1137 if (ret < 0 || ret >= clen)
91c3830e 1138 return -1;
bc6928ff 1139
87da4ec3 1140 if (!dir_exists(path)) {
14221cbb 1141 WARN("No /dev in container.");
87da4ec3
SH
1142 WARN("Proceeding without autodev setup");
1143 return 0;
bc6928ff 1144 }
87da4ec3 1145
1ec0e8e3 1146 ret = safe_mount("none", path, "tmpfs", 0, "size=500000,mode=755",
ec50007f 1147 rootfs->path ? rootfs->mount : NULL);
1ec0e8e3 1148 if (ret != 0) {
87da4ec3 1149 SYSERROR("Failed mounting tmpfs onto %s\n", path);
1ec0e8e3 1150 return -1;
91c3830e 1151 }
87da4ec3
SH
1152
1153 INFO("Mounted tmpfs onto %s", path);
1154
ec50007f 1155 ret = snprintf(path, clen, "%s/dev/pts", rootfs->path ? rootfs->mount : "");
87da4ec3 1156 if (ret < 0 || ret >= clen)
91c3830e 1157 return -1;
87da4ec3 1158
bc6928ff
MW
1159 /*
1160 * If we are running on a devtmpfs mapping, dev/pts may already exist.
1161 * If not, then create it and exit if that fails...
1162 */
87da4ec3 1163 if (!dir_exists(path)) {
bc6928ff
MW
1164 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1165 if (ret) {
1166 SYSERROR("Failed to create /dev/pts in container");
1167 return -1;
1168 }
91c3830e
SH
1169 }
1170
14221cbb 1171 INFO("Mounted container /dev");
91c3830e
SH
1172 return 0;
1173}
1174
c6883f38 1175struct lxc_devs {
74a3920a 1176 const char *name;
c6883f38
SH
1177 mode_t mode;
1178 int maj;
1179 int min;
1180};
1181
74a3920a 1182static const struct lxc_devs lxc_devs[] = {
c6883f38
SH
1183 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1184 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1185 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1186 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1187 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1188 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1189 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1190};
1191
0728ebf4 1192static int fill_autodev(const struct lxc_rootfs *rootfs, bool mount_console)
c6883f38
SH
1193{
1194 int ret;
c6883f38
SH
1195 char path[MAXPATHLEN];
1196 int i;
3a32201c 1197 mode_t cmask;
c6883f38 1198
14221cbb 1199 INFO("Creating initial consoles under container /dev");
91c3830e 1200
ec50007f 1201 ret = snprintf(path, MAXPATHLEN, "%s/dev", rootfs->path ? rootfs->mount : "");
91c3830e
SH
1202 if (ret < 0 || ret >= MAXPATHLEN) {
1203 ERROR("Error calculating container /dev location");
c6883f38 1204 return -1;
f7bee6c6 1205 }
91c3830e 1206
9769034f 1207 if (!dir_exists(path)) // ignore, just don't try to fill in
9cb4d183
SH
1208 return 0;
1209
14221cbb 1210 INFO("Populating container /dev");
3a32201c 1211 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38 1212 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
74a3920a 1213 const struct lxc_devs *d = &lxc_devs[i];
0728ebf4
TA
1214
1215 if (!strcmp(d->name, "console") && !mount_console)
1216 continue;
1217
ec50007f 1218 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", rootfs->path ? rootfs->mount : "", d->name);
c6883f38
SH
1219 if (ret < 0 || ret >= MAXPATHLEN)
1220 return -1;
1221 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1222 if (ret && errno != EEXIST) {
9cb4d183
SH
1223 char hostpath[MAXPATHLEN];
1224 FILE *pathfile;
1225
1226 // Unprivileged containers cannot create devices, so
1227 // bind mount the device from the host
1228 ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", d->name);
1229 if (ret < 0 || ret >= MAXPATHLEN)
1230 return -1;
1231 pathfile = fopen(path, "wb");
1232 if (!pathfile) {
1233 SYSERROR("Failed to create device mount target '%s'", path);
1234 return -1;
1235 }
1236 fclose(pathfile);
592fd47a 1237 if (safe_mount(hostpath, path, 0, MS_BIND, NULL,
ec50007f 1238 rootfs->path ? rootfs->mount : NULL) != 0) {
9cb4d183
SH
1239 SYSERROR("Failed bind mounting device %s from host into container",
1240 d->name);
1241 return -1;
1242 }
c6883f38
SH
1243 }
1244 }
3a32201c 1245 umask(cmask);
c6883f38 1246
14221cbb 1247 INFO("Populated container /dev");
c6883f38
SH
1248 return 0;
1249}
1250
cc28d0b0 1251static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1252{
cc28d0b0
SH
1253 const struct lxc_rootfs *rootfs = &conf->rootfs;
1254
a0f379bf
DW
1255 if (!rootfs->path) {
1256 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1257 SYSERROR("Failed to make / rslave");
1258 return -1;
1259 }
c69bd12f 1260 return 0;
a0f379bf 1261 }
0ad19a3f 1262
12297168 1263 if (access(rootfs->mount, F_OK)) {
b1789442 1264 SYSERROR("failed to access to '%s', check it is present",
12297168 1265 rootfs->mount);
b1789442
DL
1266 return -1;
1267 }
1268
9be53773 1269 // First try mounting rootfs using a bdev
76a26f55 1270 struct bdev *bdev = bdev_init(conf, rootfs->path, rootfs->mount, rootfs->options);
9be53773 1271 if (bdev && bdev->ops->mount(bdev) == 0) {
59d66af2 1272 bdev_put(bdev);
9be53773
SH
1273 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1274 return 0;
1275 }
59d66af2
SH
1276 if (bdev)
1277 bdev_put(bdev);
a17b1e65 1278 if (mount_rootfs(rootfs->path, rootfs->mount, rootfs->options)) {
a6afdde9 1279 ERROR("failed to mount rootfs");
c3f0a28c 1280 return -1;
1281 }
0ad19a3f 1282
12297168 1283 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1284
ac778708
DL
1285 return 0;
1286}
1287
91e93c71
AV
1288int prepare_ramfs_root(char *root)
1289{
eab15c1e 1290 char buf[LXC_LINELEN], *p;
91e93c71
AV
1291 char nroot[PATH_MAX];
1292 FILE *f;
1293 int i;
1294 char *p2;
1295
1296 if (realpath(root, nroot) == NULL)
1297 return -1;
1298
1299 if (chdir("/") == -1)
1300 return -1;
1301
1302 /*
1303 * We could use here MS_MOVE, but in userns this mount is
1304 * locked and can't be moved.
1305 */
1306 if (mount(root, "/", NULL, MS_REC | MS_BIND, NULL)) {
1307 SYSERROR("Failed to move %s into /", root);
1308 return -1;
1309 }
1310
88322f77 1311 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
91e93c71
AV
1312 SYSERROR("Failed to make . rprivate");
1313 return -1;
1314 }
1315
1316 /*
1317 * The following code cleans up inhereted mounts which are not
1318 * required for CT.
1319 *
1320 * The mountinfo file shows not all mounts, if a few points have been
1321 * unmounted between read operations from the mountinfo. So we need to
1322 * read mountinfo a few times.
1323 *
1324 * This loop can be skipped if a container uses unserns, because all
1325 * inherited mounts are locked and we should live with all this trash.
1326 */
1327 while (1) {
1328 int progress = 0;
1329
1330 f = fopen("./proc/self/mountinfo", "r");
1331 if (!f) {
1332 SYSERROR("Unable to open /proc/self/mountinfo");
1333 return -1;
1334 }
eab15c1e 1335 while (fgets(buf, LXC_LINELEN, f)) {
91e93c71
AV
1336 for (p = buf, i=0; p && i < 4; i++)
1337 p = strchr(p+1, ' ');
1338 if (!p)
1339 continue;
1340 p2 = strchr(p+1, ' ');
1341 if (!p2)
1342 continue;
1343
1344 *p2 = '\0';
1345 *p = '.';
1346
1347 if (strcmp(p + 1, "/") == 0)
1348 continue;
1349 if (strcmp(p + 1, "/proc") == 0)
1350 continue;
1351
1352 if (umount2(p, MNT_DETACH) == 0)
1353 progress++;
1354 }
1355 fclose(f);
1356 if (!progress)
1357 break;
1358 }
1359
8bea9fae
PR
1360 /* This also can be skipped if a container uses unserns */
1361 umount2("./proc", MNT_DETACH);
91e93c71
AV
1362
1363 /* It is weird, but chdir("..") moves us in a new root */
1364 if (chdir("..") == -1) {
1365 SYSERROR("Unable to change working directory");
1366 return -1;
1367 }
1368
1369 if (chroot(".") == -1) {
1370 SYSERROR("Unable to chroot");
1371 return -1;
1372 }
1373
1374 return 0;
1375}
1376
74a3920a 1377static int setup_pivot_root(const struct lxc_rootfs *rootfs)
ac778708 1378{
ac778708
DL
1379 if (!rootfs->path)
1380 return 0;
1381
91e93c71
AV
1382 if (detect_ramfs_rootfs()) {
1383 if (prepare_ramfs_root(rootfs->mount))
1384 return -1;
59bb8698 1385 } else if (setup_rootfs_pivot_root(rootfs->mount)) {
cc6f6dd7 1386 ERROR("failed to setup pivot root");
25368b52 1387 return -1;
c69bd12f
DL
1388 }
1389
25368b52 1390 return 0;
0ad19a3f 1391}
1392
d852c78c 1393static int setup_pts(int pts)
3c26f34e 1394{
77890c6d
SW
1395 char target[PATH_MAX];
1396
d852c78c
DL
1397 if (!pts)
1398 return 0;
3c26f34e 1399
1400 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1401 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1402 return -1;
1403 }
1404
7e40254a
JTLB
1405 if (mkdir("/dev/pts", 0755)) {
1406 if ( errno != EEXIST ) {
1407 SYSERROR("failed to create '/dev/pts'");
1408 return -1;
1409 }
1410 }
1411
a6afdde9 1412 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
67e5a20a 1413 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
36eb9bde 1414 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1415 return -1;
1416 }
1417
3c26f34e 1418 if (access("/dev/ptmx", F_OK)) {
1419 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1420 goto out;
36eb9bde 1421 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1422 return -1;
1423 }
1424
77890c6d
SW
1425 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1426 goto out;
1427
3c26f34e 1428 /* fallback here, /dev/pts/ptmx exists just mount bind */
1429 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1430 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1431 return -1;
1432 }
cd54d859
DL
1433
1434 INFO("created new pts instance");
d852c78c 1435
3c26f34e 1436out:
1437 return 0;
1438}
1439
cccc74b5
DL
1440static int setup_personality(int persona)
1441{
6ff05e18 1442 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1443 if (persona == -1)
1444 return 0;
1445
1446 if (personality(persona) < 0) {
1447 SYSERROR("failed to set personality to '0x%x'", persona);
1448 return -1;
1449 }
1450
1451 INFO("set personality to '0x%x'", persona);
6ff05e18 1452 #endif
cccc74b5
DL
1453
1454 return 0;
1455}
1456
7c6ef2a2 1457static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1458 const struct lxc_console *console)
6e590161 1459{
63376d7d 1460 char path[MAXPATHLEN];
0728ebf4 1461 int ret, fd;
52e35957 1462
7c6ef2a2
SH
1463 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1464 if (ret >= sizeof(path)) {
959aee9c 1465 ERROR("console path too long");
7c6ef2a2
SH
1466 return -1;
1467 }
52e35957 1468
0728ebf4
TA
1469 fd = open(path, O_CREAT | O_EXCL, S_IXUSR | S_IXGRP | S_IXOTH);
1470 if (fd < 0) {
1471 if (errno != EEXIST) {
1472 SYSERROR("failed to create console");
1473 return -1;
1474 }
1475 } else {
1476 close(fd);
52e35957
DL
1477 }
1478
b5159817
DE
1479 if (console->master < 0) {
1480 INFO("no console");
f78a1f32
DL
1481 return 0;
1482 }
ed502555 1483
0728ebf4 1484 if (chmod(console->name, S_IXUSR | S_IXGRP | S_IXOTH)) {
63376d7d 1485 SYSERROR("failed to set mode '0%o' to '%s'",
0728ebf4 1486 S_IXUSR | S_IXGRP | S_IXOTH, console->name);
63376d7d
DL
1487 return -1;
1488 }
13954cce 1489
592fd47a 1490 if (safe_mount(console->name, path, "none", MS_BIND, 0, rootfs->mount)) {
63376d7d 1491 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1492 return -1;
1493 }
1494
63376d7d 1495 INFO("console has been setup");
7c6ef2a2
SH
1496 return 0;
1497}
1498
1499static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1500 const struct lxc_console *console,
1501 char *ttydir)
1502{
1503 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1504 int ret;
1505
1506 /* create rootfs/dev/<ttydir> directory */
1507 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1508 ttydir);
1509 if (ret >= sizeof(path))
1510 return -1;
1511 ret = mkdir(path, 0755);
1512 if (ret && errno != EEXIST) {
959aee9c 1513 SYSERROR("failed with errno %d to create %s", errno, path);
7c6ef2a2
SH
1514 return -1;
1515 }
959aee9c 1516 INFO("created %s", path);
7c6ef2a2
SH
1517
1518 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1519 rootfs->mount, ttydir);
1520 if (ret >= sizeof(lxcpath)) {
959aee9c 1521 ERROR("console path too long");
7c6ef2a2
SH
1522 return -1;
1523 }
1524
1525 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1526 ret = unlink(path);
1527 if (ret && errno != ENOENT) {
959aee9c 1528 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1529 return -1;
1530 }
1531
1532 ret = creat(lxcpath, 0660);
1533 if (ret==-1 && errno != EEXIST) {
959aee9c 1534 SYSERROR("error %d creating %s", errno, lxcpath);
7c6ef2a2
SH
1535 return -1;
1536 }
4d44e274
SH
1537 if (ret >= 0)
1538 close(ret);
7c6ef2a2 1539
b5159817
DE
1540 if (console->master < 0) {
1541 INFO("no console");
7c6ef2a2
SH
1542 return 0;
1543 }
1544
592fd47a 1545 if (safe_mount(console->name, lxcpath, "none", MS_BIND, 0, rootfs->mount)) {
7c6ef2a2
SH
1546 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1547 return -1;
1548 }
1549
1550 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1551 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1552 if (ret >= sizeof(lxcpath)) {
1553 ERROR("lxc/console path too long");
1554 return -1;
1555 }
7c6ef2a2
SH
1556 ret = symlink(lxcpath, path);
1557 if (ret) {
1558 SYSERROR("failed to create symlink for console");
1559 return -1;
1560 }
1561
1562 INFO("console has been setup on %s", lxcpath);
cd54d859 1563
6e590161 1564 return 0;
1565}
1566
7c6ef2a2
SH
1567static int setup_console(const struct lxc_rootfs *rootfs,
1568 const struct lxc_console *console,
1569 char *ttydir)
1570{
1571 /* We don't have a rootfs, /dev/console will be shared */
1572 if (!rootfs->path)
1573 return 0;
1574 if (!ttydir)
1575 return setup_dev_console(rootfs, console);
1576
1577 return setup_ttydir_console(rootfs, console, ttydir);
1578}
1579
1bd051a6
SH
1580static int setup_kmsg(const struct lxc_rootfs *rootfs,
1581 const struct lxc_console *console)
1582{
1583 char kpath[MAXPATHLEN];
1584 int ret;
1585
222fea5a
DE
1586 if (!rootfs->path)
1587 return 0;
1bd051a6
SH
1588 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1589 if (ret < 0 || ret >= sizeof(kpath))
1590 return -1;
1591
1592 ret = unlink(kpath);
1593 if (ret && errno != ENOENT) {
959aee9c 1594 SYSERROR("error unlinking %s", kpath);
1bd051a6
SH
1595 return -1;
1596 }
1597
1598 ret = symlink("console", kpath);
1599 if (ret) {
1600 SYSERROR("failed to create symlink for kmsg");
1601 return -1;
1602 }
1603
1604 return 0;
1605}
1606
998ac676
RT
1607static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1608{
1609 struct mount_opt *mo;
1610
1611 /* If opt is found in mount_opt, set or clear flags.
1612 * Otherwise append it to data. */
1613
1614 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1615 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1616 if (mo->clear)
1617 *flags &= ~mo->flag;
1618 else
1619 *flags |= mo->flag;
1620 return;
1621 }
1622 }
1623
1624 if (strlen(*data))
1625 strcat(*data, ",");
1626 strcat(*data, opt);
1627}
1628
a17b1e65 1629int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1630 char **mntdata)
1631{
1632 char *s, *data;
1633 char *p, *saveptr = NULL;
1634
911324ef 1635 *mntdata = NULL;
91656ce5 1636 *mntflags = 0L;
911324ef
DL
1637
1638 if (!mntopts)
998ac676
RT
1639 return 0;
1640
911324ef 1641 s = strdup(mntopts);
998ac676 1642 if (!s) {
36eb9bde 1643 SYSERROR("failed to allocate memory");
998ac676
RT
1644 return -1;
1645 }
1646
1647 data = malloc(strlen(s) + 1);
1648 if (!data) {
36eb9bde 1649 SYSERROR("failed to allocate memory");
998ac676
RT
1650 free(s);
1651 return -1;
1652 }
1653 *data = 0;
1654
1655 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1656 p = strtok_r(NULL, ",", &saveptr))
1657 parse_mntopt(p, mntflags, &data);
1658
1659 if (*data)
1660 *mntdata = data;
1661 else
1662 free(data);
1663 free(s);
1664
1665 return 0;
1666}
1667
6fd5e769
SH
1668static void null_endofword(char *word)
1669{
1670 while (*word && *word != ' ' && *word != '\t')
1671 word++;
1672 *word = '\0';
1673}
1674
1675/*
1676 * skip @nfields spaces in @src
1677 */
1678static char *get_field(char *src, int nfields)
1679{
1680 char *p = src;
1681 int i;
1682
1683 for (i = 0; i < nfields; i++) {
1684 while (*p && *p != ' ' && *p != '\t')
1685 p++;
1686 if (!*p)
1687 break;
1688 p++;
1689 }
1690 return p;
1691}
1692
911324ef
DL
1693static int mount_entry(const char *fsname, const char *target,
1694 const char *fstype, unsigned long mountflags,
ae7a770e 1695 const char *data, int optional, int dev, const char *rootfs)
911324ef 1696{
614305f3 1697#ifdef HAVE_STATVFS
2938f7c8 1698 struct statvfs sb;
614305f3 1699#endif
2938f7c8 1700
592fd47a 1701 if (safe_mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data, rootfs)) {
1fc64d22
SG
1702 if (optional) {
1703 INFO("failed to mount '%s' on '%s' (optional): %s", fsname,
1704 target, strerror(errno));
1705 return 0;
1706 }
1707 else {
1708 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1709 return -1;
1710 }
911324ef
DL
1711 }
1712
1713 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
2938f7c8
SH
1714 DEBUG("remounting %s on %s to respect bind or remount options",
1715 fsname ? fsname : "(none)", target ? target : "(none)");
7c5b6e7c
AS
1716 unsigned long rqd_flags = 0;
1717 if (mountflags & MS_RDONLY)
1718 rqd_flags |= MS_RDONLY;
614305f3 1719#ifdef HAVE_STATVFS
2938f7c8 1720 if (statvfs(fsname, &sb) == 0) {
7c5b6e7c 1721 unsigned long required_flags = rqd_flags;
2938f7c8
SH
1722 if (sb.f_flag & MS_NOSUID)
1723 required_flags |= MS_NOSUID;
ae7a770e 1724 if (sb.f_flag & MS_NODEV && !dev)
2938f7c8
SH
1725 required_flags |= MS_NODEV;
1726 if (sb.f_flag & MS_RDONLY)
1727 required_flags |= MS_RDONLY;
1728 if (sb.f_flag & MS_NOEXEC)
1729 required_flags |= MS_NOEXEC;
1730 DEBUG("(at remount) flags for %s was %lu, required extra flags are %lu", fsname, sb.f_flag, required_flags);
1731 /*
1732 * If this was a bind mount request, and required_flags
1733 * does not have any flags which are not already in
1734 * mountflags, then skip the remount
1735 */
1736 if (!(mountflags & MS_REMOUNT)) {
7c5b6e7c 1737 if (!(required_flags & ~mountflags) && rqd_flags == 0) {
2938f7c8
SH
1738 DEBUG("mountflags already was %lu, skipping remount",
1739 mountflags);
1740 goto skipremount;
1741 }
1742 }
1743 mountflags |= required_flags;
6fd5e769 1744 }
614305f3 1745#endif
911324ef
DL
1746
1747 if (mount(fsname, target, fstype,
592fd47a 1748 mountflags | MS_REMOUNT, data) < 0) {
1fc64d22
SG
1749 if (optional) {
1750 INFO("failed to mount '%s' on '%s' (optional): %s",
1751 fsname, target, strerror(errno));
1752 return 0;
1753 }
1754 else {
1755 SYSERROR("failed to mount '%s' on '%s'",
1756 fsname, target);
1757 return -1;
1758 }
911324ef
DL
1759 }
1760 }
1761
614305f3 1762#ifdef HAVE_STATVFS
6fd5e769 1763skipremount:
614305f3 1764#endif
911324ef
DL
1765 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1766
1767 return 0;
1768}
1769
4e4ca161
SH
1770/*
1771 * Remove 'optional', 'create=dir', and 'create=file' from mntopt
1772 */
1773static void cull_mntent_opt(struct mntent *mntent)
1774{
1775 int i;
1776 char *p, *p2;
1777 char *list[] = {"create=dir",
1778 "create=file",
1779 "optional",
1780 NULL };
1781
1782 for (i=0; list[i]; i++) {
1783 if (!(p = strstr(mntent->mnt_opts, list[i])))
1784 continue;
1785 p2 = strchr(p, ',');
1786 if (!p2) {
1787 /* no more mntopts, so just chop it here */
1788 *p = '\0';
1789 continue;
1790 }
1791 memmove(p, p2+1, strlen(p2+1)+1);
1792 }
1793}
1794
4d5b72a1 1795static int mount_entry_create_dir_file(const struct mntent *mntent,
0a2dddd4
CB
1796 const char* path, const struct lxc_rootfs *rootfs,
1797 const char *lxc_name, const char *lxc_path)
0ad19a3f 1798{
4d5b72a1 1799 char *pathdirname = NULL;
608e3567 1800 int ret = 0;
34cfffb3 1801 FILE *pathfile = NULL;
911324ef 1802
6e46cc0d 1803 if (strncmp(mntent->mnt_type, "overlay", 7) == 0) {
5c484f79 1804 if (ovl_mkdir(mntent, rootfs, lxc_name, lxc_path) < 0)
6e46cc0d
CB
1805 return -1;
1806 } else if (strncmp(mntent->mnt_type, "aufs", 4) == 0) {
1d52bdf7 1807 if (aufs_mkdir(mntent, rootfs, lxc_name, lxc_path) < 0)
6e46cc0d
CB
1808 return -1;
1809 }
1810
34cfffb3 1811 if (hasmntopt(mntent, "create=dir")) {
4d5b72a1
NC
1812 if (mkdir_p(path, 0755) < 0) {
1813 WARN("Failed to create mount target '%s'", path);
34cfffb3
SG
1814 ret = -1;
1815 }
1816 }
1817
4d5b72a1
NC
1818 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
1819 pathdirname = strdup(path);
34cfffb3 1820 pathdirname = dirname(pathdirname);
119126b6
SG
1821 if (mkdir_p(pathdirname, 0755) < 0) {
1822 WARN("Failed to create target directory");
1823 }
4d5b72a1 1824 pathfile = fopen(path, "wb");
34cfffb3 1825 if (!pathfile) {
4d5b72a1 1826 WARN("Failed to create mount target '%s'", path);
34cfffb3 1827 ret = -1;
6e46cc0d 1828 } else {
34cfffb3 1829 fclose(pathfile);
6e46cc0d 1830 }
34cfffb3 1831 }
4d5b72a1
NC
1832 free(pathdirname);
1833 return ret;
1834}
1835
ec50007f
CB
1836/* rootfs, lxc_name, and lxc_path can be NULL when the container is created
1837 * without a rootfs. */
db4aba38 1838static inline int mount_entry_on_generic(struct mntent *mntent,
0a2dddd4
CB
1839 const char* path, const struct lxc_rootfs *rootfs,
1840 const char *lxc_name, const char *lxc_path)
4d5b72a1
NC
1841{
1842 unsigned long mntflags;
1843 char *mntdata;
1844 int ret;
1845 bool optional = hasmntopt(mntent, "optional") != NULL;
ae7a770e 1846 bool dev = hasmntopt(mntent, "dev") != NULL;
4d5b72a1 1847
ec50007f
CB
1848 char *rootfs_path = NULL;
1849 if (rootfs && rootfs->path)
1850 rootfs_path = rootfs->mount;
1851
0a2dddd4 1852 ret = mount_entry_create_dir_file(mntent, path, rootfs, lxc_name, lxc_path);
34cfffb3 1853
608e3567
SH
1854 if (ret < 0)
1855 return optional ? 0 : -1;
1856
4e4ca161
SH
1857 cull_mntent_opt(mntent);
1858
a17b1e65
SG
1859 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1860 free(mntdata);
1861 return -1;
1862 }
1863
6e46cc0d 1864 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type, mntflags,
ae7a770e 1865 mntdata, optional, dev, rootfs_path);
68c152ef 1866
911324ef 1867 free(mntdata);
911324ef
DL
1868 return ret;
1869}
1870
db4aba38
NC
1871static inline int mount_entry_on_systemfs(struct mntent *mntent)
1872{
1433c9f9
CB
1873 char path[MAXPATHLEN];
1874 int ret;
1875
1876 /* For containers created without a rootfs all mounts are treated as
1877 * absolute paths starting at / on the host. */
1878 if (mntent->mnt_dir[0] != '/')
1879 ret = snprintf(path, sizeof(path), "/%s", mntent->mnt_dir);
1880 else
1881 ret = snprintf(path, sizeof(path), "%s", mntent->mnt_dir);
1882
1883 if (ret < 0 || ret >= sizeof(path)) {
1884 ERROR("path name too long");
1885 return -1;
1886 }
1887
1888 return mount_entry_on_generic(mntent, path, NULL, NULL, NULL);
db4aba38
NC
1889}
1890
4e4ca161 1891static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2 1892 const struct lxc_rootfs *rootfs,
0a2dddd4
CB
1893 const char *lxc_name,
1894 const char *lxc_path)
911324ef 1895{
013bd428 1896 char *aux;
59760f5d 1897 char path[MAXPATHLEN];
80a881b2 1898 int r, ret = 0, offset;
67e571de 1899 const char *lxcpath;
0ad19a3f 1900
593e8478 1901 lxcpath = lxc_global_config_value("lxc.lxcpath");
2a59a681
SH
1902 if (!lxcpath) {
1903 ERROR("Out of memory");
1904 return -1;
1905 }
1906
80a881b2 1907 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
1908 * use $lxcpath/CN/rootfs as the target prefix */
1909 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
1910 if (r < 0 || r >= MAXPATHLEN)
1911 goto skipvarlib;
1912
1913 aux = strstr(mntent->mnt_dir, path);
1914 if (aux) {
1915 offset = strlen(path);
1916 goto skipabs;
1917 }
1918
1919skipvarlib:
013bd428
DL
1920 aux = strstr(mntent->mnt_dir, rootfs->path);
1921 if (!aux) {
1922 WARN("ignoring mount point '%s'", mntent->mnt_dir);
db4aba38 1923 return ret;
013bd428 1924 }
80a881b2
SH
1925 offset = strlen(rootfs->path);
1926
1927skipabs:
013bd428 1928
9ba8130c 1929 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
1930 aux + offset);
1931 if (r < 0 || r >= MAXPATHLEN) {
1932 WARN("pathnme too long for '%s'", mntent->mnt_dir);
a17b1e65
SG
1933 return -1;
1934 }
1935
0a2dddd4 1936 return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
911324ef 1937}
d330fe7b 1938
4e4ca161 1939static int mount_entry_on_relative_rootfs(struct mntent *mntent,
0a2dddd4
CB
1940 const struct lxc_rootfs *rootfs,
1941 const char *lxc_name,
1942 const char *lxc_path)
911324ef
DL
1943{
1944 char path[MAXPATHLEN];
911324ef 1945 int ret;
d330fe7b 1946
34cfffb3 1947 /* relative to root mount point */
6e46cc0d 1948 ret = snprintf(path, sizeof(path), "%s/%s", rootfs->mount, mntent->mnt_dir);
1433c9f9 1949 if (ret < 0 || ret >= sizeof(path)) {
9ba8130c
SH
1950 ERROR("path name too long");
1951 return -1;
1952 }
911324ef 1953
0a2dddd4 1954 return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
911324ef
DL
1955}
1956
80a881b2 1957static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
0a2dddd4 1958 const char *lxc_name, const char *lxc_path)
911324ef 1959{
aaf901be
AM
1960 struct mntent mntent;
1961 char buf[4096];
911324ef 1962 int ret = -1;
e76b8764 1963
aaf901be 1964 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
e76b8764 1965
911324ef 1966 if (!rootfs->path) {
aaf901be 1967 if (mount_entry_on_systemfs(&mntent))
e76b8764 1968 goto out;
911324ef 1969 continue;
e76b8764
CDC
1970 }
1971
911324ef 1972 /* We have a separate root, mounts are relative to it */
aaf901be 1973 if (mntent.mnt_dir[0] != '/') {
0a2dddd4 1974 if (mount_entry_on_relative_rootfs(&mntent, rootfs, lxc_name, lxc_path))
911324ef
DL
1975 goto out;
1976 continue;
1977 }
cd54d859 1978
0a2dddd4 1979 if (mount_entry_on_absolute_rootfs(&mntent, rootfs, lxc_name, lxc_path))
911324ef 1980 goto out;
0ad19a3f 1981 }
cd54d859 1982
0ad19a3f 1983 ret = 0;
cd54d859
DL
1984
1985 INFO("mount points have been setup");
0ad19a3f 1986out:
e7938e9e
MN
1987 return ret;
1988}
1989
80a881b2 1990static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
0a2dddd4 1991 const char *lxc_name, const char *lxc_path)
e7938e9e
MN
1992{
1993 FILE *file;
1994 int ret;
1995
1996 if (!fstab)
1997 return 0;
1998
1999 file = setmntent(fstab, "r");
2000 if (!file) {
2001 SYSERROR("failed to use '%s'", fstab);
2002 return -1;
2003 }
2004
0a2dddd4 2005 ret = mount_file_entries(rootfs, file, lxc_name, lxc_path);
e7938e9e 2006
0ad19a3f 2007 endmntent(file);
2008 return ret;
2009}
2010
5ef5c9a3 2011FILE *make_anonymous_mount_file(struct lxc_list *mount)
e7938e9e 2012{
5ef5c9a3 2013 int ret;
e7938e9e 2014 char *mount_entry;
5ef5c9a3
CB
2015 struct lxc_list *iterator;
2016 FILE *file;
2017 int fd = -1;
2018
2019 fd = memfd_create("lxc_mount_file", MFD_CLOEXEC);
2020 if (fd < 0) {
2021 if (errno != ENOSYS)
2022 return NULL;
2023 file = tmpfile();
2024 } else {
2025 file = fdopen(fd, "r+");
2026 }
e7938e9e 2027
e7938e9e 2028 if (!file) {
fad6ef95 2029 int saved_errno = errno;
5ef5c9a3
CB
2030 if (fd != -1)
2031 close(fd);
fad6ef95 2032 ERROR("Could not create mount entry file: %s.", strerror(saved_errno));
9fc7f8c0 2033 return NULL;
e7938e9e
MN
2034 }
2035
2036 lxc_list_for_each(iterator, mount) {
2037 mount_entry = iterator->elem;
5ef5c9a3
CB
2038 ret = fprintf(file, "%s\n", mount_entry);
2039 if (ret < strlen(mount_entry))
2040 WARN("Could not write mount entry to anonymous mount file.");
2041 }
2042
2043 if (fseek(file, 0, SEEK_SET) < 0) {
2044 fclose(file);
2045 return NULL;
e7938e9e
MN
2046 }
2047
9fc7f8c0
TA
2048 return file;
2049}
2050
5ef5c9a3
CB
2051static int setup_mount_entries(const struct lxc_rootfs *rootfs,
2052 struct lxc_list *mount, const char *lxc_name,
2053 const char *lxc_path)
9fc7f8c0
TA
2054{
2055 FILE *file;
2056 int ret;
2057
5ef5c9a3 2058 file = make_anonymous_mount_file(mount);
9fc7f8c0
TA
2059 if (!file)
2060 return -1;
e7938e9e 2061
0a2dddd4 2062 ret = mount_file_entries(rootfs, file, lxc_name, lxc_path);
e7938e9e
MN
2063
2064 fclose(file);
2065 return ret;
2066}
2067
bab88e68
CS
2068static int parse_cap(const char *cap)
2069{
2070 char *ptr = NULL;
84760c11 2071 size_t i;
2072 int capid = -1;
bab88e68 2073
7035407c
DE
2074 if (!strcmp(cap, "none"))
2075 return -2;
2076
bab88e68
CS
2077 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2078
2079 if (strcmp(cap, caps_opt[i].name))
2080 continue;
2081
2082 capid = caps_opt[i].value;
2083 break;
2084 }
2085
2086 if (capid < 0) {
2087 /* try to see if it's numeric, so the user may specify
2088 * capabilities that the running kernel knows about but
2089 * we don't */
2090 errno = 0;
2091 capid = strtol(cap, &ptr, 10);
2092 if (!ptr || *ptr != '\0' || errno != 0)
2093 /* not a valid number */
2094 capid = -1;
2095 else if (capid > lxc_caps_last_cap())
2096 /* we have a number but it's not a valid
2097 * capability */
2098 capid = -1;
2099 }
2100
2101 return capid;
2102}
2103
0769b82a
CS
2104int in_caplist(int cap, struct lxc_list *caps)
2105{
2106 struct lxc_list *iterator;
2107 int capid;
2108
2109 lxc_list_for_each(iterator, caps) {
2110 capid = parse_cap(iterator->elem);
2111 if (capid == cap)
2112 return 1;
2113 }
2114
2115 return 0;
2116}
2117
81810dd1
DL
2118static int setup_caps(struct lxc_list *caps)
2119{
2120 struct lxc_list *iterator;
2121 char *drop_entry;
bab88e68 2122 int capid;
81810dd1
DL
2123
2124 lxc_list_for_each(iterator, caps) {
2125
2126 drop_entry = iterator->elem;
2127
bab88e68 2128 capid = parse_cap(drop_entry);
d55bc1ad 2129
81810dd1 2130 if (capid < 0) {
1e11be34
DL
2131 ERROR("unknown capability %s", drop_entry);
2132 return -1;
81810dd1
DL
2133 }
2134
2135 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
2136
2137 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
3ec1648d
SH
2138 SYSERROR("failed to remove %s capability", drop_entry);
2139 return -1;
2140 }
81810dd1
DL
2141
2142 }
2143
1fb86a7c
SH
2144 DEBUG("capabilities have been setup");
2145
2146 return 0;
2147}
2148
2149static int dropcaps_except(struct lxc_list *caps)
2150{
2151 struct lxc_list *iterator;
2152 char *keep_entry;
1fb86a7c
SH
2153 int i, capid;
2154 int numcaps = lxc_caps_last_cap() + 1;
959aee9c 2155 INFO("found %d capabilities", numcaps);
1fb86a7c 2156
2caf9a97
SH
2157 if (numcaps <= 0 || numcaps > 200)
2158 return -1;
2159
1fb86a7c
SH
2160 // caplist[i] is 1 if we keep capability i
2161 int *caplist = alloca(numcaps * sizeof(int));
2162 memset(caplist, 0, numcaps * sizeof(int));
2163
2164 lxc_list_for_each(iterator, caps) {
2165
2166 keep_entry = iterator->elem;
2167
bab88e68 2168 capid = parse_cap(keep_entry);
1fb86a7c 2169
7035407c
DE
2170 if (capid == -2)
2171 continue;
2172
1fb86a7c
SH
2173 if (capid < 0) {
2174 ERROR("unknown capability %s", keep_entry);
2175 return -1;
2176 }
2177
8255688a 2178 DEBUG("keep capability '%s' (%d)", keep_entry, capid);
1fb86a7c
SH
2179
2180 caplist[capid] = 1;
2181 }
2182 for (i=0; i<numcaps; i++) {
2183 if (caplist[i])
2184 continue;
2185 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
3ec1648d
SH
2186 SYSERROR("failed to remove capability %d", i);
2187 return -1;
2188 }
1fb86a7c
SH
2189 }
2190
2191 DEBUG("capabilities have been setup");
81810dd1
DL
2192
2193 return 0;
2194}
2195
0ad19a3f 2196static int setup_hw_addr(char *hwaddr, const char *ifname)
2197{
2198 struct sockaddr sockaddr;
2199 struct ifreq ifr;
fad6ef95 2200 int ret, fd, saved_errno;
0ad19a3f 2201
3cfc0f3a
MN
2202 ret = lxc_convert_mac(hwaddr, &sockaddr);
2203 if (ret) {
2204 ERROR("mac address '%s' conversion failed : %s",
2205 hwaddr, strerror(-ret));
0ad19a3f 2206 return -1;
2207 }
2208
2209 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
5da6aa8c 2210 ifr.ifr_name[IFNAMSIZ-1] = '\0';
0ad19a3f 2211 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2212
2213 fd = socket(AF_INET, SOCK_DGRAM, 0);
2214 if (fd < 0) {
3ab87b66 2215 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 2216 return -1;
2217 }
2218
2219 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
fad6ef95 2220 saved_errno = errno;
0ad19a3f 2221 close(fd);
2222 if (ret)
fad6ef95 2223 ERROR("ioctl failure : %s", strerror(saved_errno));
0ad19a3f 2224
5da6aa8c 2225 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
cd54d859 2226
0ad19a3f 2227 return ret;
2228}
2229
82d5ae15 2230static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2231{
82d5ae15
DL
2232 struct lxc_list *iterator;
2233 struct lxc_inetdev *inetdev;
3cfc0f3a 2234 int err;
0ad19a3f 2235
82d5ae15
DL
2236 lxc_list_for_each(iterator, ip) {
2237
2238 inetdev = iterator->elem;
2239
0093bb8c
DL
2240 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2241 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
2242 if (err) {
2243 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2244 ifindex, strerror(-err));
82d5ae15
DL
2245 return -1;
2246 }
2247 }
2248
2249 return 0;
0ad19a3f 2250}
2251
82d5ae15 2252static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2253{
82d5ae15 2254 struct lxc_list *iterator;
7fa9074f 2255 struct lxc_inet6dev *inet6dev;
3cfc0f3a 2256 int err;
0ad19a3f 2257
82d5ae15
DL
2258 lxc_list_for_each(iterator, ip) {
2259
2260 inet6dev = iterator->elem;
2261
b3df193c 2262 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
2263 &inet6dev->mcast, &inet6dev->acast,
2264 inet6dev->prefix);
3cfc0f3a
MN
2265 if (err) {
2266 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2267 ifindex, strerror(-err));
82d5ae15 2268 return -1;
3cfc0f3a 2269 }
82d5ae15
DL
2270 }
2271
2272 return 0;
0ad19a3f 2273}
2274
82d5ae15 2275static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 2276{
0ad19a3f 2277 char ifname[IFNAMSIZ];
0ad19a3f 2278 char *current_ifname = ifname;
3cfc0f3a 2279 int err;
0ad19a3f 2280
82d5ae15
DL
2281 /* empty network namespace */
2282 if (!netdev->ifindex) {
b0efbac4 2283 if (netdev->flags & IFF_UP) {
d472214b 2284 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2285 if (err) {
2286 ERROR("failed to set the loopback up : %s",
2287 strerror(-err));
82d5ae15
DL
2288 return -1;
2289 }
82d5ae15 2290 }
40790553
SH
2291 if (netdev->type != LXC_NET_VETH)
2292 return 0;
2293 netdev->ifindex = if_nametoindex(netdev->name);
0ad19a3f 2294 }
13954cce 2295
b466dc33 2296 /* get the new ifindex in case of physical netdev */
40790553 2297 if (netdev->type == LXC_NET_PHYS) {
b466dc33
BP
2298 if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
2299 ERROR("failed to get ifindex for %s",
2300 netdev->link);
2301 return -1;
2302 }
40790553 2303 }
b466dc33 2304
82d5ae15
DL
2305 /* retrieve the name of the interface */
2306 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 2307 ERROR("no interface corresponding to index '%d'",
82d5ae15 2308 netdev->ifindex);
0ad19a3f 2309 return -1;
2310 }
13954cce 2311
018ef520 2312 /* default: let the system to choose one interface name */
9d083402 2313 if (!netdev->name)
fb6d9b2f
DL
2314 netdev->name = netdev->type == LXC_NET_PHYS ?
2315 netdev->link : "eth%d";
018ef520 2316
82d5ae15 2317 /* rename the interface name */
40790553
SH
2318 if (strcmp(ifname, netdev->name) != 0) {
2319 err = lxc_netdev_rename_by_name(ifname, netdev->name);
2320 if (err) {
2321 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2322 strerror(-err));
2323 return -1;
2324 }
018ef520
DL
2325 }
2326
2327 /* Re-read the name of the interface because its name has changed
2328 * and would be automatically allocated by the system
2329 */
82d5ae15 2330 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 2331 ERROR("no interface corresponding to index '%d'",
82d5ae15 2332 netdev->ifindex);
018ef520 2333 return -1;
0ad19a3f 2334 }
2335
82d5ae15
DL
2336 /* set a mac address */
2337 if (netdev->hwaddr) {
2338 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 2339 ERROR("failed to setup hw address for '%s'",
82d5ae15 2340 current_ifname);
0ad19a3f 2341 return -1;
2342 }
2343 }
2344
82d5ae15
DL
2345 /* setup ipv4 addresses on the interface */
2346 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 2347 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 2348 ifname);
2349 return -1;
2350 }
2351
82d5ae15
DL
2352 /* setup ipv6 addresses on the interface */
2353 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 2354 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 2355 ifname);
2356 return -1;
2357 }
2358
82d5ae15 2359 /* set the network device up */
b0efbac4 2360 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
2361 int err;
2362
d472214b 2363 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
2364 if (err) {
2365 ERROR("failed to set '%s' up : %s", current_ifname,
2366 strerror(-err));
0ad19a3f 2367 return -1;
2368 }
2369
2370 /* the network is up, make the loopback up too */
d472214b 2371 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2372 if (err) {
2373 ERROR("failed to set the loopback up : %s",
2374 strerror(-err));
0ad19a3f 2375 return -1;
2376 }
2377 }
2378
f8fee0e2
MK
2379 /* We can only set up the default routes after bringing
2380 * up the interface, sine bringing up the interface adds
2381 * the link-local routes and we can't add a default
2382 * route if the gateway is not reachable. */
2383
2384 /* setup ipv4 gateway on the interface */
2385 if (netdev->ipv4_gateway) {
2386 if (!(netdev->flags & IFF_UP)) {
2387 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2388 return -1;
2389 }
2390
2391 if (lxc_list_empty(&netdev->ipv4)) {
2392 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2393 return -1;
2394 }
2395
2396 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2397 if (err) {
fc739df5
SG
2398 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2399 if (err) {
2400 ERROR("failed to add ipv4 dest for '%s': %s",
2401 ifname, strerror(-err));
2402 }
2403
2404 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2405 if (err) {
2406 ERROR("failed to setup ipv4 gateway for '%s': %s",
2407 ifname, strerror(-err));
2408 if (netdev->ipv4_gateway_auto) {
2409 char buf[INET_ADDRSTRLEN];
2410 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2411 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2412 }
2413 return -1;
19a26f82 2414 }
f8fee0e2
MK
2415 }
2416 }
2417
2418 /* setup ipv6 gateway on the interface */
2419 if (netdev->ipv6_gateway) {
2420 if (!(netdev->flags & IFF_UP)) {
2421 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2422 return -1;
2423 }
2424
2425 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2426 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2427 return -1;
2428 }
2429
2430 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2431 if (err) {
fc739df5
SG
2432 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2433 if (err) {
2434 ERROR("failed to add ipv6 dest for '%s': %s",
f8fee0e2 2435 ifname, strerror(-err));
19a26f82 2436 }
fc739df5
SG
2437
2438 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2439 if (err) {
2440 ERROR("failed to setup ipv6 gateway for '%s': %s",
2441 ifname, strerror(-err));
2442 if (netdev->ipv6_gateway_auto) {
2443 char buf[INET6_ADDRSTRLEN];
2444 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2445 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2446 }
2447 return -1;
2448 }
f8fee0e2
MK
2449 }
2450 }
2451
cd54d859
DL
2452 DEBUG("'%s' has been setup", current_ifname);
2453
0ad19a3f 2454 return 0;
2455}
2456
5f4535a3 2457static int setup_network(struct lxc_list *network)
0ad19a3f 2458{
82d5ae15 2459 struct lxc_list *iterator;
82d5ae15 2460 struct lxc_netdev *netdev;
0ad19a3f 2461
5f4535a3 2462 lxc_list_for_each(iterator, network) {
cd54d859 2463
5f4535a3 2464 netdev = iterator->elem;
82d5ae15
DL
2465
2466 if (setup_netdev(netdev)) {
2467 ERROR("failed to setup netdev");
2468 return -1;
2469 }
2470 }
cd54d859 2471
5f4535a3
DL
2472 if (!lxc_list_empty(network))
2473 INFO("network has been setup");
cd54d859
DL
2474
2475 return 0;
0ad19a3f 2476}
2477
2af6bd1b 2478/* try to move physical nics to the init netns */
5610055a 2479void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf)
2af6bd1b 2480{
64d2fcb5 2481 int i, oldfd;
4ec31c52 2482 char ifname[IFNAMSIZ];
2af6bd1b 2483
5610055a 2484 if (netnsfd < 0 || conf->num_savednics == 0)
2af6bd1b
SH
2485 return;
2486
64d2fcb5 2487 INFO("Running to reset %d nic names.", conf->num_savednics);
5610055a 2488
64d2fcb5
CB
2489 oldfd = lxc_preserve_ns(getpid(), "net");
2490 if (oldfd < 0) {
2491 SYSERROR("Failed to open monitor netns fd.");
2af6bd1b
SH
2492 return;
2493 }
64d2fcb5 2494
2af6bd1b
SH
2495 if (setns(netnsfd, 0) != 0) {
2496 SYSERROR("Failed to enter container netns to reset nics");
2497 close(oldfd);
2498 return;
2499 }
2500 for (i=0; i<conf->num_savednics; i++) {
2501 struct saved_nic *s = &conf->saved_nics[i];
f2e206ff 2502 /* retrieve the name of the interface */
2503 if (!if_indextoname(s->ifindex, ifname)) {
2504 WARN("no interface corresponding to index '%d'", s->ifindex);
2505 continue;
2506 }
5610055a 2507 if (lxc_netdev_move_by_name(ifname, 1, s->orig_name))
f2e206ff 2508 WARN("Error moving nic name:%s back to host netns", ifname);
5610055a 2509 free(s->orig_name);
2af6bd1b 2510 }
5610055a
WB
2511 conf->num_savednics = 0;
2512
2af6bd1b
SH
2513 if (setns(oldfd, 0) != 0)
2514 SYSERROR("Failed to re-enter monitor's netns");
2515 close(oldfd);
2516}
2517
ae9242c8
SH
2518static char *default_rootfs_mount = LXCROOTFSMOUNT;
2519
7b379ab3 2520struct lxc_conf *lxc_conf_init(void)
089cd8b8 2521{
7b379ab3 2522 struct lxc_conf *new;
26ddeedd 2523 int i;
7b379ab3
MN
2524
2525 new = malloc(sizeof(*new));
2526 if (!new) {
2527 ERROR("lxc_conf_init : %m");
2528 return NULL;
2529 }
2530 memset(new, 0, sizeof(*new));
2531
b40a606e 2532 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
cccc74b5 2533 new->personality = -1;
124fa0a8 2534 new->autodev = 1;
596a818d
DE
2535 new->console.log_path = NULL;
2536 new->console.log_fd = -1;
28a4b0e5 2537 new->console.path = NULL;
63376d7d 2538 new->console.peer = -1;
b5159817
DE
2539 new->console.peerpty.busy = -1;
2540 new->console.peerpty.master = -1;
2541 new->console.peerpty.slave = -1;
63376d7d
DL
2542 new->console.master = -1;
2543 new->console.slave = -1;
2544 new->console.name[0] = '\0';
d2e30e99 2545 new->maincmd_fd = -1;
76a26f55 2546 new->nbd_idx = -1;
54c30e29 2547 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048
SH
2548 if (!new->rootfs.mount) {
2549 ERROR("lxc_conf_init : %m");
2550 free(new);
2551 return NULL;
2552 }
d89de239 2553 new->kmsg = 0;
858377e4 2554 new->logfd = -1;
7b379ab3
MN
2555 lxc_list_init(&new->cgroup);
2556 lxc_list_init(&new->network);
2557 lxc_list_init(&new->mount_list);
81810dd1 2558 lxc_list_init(&new->caps);
1fb86a7c 2559 lxc_list_init(&new->keepcaps);
f6d3e3e4 2560 lxc_list_init(&new->id_map);
f979ac15 2561 lxc_list_init(&new->includes);
4184c3e1 2562 lxc_list_init(&new->aliens);
7c661726 2563 lxc_list_init(&new->environment);
26ddeedd
SH
2564 for (i=0; i<NUM_LXC_HOOKS; i++)
2565 lxc_list_init(&new->hooks[i]);
ee1e7aa0 2566 lxc_list_init(&new->groups);
fe4de9a6
DE
2567 new->lsm_aa_profile = NULL;
2568 new->lsm_se_context = NULL;
5112cd70 2569 new->tmp_umount_proc = 0;
7b379ab3 2570
9f30a190
MM
2571 for (i = 0; i < LXC_NS_MAX; i++)
2572 new->inherit_ns_fd[i] = -1;
2573
72bb04e4
PT
2574 /* if running in a new user namespace, init and COMMAND
2575 * default to running as UID/GID 0 when using lxc-execute */
2576 new->init_uid = 0;
2577 new->init_gid = 0;
2578
7b379ab3 2579 return new;
089cd8b8
DL
2580}
2581
a589434e 2582static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2583{
8634bc19 2584 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2585 char veth2buf[IFNAMSIZ], *veth2;
b7b2fde4
CB
2586 int bridge_index, err;
2587 unsigned int mtu = 0;
13954cce 2588
8bee8851 2589 if (netdev->priv.veth_attr.pair) {
e892973e 2590 veth1 = netdev->priv.veth_attr.pair;
8bee8851
WB
2591 if (handler->conf->reboot)
2592 lxc_netdev_delete_by_name(veth1);
2593 } else {
9ba8130c
SH
2594 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2595 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2596 ERROR("veth1 name too long");
2597 return -1;
2598 }
a0265685 2599 veth1 = lxc_mkifname(veth1buf);
ad40563e
ÇO
2600 if (!veth1) {
2601 ERROR("failed to allocate a temporary name");
2602 return -1;
2603 }
74a2b586
JK
2604 /* store away for deconf */
2605 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2606 }
82d5ae15 2607
0e391e57 2608 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
a0265685 2609 veth2 = lxc_mkifname(veth2buf);
ad40563e 2610 if (!veth2) {
82d5ae15 2611 ERROR("failed to allocate a temporary name");
ad40563e 2612 goto out_delete;
0ad19a3f 2613 }
2614
3cfc0f3a
MN
2615 err = lxc_veth_create(veth1, veth2);
2616 if (err) {
2e2d6a7b 2617 ERROR("failed to create veth pair (%s and %s): %s", veth1, veth2,
3cfc0f3a 2618 strerror(-err));
ad40563e 2619 goto out_delete;
0ad19a3f 2620 }
13954cce 2621
49684c0b
CS
2622 /* changing the high byte of the mac address to 0xfe, the bridge interface
2623 * will always keep the host's mac address and not take the mac address
2624 * of a container */
2625 err = setup_private_host_hw_addr(veth1);
2626 if (err) {
2e2d6a7b 2627 ERROR("failed to change mac address of host interface '%s': %s",
49684c0b
CS
2628 veth1, strerror(-err));
2629 goto out_delete;
2630 }
2631
af651aa9
SN
2632 netdev->ifindex = if_nametoindex(veth2);
2633 if (!netdev->ifindex) {
2634 ERROR("failed to retrieve the index for %s", veth2);
2635 goto out_delete;
2636 }
2637
82d5ae15 2638 if (netdev->mtu) {
b7b2fde4
CB
2639 if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
2640 WARN("Failed to parse mtu from.");
2641 else
2642 INFO("Retrieved mtu %d", mtu);
e54864d3 2643 } else if (netdev->link) {
e9280f65 2644 bridge_index = if_nametoindex(netdev->link);
729e8bf6
CB
2645 if (bridge_index) {
2646 mtu = netdev_get_mtu(bridge_index);
2647 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
2648 } else {
2649 mtu = netdev_get_mtu(netdev->ifindex);
2650 INFO("Retrieved mtu %d from %s", mtu, veth2);
2651 }
e54864d3
NC
2652 }
2653
2654 if (mtu) {
2655 err = lxc_netdev_set_mtu(veth1, mtu);
3cfc0f3a 2656 if (!err)
e54864d3 2657 err = lxc_netdev_set_mtu(veth2, mtu);
3cfc0f3a 2658 if (err) {
e54864d3
NC
2659 ERROR("failed to set mtu '%i' for veth pair (%s and %s): %s",
2660 mtu, veth1, veth2, strerror(-err));
eb14c10a 2661 goto out_delete;
75d09f83
DL
2662 }
2663 }
2664
3cfc0f3a 2665 if (netdev->link) {
c43cbc04 2666 err = lxc_bridge_attach(handler->lxcpath, handler->name, netdev->link, veth1);
3cfc0f3a 2667 if (err) {
2e2d6a7b 2668 ERROR("failed to attach '%s' to the bridge '%s': %s",
3cfc0f3a
MN
2669 veth1, netdev->link, strerror(-err));
2670 goto out_delete;
2671 }
738d0deb 2672 INFO("Attached '%s': to the bridge '%s': ", veth1, netdev->link);
eb14c10a
DL
2673 }
2674
d472214b 2675 err = lxc_netdev_up(veth1);
6e35af2e
DL
2676 if (err) {
2677 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2678 goto out_delete;
0ad19a3f 2679 }
2680
e3b4c4c4 2681 if (netdev->upscript) {
751d9dcd
DL
2682 err = run_script(handler->name, "net", netdev->upscript, "up",
2683 "veth", veth1, (char*) NULL);
2684 if (err)
e3b4c4c4 2685 goto out_delete;
e3b4c4c4
ST
2686 }
2687
a589434e 2688 DEBUG("instantiated veth '%s/%s', index is '%d'",
82d5ae15
DL
2689 veth1, veth2, netdev->ifindex);
2690
6ab9ab6d 2691 return 0;
eb14c10a
DL
2692
2693out_delete:
b84f58b9 2694 lxc_netdev_delete_by_name(veth1);
f10fad2f 2695 if (!netdev->priv.veth_attr.pair)
ad40563e 2696 free(veth1);
f10fad2f 2697 free(veth2);
6ab9ab6d 2698 return -1;
13954cce 2699}
d957ae2d 2700
74a2b586
JK
2701static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2702{
2703 char *veth1;
2704 int err;
2705
2706 if (netdev->priv.veth_attr.pair)
2707 veth1 = netdev->priv.veth_attr.pair;
2708 else
2709 veth1 = netdev->priv.veth_attr.veth1;
2710
2711 if (netdev->downscript) {
2712 err = run_script(handler->name, "net", netdev->downscript,
2713 "down", "veth", veth1, (char*) NULL);
2714 if (err)
2715 return -1;
2716 }
2717 return 0;
2718}
2719
a589434e 2720static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2721{
0e391e57 2722 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2723 int err;
d957ae2d
MT
2724
2725 if (!netdev->link) {
2726 ERROR("no link specified for macvlan netdev");
2727 return -1;
2728 }
13954cce 2729
9ba8130c
SH
2730 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2731 if (err >= sizeof(peerbuf))
2732 return -1;
82d5ae15 2733
a0265685 2734 peer = lxc_mkifname(peerbuf);
ad40563e 2735 if (!peer) {
82d5ae15
DL
2736 ERROR("failed to make a temporary name");
2737 return -1;
0ad19a3f 2738 }
2739
3cfc0f3a
MN
2740 err = lxc_macvlan_create(netdev->link, peer,
2741 netdev->priv.macvlan_attr.mode);
2742 if (err) {
2743 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2744 peer, netdev->link, strerror(-err));
ad40563e 2745 goto out;
0ad19a3f 2746 }
2747
82d5ae15
DL
2748 netdev->ifindex = if_nametoindex(peer);
2749 if (!netdev->ifindex) {
36eb9bde 2750 ERROR("failed to retrieve the index for %s", peer);
ad40563e 2751 goto out;
22ebac19 2752 }
2753
e3b4c4c4 2754 if (netdev->upscript) {
751d9dcd
DL
2755 err = run_script(handler->name, "net", netdev->upscript, "up",
2756 "macvlan", netdev->link, (char*) NULL);
2757 if (err)
ad40563e 2758 goto out;
e3b4c4c4
ST
2759 }
2760
a589434e 2761 DEBUG("instantiated macvlan '%s', index is '%d' and mode '%d'",
e892973e 2762 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 2763
d957ae2d 2764 return 0;
ad40563e
ÇO
2765out:
2766 lxc_netdev_delete_by_name(peer);
2767 free(peer);
2768 return -1;
0ad19a3f 2769}
2770
74a2b586
JK
2771static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2772{
2773 int err;
2774
2775 if (netdev->downscript) {
2776 err = run_script(handler->name, "net", netdev->downscript,
2777 "down", "macvlan", netdev->link,
2778 (char*) NULL);
2779 if (err)
2780 return -1;
2781 }
2782 return 0;
2783}
2784
a589434e
JN
2785/* XXX: merge with instantiate_macvlan */
2786static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
2787{
2788 char peer[IFNAMSIZ];
3cfc0f3a 2789 int err;
82f58d03 2790 static uint16_t vlan_cntr = 0;
b7b2fde4 2791 unsigned int mtu = 0;
26c39028
JHS
2792
2793 if (!netdev->link) {
2794 ERROR("no link specified for vlan netdev");
2795 return -1;
2796 }
2797
82f58d03 2798 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
9ba8130c
SH
2799 if (err >= sizeof(peer)) {
2800 ERROR("peer name too long");
2801 return -1;
2802 }
26c39028 2803
3cfc0f3a
MN
2804 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2805 if (err) {
2806 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2807 peer, netdev->link, strerror(-err));
26c39028
JHS
2808 return -1;
2809 }
2810
2811 netdev->ifindex = if_nametoindex(peer);
2812 if (!netdev->ifindex) {
2813 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 2814 lxc_netdev_delete_by_name(peer);
26c39028
JHS
2815 return -1;
2816 }
2817
a589434e 2818 DEBUG("instantiated vlan '%s', ifindex is '%d'", " vlan1000",
e892973e 2819 netdev->ifindex);
b4fb7de1 2820 if (netdev->mtu) {
b7b2fde4
CB
2821 if (lxc_safe_uint(netdev->mtu, &mtu) < 0) {
2822 ERROR("Failed to retrieve mtu from: '%d'/'%s'.",
2823 netdev->ifindex, netdev->name);
2824 return -1;
2825 }
2826 err = lxc_netdev_set_mtu(peer, mtu);
b4fb7de1
VL
2827 if (err) {
2828 ERROR("failed to set mtu '%s' for %s : %s",
2829 netdev->mtu, peer, strerror(-err));
2830 lxc_netdev_delete_by_name(peer);
2831 return -1;
2832 }
2833 }
e892973e 2834
26c39028
JHS
2835 return 0;
2836}
2837
74a2b586
JK
2838static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2839{
2840 return 0;
2841}
2842
a589434e 2843static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2844{
6168e99f
DL
2845 if (!netdev->link) {
2846 ERROR("no link specified for the physical interface");
2847 return -1;
2848 }
2849
9d083402 2850 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 2851 if (!netdev->ifindex) {
9d083402 2852 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 2853 return -1;
2854 }
2855
e3b4c4c4
ST
2856 if (netdev->upscript) {
2857 int err;
751d9dcd
DL
2858 err = run_script(handler->name, "net", netdev->upscript,
2859 "up", "phys", netdev->link, (char*) NULL);
2860 if (err)
e3b4c4c4 2861 return -1;
e3b4c4c4
ST
2862 }
2863
82d5ae15 2864 return 0;
0ad19a3f 2865}
2866
74a2b586
JK
2867static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2868{
2869 int err;
2870
2871 if (netdev->downscript) {
2872 err = run_script(handler->name, "net", netdev->downscript,
2873 "down", "phys", netdev->link, (char*) NULL);
2874 if (err)
2875 return -1;
2876 }
2877 return 0;
2878}
2879
a589434e 2880static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
26b797f3
SH
2881{
2882 netdev->ifindex = 0;
2883 return 0;
2884}
2885
a589434e 2886static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2887{
82d5ae15 2888 netdev->ifindex = 0;
e3b4c4c4
ST
2889 if (netdev->upscript) {
2890 int err;
751d9dcd
DL
2891 err = run_script(handler->name, "net", netdev->upscript,
2892 "up", "empty", (char*) NULL);
2893 if (err)
e3b4c4c4 2894 return -1;
e3b4c4c4 2895 }
82d5ae15 2896 return 0;
0ad19a3f 2897}
2898
74a2b586
JK
2899static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2900{
2901 int err;
2902
2903 if (netdev->downscript) {
2904 err = run_script(handler->name, "net", netdev->downscript,
2905 "down", "empty", (char*) NULL);
2906 if (err)
2907 return -1;
2908 }
2909 return 0;
2910}
2911
26b797f3
SH
2912static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
2913{
2914 return 0;
2915}
2916
2917int lxc_requests_empty_network(struct lxc_handler *handler)
2918{
2919 struct lxc_list *network = &handler->conf->network;
2920 struct lxc_list *iterator;
2921 struct lxc_netdev *netdev;
2922 bool found_none = false, found_nic = false;
2923
2924 if (lxc_list_empty(network))
2925 return 0;
2926
2927 lxc_list_for_each(iterator, network) {
2928
2929 netdev = iterator->elem;
2930
2931 if (netdev->type == LXC_NET_NONE)
2932 found_none = true;
2933 else
2934 found_nic = true;
2935 }
2936 if (found_none && !found_nic)
2937 return 1;
2938 return 0;
2939}
2940
e3b4c4c4 2941int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 2942{
e3b4c4c4 2943 struct lxc_list *network = &handler->conf->network;
82d5ae15 2944 struct lxc_list *iterator;
82d5ae15 2945 struct lxc_netdev *netdev;
cbef6c52
SH
2946 int am_root = (getuid() == 0);
2947
2948 if (!am_root)
2949 return 0;
0ad19a3f 2950
5f4535a3 2951 lxc_list_for_each(iterator, network) {
0ad19a3f 2952
5f4535a3 2953 netdev = iterator->elem;
13954cce 2954
24654103 2955 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 2956 ERROR("invalid network configuration type '%d'",
5f4535a3 2957 netdev->type);
82d5ae15
DL
2958 return -1;
2959 }
0ad19a3f 2960
e3b4c4c4 2961 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
2962 ERROR("failed to create netdev");
2963 return -1;
2964 }
e3b4c4c4 2965
0ad19a3f 2966 }
2967
2968 return 0;
2969}
2970
358daf49 2971bool lxc_delete_network(struct lxc_handler *handler)
7fef7a06 2972{
e97946ae 2973 int ret;
74a2b586 2974 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
2975 struct lxc_list *iterator;
2976 struct lxc_netdev *netdev;
358daf49 2977 bool deleted_all = true;
7fef7a06
DL
2978
2979 lxc_list_for_each(iterator, network) {
2980 netdev = iterator->elem;
d472214b 2981
74a2b586 2982 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352 2983 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
358daf49
CB
2984 WARN("Failed to rename interface with index %d "
2985 "to its initial name \"%s\".",
2986 netdev->ifindex, netdev->link);
d472214b 2987 continue;
d8f8e352 2988 }
d472214b 2989
74a2b586 2990 if (netdev_deconf[netdev->type](handler, netdev)) {
e97946ae 2991 WARN("Failed to destroy netdev");
74a2b586
JK
2992 }
2993
d8f8e352
DL
2994 /* Recent kernel remove the virtual interfaces when the network
2995 * namespace is destroyed but in case we did not moved the
2996 * interface to the network namespace, we have to destroy it
2997 */
e97946ae
CB
2998 if (netdev->ifindex != 0) {
2999 ret = lxc_netdev_delete_by_index(netdev->ifindex);
358daf49
CB
3000 if (-ret == ENODEV) {
3001 INFO("Interface \"%s\" with index %d already "
3002 "deleted or existing in different network "
3003 "namespace.",
3004 netdev->name ? netdev->name : "(null)",
3005 netdev->ifindex);
3006 } else if (ret < 0) {
3007 deleted_all = false;
3008 WARN("Failed to remove interface \"%s\" with "
3009 "index %d: %s.",
3010 netdev->name ? netdev->name : "(null)",
3011 netdev->ifindex, strerror(-ret));
3012 } else {
3013 INFO("Removed interface \"%s\" with index %d.",
3014 netdev->name ? netdev->name : "(null)",
3015 netdev->ifindex);
3016 }
e97946ae
CB
3017 }
3018
3019 /* Explicitly delete host veth device to prevent lingering
3020 * devices. We had issues in LXD around this.
3021 */
3022 if (netdev->type == LXC_NET_VETH) {
358daf49
CB
3023 char *hostveth;
3024 if (netdev->priv.veth_attr.pair) {
e97946ae 3025 hostveth = netdev->priv.veth_attr.pair;
358daf49
CB
3026 ret = lxc_netdev_delete_by_name(hostveth);
3027 if (ret < 0) {
3028 WARN("Failed to remove interface \"%s\" from host: %s.", hostveth, strerror(-ret));
3029 } else {
3030 INFO("Removed interface \"%s\" from host.", hostveth);
3031 free(netdev->priv.veth_attr.pair);
3032 netdev->priv.veth_attr.pair = NULL;
3033 }
3034 } else if (strlen(netdev->priv.veth_attr.veth1) > 0) {
e97946ae 3035 hostveth = netdev->priv.veth_attr.veth1;
e97946ae 3036 ret = lxc_netdev_delete_by_name(hostveth);
358daf49
CB
3037 if (ret < 0) {
3038 WARN("Failed to remove \"%s\" from host: %s.", hostveth, strerror(-ret));
3039 } else {
3040 INFO("Removed interface \"%s\" from host.", hostveth);
3041 memset((void *)&netdev->priv.veth_attr.veth1, 0, sizeof(netdev->priv.veth_attr.veth1));
3042 }
e97946ae
CB
3043 }
3044 }
7fef7a06 3045 }
358daf49
CB
3046
3047 return deleted_all;
7fef7a06
DL
3048}
3049
45e854dc
SG
3050#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
3051
fe1f672f 3052/* lxc-user-nic returns "interface_name:interface_name\n" */
eab15c1e 3053#define MAX_BUFFER_SIZE IFNAMSIZ * 2 + 2
c43cbc04
SH
3054static int unpriv_assign_nic(const char *lxcpath, char *lxcname,
3055 struct lxc_netdev *netdev, pid_t pid)
cbef6c52
SH
3056{
3057 pid_t child;
a7242d9a
ÇO
3058 int bytes, pipefd[2];
3059 char *token, *saveptr = NULL;
fe1f672f 3060 char buffer[MAX_BUFFER_SIZE];
091045f8 3061 char netdev_link[IFNAMSIZ + 1];
cbef6c52
SH
3062
3063 if (netdev->type != LXC_NET_VETH) {
3064 ERROR("nic type %d not support for unprivileged use",
091045f8 3065 netdev->type);
cbef6c52
SH
3066 return -1;
3067 }
3068
091045f8 3069 if (pipe(pipefd) < 0) {
a7242d9a
ÇO
3070 SYSERROR("pipe failed");
3071 return -1;
3072 }
3073
091045f8
CB
3074 child = fork();
3075 if (child < 0) {
cbef6c52 3076 SYSERROR("fork");
a7242d9a
ÇO
3077 close(pipefd[0]);
3078 close(pipefd[1]);
3079 return -1;
3080 }
3081
3082 if (child == 0) { // child
091045f8
CB
3083 /* Call lxc-user-nic pid type bridge. */
3084 int ret;
3085 char pidstr[LXC_NUMSTRLEN64];
3086
3087 close(pipefd[0]); /* Close the read-end of the pipe. */
3088
3089 /* Redirect stdout to write-end of the pipe. */
3090 ret = dup2(pipefd[1], STDOUT_FILENO);
3091 close(pipefd[1]); /* Close the write-end of the pipe. */
3092 if (ret < 0) {
3093 SYSERROR("Failed to dup2() to redirect stdout to pipe file descriptor.");
3094 exit(EXIT_FAILURE);
3095 }
a7242d9a 3096
091045f8 3097 if (netdev->link)
cff7b5eb 3098 strncpy(netdev_link, netdev->link, IFNAMSIZ);
091045f8 3099 else
cff7b5eb 3100 strncpy(netdev_link, "none", IFNAMSIZ);
091045f8
CB
3101
3102 ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid);
3103 if (ret < 0 || ret >= LXC_NUMSTRLEN64)
3104 exit(EXIT_FAILURE);
3105 pidstr[LXC_NUMSTRLEN64 - 1] = '\0';
3106
3107 INFO("Execing lxc-user-nic %s %s %s veth %s %s", lxcpath,
3108 lxcname, pidstr, netdev_link, netdev->name);
c43cbc04 3109 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, lxcpath, lxcname,
091045f8
CB
3110 pidstr, "veth", netdev_link, netdev->name, NULL);
3111
3112 SYSERROR("Failed to exec lxc-user-nic.");
3113 exit(EXIT_FAILURE);
a7242d9a
ÇO
3114 }
3115
3116 /* close the write-end of the pipe */
3117 close(pipefd[1]);
3118
fe1f672f 3119 bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE);
091045f8
CB
3120 if (bytes < 0)
3121 SYSERROR("Failed to read from pipe file descriptor.");
a7242d9a
ÇO
3122 buffer[bytes - 1] = '\0';
3123
3124 if (wait_for_pid(child) != 0) {
3125 close(pipefd[0]);
cbef6c52
SH
3126 return -1;
3127 }
3128
a7242d9a
ÇO
3129 /* close the read-end of the pipe */
3130 close(pipefd[0]);
cbef6c52 3131
a7242d9a
ÇO
3132 /* fill netdev->name field */
3133 token = strtok_r(buffer, ":", &saveptr);
3134 if (!token)
3135 return -1;
091045f8
CB
3136
3137 netdev->name = malloc(IFNAMSIZ + 1);
658979c5 3138 if (!netdev->name) {
091045f8 3139 SYSERROR("Failed to allocate memory.");
658979c5
SH
3140 return -1;
3141 }
091045f8 3142 memset(netdev->name, 0, IFNAMSIZ + 1);
658979c5 3143 strncpy(netdev->name, token, IFNAMSIZ);
a7242d9a
ÇO
3144
3145 /* fill netdev->veth_attr.pair field */
3146 token = strtok_r(NULL, ":", &saveptr);
3147 if (!token)
3148 return -1;
091045f8 3149
a7242d9a 3150 netdev->priv.veth_attr.pair = strdup(token);
658979c5 3151 if (!netdev->priv.veth_attr.pair) {
091045f8 3152 ERROR("Failed to allocate memory.");
658979c5
SH
3153 return -1;
3154 }
45e854dc 3155
a7242d9a 3156 return 0;
cbef6c52
SH
3157}
3158
c43cbc04
SH
3159int lxc_assign_network(const char *lxcpath, char *lxcname,
3160 struct lxc_list *network, pid_t pid)
0ad19a3f 3161{
82d5ae15 3162 struct lxc_list *iterator;
82d5ae15 3163 struct lxc_netdev *netdev;
f2e206ff 3164 char ifname[IFNAMSIZ];
cbef6c52 3165 int am_root = (getuid() == 0);
3cfc0f3a 3166 int err;
0ad19a3f 3167
5f4535a3 3168 lxc_list_for_each(iterator, network) {
82d5ae15 3169
5f4535a3 3170 netdev = iterator->elem;
82d5ae15 3171
fbb16259 3172 if (netdev->type == LXC_NET_VETH && !am_root) {
c43cbc04 3173 if (unpriv_assign_nic(lxcpath, lxcname, netdev, pid))
cbef6c52 3174 return -1;
658979c5
SH
3175 // lxc-user-nic has moved the nic to the new ns.
3176 // unpriv_assign_nic() fills in netdev->name.
3177 // netdev->ifindex will be filed in at setup_netdev.
cbef6c52
SH
3178 continue;
3179 }
236087a6 3180
fbb16259
SH
3181 /* empty network namespace, nothing to move */
3182 if (!netdev->ifindex)
3183 continue;
3184
f2e206ff 3185 /* retrieve the name of the interface */
3186 if (!if_indextoname(netdev->ifindex, ifname)) {
3187 ERROR("no interface corresponding to index '%d'", netdev->ifindex);
3188 return -1;
3189 }
3190
3191 err = lxc_netdev_move_by_name(ifname, pid, NULL);
3cfc0f3a
MN
3192 if (err) {
3193 ERROR("failed to move '%s' to the container : %s",
3194 netdev->link, strerror(-err));
82d5ae15
DL
3195 return -1;
3196 }
3197
198cbbaa 3198 DEBUG("move '%s'/'%s' to '%d': .", ifname, netdev->name, pid);
0ad19a3f 3199 }
3200
3201 return 0;
3202}
3203
251d0d2a
DE
3204static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
3205 size_t buf_size)
f6d3e3e4
SH
3206{
3207 char path[PATH_MAX];
e4ccd113 3208 int ret, closeret;
f6d3e3e4
SH
3209 FILE *f;
3210
3211 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
3212 if (ret < 0 || ret >= PATH_MAX) {
03fadd16 3213 fprintf(stderr, "%s: path name too long\n", __func__);
f6d3e3e4
SH
3214 return -E2BIG;
3215 }
3216 f = fopen(path, "w");
3217 if (!f) {
3218 perror("open");
3219 return -EINVAL;
3220 }
251d0d2a 3221 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 3222 if (ret < 0)
e4ccd113
SH
3223 SYSERROR("writing id mapping");
3224 closeret = fclose(f);
3225 if (closeret)
3226 SYSERROR("writing id mapping");
3227 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
3228}
3229
3230int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
3231{
3232 struct lxc_list *iterator;
3233 struct id_map *map;
8afb3e61 3234 int ret = 0, use_shadow = 0;
251d0d2a 3235 enum idtype type;
8afb3e61
SG
3236 char *buf = NULL, *pos, *cmdpath = NULL;
3237
22038de5
SH
3238 /*
3239 * If newuidmap exists, that is, if shadow is handing out subuid
3240 * ranges, then insist that root also reserve ranges in subuid. This
3241 * will protected it by preventing another user from being handed the
3242 * range by shadow.
3243 */
9d9c111c 3244 cmdpath = on_path("newuidmap", NULL);
8afb3e61
SG
3245 if (cmdpath) {
3246 use_shadow = 1;
3247 free(cmdpath);
3248 }
3249
0e6e3a41
SG
3250 if (!use_shadow && geteuid()) {
3251 ERROR("Missing newuidmap/newgidmap");
3252 return -1;
3253 }
251d0d2a
DE
3254
3255 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4 3256 int left, fill;
cf3ef16d
SH
3257 int had_entry = 0;
3258 if (!buf) {
3259 buf = pos = malloc(4096);
4f7521b4
SH
3260 if (!buf)
3261 return -ENOMEM;
cf3ef16d
SH
3262 }
3263 pos = buf;
0e6e3a41 3264 if (use_shadow)
d1838f34 3265 pos += sprintf(buf, "new%cidmap %d",
cf3ef16d
SH
3266 type == ID_TYPE_UID ? 'u' : 'g',
3267 pid);
4f7521b4 3268
cf3ef16d
SH
3269 lxc_list_for_each(iterator, idmap) {
3270 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
251d0d2a 3271 map = iterator->elem;
cf3ef16d
SH
3272 if (map->idtype != type)
3273 continue;
3274
3275 had_entry = 1;
3276 left = 4096 - (pos - buf);
d1838f34 3277 fill = snprintf(pos, left, "%s%lu %lu %lu%s",
0e6e3a41 3278 use_shadow ? " " : "",
d1838f34 3279 map->nsid, map->hostid, map->range,
0e6e3a41 3280 use_shadow ? "" : "\n");
cf3ef16d
SH
3281 if (fill <= 0 || fill >= left)
3282 SYSERROR("snprintf failed, too many mappings");
3283 pos += fill;
251d0d2a 3284 }
cf3ef16d 3285 if (!had_entry)
4f7521b4 3286 continue;
cf3ef16d 3287
0e6e3a41 3288 if (!use_shadow) {
cf3ef16d 3289 ret = write_id_mapping(type, pid, buf, pos-buf);
d1838f34
MS
3290 } else {
3291 left = 4096 - (pos - buf);
3292 fill = snprintf(pos, left, "\n");
3293 if (fill <= 0 || fill >= left)
3294 SYSERROR("snprintf failed, too many mappings");
3295 pos += fill;
cf3ef16d 3296 ret = system(buf);
d1838f34 3297 }
cf3ef16d 3298
f6d3e3e4
SH
3299 if (ret)
3300 break;
3301 }
251d0d2a 3302
f10fad2f 3303 free(buf);
f6d3e3e4
SH
3304 return ret;
3305}
3306
cf3ef16d 3307/*
7b50c609
TS
3308 * return the host uid/gid to which the container root is mapped in
3309 * *val.
0b3a6504 3310 * Return true if id was found, false otherwise.
cf3ef16d 3311 */
2a9a80cb 3312bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
3ec1648d 3313 unsigned long *val)
cf3ef16d
SH
3314{
3315 struct lxc_list *it;
3316 struct id_map *map;
3317
3318 lxc_list_for_each(it, &conf->id_map) {
3319 map = it->elem;
7b50c609 3320 if (map->idtype != idtype)
cf3ef16d
SH
3321 continue;
3322 if (map->nsid != 0)
3323 continue;
2a9a80cb
SH
3324 *val = map->hostid;
3325 return true;
cf3ef16d 3326 }
2a9a80cb 3327 return false;
cf3ef16d
SH
3328}
3329
2133f58c 3330int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3331{
3332 struct lxc_list *it;
3333 struct id_map *map;
3334 lxc_list_for_each(it, &conf->id_map) {
3335 map = it->elem;
2133f58c 3336 if (map->idtype != idtype)
cf3ef16d
SH
3337 continue;
3338 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 3339 return (id - map->hostid) + map->nsid;
cf3ef16d 3340 }
57d116ab 3341 return -1;
cf3ef16d
SH
3342}
3343
2133f58c 3344int find_unmapped_nsuid(struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3345{
3346 struct lxc_list *it;
3347 struct id_map *map;
2133f58c 3348 unsigned int freeid = 0;
cf3ef16d
SH
3349again:
3350 lxc_list_for_each(it, &conf->id_map) {
3351 map = it->elem;
2133f58c 3352 if (map->idtype != idtype)
cf3ef16d
SH
3353 continue;
3354 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3355 freeid = map->nsid + map->range;
3356 goto again;
3357 }
3358 }
3359 return freeid;
3360}
3361
19a26f82
MK
3362int lxc_find_gateway_addresses(struct lxc_handler *handler)
3363{
3364 struct lxc_list *network = &handler->conf->network;
3365 struct lxc_list *iterator;
3366 struct lxc_netdev *netdev;
3367 int link_index;
3368
3369 lxc_list_for_each(iterator, network) {
3370 netdev = iterator->elem;
3371
3372 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
3373 continue;
3374
3375 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
3376 ERROR("gateway = auto only supported for "
3377 "veth and macvlan");
3378 return -1;
3379 }
3380
3381 if (!netdev->link) {
3382 ERROR("gateway = auto needs a link interface");
3383 return -1;
3384 }
3385
3386 link_index = if_nametoindex(netdev->link);
3387 if (!link_index)
3388 return -EINVAL;
3389
3390 if (netdev->ipv4_gateway_auto) {
3391 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
3392 ERROR("failed to automatically find ipv4 gateway "
3393 "address from link interface '%s'", netdev->link);
3394 return -1;
3395 }
3396 }
3397
3398 if (netdev->ipv6_gateway_auto) {
3399 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
3400 ERROR("failed to automatically find ipv6 gateway "
3401 "address from link interface '%s'", netdev->link);
3402 return -1;
3403 }
3404 }
3405 }
3406
3407 return 0;
3408}
3409
5e4a62bf 3410int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 3411{
5e4a62bf 3412 struct lxc_tty_info *tty_info = &conf->tty_info;
025ed0f3 3413 int i, ret;
b0a33c1e 3414
5e4a62bf
DL
3415 /* no tty in the configuration */
3416 if (!conf->tty)
b0a33c1e 3417 return 0;
3418
13954cce 3419 tty_info->pty_info =
e4e7d59d 3420 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 3421 if (!tty_info->pty_info) {
36eb9bde 3422 SYSERROR("failed to allocate pty_info");
985d15b1 3423 return -1;
b0a33c1e 3424 }
3425
985d15b1 3426 for (i = 0; i < conf->tty; i++) {
13954cce 3427
b0a33c1e 3428 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3429
025ed0f3
SH
3430 process_lock();
3431 ret = openpty(&pty_info->master, &pty_info->slave,
3432 pty_info->name, NULL, NULL);
3433 process_unlock();
3434 if (ret) {
36eb9bde 3435 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
3436 tty_info->nbtty = i;
3437 lxc_delete_tty(tty_info);
3438 return -1;
b0a33c1e 3439 }
3440
5332bb84
DL
3441 DEBUG("allocated pty '%s' (%d/%d)",
3442 pty_info->name, pty_info->master, pty_info->slave);
3443
3ec1648d 3444 /* Prevent leaking the file descriptors to the container */
b035ad62
MS
3445 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3446 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3447
b0a33c1e 3448 pty_info->busy = 0;
3449 }
3450
985d15b1 3451 tty_info->nbtty = conf->tty;
1ac470c0
DL
3452
3453 INFO("tty's configured");
3454
985d15b1 3455 return 0;
b0a33c1e 3456}
3457
3458void lxc_delete_tty(struct lxc_tty_info *tty_info)
3459{
3460 int i;
3461
3462 for (i = 0; i < tty_info->nbtty; i++) {
3463 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3464
3465 close(pty_info->master);
3466 close(pty_info->slave);
3467 }
3468
3469 free(tty_info->pty_info);
e00c0242 3470 tty_info->pty_info = NULL;
b0a33c1e 3471 tty_info->nbtty = 0;
3472}
3473
f6d3e3e4 3474/*
7b50c609
TS
3475 * chown_mapped_root: for an unprivileged user with uid/gid X to
3476 * chown a dir to subuid/subgid Y, he needs to run chown as root
3477 * in a userns where nsid 0 is mapped to hostuid/hostgid Y, and
3478 * nsid Y is mapped to hostuid/hostgid X. That way, the container
3479 * root is privileged with respect to hostuid/hostgid X, allowing
3480 * him to do the chown.
f6d3e3e4 3481 */
c4d10a05 3482int chown_mapped_root(char *path, struct lxc_conf *conf)
f6d3e3e4 3483{
7b50c609
TS
3484 uid_t rootuid;
3485 gid_t rootgid;
c4d10a05 3486 pid_t pid;
2a9a80cb 3487 unsigned long val;
a7ef8753 3488 char *chownpath = path;
f6d3e3e4 3489
2a9a80cb 3490 if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
c4d10a05
SH
3491 ERROR("No mapping for container root");
3492 return -1;
f6d3e3e4 3493 }
7b50c609
TS
3494 rootuid = (uid_t) val;
3495 if (!get_mapped_rootid(conf, ID_TYPE_GID, &val)) {
3496 ERROR("No mapping for container root");
3497 return -1;
3498 }
3499 rootgid = (gid_t) val;
2a9a80cb 3500
a7ef8753
SH
3501 /*
3502 * In case of overlay, we want only the writeable layer
3503 * to be chowned
3504 */
1f92162d 3505 if (strncmp(path, "overlayfs:", 10) == 0 || strncmp(path, "aufs:", 5) == 0) {
a7ef8753
SH
3506 chownpath = strchr(path, ':');
3507 if (!chownpath) {
3508 ERROR("Bad overlay path: %s", path);
3509 return -1;
3510 }
3511 chownpath = strchr(chownpath+1, ':');
3512 if (!chownpath) {
3513 ERROR("Bad overlay path: %s", path);
3514 return -1;
3515 }
3516 chownpath++;
3517 }
3518 path = chownpath;
c4d10a05 3519 if (geteuid() == 0) {
7b50c609 3520 if (chown(path, rootuid, rootgid) < 0) {
c4d10a05
SH
3521 ERROR("Error chowning %s", path);
3522 return -1;
3523 }
3524 return 0;
3525 }
f3d7e4ca 3526
7b50c609 3527 if (rootuid == geteuid()) {
f3d7e4ca
SH
3528 // nothing to do
3529 INFO("%s: container root is our uid; no need to chown" ,__func__);
3530 return 0;
3531 }
3532
c4d10a05
SH
3533 pid = fork();
3534 if (pid < 0) {
3535 SYSERROR("Failed forking");
f6d3e3e4
SH
3536 return -1;
3537 }
c4d10a05 3538 if (!pid) {
7b50c609
TS
3539 int hostuid = geteuid(), hostgid = getegid(), ret;
3540 struct stat sb;
3541 char map1[100], map2[100], map3[100], map4[100], map5[100];
3542 char ugid[100];
3543 char *args1[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3544 "-m", map3, "-m", map5,
3545 "--", "chown", ugid, path, NULL };
3546 char *args2[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3547 "-m", map3, "-m", map4, "-m", map5,
3548 "--", "chown", ugid, path, NULL };
3549
3550 // save the current gid of "path"
3551 if (stat(path, &sb) < 0) {
3552 ERROR("Error stat %s", path);
3553 return -1;
3554 }
f6d3e3e4 3555
9a7c2aba
SH
3556 /*
3557 * A file has to be group-owned by a gid mapped into the
3558 * container, or the container won't be privileged over it.
3559 */
3560 if (sb.st_uid == geteuid() &&
3561 mapped_hostid(sb.st_gid, conf, ID_TYPE_GID) < 0 &&
3562 chown(path, -1, hostgid) < 0) {
3563 ERROR("Failed chgrping %s", path);
7b50c609
TS
3564 return -1;
3565 }
3566
3567 // "u:0:rootuid:1"
3568 ret = snprintf(map1, 100, "u:0:%d:1", rootuid);
c4d10a05
SH
3569 if (ret < 0 || ret >= 100) {
3570 ERROR("Error uid printing map string");
f6d3e3e4
SH
3571 return -1;
3572 }
c4d10a05 3573
98e5ba51
SH
3574 // "u:hostuid:hostuid:1"
3575 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3576 if (ret < 0 || ret >= 100) {
3577 ERROR("Error uid printing map string");
3578 return -1;
3579 }
3580
7b50c609
TS
3581 // "g:0:rootgid:1"
3582 ret = snprintf(map3, 100, "g:0:%d:1", rootgid);
c4d10a05 3583 if (ret < 0 || ret >= 100) {
7b50c609 3584 ERROR("Error gid printing map string");
c4d10a05
SH
3585 return -1;
3586 }
3587
7b50c609 3588 // "g:pathgid:rootgid+pathgid:1"
b4c1e35d
SG
3589 ret = snprintf(map4, 100, "g:%d:%d:1", (gid_t)sb.st_gid,
3590 rootgid + (gid_t)sb.st_gid);
7b50c609
TS
3591 if (ret < 0 || ret >= 100) {
3592 ERROR("Error gid printing map string");
3593 return -1;
3594 }
3595
3596 // "g:hostgid:hostgid:1"
3597 ret = snprintf(map5, 100, "g:%d:%d:1", hostgid, hostgid);
3598 if (ret < 0 || ret >= 100) {
3599 ERROR("Error gid printing map string");
3600 return -1;
3601 }
3602
3603 // "0:pathgid" (chown)
b4c1e35d 3604 ret = snprintf(ugid, 100, "0:%d", (gid_t)sb.st_gid);
7b50c609
TS
3605 if (ret < 0 || ret >= 100) {
3606 ERROR("Error owner printing format string for chown");
3607 return -1;
3608 }
3609
3610 if (hostgid == sb.st_gid)
3611 ret = execvp("lxc-usernsexec", args1);
3612 else
3613 ret = execvp("lxc-usernsexec", args2);
c4d10a05
SH
3614 SYSERROR("Failed executing usernsexec");
3615 exit(1);
f6d3e3e4 3616 }
c4d10a05 3617 return wait_for_pid(pid);
f6d3e3e4
SH
3618}
3619
c4d10a05 3620int ttys_shift_ids(struct lxc_conf *c)
f6d3e3e4 3621{
c4d10a05 3622 if (lxc_list_empty(&c->id_map))
f6d3e3e4 3623 return 0;
c4d10a05 3624
29b10e4f 3625 if (strcmp(c->console.name, "") !=0 && chown_mapped_root(c->console.name, c) < 0) {
c4d10a05
SH
3626 ERROR("Failed to chown %s", c->console.name);
3627 return -1;
3628 }
3629
f6d3e3e4
SH
3630 return 0;
3631}
3632
f267d666 3633/* NOTE: not to be called from inside the container namespace! */
5112cd70
SH
3634int tmp_proc_mount(struct lxc_conf *lxc_conf)
3635{
3636 int mounted;
3637
01958b1f 3638 mounted = mount_proc_if_needed(lxc_conf->rootfs.path ? lxc_conf->rootfs.mount : "");
5112cd70
SH
3639 if (mounted == -1) {
3640 SYSERROR("failed to mount /proc in the container.");
01958b1f
DW
3641 /* continue only if there is no rootfs */
3642 if (lxc_conf->rootfs.path)
3643 return -1;
5112cd70
SH
3644 } else if (mounted == 1) {
3645 lxc_conf->tmp_umount_proc = 1;
3646 }
3647 return 0;
3648}
3649
3650void tmp_proc_unmount(struct lxc_conf *lxc_conf)
3651{
3652 if (lxc_conf->tmp_umount_proc == 1) {
3653 umount("/proc");
3654 lxc_conf->tmp_umount_proc = 0;
3655 }
3656}
3657
6a0c909a 3658void remount_all_slave(void)
e995d7a2
SH
3659{
3660 /* walk /proc/mounts and change any shared entries to slave */
3661 FILE *f = fopen("/proc/self/mountinfo", "r");
3662 char *line = NULL;
3663 size_t len = 0;
3664
3665 if (!f) {
3666 SYSERROR("Failed to open /proc/self/mountinfo to mark all shared");
3667 ERROR("Continuing container startup...");
3668 return;
3669 }
3670
3671 while (getline(&line, &len, f) != -1) {
3672 char *target, *opts;
3673 target = get_field(line, 4);
3674 if (!target)
3675 continue;
3676 opts = get_field(target, 2);
3677 if (!opts)
3678 continue;
3679 null_endofword(opts);
3680 if (!strstr(opts, "shared"))
3681 continue;
3682 null_endofword(target);
3683 if (mount(NULL, target, NULL, MS_SLAVE, NULL)) {
3684 SYSERROR("Failed to make %s rslave", target);
3685 ERROR("Continuing...");
3686 }
3687 }
3688 fclose(f);
f10fad2f 3689 free(line);
e995d7a2
SH
3690}
3691
2322903b
SH
3692void lxc_execute_bind_init(struct lxc_conf *conf)
3693{
3694 int ret;
9d9c111c
SH
3695 char path[PATH_MAX], destpath[PATH_MAX], *p;
3696
3697 /* If init exists in the container, don't bind mount a static one */
3698 p = choose_init(conf->rootfs.mount);
3699 if (p) {
3700 free(p);
3701 return;
3702 }
2322903b
SH
3703
3704 ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
3705 if (ret < 0 || ret >= PATH_MAX) {
3706 WARN("Path name too long searching for lxc.init.static");
3707 return;
3708 }
3709
3710 if (!file_exists(path)) {
3711 INFO("%s does not exist on host", path);
3712 return;
3713 }
3714
3715 ret = snprintf(destpath, PATH_MAX, "%s%s", conf->rootfs.mount, "/init.lxc.static");
3716 if (ret < 0 || ret >= PATH_MAX) {
3717 WARN("Path name too long for container's lxc.init.static");
3718 return;
3719 }
3720
3721 if (!file_exists(destpath)) {
3722 FILE * pathfile = fopen(destpath, "wb");
3723 if (!pathfile) {
3724 SYSERROR("Failed to create mount target '%s'", destpath);
3725 return;
3726 }
3727 fclose(pathfile);
3728 }
3729
592fd47a 3730 ret = safe_mount(path, destpath, "none", MS_BIND, NULL, conf->rootfs.mount);
2322903b
SH
3731 if (ret < 0)
3732 SYSERROR("Failed to bind lxc.init.static into container");
3733 INFO("lxc.init.static bound into container at %s", path);
3734}
3735
35120d9c
SH
3736/*
3737 * This does the work of remounting / if it is shared, calling the
3738 * container pre-mount hooks, and mounting the rootfs.
3739 */
3740int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath)
0ad19a3f 3741{
35120d9c
SH
3742 if (conf->rootfs_setup) {
3743 /*
3744 * rootfs was set up in another namespace. bind-mount it
3745 * to give us a mount in our own ns so we can pivot_root to it
3746 */
3747 const char *path = conf->rootfs.mount;
3748 if (mount(path, path, "rootfs", MS_BIND, NULL) < 0) {
3749 ERROR("Failed to bind-mount container / onto itself");
145832ba 3750 return -1;
35120d9c 3751 }
145832ba 3752 return 0;
35120d9c 3753 }
d4ef7c50 3754
e995d7a2
SH
3755 remount_all_slave();
3756
35120d9c
SH
3757 if (run_lxc_hooks(name, "pre-mount", conf, lxcpath, NULL)) {
3758 ERROR("failed to run pre-mount hooks for container '%s'.", name);
3759 return -1;
3760 }
3761
3762 if (setup_rootfs(conf)) {
3763 ERROR("failed to setup rootfs for '%s'", name);
3764 return -1;
3765 }
3766
3767 conf->rootfs_setup = true;
3768 return 0;
3769}
3770
1c1c7051
SH
3771static bool verify_start_hooks(struct lxc_conf *conf)
3772{
3773 struct lxc_list *it;
3774 char path[MAXPATHLEN];
3775 lxc_list_for_each(it, &conf->hooks[LXCHOOK_START]) {
3776 char *hookname = it->elem;
3777 struct stat st;
3778 int ret;
3779
3780 ret = snprintf(path, MAXPATHLEN, "%s%s",
7b6753e7 3781 conf->rootfs.path ? conf->rootfs.mount : "", hookname);
1c1c7051
SH
3782 if (ret < 0 || ret >= MAXPATHLEN)
3783 return false;
3784 ret = stat(path, &st);
3785 if (ret) {
7b6753e7 3786 SYSERROR("Start hook %s not found in container",
1c1c7051
SH
3787 hookname);
3788 return false;
3789 }
6a0c909a 3790 return true;
1c1c7051
SH
3791 }
3792
3793 return true;
3794}
3795
e8bd4e43
SH
3796static int send_fd(int sock, int fd)
3797{
3798 int ret = lxc_abstract_unix_send_fd(sock, fd, NULL, 0);
3799
3800
3801 if (ret < 0) {
3802 SYSERROR("Error sending tty fd to parent");
3803 return -1;
3804 }
3805
3806 return 0;
3807}
3808
3809static int send_ttys_to_parent(struct lxc_handler *handler)
3810{
3811 struct lxc_conf *conf = handler->conf;
3812 const struct lxc_tty_info *tty_info = &conf->tty_info;
3813 int i;
3814 int sock = handler->ttysock[0];
3815
3816 for (i = 0; i < tty_info->nbtty; i++) {
3817 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3818 if (send_fd(sock, pty_info->slave) < 0)
3819 goto bad;
3820 close(pty_info->slave);
3821 pty_info->slave = -1;
3822 if (send_fd(sock, pty_info->master) < 0)
3823 goto bad;
3824 close(pty_info->master);
3825 pty_info->master = -1;
3826 }
3827
3828 close(handler->ttysock[0]);
3829 close(handler->ttysock[1]);
3830
3831 return 0;
3832
3833bad:
3834 ERROR("Error writing tty fd to parent");
3835 return -1;
3836}
3837
35120d9c
SH
3838int lxc_setup(struct lxc_handler *handler)
3839{
3840 const char *name = handler->name;
3841 struct lxc_conf *lxc_conf = handler->conf;
3842 const char *lxcpath = handler->lxcpath;
35120d9c
SH
3843
3844 if (do_rootfs_setup(lxc_conf, name, lxcpath) < 0) {
3845 ERROR("Error setting up rootfs mount after spawn");
3846 return -1;
3847 }
3848
6c544cb3
MM
3849 if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
3850 if (setup_utsname(lxc_conf->utsname)) {
3851 ERROR("failed to setup the utsname for '%s'", name);
3852 return -1;
3853 }
0ad19a3f 3854 }
3855
5f4535a3 3856 if (setup_network(&lxc_conf->network)) {
36eb9bde 3857 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 3858 return -1;
0ad19a3f 3859 }
3860
bc6928ff 3861 if (lxc_conf->autodev > 0) {
14221cbb 3862 if (mount_autodev(name, &lxc_conf->rootfs, lxcpath)) {
91c3830e 3863 ERROR("failed to mount /dev in the container");
c6883f38
SH
3864 return -1;
3865 }
3866 }
3867
368bbc02
CS
3868 /* do automatic mounts (mainly /proc and /sys), but exclude
3869 * those that need to wait until other stuff has finished
3870 */
4fb3cba5 3871 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
3872 ERROR("failed to setup the automatic mounts for '%s'", name);
3873 return -1;
3874 }
3875
0a2dddd4 3876 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name, lxcpath)) {
36eb9bde 3877 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 3878 return -1;
576f946d 3879 }
3880
0a2dddd4 3881 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name, lxcpath)) {
e7938e9e
MN
3882 ERROR("failed to setup the mount entries for '%s'", name);
3883 return -1;
3884 }
3885
7b6753e7 3886 /* Make sure any start hooks are in the container */
1c1c7051
SH
3887 if (!verify_start_hooks(lxc_conf))
3888 return -1;
3889
2322903b
SH
3890 if (lxc_conf->is_execute)
3891 lxc_execute_bind_init(lxc_conf);
3892
368bbc02
CS
3893 /* now mount only cgroup, if wanted;
3894 * before, /sys could not have been mounted
3895 * (is either mounted automatically or via fstab entries)
3896 */
4fb3cba5 3897 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
3898 ERROR("failed to setup the automatic mounts for '%s'", name);
3899 return -1;
3900 }
3901
283678ed 3902 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
773fb9ca
SH
3903 ERROR("failed to run mount hooks for container '%s'.", name);
3904 return -1;
3905 }
3906
bc6928ff 3907 if (lxc_conf->autodev > 0) {
0728ebf4
TA
3908 bool mount_console = lxc_conf->console.path && !strcmp(lxc_conf->console.path, "none");
3909
283678ed 3910 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
f7bee6c6
MW
3911 ERROR("failed to run autodev hooks for container '%s'.", name);
3912 return -1;
3913 }
0728ebf4 3914 if (fill_autodev(&lxc_conf->rootfs, mount_console)) {
91c3830e
SH
3915 ERROR("failed to populate /dev in the container");
3916 return -1;
3917 }
3918 }
368bbc02 3919
37903589 3920 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 3921 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 3922 return -1;
6e590161 3923 }
3924
7e0e1d94
AV
3925 if (lxc_conf->kmsg) {
3926 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
3927 ERROR("failed to setup kmsg for '%s'", name);
3928 }
1bd051a6 3929
69aa6655
DE
3930 if (!lxc_conf->is_execute && setup_dev_symlinks(&lxc_conf->rootfs)) {
3931 ERROR("failed to setup /dev symlinks for '%s'", name);
3932 return -1;
3933 }
3934
5112cd70
SH
3935 /* mount /proc if it's not already there */
3936 if (tmp_proc_mount(lxc_conf) < 0) {
fe4de9a6 3937 ERROR("failed to LSM mount proc for '%s'", name);
e075f5d9 3938 return -1;
e075f5d9 3939 }
e075f5d9 3940
ac778708 3941 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 3942 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 3943 return -1;
ed502555 3944 }
3945
571e6ec8 3946 if (setup_pts(lxc_conf->pts)) {
36eb9bde 3947 ERROR("failed to setup the new pts instance");
95b5ffaf 3948 return -1;
3c26f34e 3949 }
3950
e8bd4e43
SH
3951 if (lxc_create_tty(name, lxc_conf)) {
3952 ERROR("failed to create the ttys");
3953 return -1;
3954 }
3955
3956 if (send_ttys_to_parent(handler) < 0) {
3957 ERROR("failure sending console info to parent");
3958 return -1;
3959 }
3960
3961
3962 if (!lxc_conf->is_execute && setup_tty(lxc_conf)) {
3963 ERROR("failed to setup the ttys for '%s'", name);
3964 return -1;
3965 }
3966
3967 if (lxc_conf->pty_names && setenv("container_ttys", lxc_conf->pty_names, 1))
3968 SYSERROR("failed to set environment variable for container ptys");
3969
3970
cccc74b5
DL
3971 if (setup_personality(lxc_conf->personality)) {
3972 ERROR("failed to setup personality");
3973 return -1;
3974 }
3975
97a8f74f
SG
3976 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
3977 if (!lxc_list_empty(&lxc_conf->caps)) {
7389ca26 3978 ERROR("Container requests lxc.cap.drop and lxc.cap.keep: either use lxc.cap.drop or lxc.cap.keep, not both.");
f6d3e3e4
SH
3979 return -1;
3980 }
97a8f74f
SG
3981 if (dropcaps_except(&lxc_conf->keepcaps)) {
3982 ERROR("failed to keep requested caps");
3983 return -1;
3984 }
3985 } else if (setup_caps(&lxc_conf->caps)) {
3986 ERROR("failed to drop capabilities");
3987 return -1;
81810dd1
DL
3988 }
3989
cd54d859
DL
3990 NOTICE("'%s' is setup.", name);
3991
0ad19a3f 3992 return 0;
3993}
26ddeedd 3994
283678ed
SH
3995int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
3996 const char *lxcpath, char *argv[])
26ddeedd
SH
3997{
3998 int which = -1;
3999 struct lxc_list *it;
4000
4001 if (strcmp(hook, "pre-start") == 0)
4002 which = LXCHOOK_PRESTART;
5ea6163a
SH
4003 else if (strcmp(hook, "pre-mount") == 0)
4004 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
4005 else if (strcmp(hook, "mount") == 0)
4006 which = LXCHOOK_MOUNT;
f7bee6c6
MW
4007 else if (strcmp(hook, "autodev") == 0)
4008 which = LXCHOOK_AUTODEV;
26ddeedd
SH
4009 else if (strcmp(hook, "start") == 0)
4010 which = LXCHOOK_START;
52492063
WB
4011 else if (strcmp(hook, "stop") == 0)
4012 which = LXCHOOK_STOP;
26ddeedd
SH
4013 else if (strcmp(hook, "post-stop") == 0)
4014 which = LXCHOOK_POSTSTOP;
148e91f5
SH
4015 else if (strcmp(hook, "clone") == 0)
4016 which = LXCHOOK_CLONE;
37cf711b
SY
4017 else if (strcmp(hook, "destroy") == 0)
4018 which = LXCHOOK_DESTROY;
26ddeedd
SH
4019 else
4020 return -1;
4021 lxc_list_for_each(it, &conf->hooks[which]) {
4022 int ret;
4023 char *hookname = it->elem;
283678ed 4024 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
26ddeedd
SH
4025 if (ret)
4026 return ret;
4027 }
4028 return 0;
4029}
72d0e1cb 4030
427b3a21 4031static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
4032{
4033 struct lxc_netdev *netdev = it->elem;
9ebb03ad 4034 struct lxc_list *it2,*next;
72d0e1cb
SG
4035
4036 lxc_list_del(it);
4037
f10fad2f
ME
4038 free(netdev->link);
4039 free(netdev->name);
4040 if (netdev->type == LXC_NET_VETH)
c9bb9a85 4041 free(netdev->priv.veth_attr.pair);
f10fad2f
ME
4042 free(netdev->upscript);
4043 free(netdev->hwaddr);
4044 free(netdev->mtu);
4045 free(netdev->ipv4_gateway);
4046 free(netdev->ipv6_gateway);
9ebb03ad 4047 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
4048 lxc_list_del(it2);
4049 free(it2->elem);
4050 free(it2);
4051 }
9ebb03ad 4052 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
4053 lxc_list_del(it2);
4054 free(it2->elem);
4055 free(it2);
4056 }
d95db067 4057 free(netdev);
72d0e1cb
SG
4058 free(it);
4059}
4060
4061/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 4062int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
4063{
4064 char *p1;
4065 int ret, idx, i;
4066 struct lxc_list *it;
4067 struct lxc_netdev *netdev;
4068
46cd2845 4069 p1 = strchr(key, '.');
72d0e1cb
SG
4070 if (!p1 || *(p1+1) == '\0')
4071 p1 = NULL;
4072
4073 ret = sscanf(key, "%d", &idx);
4074 if (ret != 1) return -1;
4075 if (idx < 0)
4076 return -1;
4077
4078 i = 0;
4079 lxc_list_for_each(it, &c->network) {
4080 if (i == idx)
4081 break;
4082 i++;
4083 }
4084 if (i < idx) // we don't have that many nics defined
4085 return -1;
4086
4087 if (!it || !it->elem)
4088 return -1;
4089
4090 netdev = it->elem;
4091
4092 if (!p1) {
4093 lxc_remove_nic(it);
52d21d40 4094 } else if (strcmp(p1, ".ipv4") == 0) {
9ebb03ad
DE
4095 struct lxc_list *it2,*next;
4096 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
4097 lxc_list_del(it2);
4098 free(it2->elem);
4099 free(it2);
4100 }
52d21d40 4101 } else if (strcmp(p1, ".ipv6") == 0) {
9ebb03ad
DE
4102 struct lxc_list *it2,*next;
4103 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
4104 lxc_list_del(it2);
4105 free(it2->elem);
4106 free(it2);
4107 }
72d0e1cb
SG
4108 }
4109 else return -1;
4110
4111 return 0;
4112}
4113
4114int lxc_clear_config_network(struct lxc_conf *c)
4115{
9ebb03ad
DE
4116 struct lxc_list *it,*next;
4117 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
4118 lxc_remove_nic(it);
4119 }
4120 return 0;
4121}
4122
4123int lxc_clear_config_caps(struct lxc_conf *c)
4124{
9ebb03ad 4125 struct lxc_list *it,*next;
72d0e1cb 4126
9ebb03ad 4127 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
4128 lxc_list_del(it);
4129 free(it->elem);
4130 free(it);
4131 }
4132 return 0;
4133}
4134
74a3920a 4135static int lxc_free_idmap(struct lxc_list *id_map) {
27c27d73
SH
4136 struct lxc_list *it, *next;
4137
4355ab5f 4138 lxc_list_for_each_safe(it, id_map, next) {
27c27d73
SH
4139 lxc_list_del(it);
4140 free(it->elem);
4141 free(it);
4142 }
4143 return 0;
4144}
4145
4355ab5f
SH
4146int lxc_clear_idmaps(struct lxc_conf *c)
4147{
4148 return lxc_free_idmap(&c->id_map);
4149}
4150
1fb86a7c
SH
4151int lxc_clear_config_keepcaps(struct lxc_conf *c)
4152{
4153 struct lxc_list *it,*next;
4154
4155 lxc_list_for_each_safe(it, &c->keepcaps, next) {
4156 lxc_list_del(it);
4157 free(it->elem);
4158 free(it);
4159 }
4160 return 0;
4161}
4162
12a50cc6 4163int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 4164{
9ebb03ad 4165 struct lxc_list *it,*next;
72d0e1cb 4166 bool all = false;
12a50cc6 4167 const char *k = key + 11;
72d0e1cb
SG
4168
4169 if (strcmp(key, "lxc.cgroup") == 0)
4170 all = true;
4171
9ebb03ad 4172 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
4173 struct lxc_cgroup *cg = it->elem;
4174 if (!all && strcmp(cg->subsystem, k) != 0)
4175 continue;
4176 lxc_list_del(it);
4177 free(cg->subsystem);
4178 free(cg->value);
4179 free(cg);
4180 free(it);
4181 }
4182 return 0;
4183}
4184
ee1e7aa0
SG
4185int lxc_clear_groups(struct lxc_conf *c)
4186{
4187 struct lxc_list *it,*next;
4188
4189 lxc_list_for_each_safe(it, &c->groups, next) {
4190 lxc_list_del(it);
4191 free(it->elem);
4192 free(it);
4193 }
4194 return 0;
4195}
4196
ab799c0b
SG
4197int lxc_clear_environment(struct lxc_conf *c)
4198{
4199 struct lxc_list *it,*next;
4200
4201 lxc_list_for_each_safe(it, &c->environment, next) {
4202 lxc_list_del(it);
4203 free(it->elem);
4204 free(it);
4205 }
4206 return 0;
4207}
4208
4209
72d0e1cb
SG
4210int lxc_clear_mount_entries(struct lxc_conf *c)
4211{
9ebb03ad 4212 struct lxc_list *it,*next;
72d0e1cb 4213
9ebb03ad 4214 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
4215 lxc_list_del(it);
4216 free(it->elem);
4217 free(it);
4218 }
4219 return 0;
4220}
4221
b099e9e9
SH
4222int lxc_clear_automounts(struct lxc_conf *c)
4223{
4224 c->auto_mounts = 0;
4225 return 0;
4226}
4227
12a50cc6 4228int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 4229{
9ebb03ad 4230 struct lxc_list *it,*next;
17ed13a3 4231 bool all = false, done = false;
12a50cc6 4232 const char *k = key + 9;
72d0e1cb
SG
4233 int i;
4234
17ed13a3
SH
4235 if (strcmp(key, "lxc.hook") == 0)
4236 all = true;
4237
72d0e1cb 4238 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 4239 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 4240 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
4241 lxc_list_del(it);
4242 free(it->elem);
4243 free(it);
4244 }
4245 done = true;
72d0e1cb
SG
4246 }
4247 }
17ed13a3
SH
4248
4249 if (!done) {
4250 ERROR("Invalid hook key: %s", key);
4251 return -1;
4252 }
72d0e1cb
SG
4253 return 0;
4254}
8eb5694b 4255
74a3920a 4256static void lxc_clear_saved_nics(struct lxc_conf *conf)
7b35f3d6
SH
4257{
4258 int i;
4259
0cf45501 4260 if (!conf->saved_nics)
7b35f3d6
SH
4261 return;
4262 for (i=0; i < conf->num_savednics; i++)
4263 free(conf->saved_nics[i].orig_name);
7b35f3d6
SH
4264 free(conf->saved_nics);
4265}
4266
4184c3e1
SH
4267static inline void lxc_clear_aliens(struct lxc_conf *conf)
4268{
4269 struct lxc_list *it,*next;
4270
4271 lxc_list_for_each_safe(it, &conf->aliens, next) {
4272 lxc_list_del(it);
4273 free(it->elem);
4274 free(it);
4275 }
4276}
4277
f979ac15
SH
4278static inline void lxc_clear_includes(struct lxc_conf *conf)
4279{
4280 struct lxc_list *it,*next;
4281
4282 lxc_list_for_each_safe(it, &conf->includes, next) {
4283 lxc_list_del(it);
4284 free(it->elem);
4285 free(it);
4286 }
4287}
4288
8eb5694b
SH
4289void lxc_conf_free(struct lxc_conf *conf)
4290{
4291 if (!conf)
4292 return;
858377e4
SH
4293 if (current_config == conf)
4294 current_config = NULL;
f10fad2f
ME
4295 free(conf->console.log_path);
4296 free(conf->console.path);
4297 free(conf->rootfs.mount);
b3b8c97f 4298 free(conf->rootfs.bdev_type);
f10fad2f
ME
4299 free(conf->rootfs.options);
4300 free(conf->rootfs.path);
f10fad2f 4301 free(conf->logfile);
858377e4
SH
4302 if (conf->logfd != -1)
4303 close(conf->logfd);
f10fad2f
ME
4304 free(conf->utsname);
4305 free(conf->ttydir);
4306 free(conf->fstab);
4307 free(conf->rcfile);
4308 free(conf->init_cmd);
6b0d5538 4309 free(conf->unexpanded_config);
393903d1 4310 free(conf->pty_names);
76d0127f 4311 free(conf->syslog);
8eb5694b 4312 lxc_clear_config_network(conf);
f10fad2f
ME
4313 free(conf->lsm_aa_profile);
4314 free(conf->lsm_se_context);
769872f9 4315 lxc_seccomp_free(conf);
8eb5694b 4316 lxc_clear_config_caps(conf);
1fb86a7c 4317 lxc_clear_config_keepcaps(conf);
8eb5694b 4318 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 4319 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 4320 lxc_clear_mount_entries(conf);
7b35f3d6 4321 lxc_clear_saved_nics(conf);
27c27d73 4322 lxc_clear_idmaps(conf);
ee1e7aa0 4323 lxc_clear_groups(conf);
f979ac15 4324 lxc_clear_includes(conf);
761d81ca 4325 lxc_clear_aliens(conf);
ab799c0b 4326 lxc_clear_environment(conf);
8eb5694b
SH
4327 free(conf);
4328}
4355ab5f
SH
4329
4330struct userns_fn_data {
4331 int (*fn)(void *);
4332 void *arg;
4333 int p[2];
4334};
4335
4336static int run_userns_fn(void *data)
4337{
4338 struct userns_fn_data *d = data;
4339 char c;
4340 // we're not sharing with the parent any more, if it was a thread
4341
4342 close(d->p[1]);
4343 if (read(d->p[0], &c, 1) != 1)
4344 return -1;
4345 close(d->p[0]);
4346 return d->fn(d->arg);
4347}
4348
4349/*
8b227008
TS
4350 * Add ID_TYPE_UID/ID_TYPE_GID entries to an existing lxc_conf,
4351 * if they are not already there.
4355ab5f 4352 */
8b227008
TS
4353static struct lxc_list *idmap_add_id(struct lxc_conf *conf,
4354 uid_t uid, gid_t gid)
4355ab5f 4355{
8b227008
TS
4356 int hostuid_mapped = mapped_hostid(uid, conf, ID_TYPE_UID);
4357 int hostgid_mapped = mapped_hostid(gid, conf, ID_TYPE_GID);
4355ab5f
SH
4358 struct lxc_list *new = NULL, *tmp, *it, *next;
4359 struct id_map *entry;
4360
3ec1648d
SH
4361 new = malloc(sizeof(*new));
4362 if (!new) {
4363 ERROR("Out of memory building id map");
4364 return NULL;
4365 }
4366 lxc_list_init(new);
4367
8b227008
TS
4368 if (hostuid_mapped < 0) {
4369 hostuid_mapped = find_unmapped_nsuid(conf, ID_TYPE_UID);
4370 if (hostuid_mapped < 0)
3ec1648d
SH
4371 goto err;
4372 tmp = malloc(sizeof(*tmp));
4373 if (!tmp)
4374 goto err;
4355ab5f
SH
4375 entry = malloc(sizeof(*entry));
4376 if (!entry) {
3ec1648d
SH
4377 free(tmp);
4378 goto err;
4355ab5f 4379 }
3ec1648d 4380 tmp->elem = entry;
4355ab5f 4381 entry->idtype = ID_TYPE_UID;
8b227008
TS
4382 entry->nsid = hostuid_mapped;
4383 entry->hostid = (unsigned long) uid;
4384 entry->range = 1;
4385 lxc_list_add_tail(new, tmp);
4386 }
4387 if (hostgid_mapped < 0) {
4388 hostgid_mapped = find_unmapped_nsuid(conf, ID_TYPE_GID);
4389 if (hostgid_mapped < 0)
4390 goto err;
4391 tmp = malloc(sizeof(*tmp));
4392 if (!tmp)
4393 goto err;
4394 entry = malloc(sizeof(*entry));
4395 if (!entry) {
4396 free(tmp);
4397 goto err;
4398 }
4399 tmp->elem = entry;
4400 entry->idtype = ID_TYPE_GID;
4401 entry->nsid = hostgid_mapped;
4402 entry->hostid = (unsigned long) gid;
4355ab5f 4403 entry->range = 1;
3ec1648d 4404 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4405 }
4406 lxc_list_for_each_safe(it, &conf->id_map, next) {
4407 tmp = malloc(sizeof(*tmp));
4408 if (!tmp)
4409 goto err;
4410 entry = malloc(sizeof(*entry));
4411 if (!entry) {
4412 free(tmp);
4413 goto err;
4414 }
4415 memset(entry, 0, sizeof(*entry));
4416 memcpy(entry, it->elem, sizeof(*entry));
4417 tmp->elem = entry;
3ec1648d 4418 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4419 }
4420
4421 return new;
4422
4423err:
8b227008 4424 ERROR("Out of memory building a new uid/gid map");
908fde6a
SH
4425 if (new)
4426 lxc_free_idmap(new);
c30ac545 4427 free(new);
4355ab5f
SH
4428 return NULL;
4429}
4430
4431/*
4432 * Run a function in a new user namespace.
8b227008 4433 * The caller's euid/egid will be mapped in if it is not already.
4355ab5f
SH
4434 */
4435int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
4436{
4437 int ret, pid;
4438 struct userns_fn_data d;
4439 char c = '1';
4440 int p[2];
4441 struct lxc_list *idmap;
4442
4355ab5f 4443 ret = pipe(p);
4355ab5f
SH
4444 if (ret < 0) {
4445 SYSERROR("opening pipe");
4446 return -1;
4447 }
4448 d.fn = fn;
4449 d.arg = data;
4450 d.p[0] = p[0];
4451 d.p[1] = p[1];
4452 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4453 if (pid < 0)
4454 goto err;
4355ab5f 4455 close(p[0]);
4355ab5f
SH
4456 p[0] = -1;
4457
8b227008
TS
4458 if ((idmap = idmap_add_id(conf, geteuid(), getegid())) == NULL) {
4459 ERROR("Error adding self to container uid/gid map");
4355ab5f
SH
4460 goto err;
4461 }
4462
4463 ret = lxc_map_ids(idmap, pid);
4464 lxc_free_idmap(idmap);
88dd66fc 4465 free(idmap);
565e571c 4466 if (ret) {
4355ab5f
SH
4467 ERROR("Error setting up child mappings");
4468 goto err;
4469 }
4470
4471 // kick the child
4472 if (write(p[1], &c, 1) != 1) {
4473 SYSERROR("writing to pipe to child");
4474 goto err;
4475 }
4476
3139aead
SG
4477 ret = wait_for_pid(pid);
4478
4479 close(p[1]);
4480 return ret;
4481
4355ab5f 4482err:
4355ab5f
SH
4483 if (p[0] != -1)
4484 close(p[0]);
4485 close(p[1]);
4355ab5f
SH
4486 return -1;
4487}
97e9cfa0 4488
a96a8e8c 4489/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4490static char* getuname(void)
4491{
a96a8e8c 4492 struct passwd *result;
97e9cfa0 4493
a96a8e8c
SH
4494 result = getpwuid(geteuid());
4495 if (!result)
97e9cfa0
SH
4496 return NULL;
4497
a96a8e8c 4498 return strdup(result->pw_name);
97e9cfa0
SH
4499}
4500
a96a8e8c 4501/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4502static char *getgname(void)
4503{
a96a8e8c 4504 struct group *result;
97e9cfa0 4505
a96a8e8c
SH
4506 result = getgrgid(getegid());
4507 if (!result)
97e9cfa0
SH
4508 return NULL;
4509
a96a8e8c 4510 return strdup(result->gr_name);
97e9cfa0
SH
4511}
4512
a96a8e8c 4513/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4514void suggest_default_idmap(void)
4515{
4516 FILE *f;
4517 unsigned int uid = 0, urange = 0, gid = 0, grange = 0;
4518 char *line = NULL;
4519 char *uname, *gname;
4520 size_t len = 0;
4521
4522 if (!(uname = getuname()))
4523 return;
4524
4525 if (!(gname = getgname())) {
4526 free(uname);
4527 return;
4528 }
4529
4530 f = fopen(subuidfile, "r");
4531 if (!f) {
4532 ERROR("Your system is not configured with subuids");
4533 free(gname);
4534 free(uname);
4535 return;
4536 }
4537 while (getline(&line, &len, f) != -1) {
b7930180 4538 size_t no_newline = 0;
97e9cfa0
SH
4539 char *p = strchr(line, ':'), *p2;
4540 if (*line == '#')
4541 continue;
4542 if (!p)
4543 continue;
4544 *p = '\0';
4545 p++;
4546 if (strcmp(line, uname))
4547 continue;
4548 p2 = strchr(p, ':');
4549 if (!p2)
4550 continue;
4551 *p2 = '\0';
4552 p2++;
4553 if (!*p2)
4554 continue;
b7930180
CB
4555 no_newline = strcspn(p2, "\n");
4556 p2[no_newline] = '\0';
4557
b7b2fde4
CB
4558 if (lxc_safe_uint(p, &uid) < 0)
4559 WARN("Could not parse UID.");
4560 if (lxc_safe_uint(p2, &urange) < 0)
4561 WARN("Could not parse UID range.");
97e9cfa0
SH
4562 }
4563 fclose(f);
4564
4565 f = fopen(subuidfile, "r");
4566 if (!f) {
4567 ERROR("Your system is not configured with subgids");
4568 free(gname);
4569 free(uname);
4570 return;
4571 }
4572 while (getline(&line, &len, f) != -1) {
b7930180 4573 size_t no_newline = 0;
97e9cfa0
SH
4574 char *p = strchr(line, ':'), *p2;
4575 if (*line == '#')
4576 continue;
4577 if (!p)
4578 continue;
4579 *p = '\0';
4580 p++;
4581 if (strcmp(line, uname))
4582 continue;
4583 p2 = strchr(p, ':');
4584 if (!p2)
4585 continue;
4586 *p2 = '\0';
4587 p2++;
4588 if (!*p2)
4589 continue;
b7930180
CB
4590 no_newline = strcspn(p2, "\n");
4591 p2[no_newline] = '\0';
4592
b7b2fde4
CB
4593 if (lxc_safe_uint(p, &gid) < 0)
4594 WARN("Could not parse GID.");
4595 if (lxc_safe_uint(p2, &grange) < 0)
4596 WARN("Could not parse GID range.");
97e9cfa0
SH
4597 }
4598 fclose(f);
4599
f10fad2f 4600 free(line);
97e9cfa0
SH
4601
4602 if (!urange || !grange) {
4603 ERROR("You do not have subuids or subgids allocated");
4604 ERROR("Unprivileged containers require subuids and subgids");
4605 return;
4606 }
4607
4608 ERROR("You must either run as root, or define uid mappings");
4609 ERROR("To pass uid mappings to lxc-create, you could create");
4610 ERROR("~/.config/lxc/default.conf:");
4611 ERROR("lxc.include = %s", LXC_DEFAULT_CONFIG);
4612 ERROR("lxc.id_map = u 0 %u %u", uid, urange);
4613 ERROR("lxc.id_map = g 0 %u %u", gid, grange);
4614
4615 free(gname);
4616 free(uname);
4617}
aaf26830 4618
a7307747
SH
4619static void free_cgroup_settings(struct lxc_list *result)
4620{
4621 struct lxc_list *iterator, *next;
4622
4623 lxc_list_for_each_safe(iterator, result, next) {
4624 lxc_list_del(iterator);
4625 free(iterator);
4626 }
4627 free(result);
4628}
4629
aaf26830
KT
4630/*
4631 * Return the list of cgroup_settings sorted according to the following rules
4632 * 1. Put memory.limit_in_bytes before memory.memsw.limit_in_bytes
4633 */
4634struct lxc_list *sort_cgroup_settings(struct lxc_list* cgroup_settings)
4635{
4636 struct lxc_list *result;
4637 struct lxc_list *memsw_limit = NULL;
4638 struct lxc_list *it = NULL;
4639 struct lxc_cgroup *cg = NULL;
4640 struct lxc_list *item = NULL;
4641
4642 result = malloc(sizeof(*result));
fac7c663
KT
4643 if (!result) {
4644 ERROR("failed to allocate memory to sort cgroup settings");
4645 return NULL;
4646 }
aaf26830
KT
4647 lxc_list_init(result);
4648
4649 /*Iterate over the cgroup settings and copy them to the output list*/
4650 lxc_list_for_each(it, cgroup_settings) {
4651 item = malloc(sizeof(*item));
fac7c663
KT
4652 if (!item) {
4653 ERROR("failed to allocate memory to sort cgroup settings");
a7307747 4654 free_cgroup_settings(result);
fac7c663
KT
4655 return NULL;
4656 }
aaf26830
KT
4657 item->elem = it->elem;
4658 cg = it->elem;
4659 if (strcmp(cg->subsystem, "memory.memsw.limit_in_bytes") == 0) {
4660 /* Store the memsw_limit location */
4661 memsw_limit = item;
4662 } else if (strcmp(cg->subsystem, "memory.limit_in_bytes") == 0 && memsw_limit != NULL) {
4d5b72a1 4663 /* lxc.cgroup.memory.memsw.limit_in_bytes is found before
aaf26830
KT
4664 * lxc.cgroup.memory.limit_in_bytes, swap these two items */
4665 item->elem = memsw_limit->elem;
4666 memsw_limit->elem = it->elem;
4667 }
4668 lxc_list_add_tail(result, item);
4669 }
4670
4671 return result;
a7307747 4672}