]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
Merge pull request #1479 from brauner/2017-03-20/sysmacro
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
1d52bdf7
CB
23
24#define _GNU_SOURCE
d06245b8
NC
25#include "config.h"
26
8f3e280e
CB
27#include <dirent.h>
28#include <errno.h>
29#include <fcntl.h>
30#include <grp.h>
31#include <inttypes.h>
32#include <libgen.h>
33#include <pwd.h>
34#include <stdarg.h>
0ad19a3f 35#include <stdio.h>
0ad19a3f 36#include <stdlib.h>
0ad19a3f 37#include <string.h>
8f3e280e 38#include <time.h>
0ad19a3f 39#include <unistd.h>
8f3e280e
CB
40#include <arpa/inet.h>
41#include <linux/loop.h>
8f3e280e
CB
42#include <net/if.h>
43#include <netinet/in.h>
44#include <sys/mman.h>
45#include <sys/mount.h>
46#include <sys/param.h>
47#include <sys/prctl.h>
48#include <sys/stat.h>
49#include <sys/socket.h>
ce831b3b 50#include <sys/sysmacros.h>
2d76d1d7 51#include <sys/syscall.h>
97e9cfa0 52#include <sys/types.h>
8f3e280e
CB
53#include <sys/utsname.h>
54#include <sys/wait.h>
1d52bdf7 55
af6824fc
ST
56/* makedev() */
57#ifdef MAJOR_IN_MKDEV
58# include <sys/mkdev.h>
59#endif
af6824fc 60
614305f3 61#ifdef HAVE_STATVFS
2938f7c8 62#include <sys/statvfs.h>
614305f3 63#endif
e827ff7e
SG
64
65#if HAVE_PTY_H
b0a33c1e 66#include <pty.h>
e827ff7e
SG
67#else
68#include <../include/openpty.h>
69#endif
0ad19a3f 70
5ef5c9a3
CB
71#ifdef HAVE_LINUX_MEMFD_H
72#include <linux/memfd.h>
73#endif
74
e8bd4e43 75#include "af_unix.h"
8f3e280e
CB
76#include "bdev.h"
77#include "caps.h" /* for lxc_caps_last_cap() */
78#include "cgroup.h"
1b09f2c0 79#include "conf.h"
8f3e280e 80#include "error.h"
1b09f2c0 81#include "log.h"
d8e48992 82#include "lxcaufs.h"
025ed0f3 83#include "lxclock.h"
8f3e280e
CB
84#include "lxcoverlay.h"
85#include "lxcseccomp.h"
4355ab5f 86#include "namespace.h"
8f3e280e
CB
87#include "network.h"
88#include "parse.h"
89#include "utils.h"
fe4de9a6 90#include "lsm/lsm.h"
d0a36f2c 91
e37dda71 92#if HAVE_LIBCAP
495d2046
SG
93#include <sys/capability.h>
94#endif
95
6ff05e18
SG
96#if HAVE_SYS_PERSONALITY_H
97#include <sys/personality.h>
98#endif
99
edaf8b1b
SG
100#if IS_BIONIC
101#include <../include/lxcmntent.h>
102#else
103#include <mntent.h>
104#endif
105
36eb9bde 106lxc_log_define(lxc_conf, lxc);
e5bda9ee 107
e37dda71 108#if HAVE_LIBCAP
b09094da
MN
109#ifndef CAP_SETFCAP
110#define CAP_SETFCAP 31
111#endif
112
113#ifndef CAP_MAC_OVERRIDE
114#define CAP_MAC_OVERRIDE 32
115#endif
116
117#ifndef CAP_MAC_ADMIN
118#define CAP_MAC_ADMIN 33
119#endif
495d2046 120#endif
b09094da
MN
121
122#ifndef PR_CAPBSET_DROP
123#define PR_CAPBSET_DROP 24
124#endif
125
9818cae4
SG
126#ifndef LO_FLAGS_AUTOCLEAR
127#define LO_FLAGS_AUTOCLEAR 4
128#endif
129
0769b82a
CS
130/* needed for cgroup automount checks, regardless of whether we
131 * have included linux/capability.h or not */
132#ifndef CAP_SYS_ADMIN
133#define CAP_SYS_ADMIN 21
134#endif
135
2d76d1d7
SG
136/* Define pivot_root() if missing from the C library */
137#ifndef HAVE_PIVOT_ROOT
138static int pivot_root(const char * new_root, const char * put_old)
139{
140#ifdef __NR_pivot_root
8f3e280e 141 return syscall(__NR_pivot_root, new_root, put_old);
2d76d1d7 142#else
8f3e280e
CB
143 errno = ENOSYS;
144 return -1;
2d76d1d7
SG
145#endif
146}
147#else
148extern int pivot_root(const char * new_root, const char * put_old);
149#endif
150
151/* Define sethostname() if missing from the C library */
152#ifndef HAVE_SETHOSTNAME
153static int sethostname(const char * name, size_t len)
154{
155#ifdef __NR_sethostname
8f3e280e 156 return syscall(__NR_sethostname, name, len);
2d76d1d7 157#else
8f3e280e
CB
158 errno = ENOSYS;
159 return -1;
2d76d1d7
SG
160#endif
161}
162#endif
163
72f919c4
SG
164/* Define __S_ISTYPE if missing from the C library */
165#ifndef __S_ISTYPE
166#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
167#endif
168
ecec0126
SG
169#ifndef MS_PRIVATE
170#define MS_PRIVATE (1<<18)
171#endif
172
5ef5c9a3
CB
173/* memfd_create() */
174#ifndef MFD_CLOEXEC
175#define MFD_CLOEXEC 0x0001U
176#endif
177
178#ifndef MFD_ALLOW_SEALING
179#define MFD_ALLOW_SEALING 0x0002U
180#endif
181
182#ifndef HAVE_MEMFD_CREATE
183static int memfd_create(const char *name, unsigned int flags) {
184 #ifndef __NR_memfd_create
185 #if defined __i386__
186 #define __NR_memfd_create 356
187 #elif defined __x86_64__
188 #define __NR_memfd_create 319
189 #elif defined __arm__
190 #define __NR_memfd_create 385
191 #elif defined __aarch64__
192 #define __NR_memfd_create 279
193 #elif defined __s390__
194 #define __NR_memfd_create 350
195 #elif defined __powerpc__
196 #define __NR_memfd_create 360
197 #elif defined __sparc__
198 #define __NR_memfd_create 348
199 #elif defined __blackfin__
200 #define __NR_memfd_create 390
201 #elif defined __ia64__
202 #define __NR_memfd_create 1340
203 #elif defined _MIPS_SIM
204 #if _MIPS_SIM == _MIPS_SIM_ABI32
205 #define __NR_memfd_create 4354
206 #endif
207 #if _MIPS_SIM == _MIPS_SIM_NABI32
208 #define __NR_memfd_create 6318
209 #endif
210 #if _MIPS_SIM == _MIPS_SIM_ABI64
211 #define __NR_memfd_create 5314
212 #endif
213 #endif
214 #endif
215 #ifdef __NR_memfd_create
216 return syscall(__NR_memfd_create, name, flags);
217 #else
218 errno = ENOSYS;
219 return -1;
220 #endif
221}
222#else
223extern int memfd_create(const char *name, unsigned int flags);
224#endif
225
72d0e1cb 226char *lxchook_names[NUM_LXC_HOOKS] = {
52492063 227 "pre-start", "pre-mount", "mount", "autodev", "start", "stop", "post-stop", "clone", "destroy" };
72d0e1cb 228
a589434e 229typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 230
998ac676
RT
231struct mount_opt {
232 char *name;
233 int clear;
234 int flag;
235};
236
81810dd1
DL
237struct caps_opt {
238 char *name;
239 int value;
240};
241
858377e4
SH
242/*
243 * The lxc_conf of the container currently being worked on in an
244 * API call
245 * This is used in the error calls
246 */
247#ifdef HAVE_TLS
248__thread struct lxc_conf *current_config;
249#else
250struct lxc_conf *current_config;
251#endif
252
0769b82a
CS
253/* Declare this here, since we don't want to reshuffle the whole file. */
254static int in_caplist(int cap, struct lxc_list *caps);
255
a589434e
JN
256static int instantiate_veth(struct lxc_handler *, struct lxc_netdev *);
257static int instantiate_macvlan(struct lxc_handler *, struct lxc_netdev *);
258static int instantiate_vlan(struct lxc_handler *, struct lxc_netdev *);
259static int instantiate_phys(struct lxc_handler *, struct lxc_netdev *);
260static int instantiate_empty(struct lxc_handler *, struct lxc_netdev *);
261static int instantiate_none(struct lxc_handler *, struct lxc_netdev *);
262
263static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
264 [LXC_NET_VETH] = instantiate_veth,
265 [LXC_NET_MACVLAN] = instantiate_macvlan,
266 [LXC_NET_VLAN] = instantiate_vlan,
267 [LXC_NET_PHYS] = instantiate_phys,
268 [LXC_NET_EMPTY] = instantiate_empty,
269 [LXC_NET_NONE] = instantiate_none,
0ad19a3f 270};
271
74a2b586
JK
272static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
273static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
274static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
275static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
276static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 277static int shutdown_none(struct lxc_handler *, struct lxc_netdev *);
74a2b586 278
a589434e 279static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
74a2b586
JK
280 [LXC_NET_VETH] = shutdown_veth,
281 [LXC_NET_MACVLAN] = shutdown_macvlan,
282 [LXC_NET_VLAN] = shutdown_vlan,
283 [LXC_NET_PHYS] = shutdown_phys,
284 [LXC_NET_EMPTY] = shutdown_empty,
26b797f3 285 [LXC_NET_NONE] = shutdown_none,
74a2b586
JK
286};
287
998ac676 288static struct mount_opt mount_opt[] = {
88d413d5
SW
289 { "defaults", 0, 0 },
290 { "ro", 0, MS_RDONLY },
291 { "rw", 1, MS_RDONLY },
292 { "suid", 1, MS_NOSUID },
293 { "nosuid", 0, MS_NOSUID },
294 { "dev", 1, MS_NODEV },
295 { "nodev", 0, MS_NODEV },
296 { "exec", 1, MS_NOEXEC },
297 { "noexec", 0, MS_NOEXEC },
298 { "sync", 0, MS_SYNCHRONOUS },
299 { "async", 1, MS_SYNCHRONOUS },
300 { "dirsync", 0, MS_DIRSYNC },
301 { "remount", 0, MS_REMOUNT },
302 { "mand", 0, MS_MANDLOCK },
303 { "nomand", 1, MS_MANDLOCK },
304 { "atime", 1, MS_NOATIME },
305 { "noatime", 0, MS_NOATIME },
306 { "diratime", 1, MS_NODIRATIME },
307 { "nodiratime", 0, MS_NODIRATIME },
308 { "bind", 0, MS_BIND },
309 { "rbind", 0, MS_BIND|MS_REC },
310 { "relatime", 0, MS_RELATIME },
311 { "norelatime", 1, MS_RELATIME },
312 { "strictatime", 0, MS_STRICTATIME },
313 { "nostrictatime", 1, MS_STRICTATIME },
314 { NULL, 0, 0 },
998ac676
RT
315};
316
e37dda71 317#if HAVE_LIBCAP
81810dd1 318static struct caps_opt caps_opt[] = {
a6afdde9 319 { "chown", CAP_CHOWN },
1e11be34
DL
320 { "dac_override", CAP_DAC_OVERRIDE },
321 { "dac_read_search", CAP_DAC_READ_SEARCH },
322 { "fowner", CAP_FOWNER },
323 { "fsetid", CAP_FSETID },
81810dd1
DL
324 { "kill", CAP_KILL },
325 { "setgid", CAP_SETGID },
326 { "setuid", CAP_SETUID },
327 { "setpcap", CAP_SETPCAP },
328 { "linux_immutable", CAP_LINUX_IMMUTABLE },
329 { "net_bind_service", CAP_NET_BIND_SERVICE },
330 { "net_broadcast", CAP_NET_BROADCAST },
331 { "net_admin", CAP_NET_ADMIN },
332 { "net_raw", CAP_NET_RAW },
333 { "ipc_lock", CAP_IPC_LOCK },
334 { "ipc_owner", CAP_IPC_OWNER },
335 { "sys_module", CAP_SYS_MODULE },
336 { "sys_rawio", CAP_SYS_RAWIO },
337 { "sys_chroot", CAP_SYS_CHROOT },
338 { "sys_ptrace", CAP_SYS_PTRACE },
339 { "sys_pacct", CAP_SYS_PACCT },
340 { "sys_admin", CAP_SYS_ADMIN },
341 { "sys_boot", CAP_SYS_BOOT },
342 { "sys_nice", CAP_SYS_NICE },
343 { "sys_resource", CAP_SYS_RESOURCE },
344 { "sys_time", CAP_SYS_TIME },
345 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
346 { "mknod", CAP_MKNOD },
347 { "lease", CAP_LEASE },
57b837e2
CB
348#ifdef CAP_AUDIT_READ
349 { "audit_read", CAP_AUDIT_READ },
350#endif
9527e566 351#ifdef CAP_AUDIT_WRITE
81810dd1 352 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
353#endif
354#ifdef CAP_AUDIT_CONTROL
81810dd1 355 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 356#endif
81810dd1
DL
357 { "setfcap", CAP_SETFCAP },
358 { "mac_override", CAP_MAC_OVERRIDE },
359 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
360#ifdef CAP_SYSLOG
361 { "syslog", CAP_SYSLOG },
362#endif
363#ifdef CAP_WAKE_ALARM
364 { "wake_alarm", CAP_WAKE_ALARM },
365#endif
2b54359b
CB
366#ifdef CAP_BLOCK_SUSPEND
367 { "block_suspend", CAP_BLOCK_SUSPEND },
368#endif
81810dd1 369};
495d2046
SG
370#else
371static struct caps_opt caps_opt[] = {};
372#endif
81810dd1 373
91c3830e
SH
374static int run_buffer(char *buffer)
375{
ebec9176 376 struct lxc_popen_FILE *f;
91c3830e 377 char *output;
8e7da691 378 int ret;
91c3830e 379
ebec9176 380 f = lxc_popen(buffer);
91c3830e 381 if (!f) {
062b72c6 382 SYSERROR("Failed to popen() %s.", buffer);
91c3830e
SH
383 return -1;
384 }
385
386 output = malloc(LXC_LOG_BUFFER_SIZE);
387 if (!output) {
062b72c6 388 ERROR("Failed to allocate memory for %s.", buffer);
ebec9176 389 lxc_pclose(f);
91c3830e
SH
390 return -1;
391 }
392
062b72c6
CB
393 while (fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
394 DEBUG("Script %s with output: %s.", buffer, output);
91c3830e
SH
395
396 free(output);
397
ebec9176 398 ret = lxc_pclose(f);
8e7da691 399 if (ret == -1) {
062b72c6 400 SYSERROR("Script exited with error.");
91c3830e 401 return -1;
8e7da691 402 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
062b72c6 403 ERROR("Script exited with status %d.", WEXITSTATUS(ret));
8e7da691
DE
404 return -1;
405 } else if (WIFSIGNALED(ret)) {
062b72c6 406 ERROR("Script terminated by signal %d.", WTERMSIG(ret));
8e7da691 407 return -1;
91c3830e
SH
408 }
409
410 return 0;
411}
412
148e91f5 413static int run_script_argv(const char *name, const char *section,
062b72c6
CB
414 const char *script, const char *hook,
415 const char *lxcpath, char **argsin)
148e91f5
SH
416{
417 int ret, i;
418 char *buffer;
419 size_t size = 0;
420
062b72c6 421 INFO("Executing script \"%s\" for container \"%s\", config section \"%s\".",
148e91f5
SH
422 script, name, section);
423
062b72c6 424 for (i = 0; argsin && argsin[i]; i++)
148e91f5
SH
425 size += strlen(argsin[i]) + 1;
426
427 size += strlen(hook) + 1;
428
429 size += strlen(script);
430 size += strlen(name);
431 size += strlen(section);
432 size += 3;
433
434 if (size > INT_MAX)
435 return -1;
436
437 buffer = alloca(size);
438 if (!buffer) {
062b72c6 439 ERROR("Failed to allocate memory.");
148e91f5
SH
440 return -1;
441 }
442
062b72c6
CB
443 ret =
444 snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
445 if (ret < 0 || (size_t)ret >= size) {
446 ERROR("Script name too long.");
148e91f5
SH
447 return -1;
448 }
449
062b72c6
CB
450 for (i = 0; argsin && argsin[i]; i++) {
451 int len = size - ret;
148e91f5
SH
452 int rc;
453 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
454 if (rc < 0 || rc >= len) {
062b72c6 455 ERROR("Script args too long.");
148e91f5
SH
456 return -1;
457 }
458 ret += rc;
459 }
460
461 return run_buffer(buffer);
462}
463
062b72c6
CB
464static int run_script(const char *name, const char *section, const char *script,
465 ...)
e3b4c4c4 466{
abbfd20b 467 int ret;
91c3830e 468 char *buffer, *p;
abbfd20b
DL
469 size_t size = 0;
470 va_list ap;
751d9dcd 471
062b72c6 472 INFO("Executing script \"%s\" for container \"%s\", config section \"%s\".",
751d9dcd 473 script, name, section);
e3b4c4c4 474
abbfd20b
DL
475 va_start(ap, script);
476 while ((p = va_arg(ap, char *)))
95642a10 477 size += strlen(p) + 1;
abbfd20b
DL
478 va_end(ap);
479
480 size += strlen(script);
481 size += strlen(name);
482 size += strlen(section);
95642a10 483 size += 3;
abbfd20b 484
95642a10
MS
485 if (size > INT_MAX)
486 return -1;
487
488 buffer = alloca(size);
abbfd20b 489 if (!buffer) {
062b72c6 490 ERROR("Failed to allocate memory.");
751d9dcd
DL
491 return -1;
492 }
493
9ba8130c
SH
494 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
495 if (ret < 0 || ret >= size) {
062b72c6 496 ERROR("Script name too long.");
9ba8130c
SH
497 return -1;
498 }
751d9dcd 499
abbfd20b 500 va_start(ap, script);
9ba8130c 501 while ((p = va_arg(ap, char *))) {
062b72c6 502 int len = size - ret;
9ba8130c
SH
503 int rc;
504 rc = snprintf(buffer + ret, len, " %s", p);
505 if (rc < 0 || rc >= len) {
062b72c6 506 ERROR("Script args too long.");
9ba8130c
SH
507 return -1;
508 }
509 ret += rc;
510 }
abbfd20b 511 va_end(ap);
751d9dcd 512
91c3830e 513 return run_buffer(buffer);
e3b4c4c4
ST
514}
515
a17b1e65
SG
516static int mount_rootfs_dir(const char *rootfs, const char *target,
517 const char *options)
a6afdde9 518{
a17b1e65
SG
519 unsigned long mntflags;
520 char *mntdata;
521 int ret;
522
523 if (parse_mntopts(options, &mntflags, &mntdata) < 0) {
524 free(mntdata);
525 return -1;
526 }
527
528 ret = mount(rootfs, target, "none", MS_BIND | MS_REC | mntflags, mntdata);
529 free(mntdata);
530
531 return ret;
a6afdde9
DL
532}
533
534static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
535{
536 int rfd;
537 int ret = -1;
538
539 rfd = open(rootfs, O_RDWR);
540 if (rfd < 0) {
541 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 542 return -1;
543 }
544
a6afdde9 545 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 546
a6afdde9 547 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 548
a6afdde9
DL
549 if (ioctl(fd, LOOP_SET_FD, rfd)) {
550 SYSERROR("failed to LOOP_SET_FD");
551 goto out;
78ae2fcc 552 }
553
a6afdde9
DL
554 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
555 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 556 goto out;
557 }
558
a6afdde9 559 ret = 0;
78ae2fcc 560out:
a6afdde9 561 close(rfd);
78ae2fcc 562
a6afdde9 563 return ret;
78ae2fcc 564}
565
a17b1e65
SG
566static int mount_rootfs_file(const char *rootfs, const char *target,
567 const char *options)
78ae2fcc 568{
74f96976 569 struct dirent *direntp;
a6afdde9 570 struct loop_info64 loinfo;
9ba8130c 571 int ret = -1, fd = -1, rc;
a6afdde9
DL
572 DIR *dir;
573 char path[MAXPATHLEN];
78ae2fcc 574
a6afdde9
DL
575 dir = opendir("/dev");
576 if (!dir) {
577 SYSERROR("failed to open '/dev'");
78ae2fcc 578 return -1;
579 }
580
74f96976 581 while ((direntp = readdir(dir))) {
a6afdde9
DL
582
583 if (!direntp)
584 break;
585
586 if (!strcmp(direntp->d_name, "."))
587 continue;
588
589 if (!strcmp(direntp->d_name, ".."))
590 continue;
591
592 if (strncmp(direntp->d_name, "loop", 4))
593 continue;
594
9ba8130c
SH
595 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
596 if (rc < 0 || rc >= MAXPATHLEN)
597 continue;
598
a6afdde9
DL
599 fd = open(path, O_RDWR);
600 if (fd < 0)
601 continue;
602
603 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
604 close(fd);
605 continue;
606 }
607
608 if (errno != ENXIO) {
609 WARN("unexpected error for ioctl on '%s': %m",
610 direntp->d_name);
00b6be44 611 close(fd);
a6afdde9
DL
612 continue;
613 }
614
615 DEBUG("found '%s' free lodev", path);
616
617 ret = setup_lodev(rootfs, fd, &loinfo);
618 if (!ret)
a17b1e65 619 ret = mount_unknown_fs(path, target, options);
a6afdde9
DL
620 close(fd);
621
622 break;
623 }
624
625 if (closedir(dir))
626 WARN("failed to close directory");
627
628 return ret;
78ae2fcc 629}
630
a17b1e65
SG
631static int mount_rootfs_block(const char *rootfs, const char *target,
632 const char *options)
a6afdde9 633{
a17b1e65 634 return mount_unknown_fs(rootfs, target, options);
a6afdde9
DL
635}
636
0c547523
SH
637/*
638 * pin_rootfs
b7ed4bf0
CS
639 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
640 * the duration of the container run, to prevent the container from marking
641 * the underlying fs readonly on shutdown. unlink the file immediately so
642 * no name pollution is happens
0c547523
SH
643 * return -1 on error.
644 * return -2 if nothing needed to be pinned.
645 * return an open fd (>=0) if we pinned it.
646 */
647int pin_rootfs(const char *rootfs)
648{
649 char absrootfs[MAXPATHLEN];
650 char absrootfspin[MAXPATHLEN];
651 struct stat s;
652 int ret, fd;
653
e99ee0de 654 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 655 return -2;
e99ee0de 656
00ec333b 657 if (!realpath(rootfs, absrootfs))
9be53773 658 return -2;
0c547523 659
00ec333b 660 if (access(absrootfs, F_OK))
0c547523 661 return -1;
0c547523 662
00ec333b 663 if (stat(absrootfs, &s))
0c547523 664 return -1;
0c547523 665
72f919c4 666 if (!S_ISDIR(s.st_mode))
0c547523
SH
667 return -2;
668
b7ed4bf0 669 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
00ec333b 670 if (ret >= MAXPATHLEN)
0c547523 671 return -1;
0c547523
SH
672
673 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
b7ed4bf0
CS
674 if (fd < 0)
675 return fd;
676 (void)unlink(absrootfspin);
0c547523
SH
677 return fd;
678}
679
e2a7e8dc
SH
680/*
681 * If we are asking to remount something, make sure that any
682 * NOEXEC etc are honored.
683 */
684static unsigned long add_required_remount_flags(const char *s, const char *d,
685 unsigned long flags)
686{
614305f3 687#ifdef HAVE_STATVFS
e2a7e8dc
SH
688 struct statvfs sb;
689 unsigned long required_flags = 0;
690
691 if (!(flags & MS_REMOUNT))
692 return flags;
693
694 if (!s)
695 s = d;
696
697 if (!s)
698 return flags;
699 if (statvfs(s, &sb) < 0)
700 return flags;
701
702 if (sb.f_flag & MS_NOSUID)
703 required_flags |= MS_NOSUID;
704 if (sb.f_flag & MS_NODEV)
705 required_flags |= MS_NODEV;
706 if (sb.f_flag & MS_RDONLY)
707 required_flags |= MS_RDONLY;
708 if (sb.f_flag & MS_NOEXEC)
709 required_flags |= MS_NOEXEC;
710
711 return flags | required_flags;
614305f3
SH
712#else
713 return flags;
714#endif
e2a7e8dc
SH
715}
716
4fb3cba5 717static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
368bbc02 718{
368bbc02 719 int r;
80e80c40 720 int i;
b06b8511
CS
721 static struct {
722 int match_mask;
723 int match_flag;
724 const char *source;
725 const char *destination;
726 const char *fstype;
727 unsigned long flags;
728 const char *options;
729 } default_mounts[] = {
730 /* Read-only bind-mounting... In older kernels, doing that required
731 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
732 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
733 * kernel 2.6.26 onwards. However, this apparently does not work on
734 * kernel 3.8. Unfortunately, on that very same kernel, doing the
735 * same trick as above doesn't seem to work either, there one needs
736 * to ALSO specify MS_BIND for the remount, otherwise the entire
737 * fs is remounted read-only or the mount fails because it's busy...
738 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
739 * 2.6.32...
368bbc02 740 */
f24a52d5 741 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
592fd47a
SH
742 /* proc/tty is used as a temporary placeholder for proc/sys/net which we'll move back in a few steps */
743 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys/net", "%r/proc/tty", NULL, MS_BIND, NULL },
f24a52d5
SG
744 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
745 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
592fd47a 746 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/tty", "%r/proc/sys/net", NULL, MS_MOVE, NULL },
f24a52d5
SG
747 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
748 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
749 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
750 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
751 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
752 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
753 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL },
754 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
755 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys/devices/virtual/net", "sysfs", 0, NULL },
756 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys/devices/virtual/net/devices/virtual/net", "%r/sys/devices/virtual/net", NULL, MS_BIND, NULL },
757 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys/devices/virtual/net", NULL, MS_REMOUNT|MS_BIND|MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL },
758 { 0, 0, NULL, NULL, NULL, 0, NULL }
b06b8511 759 };
368bbc02 760
b06b8511
CS
761 for (i = 0; default_mounts[i].match_mask; i++) {
762 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
763 char *source = NULL;
764 char *destination = NULL;
765 int saved_errno;
e2a7e8dc 766 unsigned long mflags;
b06b8511
CS
767
768 if (default_mounts[i].source) {
769 /* will act like strdup if %r is not present */
8ede5f4c 770 source = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].source);
b06b8511
CS
771 if (!source) {
772 SYSERROR("memory allocation error");
773 return -1;
774 }
775 }
cc4fd506
SH
776 if (!default_mounts[i].destination) {
777 ERROR("BUG: auto mounts destination %d was NULL", i);
b2f44b4d 778 free(source);
cc4fd506
SH
779 return -1;
780 }
781 /* will act like strdup if %r is not present */
782 destination = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].destination);
783 if (!destination) {
784 saved_errno = errno;
785 SYSERROR("memory allocation error");
786 free(source);
787 errno = saved_errno;
788 return -1;
b06b8511 789 }
e2a7e8dc
SH
790 mflags = add_required_remount_flags(source, destination,
791 default_mounts[i].flags);
592fd47a 792 r = safe_mount(source, destination, default_mounts[i].fstype, mflags, default_mounts[i].options, conf->rootfs.path ? conf->rootfs.mount : NULL);
b06b8511 793 saved_errno = errno;
b88ff9a0
SG
794 if (r < 0 && errno == ENOENT) {
795 INFO("Mount source or target for %s on %s doesn't exist. Skipping.", source, destination);
796 r = 0;
797 }
798 else if (r < 0)
e2a7e8dc 799 SYSERROR("error mounting %s on %s flags %lu", source, destination, mflags);
f24a52d5 800
b06b8511
CS
801 free(source);
802 free(destination);
803 if (r < 0) {
b06b8511
CS
804 errno = saved_errno;
805 return -1;
806 }
368bbc02 807 }
368bbc02
CS
808 }
809
b06b8511 810 if (flags & LXC_AUTO_CGROUP_MASK) {
0769b82a
CS
811 int cg_flags;
812
813 cg_flags = flags & LXC_AUTO_CGROUP_MASK;
814 /* If the type of cgroup mount was not specified, it depends on the
815 * container's capabilities as to what makes sense: if we have
816 * CAP_SYS_ADMIN, the read-only part can be remounted read-write
817 * anyway, so we may as well default to read-write; then the admin
818 * will not be given a false sense of security. (And if they really
819 * want mixed r/o r/w, then they can explicitly specify :mixed.)
820 * OTOH, if the container lacks CAP_SYS_ADMIN, do only default to
821 * :mixed, because then the container can't remount it read-write. */
822 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
823 int has_sys_admin = 0;
824 if (!lxc_list_empty(&conf->keepcaps)) {
825 has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps);
826 } else {
827 has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps);
828 }
829 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC) {
830 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED;
831 } else {
832 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
833 }
834 }
835
8ede5f4c 836 if (!cgroup_mount(conf->rootfs.path ? conf->rootfs.mount : "", handler, cg_flags)) {
368bbc02 837 SYSERROR("error mounting /sys/fs/cgroup");
b06b8511 838 return -1;
368bbc02
CS
839 }
840 }
841
368bbc02 842 return 0;
368bbc02
CS
843}
844
a17b1e65 845static int mount_rootfs(const char *rootfs, const char *target, const char *options)
0ad19a3f 846{
b09ef133 847 char absrootfs[MAXPATHLEN];
78ae2fcc 848 struct stat s;
a6afdde9 849 int i;
78ae2fcc 850
a17b1e65 851 typedef int (*rootfs_cb)(const char *, const char *, const char *);
78ae2fcc 852
853 struct rootfs_type {
854 int type;
855 rootfs_cb cb;
856 } rtfs_type[] = {
2656d231
DL
857 { S_IFDIR, mount_rootfs_dir },
858 { S_IFBLK, mount_rootfs_block },
859 { S_IFREG, mount_rootfs_file },
78ae2fcc 860 };
0ad19a3f 861
4c8ab83b 862 if (!realpath(rootfs, absrootfs)) {
36eb9bde 863 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 864 return -1;
865 }
b09ef133 866
b09ef133 867 if (access(absrootfs, F_OK)) {
36eb9bde 868 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 869 return -1;
870 }
871
78ae2fcc 872 if (stat(absrootfs, &s)) {
36eb9bde 873 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 874 return -1;
875 }
876
78ae2fcc 877 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 878
78ae2fcc 879 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
880 continue;
9b0f0477 881
a17b1e65 882 return rtfs_type[i].cb(absrootfs, target, options);
78ae2fcc 883 }
9b0f0477 884
36eb9bde 885 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 886 return -1;
0ad19a3f 887}
888
4e5440c6 889static int setup_utsname(struct utsname *utsname)
0ad19a3f 890{
4e5440c6
DL
891 if (!utsname)
892 return 0;
0ad19a3f 893
4e5440c6
DL
894 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
895 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 896 return -1;
897 }
898
4e5440c6 899 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 900
0ad19a3f 901 return 0;
902}
903
69aa6655
DE
904struct dev_symlinks {
905 const char *oldpath;
906 const char *name;
907};
908
909static const struct dev_symlinks dev_symlinks[] = {
910 {"/proc/self/fd", "fd"},
911 {"/proc/self/fd/0", "stdin"},
912 {"/proc/self/fd/1", "stdout"},
913 {"/proc/self/fd/2", "stderr"},
914};
915
916static int setup_dev_symlinks(const struct lxc_rootfs *rootfs)
917{
918 char path[MAXPATHLEN];
919 int ret,i;
09227be2 920 struct stat s;
69aa6655
DE
921
922
923 for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
924 const struct dev_symlinks *d = &dev_symlinks[i];
ec50007f 925 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->path ? rootfs->mount : "", d->name);
69aa6655
DE
926 if (ret < 0 || ret >= MAXPATHLEN)
927 return -1;
09227be2
MW
928
929 /*
930 * Stat the path first. If we don't get an error
931 * accept it as is and don't try to create it
932 */
933 if (!stat(path, &s)) {
934 continue;
935 }
936
69aa6655 937 ret = symlink(d->oldpath, path);
09227be2 938
69aa6655 939 if (ret && errno != EEXIST) {
09227be2
MW
940 if ( errno == EROFS ) {
941 WARN("Warning: Read Only file system while creating %s", path);
942 } else {
943 SYSERROR("Error creating %s", path);
944 return -1;
945 }
69aa6655
DE
946 }
947 }
948 return 0;
949}
950
393903d1
SH
951/*
952 * Build a space-separate list of ptys to pass to systemd.
953 */
954static bool append_ptyname(char **pp, char *name)
b0a33c1e 955{
393903d1
SH
956 char *p;
957
958 if (!*pp) {
959 *pp = malloc(strlen(name) + strlen("container_ttys=") + 1);
960 if (!*pp)
961 return false;
962 sprintf(*pp, "container_ttys=%s", name);
963 return true;
964 }
965 p = realloc(*pp, strlen(*pp) + strlen(name) + 2);
966 if (!p)
967 return false;
968 *pp = p;
969 strcat(p, " ");
970 strcat(p, name);
971 return true;
972}
973
974static int setup_tty(struct lxc_conf *conf)
975{
393903d1
SH
976 const struct lxc_tty_info *tty_info = &conf->tty_info;
977 char *ttydir = conf->ttydir;
7c6ef2a2
SH
978 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
979 int i, ret;
b0a33c1e 980
e8bd4e43 981 if (!conf->rootfs.path)
bc9bd0e3
DL
982 return 0;
983
b0a33c1e 984 for (i = 0; i < tty_info->nbtty; i++) {
985
986 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
987
e8bd4e43 988 ret = snprintf(path, sizeof(path), "/dev/tty%d", i + 1);
7c6ef2a2
SH
989 if (ret >= sizeof(path)) {
990 ERROR("pathname too long for ttys");
991 return -1;
992 }
993 if (ttydir) {
994 /* create dev/lxc/tty%d" */
e8bd4e43 995 ret = snprintf(lxcpath, sizeof(lxcpath), "/dev/%s/tty%d", ttydir, i + 1);
7c6ef2a2
SH
996 if (ret >= sizeof(lxcpath)) {
997 ERROR("pathname too long for ttys");
998 return -1;
999 }
1000 ret = creat(lxcpath, 0660);
1001 if (ret==-1 && errno != EEXIST) {
959aee9c 1002 SYSERROR("error creating %s", lxcpath);
7c6ef2a2
SH
1003 return -1;
1004 }
4d44e274
SH
1005 if (ret >= 0)
1006 close(ret);
7c6ef2a2
SH
1007 ret = unlink(path);
1008 if (ret && errno != ENOENT) {
959aee9c 1009 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1010 return -1;
1011 }
b0a33c1e 1012
7c6ef2a2
SH
1013 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
1014 WARN("failed to mount '%s'->'%s'",
1015 pty_info->name, path);
1016 continue;
1017 }
13954cce 1018
9ba8130c
SH
1019 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
1020 if (ret >= sizeof(lxcpath)) {
1021 ERROR("tty pathname too long");
1022 return -1;
1023 }
7c6ef2a2
SH
1024 ret = symlink(lxcpath, path);
1025 if (ret) {
959aee9c 1026 SYSERROR("failed to create symlink for tty %d", i+1);
7c6ef2a2
SH
1027 return -1;
1028 }
1029 } else {
c6883f38
SH
1030 /* If we populated /dev, then we need to create /dev/ttyN */
1031 if (access(path, F_OK)) {
1032 ret = creat(path, 0660);
1033 if (ret==-1) {
959aee9c 1034 SYSERROR("error creating %s", path);
c6883f38 1035 /* this isn't fatal, continue */
025ed0f3 1036 } else {
c6883f38 1037 close(ret);
025ed0f3 1038 }
c6883f38 1039 }
7c6ef2a2 1040 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
e8bd4e43 1041 SYSERROR("failed to mount '%s'->'%s'", pty_info->name, path);
7c6ef2a2
SH
1042 continue;
1043 }
393903d1 1044 }
e8bd4e43 1045 if (!append_ptyname(&conf->pty_names, pty_info->name)) {
393903d1
SH
1046 ERROR("Error setting up container_ttys string");
1047 return -1;
b0a33c1e 1048 }
1049 }
1050
cd54d859
DL
1051 INFO("%d tty(s) has been setup", tty_info->nbtty);
1052
b0a33c1e 1053 return 0;
1054}
1055
bf601689 1056
59bb8698 1057static int setup_rootfs_pivot_root(const char *rootfs)
bf601689 1058{
2d489f9e 1059 int oldroot = -1, newroot = -1;
bf601689 1060
2d489f9e
SH
1061 oldroot = open("/", O_DIRECTORY | O_RDONLY);
1062 if (oldroot < 0) {
1063 SYSERROR("Error opening old-/ for fchdir");
9ba8130c
SH
1064 return -1;
1065 }
2d489f9e
SH
1066 newroot = open(rootfs, O_DIRECTORY | O_RDONLY);
1067 if (newroot < 0) {
1068 SYSERROR("Error opening new-/ for fchdir");
1069 goto fail;
c08556c6 1070 }
bf601689 1071
cc6f6dd7 1072 /* change into new root fs */
2d489f9e 1073 if (fchdir(newroot)) {
cc6f6dd7 1074 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
2d489f9e 1075 goto fail;
cc6f6dd7
DL
1076 }
1077
cc6f6dd7 1078 /* pivot_root into our new root fs */
2d489f9e 1079 if (pivot_root(".", ".")) {
cc6f6dd7 1080 SYSERROR("pivot_root syscall failed");
2d489f9e 1081 goto fail;
bf601689 1082 }
cc6f6dd7 1083
2d489f9e
SH
1084 /*
1085 * at this point the old-root is mounted on top of our new-root
1086 * To unmounted it we must not be chdir'd into it, so escape back
1087 * to old-root
1088 */
1089 if (fchdir(oldroot) < 0) {
1090 SYSERROR("Error entering oldroot");
1091 goto fail;
1092 }
7981ea46 1093 if (umount2(".", MNT_DETACH) < 0) {
2d489f9e
SH
1094 SYSERROR("Error detaching old root");
1095 goto fail;
cc6f6dd7
DL
1096 }
1097
2d489f9e
SH
1098 if (fchdir(newroot) < 0) {
1099 SYSERROR("Error re-entering newroot");
1100 goto fail;
1101 }
cc6f6dd7 1102
2d489f9e
SH
1103 close(oldroot);
1104 close(newroot);
bf601689 1105
2d489f9e 1106 DEBUG("pivot_root syscall to '%s' successful", rootfs);
bf601689 1107
bf601689 1108 return 0;
2d489f9e
SH
1109
1110fail:
1111 if (oldroot != -1)
1112 close(oldroot);
1113 if (newroot != -1)
1114 close(newroot);
1115 return -1;
bf601689
MH
1116}
1117
bc6928ff 1118/*
87da4ec3
SH
1119 * Just create a path for /dev under $lxcpath/$name and in rootfs
1120 * If we hit an error, log it but don't fail yet.
91c3830e 1121 */
14221cbb 1122static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, const char *lxcpath)
91c3830e
SH
1123{
1124 int ret;
87da4ec3
SH
1125 size_t clen;
1126 char *path;
91c3830e 1127
14221cbb 1128 INFO("Mounting container /dev");
bc6928ff 1129
14221cbb 1130 /* $(rootfs->mount) + "/dev/pts" + '\0' */
ec50007f 1131 clen = (rootfs->path ? strlen(rootfs->mount) : 0) + 9;
87da4ec3 1132 path = alloca(clen);
bc6928ff 1133
ec50007f 1134 ret = snprintf(path, clen, "%s/dev", rootfs->path ? rootfs->mount : "");
87da4ec3 1135 if (ret < 0 || ret >= clen)
91c3830e 1136 return -1;
bc6928ff 1137
87da4ec3 1138 if (!dir_exists(path)) {
14221cbb 1139 WARN("No /dev in container.");
87da4ec3
SH
1140 WARN("Proceeding without autodev setup");
1141 return 0;
bc6928ff 1142 }
87da4ec3 1143
1ec0e8e3 1144 ret = safe_mount("none", path, "tmpfs", 0, "size=500000,mode=755",
ec50007f 1145 rootfs->path ? rootfs->mount : NULL);
1ec0e8e3 1146 if (ret != 0) {
87da4ec3 1147 SYSERROR("Failed mounting tmpfs onto %s\n", path);
1ec0e8e3 1148 return -1;
91c3830e 1149 }
87da4ec3
SH
1150
1151 INFO("Mounted tmpfs onto %s", path);
1152
ec50007f 1153 ret = snprintf(path, clen, "%s/dev/pts", rootfs->path ? rootfs->mount : "");
87da4ec3 1154 if (ret < 0 || ret >= clen)
91c3830e 1155 return -1;
87da4ec3 1156
bc6928ff
MW
1157 /*
1158 * If we are running on a devtmpfs mapping, dev/pts may already exist.
1159 * If not, then create it and exit if that fails...
1160 */
87da4ec3 1161 if (!dir_exists(path)) {
bc6928ff
MW
1162 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1163 if (ret) {
1164 SYSERROR("Failed to create /dev/pts in container");
1165 return -1;
1166 }
91c3830e
SH
1167 }
1168
14221cbb 1169 INFO("Mounted container /dev");
91c3830e
SH
1170 return 0;
1171}
1172
c6883f38 1173struct lxc_devs {
74a3920a 1174 const char *name;
c6883f38
SH
1175 mode_t mode;
1176 int maj;
1177 int min;
1178};
1179
74a3920a 1180static const struct lxc_devs lxc_devs[] = {
c6883f38
SH
1181 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1182 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1183 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1184 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1185 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1186 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1187 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1188};
1189
0728ebf4 1190static int fill_autodev(const struct lxc_rootfs *rootfs, bool mount_console)
c6883f38
SH
1191{
1192 int ret;
c6883f38
SH
1193 char path[MAXPATHLEN];
1194 int i;
3a32201c 1195 mode_t cmask;
c6883f38 1196
14221cbb 1197 INFO("Creating initial consoles under container /dev");
91c3830e 1198
ec50007f 1199 ret = snprintf(path, MAXPATHLEN, "%s/dev", rootfs->path ? rootfs->mount : "");
91c3830e
SH
1200 if (ret < 0 || ret >= MAXPATHLEN) {
1201 ERROR("Error calculating container /dev location");
c6883f38 1202 return -1;
f7bee6c6 1203 }
91c3830e 1204
9769034f 1205 if (!dir_exists(path)) // ignore, just don't try to fill in
9cb4d183
SH
1206 return 0;
1207
14221cbb 1208 INFO("Populating container /dev");
3a32201c 1209 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38 1210 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
74a3920a 1211 const struct lxc_devs *d = &lxc_devs[i];
0728ebf4
TA
1212
1213 if (!strcmp(d->name, "console") && !mount_console)
1214 continue;
1215
ec50007f 1216 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", rootfs->path ? rootfs->mount : "", d->name);
c6883f38
SH
1217 if (ret < 0 || ret >= MAXPATHLEN)
1218 return -1;
1219 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1220 if (ret && errno != EEXIST) {
9cb4d183
SH
1221 char hostpath[MAXPATHLEN];
1222 FILE *pathfile;
1223
1224 // Unprivileged containers cannot create devices, so
1225 // bind mount the device from the host
1226 ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", d->name);
1227 if (ret < 0 || ret >= MAXPATHLEN)
1228 return -1;
1229 pathfile = fopen(path, "wb");
1230 if (!pathfile) {
1231 SYSERROR("Failed to create device mount target '%s'", path);
1232 return -1;
1233 }
1234 fclose(pathfile);
592fd47a 1235 if (safe_mount(hostpath, path, 0, MS_BIND, NULL,
ec50007f 1236 rootfs->path ? rootfs->mount : NULL) != 0) {
9cb4d183
SH
1237 SYSERROR("Failed bind mounting device %s from host into container",
1238 d->name);
1239 return -1;
1240 }
c6883f38
SH
1241 }
1242 }
3a32201c 1243 umask(cmask);
c6883f38 1244
14221cbb 1245 INFO("Populated container /dev");
c6883f38
SH
1246 return 0;
1247}
1248
cc28d0b0 1249static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1250{
cc28d0b0
SH
1251 const struct lxc_rootfs *rootfs = &conf->rootfs;
1252
a0f379bf
DW
1253 if (!rootfs->path) {
1254 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1255 SYSERROR("Failed to make / rslave");
1256 return -1;
1257 }
c69bd12f 1258 return 0;
a0f379bf 1259 }
0ad19a3f 1260
12297168 1261 if (access(rootfs->mount, F_OK)) {
b1789442 1262 SYSERROR("failed to access to '%s', check it is present",
12297168 1263 rootfs->mount);
b1789442
DL
1264 return -1;
1265 }
1266
9be53773 1267 // First try mounting rootfs using a bdev
76a26f55 1268 struct bdev *bdev = bdev_init(conf, rootfs->path, rootfs->mount, rootfs->options);
9be53773 1269 if (bdev && bdev->ops->mount(bdev) == 0) {
59d66af2 1270 bdev_put(bdev);
9be53773
SH
1271 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1272 return 0;
1273 }
59d66af2
SH
1274 if (bdev)
1275 bdev_put(bdev);
a17b1e65 1276 if (mount_rootfs(rootfs->path, rootfs->mount, rootfs->options)) {
a6afdde9 1277 ERROR("failed to mount rootfs");
c3f0a28c 1278 return -1;
1279 }
0ad19a3f 1280
12297168 1281 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1282
ac778708
DL
1283 return 0;
1284}
1285
91e93c71
AV
1286int prepare_ramfs_root(char *root)
1287{
eab15c1e 1288 char buf[LXC_LINELEN], *p;
91e93c71
AV
1289 char nroot[PATH_MAX];
1290 FILE *f;
1291 int i;
1292 char *p2;
1293
1294 if (realpath(root, nroot) == NULL)
1295 return -1;
1296
1297 if (chdir("/") == -1)
1298 return -1;
1299
1300 /*
1301 * We could use here MS_MOVE, but in userns this mount is
1302 * locked and can't be moved.
1303 */
1304 if (mount(root, "/", NULL, MS_REC | MS_BIND, NULL)) {
1305 SYSERROR("Failed to move %s into /", root);
1306 return -1;
1307 }
1308
88322f77 1309 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
91e93c71
AV
1310 SYSERROR("Failed to make . rprivate");
1311 return -1;
1312 }
1313
1314 /*
1315 * The following code cleans up inhereted mounts which are not
1316 * required for CT.
1317 *
1318 * The mountinfo file shows not all mounts, if a few points have been
1319 * unmounted between read operations from the mountinfo. So we need to
1320 * read mountinfo a few times.
1321 *
1322 * This loop can be skipped if a container uses unserns, because all
1323 * inherited mounts are locked and we should live with all this trash.
1324 */
1325 while (1) {
1326 int progress = 0;
1327
1328 f = fopen("./proc/self/mountinfo", "r");
1329 if (!f) {
1330 SYSERROR("Unable to open /proc/self/mountinfo");
1331 return -1;
1332 }
eab15c1e 1333 while (fgets(buf, LXC_LINELEN, f)) {
91e93c71
AV
1334 for (p = buf, i=0; p && i < 4; i++)
1335 p = strchr(p+1, ' ');
1336 if (!p)
1337 continue;
1338 p2 = strchr(p+1, ' ');
1339 if (!p2)
1340 continue;
1341
1342 *p2 = '\0';
1343 *p = '.';
1344
1345 if (strcmp(p + 1, "/") == 0)
1346 continue;
1347 if (strcmp(p + 1, "/proc") == 0)
1348 continue;
1349
1350 if (umount2(p, MNT_DETACH) == 0)
1351 progress++;
1352 }
1353 fclose(f);
1354 if (!progress)
1355 break;
1356 }
1357
8bea9fae
PR
1358 /* This also can be skipped if a container uses unserns */
1359 umount2("./proc", MNT_DETACH);
91e93c71
AV
1360
1361 /* It is weird, but chdir("..") moves us in a new root */
1362 if (chdir("..") == -1) {
1363 SYSERROR("Unable to change working directory");
1364 return -1;
1365 }
1366
1367 if (chroot(".") == -1) {
1368 SYSERROR("Unable to chroot");
1369 return -1;
1370 }
1371
1372 return 0;
1373}
1374
74a3920a 1375static int setup_pivot_root(const struct lxc_rootfs *rootfs)
ac778708 1376{
ac778708
DL
1377 if (!rootfs->path)
1378 return 0;
1379
91e93c71
AV
1380 if (detect_ramfs_rootfs()) {
1381 if (prepare_ramfs_root(rootfs->mount))
1382 return -1;
59bb8698 1383 } else if (setup_rootfs_pivot_root(rootfs->mount)) {
cc6f6dd7 1384 ERROR("failed to setup pivot root");
25368b52 1385 return -1;
c69bd12f
DL
1386 }
1387
25368b52 1388 return 0;
0ad19a3f 1389}
1390
d852c78c 1391static int setup_pts(int pts)
3c26f34e 1392{
77890c6d
SW
1393 char target[PATH_MAX];
1394
d852c78c
DL
1395 if (!pts)
1396 return 0;
3c26f34e 1397
1398 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1399 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1400 return -1;
1401 }
1402
7e40254a
JTLB
1403 if (mkdir("/dev/pts", 0755)) {
1404 if ( errno != EEXIST ) {
1405 SYSERROR("failed to create '/dev/pts'");
1406 return -1;
1407 }
1408 }
1409
a6afdde9 1410 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
67e5a20a 1411 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
36eb9bde 1412 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1413 return -1;
1414 }
1415
3c26f34e 1416 if (access("/dev/ptmx", F_OK)) {
1417 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1418 goto out;
36eb9bde 1419 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1420 return -1;
1421 }
1422
77890c6d
SW
1423 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1424 goto out;
1425
3c26f34e 1426 /* fallback here, /dev/pts/ptmx exists just mount bind */
1427 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1428 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1429 return -1;
1430 }
cd54d859
DL
1431
1432 INFO("created new pts instance");
d852c78c 1433
3c26f34e 1434out:
1435 return 0;
1436}
1437
cccc74b5
DL
1438static int setup_personality(int persona)
1439{
6ff05e18 1440 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1441 if (persona == -1)
1442 return 0;
1443
1444 if (personality(persona) < 0) {
1445 SYSERROR("failed to set personality to '0x%x'", persona);
1446 return -1;
1447 }
1448
1449 INFO("set personality to '0x%x'", persona);
6ff05e18 1450 #endif
cccc74b5
DL
1451
1452 return 0;
1453}
1454
7c6ef2a2 1455static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1456 const struct lxc_console *console)
6e590161 1457{
63376d7d 1458 char path[MAXPATHLEN];
0728ebf4 1459 int ret, fd;
52e35957 1460
7c6ef2a2
SH
1461 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1462 if (ret >= sizeof(path)) {
959aee9c 1463 ERROR("console path too long");
7c6ef2a2
SH
1464 return -1;
1465 }
52e35957 1466
0728ebf4
TA
1467 fd = open(path, O_CREAT | O_EXCL, S_IXUSR | S_IXGRP | S_IXOTH);
1468 if (fd < 0) {
1469 if (errno != EEXIST) {
1470 SYSERROR("failed to create console");
1471 return -1;
1472 }
1473 } else {
1474 close(fd);
52e35957
DL
1475 }
1476
b5159817
DE
1477 if (console->master < 0) {
1478 INFO("no console");
f78a1f32
DL
1479 return 0;
1480 }
ed502555 1481
0728ebf4 1482 if (chmod(console->name, S_IXUSR | S_IXGRP | S_IXOTH)) {
63376d7d 1483 SYSERROR("failed to set mode '0%o' to '%s'",
0728ebf4 1484 S_IXUSR | S_IXGRP | S_IXOTH, console->name);
63376d7d
DL
1485 return -1;
1486 }
13954cce 1487
592fd47a 1488 if (safe_mount(console->name, path, "none", MS_BIND, 0, rootfs->mount)) {
63376d7d 1489 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1490 return -1;
1491 }
1492
63376d7d 1493 INFO("console has been setup");
7c6ef2a2
SH
1494 return 0;
1495}
1496
1497static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1498 const struct lxc_console *console,
1499 char *ttydir)
1500{
1501 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1502 int ret;
1503
1504 /* create rootfs/dev/<ttydir> directory */
1505 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1506 ttydir);
1507 if (ret >= sizeof(path))
1508 return -1;
1509 ret = mkdir(path, 0755);
1510 if (ret && errno != EEXIST) {
959aee9c 1511 SYSERROR("failed with errno %d to create %s", errno, path);
7c6ef2a2
SH
1512 return -1;
1513 }
959aee9c 1514 INFO("created %s", path);
7c6ef2a2
SH
1515
1516 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1517 rootfs->mount, ttydir);
1518 if (ret >= sizeof(lxcpath)) {
959aee9c 1519 ERROR("console path too long");
7c6ef2a2
SH
1520 return -1;
1521 }
1522
1523 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1524 ret = unlink(path);
1525 if (ret && errno != ENOENT) {
959aee9c 1526 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1527 return -1;
1528 }
1529
1530 ret = creat(lxcpath, 0660);
1531 if (ret==-1 && errno != EEXIST) {
959aee9c 1532 SYSERROR("error %d creating %s", errno, lxcpath);
7c6ef2a2
SH
1533 return -1;
1534 }
4d44e274
SH
1535 if (ret >= 0)
1536 close(ret);
7c6ef2a2 1537
b5159817
DE
1538 if (console->master < 0) {
1539 INFO("no console");
7c6ef2a2
SH
1540 return 0;
1541 }
1542
592fd47a 1543 if (safe_mount(console->name, lxcpath, "none", MS_BIND, 0, rootfs->mount)) {
7c6ef2a2
SH
1544 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1545 return -1;
1546 }
1547
1548 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1549 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1550 if (ret >= sizeof(lxcpath)) {
1551 ERROR("lxc/console path too long");
1552 return -1;
1553 }
7c6ef2a2
SH
1554 ret = symlink(lxcpath, path);
1555 if (ret) {
1556 SYSERROR("failed to create symlink for console");
1557 return -1;
1558 }
1559
1560 INFO("console has been setup on %s", lxcpath);
cd54d859 1561
6e590161 1562 return 0;
1563}
1564
7c6ef2a2
SH
1565static int setup_console(const struct lxc_rootfs *rootfs,
1566 const struct lxc_console *console,
1567 char *ttydir)
1568{
1569 /* We don't have a rootfs, /dev/console will be shared */
1570 if (!rootfs->path)
1571 return 0;
1572 if (!ttydir)
1573 return setup_dev_console(rootfs, console);
1574
1575 return setup_ttydir_console(rootfs, console, ttydir);
1576}
1577
1bd051a6
SH
1578static int setup_kmsg(const struct lxc_rootfs *rootfs,
1579 const struct lxc_console *console)
1580{
1581 char kpath[MAXPATHLEN];
1582 int ret;
1583
222fea5a
DE
1584 if (!rootfs->path)
1585 return 0;
1bd051a6
SH
1586 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1587 if (ret < 0 || ret >= sizeof(kpath))
1588 return -1;
1589
1590 ret = unlink(kpath);
1591 if (ret && errno != ENOENT) {
959aee9c 1592 SYSERROR("error unlinking %s", kpath);
1bd051a6
SH
1593 return -1;
1594 }
1595
1596 ret = symlink("console", kpath);
1597 if (ret) {
1598 SYSERROR("failed to create symlink for kmsg");
1599 return -1;
1600 }
1601
1602 return 0;
1603}
1604
998ac676
RT
1605static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1606{
1607 struct mount_opt *mo;
1608
1609 /* If opt is found in mount_opt, set or clear flags.
1610 * Otherwise append it to data. */
1611
1612 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1613 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1614 if (mo->clear)
1615 *flags &= ~mo->flag;
1616 else
1617 *flags |= mo->flag;
1618 return;
1619 }
1620 }
1621
1622 if (strlen(*data))
1623 strcat(*data, ",");
1624 strcat(*data, opt);
1625}
1626
a17b1e65 1627int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1628 char **mntdata)
1629{
1630 char *s, *data;
1631 char *p, *saveptr = NULL;
1632
911324ef 1633 *mntdata = NULL;
91656ce5 1634 *mntflags = 0L;
911324ef
DL
1635
1636 if (!mntopts)
998ac676
RT
1637 return 0;
1638
911324ef 1639 s = strdup(mntopts);
998ac676 1640 if (!s) {
36eb9bde 1641 SYSERROR("failed to allocate memory");
998ac676
RT
1642 return -1;
1643 }
1644
1645 data = malloc(strlen(s) + 1);
1646 if (!data) {
36eb9bde 1647 SYSERROR("failed to allocate memory");
998ac676
RT
1648 free(s);
1649 return -1;
1650 }
1651 *data = 0;
1652
1653 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1654 p = strtok_r(NULL, ",", &saveptr))
1655 parse_mntopt(p, mntflags, &data);
1656
1657 if (*data)
1658 *mntdata = data;
1659 else
1660 free(data);
1661 free(s);
1662
1663 return 0;
1664}
1665
6fd5e769
SH
1666static void null_endofword(char *word)
1667{
1668 while (*word && *word != ' ' && *word != '\t')
1669 word++;
1670 *word = '\0';
1671}
1672
1673/*
1674 * skip @nfields spaces in @src
1675 */
1676static char *get_field(char *src, int nfields)
1677{
1678 char *p = src;
1679 int i;
1680
1681 for (i = 0; i < nfields; i++) {
1682 while (*p && *p != ' ' && *p != '\t')
1683 p++;
1684 if (!*p)
1685 break;
1686 p++;
1687 }
1688 return p;
1689}
1690
911324ef
DL
1691static int mount_entry(const char *fsname, const char *target,
1692 const char *fstype, unsigned long mountflags,
ae7a770e 1693 const char *data, int optional, int dev, const char *rootfs)
911324ef 1694{
614305f3 1695#ifdef HAVE_STATVFS
2938f7c8 1696 struct statvfs sb;
614305f3 1697#endif
2938f7c8 1698
592fd47a 1699 if (safe_mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data, rootfs)) {
1fc64d22
SG
1700 if (optional) {
1701 INFO("failed to mount '%s' on '%s' (optional): %s", fsname,
1702 target, strerror(errno));
1703 return 0;
1704 }
1705 else {
1706 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1707 return -1;
1708 }
911324ef
DL
1709 }
1710
1711 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
2938f7c8
SH
1712 DEBUG("remounting %s on %s to respect bind or remount options",
1713 fsname ? fsname : "(none)", target ? target : "(none)");
7c5b6e7c
AS
1714 unsigned long rqd_flags = 0;
1715 if (mountflags & MS_RDONLY)
1716 rqd_flags |= MS_RDONLY;
614305f3 1717#ifdef HAVE_STATVFS
2938f7c8 1718 if (statvfs(fsname, &sb) == 0) {
7c5b6e7c 1719 unsigned long required_flags = rqd_flags;
2938f7c8
SH
1720 if (sb.f_flag & MS_NOSUID)
1721 required_flags |= MS_NOSUID;
ae7a770e 1722 if (sb.f_flag & MS_NODEV && !dev)
2938f7c8
SH
1723 required_flags |= MS_NODEV;
1724 if (sb.f_flag & MS_RDONLY)
1725 required_flags |= MS_RDONLY;
1726 if (sb.f_flag & MS_NOEXEC)
1727 required_flags |= MS_NOEXEC;
1728 DEBUG("(at remount) flags for %s was %lu, required extra flags are %lu", fsname, sb.f_flag, required_flags);
1729 /*
1730 * If this was a bind mount request, and required_flags
1731 * does not have any flags which are not already in
1732 * mountflags, then skip the remount
1733 */
1734 if (!(mountflags & MS_REMOUNT)) {
7c5b6e7c 1735 if (!(required_flags & ~mountflags) && rqd_flags == 0) {
2938f7c8
SH
1736 DEBUG("mountflags already was %lu, skipping remount",
1737 mountflags);
1738 goto skipremount;
1739 }
1740 }
1741 mountflags |= required_flags;
6fd5e769 1742 }
614305f3 1743#endif
911324ef
DL
1744
1745 if (mount(fsname, target, fstype,
592fd47a 1746 mountflags | MS_REMOUNT, data) < 0) {
1fc64d22
SG
1747 if (optional) {
1748 INFO("failed to mount '%s' on '%s' (optional): %s",
1749 fsname, target, strerror(errno));
1750 return 0;
1751 }
1752 else {
1753 SYSERROR("failed to mount '%s' on '%s'",
1754 fsname, target);
1755 return -1;
1756 }
911324ef
DL
1757 }
1758 }
1759
614305f3 1760#ifdef HAVE_STATVFS
6fd5e769 1761skipremount:
614305f3 1762#endif
911324ef
DL
1763 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1764
1765 return 0;
1766}
1767
4e4ca161
SH
1768/*
1769 * Remove 'optional', 'create=dir', and 'create=file' from mntopt
1770 */
1771static void cull_mntent_opt(struct mntent *mntent)
1772{
1773 int i;
1774 char *p, *p2;
1775 char *list[] = {"create=dir",
1776 "create=file",
1777 "optional",
1778 NULL };
1779
1780 for (i=0; list[i]; i++) {
1781 if (!(p = strstr(mntent->mnt_opts, list[i])))
1782 continue;
1783 p2 = strchr(p, ',');
1784 if (!p2) {
1785 /* no more mntopts, so just chop it here */
1786 *p = '\0';
1787 continue;
1788 }
1789 memmove(p, p2+1, strlen(p2+1)+1);
1790 }
1791}
1792
4d5b72a1 1793static int mount_entry_create_dir_file(const struct mntent *mntent,
0a2dddd4
CB
1794 const char* path, const struct lxc_rootfs *rootfs,
1795 const char *lxc_name, const char *lxc_path)
0ad19a3f 1796{
4d5b72a1 1797 char *pathdirname = NULL;
608e3567 1798 int ret = 0;
34cfffb3 1799 FILE *pathfile = NULL;
911324ef 1800
6e46cc0d 1801 if (strncmp(mntent->mnt_type, "overlay", 7) == 0) {
5c484f79 1802 if (ovl_mkdir(mntent, rootfs, lxc_name, lxc_path) < 0)
6e46cc0d
CB
1803 return -1;
1804 } else if (strncmp(mntent->mnt_type, "aufs", 4) == 0) {
1d52bdf7 1805 if (aufs_mkdir(mntent, rootfs, lxc_name, lxc_path) < 0)
6e46cc0d
CB
1806 return -1;
1807 }
1808
34cfffb3 1809 if (hasmntopt(mntent, "create=dir")) {
4d5b72a1
NC
1810 if (mkdir_p(path, 0755) < 0) {
1811 WARN("Failed to create mount target '%s'", path);
34cfffb3
SG
1812 ret = -1;
1813 }
1814 }
1815
4d5b72a1
NC
1816 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
1817 pathdirname = strdup(path);
34cfffb3 1818 pathdirname = dirname(pathdirname);
119126b6
SG
1819 if (mkdir_p(pathdirname, 0755) < 0) {
1820 WARN("Failed to create target directory");
1821 }
4d5b72a1 1822 pathfile = fopen(path, "wb");
34cfffb3 1823 if (!pathfile) {
4d5b72a1 1824 WARN("Failed to create mount target '%s'", path);
34cfffb3 1825 ret = -1;
6e46cc0d 1826 } else {
34cfffb3 1827 fclose(pathfile);
6e46cc0d 1828 }
34cfffb3 1829 }
4d5b72a1
NC
1830 free(pathdirname);
1831 return ret;
1832}
1833
ec50007f
CB
1834/* rootfs, lxc_name, and lxc_path can be NULL when the container is created
1835 * without a rootfs. */
db4aba38 1836static inline int mount_entry_on_generic(struct mntent *mntent,
0a2dddd4
CB
1837 const char* path, const struct lxc_rootfs *rootfs,
1838 const char *lxc_name, const char *lxc_path)
4d5b72a1
NC
1839{
1840 unsigned long mntflags;
1841 char *mntdata;
1842 int ret;
1843 bool optional = hasmntopt(mntent, "optional") != NULL;
ae7a770e 1844 bool dev = hasmntopt(mntent, "dev") != NULL;
4d5b72a1 1845
ec50007f
CB
1846 char *rootfs_path = NULL;
1847 if (rootfs && rootfs->path)
1848 rootfs_path = rootfs->mount;
1849
0a2dddd4 1850 ret = mount_entry_create_dir_file(mntent, path, rootfs, lxc_name, lxc_path);
34cfffb3 1851
608e3567
SH
1852 if (ret < 0)
1853 return optional ? 0 : -1;
1854
4e4ca161
SH
1855 cull_mntent_opt(mntent);
1856
a17b1e65
SG
1857 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1858 free(mntdata);
1859 return -1;
1860 }
1861
6e46cc0d 1862 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type, mntflags,
ae7a770e 1863 mntdata, optional, dev, rootfs_path);
68c152ef 1864
911324ef 1865 free(mntdata);
911324ef
DL
1866 return ret;
1867}
1868
db4aba38
NC
1869static inline int mount_entry_on_systemfs(struct mntent *mntent)
1870{
1433c9f9
CB
1871 char path[MAXPATHLEN];
1872 int ret;
1873
1874 /* For containers created without a rootfs all mounts are treated as
1875 * absolute paths starting at / on the host. */
1876 if (mntent->mnt_dir[0] != '/')
1877 ret = snprintf(path, sizeof(path), "/%s", mntent->mnt_dir);
1878 else
1879 ret = snprintf(path, sizeof(path), "%s", mntent->mnt_dir);
1880
1881 if (ret < 0 || ret >= sizeof(path)) {
1882 ERROR("path name too long");
1883 return -1;
1884 }
1885
1886 return mount_entry_on_generic(mntent, path, NULL, NULL, NULL);
db4aba38
NC
1887}
1888
4e4ca161 1889static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2 1890 const struct lxc_rootfs *rootfs,
0a2dddd4
CB
1891 const char *lxc_name,
1892 const char *lxc_path)
911324ef 1893{
013bd428 1894 char *aux;
59760f5d 1895 char path[MAXPATHLEN];
80a881b2 1896 int r, ret = 0, offset;
67e571de 1897 const char *lxcpath;
0ad19a3f 1898
593e8478 1899 lxcpath = lxc_global_config_value("lxc.lxcpath");
2a59a681
SH
1900 if (!lxcpath) {
1901 ERROR("Out of memory");
1902 return -1;
1903 }
1904
80a881b2 1905 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
1906 * use $lxcpath/CN/rootfs as the target prefix */
1907 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
1908 if (r < 0 || r >= MAXPATHLEN)
1909 goto skipvarlib;
1910
1911 aux = strstr(mntent->mnt_dir, path);
1912 if (aux) {
1913 offset = strlen(path);
1914 goto skipabs;
1915 }
1916
1917skipvarlib:
013bd428
DL
1918 aux = strstr(mntent->mnt_dir, rootfs->path);
1919 if (!aux) {
1920 WARN("ignoring mount point '%s'", mntent->mnt_dir);
db4aba38 1921 return ret;
013bd428 1922 }
80a881b2
SH
1923 offset = strlen(rootfs->path);
1924
1925skipabs:
013bd428 1926
9ba8130c 1927 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
1928 aux + offset);
1929 if (r < 0 || r >= MAXPATHLEN) {
1930 WARN("pathnme too long for '%s'", mntent->mnt_dir);
a17b1e65
SG
1931 return -1;
1932 }
1933
0a2dddd4 1934 return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
911324ef 1935}
d330fe7b 1936
4e4ca161 1937static int mount_entry_on_relative_rootfs(struct mntent *mntent,
0a2dddd4
CB
1938 const struct lxc_rootfs *rootfs,
1939 const char *lxc_name,
1940 const char *lxc_path)
911324ef
DL
1941{
1942 char path[MAXPATHLEN];
911324ef 1943 int ret;
d330fe7b 1944
34cfffb3 1945 /* relative to root mount point */
6e46cc0d 1946 ret = snprintf(path, sizeof(path), "%s/%s", rootfs->mount, mntent->mnt_dir);
1433c9f9 1947 if (ret < 0 || ret >= sizeof(path)) {
9ba8130c
SH
1948 ERROR("path name too long");
1949 return -1;
1950 }
911324ef 1951
0a2dddd4 1952 return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
911324ef
DL
1953}
1954
80a881b2 1955static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
0a2dddd4 1956 const char *lxc_name, const char *lxc_path)
911324ef 1957{
aaf901be
AM
1958 struct mntent mntent;
1959 char buf[4096];
911324ef 1960 int ret = -1;
e76b8764 1961
aaf901be 1962 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
e76b8764 1963
911324ef 1964 if (!rootfs->path) {
aaf901be 1965 if (mount_entry_on_systemfs(&mntent))
e76b8764 1966 goto out;
911324ef 1967 continue;
e76b8764
CDC
1968 }
1969
911324ef 1970 /* We have a separate root, mounts are relative to it */
aaf901be 1971 if (mntent.mnt_dir[0] != '/') {
0a2dddd4 1972 if (mount_entry_on_relative_rootfs(&mntent, rootfs, lxc_name, lxc_path))
911324ef
DL
1973 goto out;
1974 continue;
1975 }
cd54d859 1976
0a2dddd4 1977 if (mount_entry_on_absolute_rootfs(&mntent, rootfs, lxc_name, lxc_path))
911324ef 1978 goto out;
0ad19a3f 1979 }
cd54d859 1980
0ad19a3f 1981 ret = 0;
cd54d859
DL
1982
1983 INFO("mount points have been setup");
0ad19a3f 1984out:
e7938e9e
MN
1985 return ret;
1986}
1987
80a881b2 1988static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
0a2dddd4 1989 const char *lxc_name, const char *lxc_path)
e7938e9e
MN
1990{
1991 FILE *file;
1992 int ret;
1993
1994 if (!fstab)
1995 return 0;
1996
1997 file = setmntent(fstab, "r");
1998 if (!file) {
1999 SYSERROR("failed to use '%s'", fstab);
2000 return -1;
2001 }
2002
0a2dddd4 2003 ret = mount_file_entries(rootfs, file, lxc_name, lxc_path);
e7938e9e 2004
0ad19a3f 2005 endmntent(file);
2006 return ret;
2007}
2008
5ef5c9a3 2009FILE *make_anonymous_mount_file(struct lxc_list *mount)
e7938e9e 2010{
5ef5c9a3 2011 int ret;
e7938e9e 2012 char *mount_entry;
5ef5c9a3
CB
2013 struct lxc_list *iterator;
2014 FILE *file;
2015 int fd = -1;
2016
2017 fd = memfd_create("lxc_mount_file", MFD_CLOEXEC);
2018 if (fd < 0) {
2019 if (errno != ENOSYS)
2020 return NULL;
2021 file = tmpfile();
2022 } else {
2023 file = fdopen(fd, "r+");
2024 }
e7938e9e 2025
e7938e9e 2026 if (!file) {
fad6ef95 2027 int saved_errno = errno;
5ef5c9a3
CB
2028 if (fd != -1)
2029 close(fd);
fad6ef95 2030 ERROR("Could not create mount entry file: %s.", strerror(saved_errno));
9fc7f8c0 2031 return NULL;
e7938e9e
MN
2032 }
2033
2034 lxc_list_for_each(iterator, mount) {
2035 mount_entry = iterator->elem;
5ef5c9a3
CB
2036 ret = fprintf(file, "%s\n", mount_entry);
2037 if (ret < strlen(mount_entry))
2038 WARN("Could not write mount entry to anonymous mount file.");
2039 }
2040
2041 if (fseek(file, 0, SEEK_SET) < 0) {
2042 fclose(file);
2043 return NULL;
e7938e9e
MN
2044 }
2045
9fc7f8c0
TA
2046 return file;
2047}
2048
5ef5c9a3
CB
2049static int setup_mount_entries(const struct lxc_rootfs *rootfs,
2050 struct lxc_list *mount, const char *lxc_name,
2051 const char *lxc_path)
9fc7f8c0
TA
2052{
2053 FILE *file;
2054 int ret;
2055
5ef5c9a3 2056 file = make_anonymous_mount_file(mount);
9fc7f8c0
TA
2057 if (!file)
2058 return -1;
e7938e9e 2059
0a2dddd4 2060 ret = mount_file_entries(rootfs, file, lxc_name, lxc_path);
e7938e9e
MN
2061
2062 fclose(file);
2063 return ret;
2064}
2065
bab88e68
CS
2066static int parse_cap(const char *cap)
2067{
2068 char *ptr = NULL;
84760c11 2069 size_t i;
2070 int capid = -1;
bab88e68 2071
7035407c
DE
2072 if (!strcmp(cap, "none"))
2073 return -2;
2074
bab88e68
CS
2075 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2076
2077 if (strcmp(cap, caps_opt[i].name))
2078 continue;
2079
2080 capid = caps_opt[i].value;
2081 break;
2082 }
2083
2084 if (capid < 0) {
2085 /* try to see if it's numeric, so the user may specify
2086 * capabilities that the running kernel knows about but
2087 * we don't */
2088 errno = 0;
2089 capid = strtol(cap, &ptr, 10);
2090 if (!ptr || *ptr != '\0' || errno != 0)
2091 /* not a valid number */
2092 capid = -1;
2093 else if (capid > lxc_caps_last_cap())
2094 /* we have a number but it's not a valid
2095 * capability */
2096 capid = -1;
2097 }
2098
2099 return capid;
2100}
2101
0769b82a
CS
2102int in_caplist(int cap, struct lxc_list *caps)
2103{
2104 struct lxc_list *iterator;
2105 int capid;
2106
2107 lxc_list_for_each(iterator, caps) {
2108 capid = parse_cap(iterator->elem);
2109 if (capid == cap)
2110 return 1;
2111 }
2112
2113 return 0;
2114}
2115
81810dd1
DL
2116static int setup_caps(struct lxc_list *caps)
2117{
2118 struct lxc_list *iterator;
2119 char *drop_entry;
bab88e68 2120 int capid;
81810dd1
DL
2121
2122 lxc_list_for_each(iterator, caps) {
2123
2124 drop_entry = iterator->elem;
2125
bab88e68 2126 capid = parse_cap(drop_entry);
d55bc1ad 2127
81810dd1 2128 if (capid < 0) {
1e11be34
DL
2129 ERROR("unknown capability %s", drop_entry);
2130 return -1;
81810dd1
DL
2131 }
2132
2133 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
2134
2135 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
3ec1648d
SH
2136 SYSERROR("failed to remove %s capability", drop_entry);
2137 return -1;
2138 }
81810dd1
DL
2139
2140 }
2141
1fb86a7c
SH
2142 DEBUG("capabilities have been setup");
2143
2144 return 0;
2145}
2146
2147static int dropcaps_except(struct lxc_list *caps)
2148{
2149 struct lxc_list *iterator;
2150 char *keep_entry;
1fb86a7c
SH
2151 int i, capid;
2152 int numcaps = lxc_caps_last_cap() + 1;
959aee9c 2153 INFO("found %d capabilities", numcaps);
1fb86a7c 2154
2caf9a97
SH
2155 if (numcaps <= 0 || numcaps > 200)
2156 return -1;
2157
1fb86a7c
SH
2158 // caplist[i] is 1 if we keep capability i
2159 int *caplist = alloca(numcaps * sizeof(int));
2160 memset(caplist, 0, numcaps * sizeof(int));
2161
2162 lxc_list_for_each(iterator, caps) {
2163
2164 keep_entry = iterator->elem;
2165
bab88e68 2166 capid = parse_cap(keep_entry);
1fb86a7c 2167
7035407c
DE
2168 if (capid == -2)
2169 continue;
2170
1fb86a7c
SH
2171 if (capid < 0) {
2172 ERROR("unknown capability %s", keep_entry);
2173 return -1;
2174 }
2175
8255688a 2176 DEBUG("keep capability '%s' (%d)", keep_entry, capid);
1fb86a7c
SH
2177
2178 caplist[capid] = 1;
2179 }
2180 for (i=0; i<numcaps; i++) {
2181 if (caplist[i])
2182 continue;
2183 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
3ec1648d
SH
2184 SYSERROR("failed to remove capability %d", i);
2185 return -1;
2186 }
1fb86a7c
SH
2187 }
2188
2189 DEBUG("capabilities have been setup");
81810dd1
DL
2190
2191 return 0;
2192}
2193
0ad19a3f 2194static int setup_hw_addr(char *hwaddr, const char *ifname)
2195{
2196 struct sockaddr sockaddr;
2197 struct ifreq ifr;
fad6ef95 2198 int ret, fd, saved_errno;
0ad19a3f 2199
3cfc0f3a
MN
2200 ret = lxc_convert_mac(hwaddr, &sockaddr);
2201 if (ret) {
2202 ERROR("mac address '%s' conversion failed : %s",
2203 hwaddr, strerror(-ret));
0ad19a3f 2204 return -1;
2205 }
2206
2207 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
5da6aa8c 2208 ifr.ifr_name[IFNAMSIZ-1] = '\0';
0ad19a3f 2209 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2210
2211 fd = socket(AF_INET, SOCK_DGRAM, 0);
2212 if (fd < 0) {
3ab87b66 2213 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 2214 return -1;
2215 }
2216
2217 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
fad6ef95 2218 saved_errno = errno;
0ad19a3f 2219 close(fd);
2220 if (ret)
fad6ef95 2221 ERROR("ioctl failure : %s", strerror(saved_errno));
0ad19a3f 2222
5da6aa8c 2223 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
cd54d859 2224
0ad19a3f 2225 return ret;
2226}
2227
82d5ae15 2228static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2229{
82d5ae15
DL
2230 struct lxc_list *iterator;
2231 struct lxc_inetdev *inetdev;
3cfc0f3a 2232 int err;
0ad19a3f 2233
82d5ae15
DL
2234 lxc_list_for_each(iterator, ip) {
2235
2236 inetdev = iterator->elem;
2237
0093bb8c
DL
2238 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2239 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
2240 if (err) {
2241 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2242 ifindex, strerror(-err));
82d5ae15
DL
2243 return -1;
2244 }
2245 }
2246
2247 return 0;
0ad19a3f 2248}
2249
82d5ae15 2250static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2251{
82d5ae15 2252 struct lxc_list *iterator;
7fa9074f 2253 struct lxc_inet6dev *inet6dev;
3cfc0f3a 2254 int err;
0ad19a3f 2255
82d5ae15
DL
2256 lxc_list_for_each(iterator, ip) {
2257
2258 inet6dev = iterator->elem;
2259
b3df193c 2260 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
2261 &inet6dev->mcast, &inet6dev->acast,
2262 inet6dev->prefix);
3cfc0f3a
MN
2263 if (err) {
2264 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2265 ifindex, strerror(-err));
82d5ae15 2266 return -1;
3cfc0f3a 2267 }
82d5ae15
DL
2268 }
2269
2270 return 0;
0ad19a3f 2271}
2272
82d5ae15 2273static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 2274{
0ad19a3f 2275 char ifname[IFNAMSIZ];
0ad19a3f 2276 char *current_ifname = ifname;
3cfc0f3a 2277 int err;
0ad19a3f 2278
82d5ae15
DL
2279 /* empty network namespace */
2280 if (!netdev->ifindex) {
b0efbac4 2281 if (netdev->flags & IFF_UP) {
d472214b 2282 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2283 if (err) {
2284 ERROR("failed to set the loopback up : %s",
2285 strerror(-err));
82d5ae15
DL
2286 return -1;
2287 }
82d5ae15 2288 }
40790553
SH
2289 if (netdev->type != LXC_NET_VETH)
2290 return 0;
2291 netdev->ifindex = if_nametoindex(netdev->name);
0ad19a3f 2292 }
13954cce 2293
b466dc33 2294 /* get the new ifindex in case of physical netdev */
40790553 2295 if (netdev->type == LXC_NET_PHYS) {
b466dc33
BP
2296 if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
2297 ERROR("failed to get ifindex for %s",
2298 netdev->link);
2299 return -1;
2300 }
40790553 2301 }
b466dc33 2302
82d5ae15
DL
2303 /* retrieve the name of the interface */
2304 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 2305 ERROR("no interface corresponding to index '%d'",
82d5ae15 2306 netdev->ifindex);
0ad19a3f 2307 return -1;
2308 }
13954cce 2309
018ef520 2310 /* default: let the system to choose one interface name */
9d083402 2311 if (!netdev->name)
fb6d9b2f
DL
2312 netdev->name = netdev->type == LXC_NET_PHYS ?
2313 netdev->link : "eth%d";
018ef520 2314
82d5ae15 2315 /* rename the interface name */
40790553
SH
2316 if (strcmp(ifname, netdev->name) != 0) {
2317 err = lxc_netdev_rename_by_name(ifname, netdev->name);
2318 if (err) {
2319 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2320 strerror(-err));
2321 return -1;
2322 }
018ef520
DL
2323 }
2324
2325 /* Re-read the name of the interface because its name has changed
2326 * and would be automatically allocated by the system
2327 */
82d5ae15 2328 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 2329 ERROR("no interface corresponding to index '%d'",
82d5ae15 2330 netdev->ifindex);
018ef520 2331 return -1;
0ad19a3f 2332 }
2333
82d5ae15
DL
2334 /* set a mac address */
2335 if (netdev->hwaddr) {
2336 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 2337 ERROR("failed to setup hw address for '%s'",
82d5ae15 2338 current_ifname);
0ad19a3f 2339 return -1;
2340 }
2341 }
2342
82d5ae15
DL
2343 /* setup ipv4 addresses on the interface */
2344 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 2345 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 2346 ifname);
2347 return -1;
2348 }
2349
82d5ae15
DL
2350 /* setup ipv6 addresses on the interface */
2351 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 2352 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 2353 ifname);
2354 return -1;
2355 }
2356
82d5ae15 2357 /* set the network device up */
b0efbac4 2358 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
2359 int err;
2360
d472214b 2361 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
2362 if (err) {
2363 ERROR("failed to set '%s' up : %s", current_ifname,
2364 strerror(-err));
0ad19a3f 2365 return -1;
2366 }
2367
2368 /* the network is up, make the loopback up too */
d472214b 2369 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2370 if (err) {
2371 ERROR("failed to set the loopback up : %s",
2372 strerror(-err));
0ad19a3f 2373 return -1;
2374 }
2375 }
2376
f8fee0e2
MK
2377 /* We can only set up the default routes after bringing
2378 * up the interface, sine bringing up the interface adds
2379 * the link-local routes and we can't add a default
2380 * route if the gateway is not reachable. */
2381
2382 /* setup ipv4 gateway on the interface */
2383 if (netdev->ipv4_gateway) {
2384 if (!(netdev->flags & IFF_UP)) {
2385 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2386 return -1;
2387 }
2388
2389 if (lxc_list_empty(&netdev->ipv4)) {
2390 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2391 return -1;
2392 }
2393
2394 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2395 if (err) {
fc739df5
SG
2396 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2397 if (err) {
2398 ERROR("failed to add ipv4 dest for '%s': %s",
2399 ifname, strerror(-err));
2400 }
2401
2402 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2403 if (err) {
2404 ERROR("failed to setup ipv4 gateway for '%s': %s",
2405 ifname, strerror(-err));
2406 if (netdev->ipv4_gateway_auto) {
2407 char buf[INET_ADDRSTRLEN];
2408 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2409 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2410 }
2411 return -1;
19a26f82 2412 }
f8fee0e2
MK
2413 }
2414 }
2415
2416 /* setup ipv6 gateway on the interface */
2417 if (netdev->ipv6_gateway) {
2418 if (!(netdev->flags & IFF_UP)) {
2419 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2420 return -1;
2421 }
2422
2423 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2424 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2425 return -1;
2426 }
2427
2428 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2429 if (err) {
fc739df5
SG
2430 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2431 if (err) {
2432 ERROR("failed to add ipv6 dest for '%s': %s",
f8fee0e2 2433 ifname, strerror(-err));
19a26f82 2434 }
fc739df5
SG
2435
2436 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2437 if (err) {
2438 ERROR("failed to setup ipv6 gateway for '%s': %s",
2439 ifname, strerror(-err));
2440 if (netdev->ipv6_gateway_auto) {
2441 char buf[INET6_ADDRSTRLEN];
2442 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2443 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2444 }
2445 return -1;
2446 }
f8fee0e2
MK
2447 }
2448 }
2449
cd54d859
DL
2450 DEBUG("'%s' has been setup", current_ifname);
2451
0ad19a3f 2452 return 0;
2453}
2454
5f4535a3 2455static int setup_network(struct lxc_list *network)
0ad19a3f 2456{
82d5ae15 2457 struct lxc_list *iterator;
82d5ae15 2458 struct lxc_netdev *netdev;
0ad19a3f 2459
5f4535a3 2460 lxc_list_for_each(iterator, network) {
cd54d859 2461
5f4535a3 2462 netdev = iterator->elem;
82d5ae15
DL
2463
2464 if (setup_netdev(netdev)) {
2465 ERROR("failed to setup netdev");
2466 return -1;
2467 }
2468 }
cd54d859 2469
5f4535a3
DL
2470 if (!lxc_list_empty(network))
2471 INFO("network has been setup");
cd54d859
DL
2472
2473 return 0;
0ad19a3f 2474}
2475
2af6bd1b 2476/* try to move physical nics to the init netns */
5610055a 2477void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf)
2af6bd1b 2478{
64d2fcb5 2479 int i, oldfd;
4ec31c52 2480 char ifname[IFNAMSIZ];
2af6bd1b 2481
5610055a 2482 if (netnsfd < 0 || conf->num_savednics == 0)
2af6bd1b
SH
2483 return;
2484
64d2fcb5 2485 INFO("Running to reset %d nic names.", conf->num_savednics);
5610055a 2486
64d2fcb5
CB
2487 oldfd = lxc_preserve_ns(getpid(), "net");
2488 if (oldfd < 0) {
2489 SYSERROR("Failed to open monitor netns fd.");
2af6bd1b
SH
2490 return;
2491 }
64d2fcb5 2492
2af6bd1b
SH
2493 if (setns(netnsfd, 0) != 0) {
2494 SYSERROR("Failed to enter container netns to reset nics");
2495 close(oldfd);
2496 return;
2497 }
2498 for (i=0; i<conf->num_savednics; i++) {
2499 struct saved_nic *s = &conf->saved_nics[i];
f2e206ff 2500 /* retrieve the name of the interface */
2501 if (!if_indextoname(s->ifindex, ifname)) {
2502 WARN("no interface corresponding to index '%d'", s->ifindex);
2503 continue;
2504 }
5610055a 2505 if (lxc_netdev_move_by_name(ifname, 1, s->orig_name))
f2e206ff 2506 WARN("Error moving nic name:%s back to host netns", ifname);
5610055a 2507 free(s->orig_name);
2af6bd1b 2508 }
5610055a
WB
2509 conf->num_savednics = 0;
2510
2af6bd1b
SH
2511 if (setns(oldfd, 0) != 0)
2512 SYSERROR("Failed to re-enter monitor's netns");
2513 close(oldfd);
2514}
2515
ae9242c8
SH
2516static char *default_rootfs_mount = LXCROOTFSMOUNT;
2517
7b379ab3 2518struct lxc_conf *lxc_conf_init(void)
089cd8b8 2519{
7b379ab3 2520 struct lxc_conf *new;
26ddeedd 2521 int i;
7b379ab3
MN
2522
2523 new = malloc(sizeof(*new));
2524 if (!new) {
2525 ERROR("lxc_conf_init : %m");
2526 return NULL;
2527 }
2528 memset(new, 0, sizeof(*new));
2529
b40a606e 2530 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
cccc74b5 2531 new->personality = -1;
124fa0a8 2532 new->autodev = 1;
596a818d
DE
2533 new->console.log_path = NULL;
2534 new->console.log_fd = -1;
28a4b0e5 2535 new->console.path = NULL;
63376d7d 2536 new->console.peer = -1;
b5159817
DE
2537 new->console.peerpty.busy = -1;
2538 new->console.peerpty.master = -1;
2539 new->console.peerpty.slave = -1;
63376d7d
DL
2540 new->console.master = -1;
2541 new->console.slave = -1;
2542 new->console.name[0] = '\0';
d2e30e99 2543 new->maincmd_fd = -1;
76a26f55 2544 new->nbd_idx = -1;
54c30e29 2545 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048
SH
2546 if (!new->rootfs.mount) {
2547 ERROR("lxc_conf_init : %m");
2548 free(new);
2549 return NULL;
2550 }
d89de239 2551 new->kmsg = 0;
858377e4 2552 new->logfd = -1;
7b379ab3
MN
2553 lxc_list_init(&new->cgroup);
2554 lxc_list_init(&new->network);
2555 lxc_list_init(&new->mount_list);
81810dd1 2556 lxc_list_init(&new->caps);
1fb86a7c 2557 lxc_list_init(&new->keepcaps);
f6d3e3e4 2558 lxc_list_init(&new->id_map);
f979ac15 2559 lxc_list_init(&new->includes);
4184c3e1 2560 lxc_list_init(&new->aliens);
7c661726 2561 lxc_list_init(&new->environment);
26ddeedd
SH
2562 for (i=0; i<NUM_LXC_HOOKS; i++)
2563 lxc_list_init(&new->hooks[i]);
ee1e7aa0 2564 lxc_list_init(&new->groups);
fe4de9a6
DE
2565 new->lsm_aa_profile = NULL;
2566 new->lsm_se_context = NULL;
5112cd70 2567 new->tmp_umount_proc = 0;
7b379ab3 2568
9f30a190
MM
2569 for (i = 0; i < LXC_NS_MAX; i++)
2570 new->inherit_ns_fd[i] = -1;
2571
72bb04e4
PT
2572 /* if running in a new user namespace, init and COMMAND
2573 * default to running as UID/GID 0 when using lxc-execute */
2574 new->init_uid = 0;
2575 new->init_gid = 0;
2576
7b379ab3 2577 return new;
089cd8b8
DL
2578}
2579
a589434e 2580static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2581{
8634bc19 2582 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2583 char veth2buf[IFNAMSIZ], *veth2;
b7b2fde4
CB
2584 int bridge_index, err;
2585 unsigned int mtu = 0;
13954cce 2586
8bee8851 2587 if (netdev->priv.veth_attr.pair) {
e892973e 2588 veth1 = netdev->priv.veth_attr.pair;
8bee8851
WB
2589 if (handler->conf->reboot)
2590 lxc_netdev_delete_by_name(veth1);
2591 } else {
9ba8130c
SH
2592 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2593 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2594 ERROR("veth1 name too long");
2595 return -1;
2596 }
a0265685 2597 veth1 = lxc_mkifname(veth1buf);
ad40563e
ÇO
2598 if (!veth1) {
2599 ERROR("failed to allocate a temporary name");
2600 return -1;
2601 }
74a2b586
JK
2602 /* store away for deconf */
2603 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2604 }
82d5ae15 2605
0e391e57 2606 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
a0265685 2607 veth2 = lxc_mkifname(veth2buf);
ad40563e 2608 if (!veth2) {
82d5ae15 2609 ERROR("failed to allocate a temporary name");
ad40563e 2610 goto out_delete;
0ad19a3f 2611 }
2612
3cfc0f3a
MN
2613 err = lxc_veth_create(veth1, veth2);
2614 if (err) {
2e2d6a7b 2615 ERROR("failed to create veth pair (%s and %s): %s", veth1, veth2,
3cfc0f3a 2616 strerror(-err));
ad40563e 2617 goto out_delete;
0ad19a3f 2618 }
13954cce 2619
49684c0b
CS
2620 /* changing the high byte of the mac address to 0xfe, the bridge interface
2621 * will always keep the host's mac address and not take the mac address
2622 * of a container */
2623 err = setup_private_host_hw_addr(veth1);
2624 if (err) {
2e2d6a7b 2625 ERROR("failed to change mac address of host interface '%s': %s",
49684c0b
CS
2626 veth1, strerror(-err));
2627 goto out_delete;
2628 }
2629
af651aa9
SN
2630 netdev->ifindex = if_nametoindex(veth2);
2631 if (!netdev->ifindex) {
2632 ERROR("failed to retrieve the index for %s", veth2);
2633 goto out_delete;
2634 }
2635
82d5ae15 2636 if (netdev->mtu) {
b7b2fde4
CB
2637 if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
2638 WARN("Failed to parse mtu from.");
2639 else
2640 INFO("Retrieved mtu %d", mtu);
e54864d3 2641 } else if (netdev->link) {
e9280f65 2642 bridge_index = if_nametoindex(netdev->link);
729e8bf6
CB
2643 if (bridge_index) {
2644 mtu = netdev_get_mtu(bridge_index);
2645 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
2646 } else {
2647 mtu = netdev_get_mtu(netdev->ifindex);
2648 INFO("Retrieved mtu %d from %s", mtu, veth2);
2649 }
e54864d3
NC
2650 }
2651
2652 if (mtu) {
2653 err = lxc_netdev_set_mtu(veth1, mtu);
3cfc0f3a 2654 if (!err)
e54864d3 2655 err = lxc_netdev_set_mtu(veth2, mtu);
3cfc0f3a 2656 if (err) {
e54864d3
NC
2657 ERROR("failed to set mtu '%i' for veth pair (%s and %s): %s",
2658 mtu, veth1, veth2, strerror(-err));
eb14c10a 2659 goto out_delete;
75d09f83
DL
2660 }
2661 }
2662
3cfc0f3a 2663 if (netdev->link) {
c43cbc04 2664 err = lxc_bridge_attach(handler->lxcpath, handler->name, netdev->link, veth1);
3cfc0f3a 2665 if (err) {
2e2d6a7b 2666 ERROR("failed to attach '%s' to the bridge '%s': %s",
3cfc0f3a
MN
2667 veth1, netdev->link, strerror(-err));
2668 goto out_delete;
2669 }
738d0deb 2670 INFO("Attached '%s': to the bridge '%s': ", veth1, netdev->link);
eb14c10a
DL
2671 }
2672
d472214b 2673 err = lxc_netdev_up(veth1);
6e35af2e
DL
2674 if (err) {
2675 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2676 goto out_delete;
0ad19a3f 2677 }
2678
e3b4c4c4 2679 if (netdev->upscript) {
751d9dcd
DL
2680 err = run_script(handler->name, "net", netdev->upscript, "up",
2681 "veth", veth1, (char*) NULL);
2682 if (err)
e3b4c4c4 2683 goto out_delete;
e3b4c4c4
ST
2684 }
2685
a589434e 2686 DEBUG("instantiated veth '%s/%s', index is '%d'",
82d5ae15
DL
2687 veth1, veth2, netdev->ifindex);
2688
6ab9ab6d 2689 return 0;
eb14c10a
DL
2690
2691out_delete:
b84f58b9 2692 lxc_netdev_delete_by_name(veth1);
f10fad2f 2693 if (!netdev->priv.veth_attr.pair)
ad40563e 2694 free(veth1);
f10fad2f 2695 free(veth2);
6ab9ab6d 2696 return -1;
13954cce 2697}
d957ae2d 2698
74a2b586
JK
2699static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2700{
2701 char *veth1;
2702 int err;
2703
2704 if (netdev->priv.veth_attr.pair)
2705 veth1 = netdev->priv.veth_attr.pair;
2706 else
2707 veth1 = netdev->priv.veth_attr.veth1;
2708
2709 if (netdev->downscript) {
2710 err = run_script(handler->name, "net", netdev->downscript,
2711 "down", "veth", veth1, (char*) NULL);
2712 if (err)
2713 return -1;
2714 }
2715 return 0;
2716}
2717
a589434e 2718static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2719{
0e391e57 2720 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2721 int err;
d957ae2d
MT
2722
2723 if (!netdev->link) {
2724 ERROR("no link specified for macvlan netdev");
2725 return -1;
2726 }
13954cce 2727
9ba8130c
SH
2728 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2729 if (err >= sizeof(peerbuf))
2730 return -1;
82d5ae15 2731
a0265685 2732 peer = lxc_mkifname(peerbuf);
ad40563e 2733 if (!peer) {
82d5ae15
DL
2734 ERROR("failed to make a temporary name");
2735 return -1;
0ad19a3f 2736 }
2737
3cfc0f3a
MN
2738 err = lxc_macvlan_create(netdev->link, peer,
2739 netdev->priv.macvlan_attr.mode);
2740 if (err) {
2741 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2742 peer, netdev->link, strerror(-err));
ad40563e 2743 goto out;
0ad19a3f 2744 }
2745
82d5ae15
DL
2746 netdev->ifindex = if_nametoindex(peer);
2747 if (!netdev->ifindex) {
36eb9bde 2748 ERROR("failed to retrieve the index for %s", peer);
ad40563e 2749 goto out;
22ebac19 2750 }
2751
e3b4c4c4 2752 if (netdev->upscript) {
751d9dcd
DL
2753 err = run_script(handler->name, "net", netdev->upscript, "up",
2754 "macvlan", netdev->link, (char*) NULL);
2755 if (err)
ad40563e 2756 goto out;
e3b4c4c4
ST
2757 }
2758
a589434e 2759 DEBUG("instantiated macvlan '%s', index is '%d' and mode '%d'",
e892973e 2760 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 2761
d957ae2d 2762 return 0;
ad40563e
ÇO
2763out:
2764 lxc_netdev_delete_by_name(peer);
2765 free(peer);
2766 return -1;
0ad19a3f 2767}
2768
74a2b586
JK
2769static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2770{
2771 int err;
2772
2773 if (netdev->downscript) {
2774 err = run_script(handler->name, "net", netdev->downscript,
2775 "down", "macvlan", netdev->link,
2776 (char*) NULL);
2777 if (err)
2778 return -1;
2779 }
2780 return 0;
2781}
2782
a589434e
JN
2783/* XXX: merge with instantiate_macvlan */
2784static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
2785{
2786 char peer[IFNAMSIZ];
3cfc0f3a 2787 int err;
82f58d03 2788 static uint16_t vlan_cntr = 0;
b7b2fde4 2789 unsigned int mtu = 0;
26c39028
JHS
2790
2791 if (!netdev->link) {
2792 ERROR("no link specified for vlan netdev");
2793 return -1;
2794 }
2795
82f58d03 2796 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
9ba8130c
SH
2797 if (err >= sizeof(peer)) {
2798 ERROR("peer name too long");
2799 return -1;
2800 }
26c39028 2801
3cfc0f3a
MN
2802 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2803 if (err) {
2804 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2805 peer, netdev->link, strerror(-err));
26c39028
JHS
2806 return -1;
2807 }
2808
2809 netdev->ifindex = if_nametoindex(peer);
2810 if (!netdev->ifindex) {
2811 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 2812 lxc_netdev_delete_by_name(peer);
26c39028
JHS
2813 return -1;
2814 }
2815
a589434e 2816 DEBUG("instantiated vlan '%s', ifindex is '%d'", " vlan1000",
e892973e 2817 netdev->ifindex);
b4fb7de1 2818 if (netdev->mtu) {
b7b2fde4
CB
2819 if (lxc_safe_uint(netdev->mtu, &mtu) < 0) {
2820 ERROR("Failed to retrieve mtu from: '%d'/'%s'.",
2821 netdev->ifindex, netdev->name);
2822 return -1;
2823 }
2824 err = lxc_netdev_set_mtu(peer, mtu);
b4fb7de1
VL
2825 if (err) {
2826 ERROR("failed to set mtu '%s' for %s : %s",
2827 netdev->mtu, peer, strerror(-err));
2828 lxc_netdev_delete_by_name(peer);
2829 return -1;
2830 }
2831 }
e892973e 2832
26c39028
JHS
2833 return 0;
2834}
2835
74a2b586
JK
2836static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2837{
2838 return 0;
2839}
2840
a589434e 2841static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2842{
6168e99f
DL
2843 if (!netdev->link) {
2844 ERROR("no link specified for the physical interface");
2845 return -1;
2846 }
2847
9d083402 2848 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 2849 if (!netdev->ifindex) {
9d083402 2850 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 2851 return -1;
2852 }
2853
e3b4c4c4
ST
2854 if (netdev->upscript) {
2855 int err;
751d9dcd
DL
2856 err = run_script(handler->name, "net", netdev->upscript,
2857 "up", "phys", netdev->link, (char*) NULL);
2858 if (err)
e3b4c4c4 2859 return -1;
e3b4c4c4
ST
2860 }
2861
82d5ae15 2862 return 0;
0ad19a3f 2863}
2864
74a2b586
JK
2865static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2866{
2867 int err;
2868
2869 if (netdev->downscript) {
2870 err = run_script(handler->name, "net", netdev->downscript,
2871 "down", "phys", netdev->link, (char*) NULL);
2872 if (err)
2873 return -1;
2874 }
2875 return 0;
2876}
2877
a589434e 2878static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
26b797f3
SH
2879{
2880 netdev->ifindex = 0;
2881 return 0;
2882}
2883
a589434e 2884static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2885{
82d5ae15 2886 netdev->ifindex = 0;
e3b4c4c4
ST
2887 if (netdev->upscript) {
2888 int err;
751d9dcd
DL
2889 err = run_script(handler->name, "net", netdev->upscript,
2890 "up", "empty", (char*) NULL);
2891 if (err)
e3b4c4c4 2892 return -1;
e3b4c4c4 2893 }
82d5ae15 2894 return 0;
0ad19a3f 2895}
2896
74a2b586
JK
2897static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2898{
2899 int err;
2900
2901 if (netdev->downscript) {
2902 err = run_script(handler->name, "net", netdev->downscript,
2903 "down", "empty", (char*) NULL);
2904 if (err)
2905 return -1;
2906 }
2907 return 0;
2908}
2909
26b797f3
SH
2910static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
2911{
2912 return 0;
2913}
2914
2915int lxc_requests_empty_network(struct lxc_handler *handler)
2916{
2917 struct lxc_list *network = &handler->conf->network;
2918 struct lxc_list *iterator;
2919 struct lxc_netdev *netdev;
2920 bool found_none = false, found_nic = false;
2921
2922 if (lxc_list_empty(network))
2923 return 0;
2924
2925 lxc_list_for_each(iterator, network) {
2926
2927 netdev = iterator->elem;
2928
2929 if (netdev->type == LXC_NET_NONE)
2930 found_none = true;
2931 else
2932 found_nic = true;
2933 }
2934 if (found_none && !found_nic)
2935 return 1;
2936 return 0;
2937}
2938
e3b4c4c4 2939int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 2940{
e3b4c4c4 2941 struct lxc_list *network = &handler->conf->network;
82d5ae15 2942 struct lxc_list *iterator;
82d5ae15 2943 struct lxc_netdev *netdev;
cbef6c52
SH
2944 int am_root = (getuid() == 0);
2945
2946 if (!am_root)
2947 return 0;
0ad19a3f 2948
5f4535a3 2949 lxc_list_for_each(iterator, network) {
0ad19a3f 2950
5f4535a3 2951 netdev = iterator->elem;
13954cce 2952
24654103 2953 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 2954 ERROR("invalid network configuration type '%d'",
5f4535a3 2955 netdev->type);
82d5ae15
DL
2956 return -1;
2957 }
0ad19a3f 2958
e3b4c4c4 2959 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
2960 ERROR("failed to create netdev");
2961 return -1;
2962 }
e3b4c4c4 2963
0ad19a3f 2964 }
2965
2966 return 0;
2967}
2968
358daf49 2969bool lxc_delete_network(struct lxc_handler *handler)
7fef7a06 2970{
e97946ae 2971 int ret;
74a2b586 2972 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
2973 struct lxc_list *iterator;
2974 struct lxc_netdev *netdev;
358daf49 2975 bool deleted_all = true;
7fef7a06
DL
2976
2977 lxc_list_for_each(iterator, network) {
2978 netdev = iterator->elem;
d472214b 2979
74a2b586 2980 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352 2981 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
358daf49
CB
2982 WARN("Failed to rename interface with index %d "
2983 "to its initial name \"%s\".",
2984 netdev->ifindex, netdev->link);
d472214b 2985 continue;
d8f8e352 2986 }
d472214b 2987
74a2b586 2988 if (netdev_deconf[netdev->type](handler, netdev)) {
e97946ae 2989 WARN("Failed to destroy netdev");
74a2b586
JK
2990 }
2991
d8f8e352
DL
2992 /* Recent kernel remove the virtual interfaces when the network
2993 * namespace is destroyed but in case we did not moved the
2994 * interface to the network namespace, we have to destroy it
2995 */
e97946ae
CB
2996 if (netdev->ifindex != 0) {
2997 ret = lxc_netdev_delete_by_index(netdev->ifindex);
358daf49
CB
2998 if (-ret == ENODEV) {
2999 INFO("Interface \"%s\" with index %d already "
3000 "deleted or existing in different network "
3001 "namespace.",
3002 netdev->name ? netdev->name : "(null)",
3003 netdev->ifindex);
3004 } else if (ret < 0) {
3005 deleted_all = false;
3006 WARN("Failed to remove interface \"%s\" with "
3007 "index %d: %s.",
3008 netdev->name ? netdev->name : "(null)",
3009 netdev->ifindex, strerror(-ret));
3010 } else {
3011 INFO("Removed interface \"%s\" with index %d.",
3012 netdev->name ? netdev->name : "(null)",
3013 netdev->ifindex);
3014 }
e97946ae
CB
3015 }
3016
3017 /* Explicitly delete host veth device to prevent lingering
3018 * devices. We had issues in LXD around this.
3019 */
9aaaad30 3020 if (netdev->type == LXC_NET_VETH && !am_unpriv()) {
358daf49
CB
3021 char *hostveth;
3022 if (netdev->priv.veth_attr.pair) {
e97946ae 3023 hostveth = netdev->priv.veth_attr.pair;
358daf49
CB
3024 ret = lxc_netdev_delete_by_name(hostveth);
3025 if (ret < 0) {
3026 WARN("Failed to remove interface \"%s\" from host: %s.", hostveth, strerror(-ret));
3027 } else {
3028 INFO("Removed interface \"%s\" from host.", hostveth);
3029 free(netdev->priv.veth_attr.pair);
3030 netdev->priv.veth_attr.pair = NULL;
3031 }
3032 } else if (strlen(netdev->priv.veth_attr.veth1) > 0) {
e97946ae 3033 hostveth = netdev->priv.veth_attr.veth1;
e97946ae 3034 ret = lxc_netdev_delete_by_name(hostveth);
358daf49
CB
3035 if (ret < 0) {
3036 WARN("Failed to remove \"%s\" from host: %s.", hostveth, strerror(-ret));
3037 } else {
3038 INFO("Removed interface \"%s\" from host.", hostveth);
3039 memset((void *)&netdev->priv.veth_attr.veth1, 0, sizeof(netdev->priv.veth_attr.veth1));
3040 }
e97946ae
CB
3041 }
3042 }
7fef7a06 3043 }
358daf49
CB
3044
3045 return deleted_all;
7fef7a06
DL
3046}
3047
45e854dc
SG
3048#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
3049
fe1f672f 3050/* lxc-user-nic returns "interface_name:interface_name\n" */
eab15c1e 3051#define MAX_BUFFER_SIZE IFNAMSIZ * 2 + 2
c43cbc04
SH
3052static int unpriv_assign_nic(const char *lxcpath, char *lxcname,
3053 struct lxc_netdev *netdev, pid_t pid)
cbef6c52
SH
3054{
3055 pid_t child;
a7242d9a
ÇO
3056 int bytes, pipefd[2];
3057 char *token, *saveptr = NULL;
fe1f672f 3058 char buffer[MAX_BUFFER_SIZE];
091045f8 3059 char netdev_link[IFNAMSIZ + 1];
cbef6c52
SH
3060
3061 if (netdev->type != LXC_NET_VETH) {
3062 ERROR("nic type %d not support for unprivileged use",
091045f8 3063 netdev->type);
cbef6c52
SH
3064 return -1;
3065 }
3066
091045f8 3067 if (pipe(pipefd) < 0) {
a7242d9a
ÇO
3068 SYSERROR("pipe failed");
3069 return -1;
3070 }
3071
091045f8
CB
3072 child = fork();
3073 if (child < 0) {
cbef6c52 3074 SYSERROR("fork");
a7242d9a
ÇO
3075 close(pipefd[0]);
3076 close(pipefd[1]);
3077 return -1;
3078 }
3079
3080 if (child == 0) { // child
091045f8
CB
3081 /* Call lxc-user-nic pid type bridge. */
3082 int ret;
3083 char pidstr[LXC_NUMSTRLEN64];
3084
3085 close(pipefd[0]); /* Close the read-end of the pipe. */
3086
3087 /* Redirect stdout to write-end of the pipe. */
3088 ret = dup2(pipefd[1], STDOUT_FILENO);
3089 close(pipefd[1]); /* Close the write-end of the pipe. */
3090 if (ret < 0) {
3091 SYSERROR("Failed to dup2() to redirect stdout to pipe file descriptor.");
3092 exit(EXIT_FAILURE);
3093 }
a7242d9a 3094
091045f8 3095 if (netdev->link)
cff7b5eb 3096 strncpy(netdev_link, netdev->link, IFNAMSIZ);
091045f8 3097 else
cff7b5eb 3098 strncpy(netdev_link, "none", IFNAMSIZ);
091045f8
CB
3099
3100 ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid);
3101 if (ret < 0 || ret >= LXC_NUMSTRLEN64)
3102 exit(EXIT_FAILURE);
3103 pidstr[LXC_NUMSTRLEN64 - 1] = '\0';
3104
3105 INFO("Execing lxc-user-nic %s %s %s veth %s %s", lxcpath,
3106 lxcname, pidstr, netdev_link, netdev->name);
c43cbc04 3107 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, lxcpath, lxcname,
091045f8
CB
3108 pidstr, "veth", netdev_link, netdev->name, NULL);
3109
3110 SYSERROR("Failed to exec lxc-user-nic.");
3111 exit(EXIT_FAILURE);
a7242d9a
ÇO
3112 }
3113
3114 /* close the write-end of the pipe */
3115 close(pipefd[1]);
3116
fe1f672f 3117 bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE);
091045f8
CB
3118 if (bytes < 0)
3119 SYSERROR("Failed to read from pipe file descriptor.");
a7242d9a
ÇO
3120 buffer[bytes - 1] = '\0';
3121
3122 if (wait_for_pid(child) != 0) {
3123 close(pipefd[0]);
cbef6c52
SH
3124 return -1;
3125 }
3126
a7242d9a
ÇO
3127 /* close the read-end of the pipe */
3128 close(pipefd[0]);
cbef6c52 3129
a7242d9a
ÇO
3130 /* fill netdev->name field */
3131 token = strtok_r(buffer, ":", &saveptr);
3132 if (!token)
3133 return -1;
091045f8
CB
3134
3135 netdev->name = malloc(IFNAMSIZ + 1);
658979c5 3136 if (!netdev->name) {
091045f8 3137 SYSERROR("Failed to allocate memory.");
658979c5
SH
3138 return -1;
3139 }
091045f8 3140 memset(netdev->name, 0, IFNAMSIZ + 1);
658979c5 3141 strncpy(netdev->name, token, IFNAMSIZ);
a7242d9a
ÇO
3142
3143 /* fill netdev->veth_attr.pair field */
3144 token = strtok_r(NULL, ":", &saveptr);
3145 if (!token)
3146 return -1;
091045f8 3147
a7242d9a 3148 netdev->priv.veth_attr.pair = strdup(token);
658979c5 3149 if (!netdev->priv.veth_attr.pair) {
091045f8 3150 ERROR("Failed to allocate memory.");
658979c5
SH
3151 return -1;
3152 }
45e854dc 3153
a7242d9a 3154 return 0;
cbef6c52
SH
3155}
3156
c43cbc04
SH
3157int lxc_assign_network(const char *lxcpath, char *lxcname,
3158 struct lxc_list *network, pid_t pid)
0ad19a3f 3159{
82d5ae15 3160 struct lxc_list *iterator;
82d5ae15 3161 struct lxc_netdev *netdev;
f2e206ff 3162 char ifname[IFNAMSIZ];
cbef6c52 3163 int am_root = (getuid() == 0);
3cfc0f3a 3164 int err;
0ad19a3f 3165
5f4535a3 3166 lxc_list_for_each(iterator, network) {
82d5ae15 3167
5f4535a3 3168 netdev = iterator->elem;
82d5ae15 3169
fbb16259 3170 if (netdev->type == LXC_NET_VETH && !am_root) {
c43cbc04 3171 if (unpriv_assign_nic(lxcpath, lxcname, netdev, pid))
cbef6c52 3172 return -1;
658979c5
SH
3173 // lxc-user-nic has moved the nic to the new ns.
3174 // unpriv_assign_nic() fills in netdev->name.
3175 // netdev->ifindex will be filed in at setup_netdev.
cbef6c52
SH
3176 continue;
3177 }
236087a6 3178
fbb16259
SH
3179 /* empty network namespace, nothing to move */
3180 if (!netdev->ifindex)
3181 continue;
3182
f2e206ff 3183 /* retrieve the name of the interface */
3184 if (!if_indextoname(netdev->ifindex, ifname)) {
3185 ERROR("no interface corresponding to index '%d'", netdev->ifindex);
3186 return -1;
3187 }
3188
3189 err = lxc_netdev_move_by_name(ifname, pid, NULL);
3cfc0f3a
MN
3190 if (err) {
3191 ERROR("failed to move '%s' to the container : %s",
3192 netdev->link, strerror(-err));
82d5ae15
DL
3193 return -1;
3194 }
3195
198cbbaa 3196 DEBUG("move '%s'/'%s' to '%d': .", ifname, netdev->name, pid);
0ad19a3f 3197 }
3198
3199 return 0;
3200}
3201
251d0d2a
DE
3202static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
3203 size_t buf_size)
f6d3e3e4
SH
3204{
3205 char path[PATH_MAX];
e4ccd113 3206 int ret, closeret;
f6d3e3e4
SH
3207 FILE *f;
3208
3209 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
3210 if (ret < 0 || ret >= PATH_MAX) {
03fadd16 3211 fprintf(stderr, "%s: path name too long\n", __func__);
f6d3e3e4
SH
3212 return -E2BIG;
3213 }
3214 f = fopen(path, "w");
3215 if (!f) {
3216 perror("open");
3217 return -EINVAL;
3218 }
251d0d2a 3219 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 3220 if (ret < 0)
e4ccd113
SH
3221 SYSERROR("writing id mapping");
3222 closeret = fclose(f);
3223 if (closeret)
3224 SYSERROR("writing id mapping");
3225 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
3226}
3227
3228int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
3229{
3230 struct lxc_list *iterator;
3231 struct id_map *map;
8afb3e61 3232 int ret = 0, use_shadow = 0;
251d0d2a 3233 enum idtype type;
8afb3e61
SG
3234 char *buf = NULL, *pos, *cmdpath = NULL;
3235
22038de5
SH
3236 /*
3237 * If newuidmap exists, that is, if shadow is handing out subuid
3238 * ranges, then insist that root also reserve ranges in subuid. This
3239 * will protected it by preventing another user from being handed the
3240 * range by shadow.
3241 */
9d9c111c 3242 cmdpath = on_path("newuidmap", NULL);
8afb3e61
SG
3243 if (cmdpath) {
3244 use_shadow = 1;
3245 free(cmdpath);
3246 }
3247
0e6e3a41
SG
3248 if (!use_shadow && geteuid()) {
3249 ERROR("Missing newuidmap/newgidmap");
3250 return -1;
3251 }
251d0d2a
DE
3252
3253 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4 3254 int left, fill;
cf3ef16d
SH
3255 int had_entry = 0;
3256 if (!buf) {
3257 buf = pos = malloc(4096);
4f7521b4
SH
3258 if (!buf)
3259 return -ENOMEM;
cf3ef16d
SH
3260 }
3261 pos = buf;
0e6e3a41 3262 if (use_shadow)
d1838f34 3263 pos += sprintf(buf, "new%cidmap %d",
cf3ef16d
SH
3264 type == ID_TYPE_UID ? 'u' : 'g',
3265 pid);
4f7521b4 3266
cf3ef16d
SH
3267 lxc_list_for_each(iterator, idmap) {
3268 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
251d0d2a 3269 map = iterator->elem;
cf3ef16d
SH
3270 if (map->idtype != type)
3271 continue;
3272
3273 had_entry = 1;
3274 left = 4096 - (pos - buf);
d1838f34 3275 fill = snprintf(pos, left, "%s%lu %lu %lu%s",
0e6e3a41 3276 use_shadow ? " " : "",
d1838f34 3277 map->nsid, map->hostid, map->range,
0e6e3a41 3278 use_shadow ? "" : "\n");
cf3ef16d
SH
3279 if (fill <= 0 || fill >= left)
3280 SYSERROR("snprintf failed, too many mappings");
3281 pos += fill;
251d0d2a 3282 }
cf3ef16d 3283 if (!had_entry)
4f7521b4 3284 continue;
cf3ef16d 3285
0e6e3a41 3286 if (!use_shadow) {
cf3ef16d 3287 ret = write_id_mapping(type, pid, buf, pos-buf);
d1838f34
MS
3288 } else {
3289 left = 4096 - (pos - buf);
3290 fill = snprintf(pos, left, "\n");
3291 if (fill <= 0 || fill >= left)
3292 SYSERROR("snprintf failed, too many mappings");
3293 pos += fill;
cf3ef16d 3294 ret = system(buf);
d1838f34 3295 }
cf3ef16d 3296
f6d3e3e4
SH
3297 if (ret)
3298 break;
3299 }
251d0d2a 3300
f10fad2f 3301 free(buf);
f6d3e3e4
SH
3302 return ret;
3303}
3304
cf3ef16d 3305/*
7b50c609
TS
3306 * return the host uid/gid to which the container root is mapped in
3307 * *val.
0b3a6504 3308 * Return true if id was found, false otherwise.
cf3ef16d 3309 */
2a9a80cb 3310bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
3ec1648d 3311 unsigned long *val)
cf3ef16d
SH
3312{
3313 struct lxc_list *it;
3314 struct id_map *map;
3315
3316 lxc_list_for_each(it, &conf->id_map) {
3317 map = it->elem;
7b50c609 3318 if (map->idtype != idtype)
cf3ef16d
SH
3319 continue;
3320 if (map->nsid != 0)
3321 continue;
2a9a80cb
SH
3322 *val = map->hostid;
3323 return true;
cf3ef16d 3324 }
2a9a80cb 3325 return false;
cf3ef16d
SH
3326}
3327
2133f58c 3328int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3329{
3330 struct lxc_list *it;
3331 struct id_map *map;
3332 lxc_list_for_each(it, &conf->id_map) {
3333 map = it->elem;
2133f58c 3334 if (map->idtype != idtype)
cf3ef16d
SH
3335 continue;
3336 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 3337 return (id - map->hostid) + map->nsid;
cf3ef16d 3338 }
57d116ab 3339 return -1;
cf3ef16d
SH
3340}
3341
2133f58c 3342int find_unmapped_nsuid(struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3343{
3344 struct lxc_list *it;
3345 struct id_map *map;
2133f58c 3346 unsigned int freeid = 0;
cf3ef16d
SH
3347again:
3348 lxc_list_for_each(it, &conf->id_map) {
3349 map = it->elem;
2133f58c 3350 if (map->idtype != idtype)
cf3ef16d
SH
3351 continue;
3352 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3353 freeid = map->nsid + map->range;
3354 goto again;
3355 }
3356 }
3357 return freeid;
3358}
3359
19a26f82
MK
3360int lxc_find_gateway_addresses(struct lxc_handler *handler)
3361{
3362 struct lxc_list *network = &handler->conf->network;
3363 struct lxc_list *iterator;
3364 struct lxc_netdev *netdev;
3365 int link_index;
3366
3367 lxc_list_for_each(iterator, network) {
3368 netdev = iterator->elem;
3369
3370 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
3371 continue;
3372
3373 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
3374 ERROR("gateway = auto only supported for "
3375 "veth and macvlan");
3376 return -1;
3377 }
3378
3379 if (!netdev->link) {
3380 ERROR("gateway = auto needs a link interface");
3381 return -1;
3382 }
3383
3384 link_index = if_nametoindex(netdev->link);
3385 if (!link_index)
3386 return -EINVAL;
3387
3388 if (netdev->ipv4_gateway_auto) {
3389 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
3390 ERROR("failed to automatically find ipv4 gateway "
3391 "address from link interface '%s'", netdev->link);
3392 return -1;
3393 }
3394 }
3395
3396 if (netdev->ipv6_gateway_auto) {
3397 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
3398 ERROR("failed to automatically find ipv6 gateway "
3399 "address from link interface '%s'", netdev->link);
3400 return -1;
3401 }
3402 }
3403 }
3404
3405 return 0;
3406}
3407
5e4a62bf 3408int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 3409{
5e4a62bf 3410 struct lxc_tty_info *tty_info = &conf->tty_info;
025ed0f3 3411 int i, ret;
b0a33c1e 3412
5e4a62bf
DL
3413 /* no tty in the configuration */
3414 if (!conf->tty)
b0a33c1e 3415 return 0;
3416
13954cce 3417 tty_info->pty_info =
e4e7d59d 3418 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 3419 if (!tty_info->pty_info) {
36eb9bde 3420 SYSERROR("failed to allocate pty_info");
985d15b1 3421 return -1;
b0a33c1e 3422 }
3423
985d15b1 3424 for (i = 0; i < conf->tty; i++) {
13954cce 3425
b0a33c1e 3426 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3427
025ed0f3
SH
3428 process_lock();
3429 ret = openpty(&pty_info->master, &pty_info->slave,
3430 pty_info->name, NULL, NULL);
3431 process_unlock();
3432 if (ret) {
36eb9bde 3433 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
3434 tty_info->nbtty = i;
3435 lxc_delete_tty(tty_info);
3436 return -1;
b0a33c1e 3437 }
3438
5332bb84
DL
3439 DEBUG("allocated pty '%s' (%d/%d)",
3440 pty_info->name, pty_info->master, pty_info->slave);
3441
3ec1648d 3442 /* Prevent leaking the file descriptors to the container */
b035ad62
MS
3443 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3444 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3445
b0a33c1e 3446 pty_info->busy = 0;
3447 }
3448
985d15b1 3449 tty_info->nbtty = conf->tty;
1ac470c0
DL
3450
3451 INFO("tty's configured");
3452
985d15b1 3453 return 0;
b0a33c1e 3454}
3455
3456void lxc_delete_tty(struct lxc_tty_info *tty_info)
3457{
3458 int i;
3459
3460 for (i = 0; i < tty_info->nbtty; i++) {
3461 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3462
3463 close(pty_info->master);
3464 close(pty_info->slave);
3465 }
3466
3467 free(tty_info->pty_info);
e00c0242 3468 tty_info->pty_info = NULL;
b0a33c1e 3469 tty_info->nbtty = 0;
3470}
3471
f6d3e3e4 3472/*
7b50c609
TS
3473 * chown_mapped_root: for an unprivileged user with uid/gid X to
3474 * chown a dir to subuid/subgid Y, he needs to run chown as root
3475 * in a userns where nsid 0 is mapped to hostuid/hostgid Y, and
3476 * nsid Y is mapped to hostuid/hostgid X. That way, the container
3477 * root is privileged with respect to hostuid/hostgid X, allowing
3478 * him to do the chown.
f6d3e3e4 3479 */
c4d10a05 3480int chown_mapped_root(char *path, struct lxc_conf *conf)
f6d3e3e4 3481{
7b50c609
TS
3482 uid_t rootuid;
3483 gid_t rootgid;
c4d10a05 3484 pid_t pid;
2a9a80cb 3485 unsigned long val;
a7ef8753 3486 char *chownpath = path;
f6d3e3e4 3487
2a9a80cb 3488 if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
c4d10a05
SH
3489 ERROR("No mapping for container root");
3490 return -1;
f6d3e3e4 3491 }
7b50c609
TS
3492 rootuid = (uid_t) val;
3493 if (!get_mapped_rootid(conf, ID_TYPE_GID, &val)) {
3494 ERROR("No mapping for container root");
3495 return -1;
3496 }
3497 rootgid = (gid_t) val;
2a9a80cb 3498
a7ef8753
SH
3499 /*
3500 * In case of overlay, we want only the writeable layer
3501 * to be chowned
3502 */
1f92162d 3503 if (strncmp(path, "overlayfs:", 10) == 0 || strncmp(path, "aufs:", 5) == 0) {
a7ef8753
SH
3504 chownpath = strchr(path, ':');
3505 if (!chownpath) {
3506 ERROR("Bad overlay path: %s", path);
3507 return -1;
3508 }
3509 chownpath = strchr(chownpath+1, ':');
3510 if (!chownpath) {
3511 ERROR("Bad overlay path: %s", path);
3512 return -1;
3513 }
3514 chownpath++;
3515 }
3516 path = chownpath;
c4d10a05 3517 if (geteuid() == 0) {
7b50c609 3518 if (chown(path, rootuid, rootgid) < 0) {
c4d10a05
SH
3519 ERROR("Error chowning %s", path);
3520 return -1;
3521 }
3522 return 0;
3523 }
f3d7e4ca 3524
7b50c609 3525 if (rootuid == geteuid()) {
f3d7e4ca
SH
3526 // nothing to do
3527 INFO("%s: container root is our uid; no need to chown" ,__func__);
3528 return 0;
3529 }
3530
c4d10a05
SH
3531 pid = fork();
3532 if (pid < 0) {
3533 SYSERROR("Failed forking");
f6d3e3e4
SH
3534 return -1;
3535 }
c4d10a05 3536 if (!pid) {
7b50c609
TS
3537 int hostuid = geteuid(), hostgid = getegid(), ret;
3538 struct stat sb;
3539 char map1[100], map2[100], map3[100], map4[100], map5[100];
3540 char ugid[100];
3541 char *args1[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3542 "-m", map3, "-m", map5,
3543 "--", "chown", ugid, path, NULL };
3544 char *args2[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3545 "-m", map3, "-m", map4, "-m", map5,
3546 "--", "chown", ugid, path, NULL };
3547
3548 // save the current gid of "path"
3549 if (stat(path, &sb) < 0) {
3550 ERROR("Error stat %s", path);
3551 return -1;
3552 }
f6d3e3e4 3553
9a7c2aba
SH
3554 /*
3555 * A file has to be group-owned by a gid mapped into the
3556 * container, or the container won't be privileged over it.
3557 */
3558 if (sb.st_uid == geteuid() &&
3559 mapped_hostid(sb.st_gid, conf, ID_TYPE_GID) < 0 &&
3560 chown(path, -1, hostgid) < 0) {
3561 ERROR("Failed chgrping %s", path);
7b50c609
TS
3562 return -1;
3563 }
3564
3565 // "u:0:rootuid:1"
3566 ret = snprintf(map1, 100, "u:0:%d:1", rootuid);
c4d10a05
SH
3567 if (ret < 0 || ret >= 100) {
3568 ERROR("Error uid printing map string");
f6d3e3e4
SH
3569 return -1;
3570 }
c4d10a05 3571
98e5ba51
SH
3572 // "u:hostuid:hostuid:1"
3573 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3574 if (ret < 0 || ret >= 100) {
3575 ERROR("Error uid printing map string");
3576 return -1;
3577 }
3578
7b50c609
TS
3579 // "g:0:rootgid:1"
3580 ret = snprintf(map3, 100, "g:0:%d:1", rootgid);
c4d10a05 3581 if (ret < 0 || ret >= 100) {
7b50c609 3582 ERROR("Error gid printing map string");
c4d10a05
SH
3583 return -1;
3584 }
3585
7b50c609 3586 // "g:pathgid:rootgid+pathgid:1"
b4c1e35d
SG
3587 ret = snprintf(map4, 100, "g:%d:%d:1", (gid_t)sb.st_gid,
3588 rootgid + (gid_t)sb.st_gid);
7b50c609
TS
3589 if (ret < 0 || ret >= 100) {
3590 ERROR("Error gid printing map string");
3591 return -1;
3592 }
3593
3594 // "g:hostgid:hostgid:1"
3595 ret = snprintf(map5, 100, "g:%d:%d:1", hostgid, hostgid);
3596 if (ret < 0 || ret >= 100) {
3597 ERROR("Error gid printing map string");
3598 return -1;
3599 }
3600
3601 // "0:pathgid" (chown)
b4c1e35d 3602 ret = snprintf(ugid, 100, "0:%d", (gid_t)sb.st_gid);
7b50c609
TS
3603 if (ret < 0 || ret >= 100) {
3604 ERROR("Error owner printing format string for chown");
3605 return -1;
3606 }
3607
3608 if (hostgid == sb.st_gid)
3609 ret = execvp("lxc-usernsexec", args1);
3610 else
3611 ret = execvp("lxc-usernsexec", args2);
c4d10a05
SH
3612 SYSERROR("Failed executing usernsexec");
3613 exit(1);
f6d3e3e4 3614 }
c4d10a05 3615 return wait_for_pid(pid);
f6d3e3e4
SH
3616}
3617
c4d10a05 3618int ttys_shift_ids(struct lxc_conf *c)
f6d3e3e4 3619{
c4d10a05 3620 if (lxc_list_empty(&c->id_map))
f6d3e3e4 3621 return 0;
c4d10a05 3622
29b10e4f 3623 if (strcmp(c->console.name, "") !=0 && chown_mapped_root(c->console.name, c) < 0) {
c4d10a05
SH
3624 ERROR("Failed to chown %s", c->console.name);
3625 return -1;
3626 }
3627
f6d3e3e4
SH
3628 return 0;
3629}
3630
f267d666 3631/* NOTE: not to be called from inside the container namespace! */
5112cd70
SH
3632int tmp_proc_mount(struct lxc_conf *lxc_conf)
3633{
3634 int mounted;
3635
01958b1f 3636 mounted = mount_proc_if_needed(lxc_conf->rootfs.path ? lxc_conf->rootfs.mount : "");
5112cd70
SH
3637 if (mounted == -1) {
3638 SYSERROR("failed to mount /proc in the container.");
01958b1f
DW
3639 /* continue only if there is no rootfs */
3640 if (lxc_conf->rootfs.path)
3641 return -1;
5112cd70
SH
3642 } else if (mounted == 1) {
3643 lxc_conf->tmp_umount_proc = 1;
3644 }
3645 return 0;
3646}
3647
3648void tmp_proc_unmount(struct lxc_conf *lxc_conf)
3649{
3650 if (lxc_conf->tmp_umount_proc == 1) {
3651 umount("/proc");
3652 lxc_conf->tmp_umount_proc = 0;
3653 }
3654}
3655
6a0c909a 3656void remount_all_slave(void)
e995d7a2
SH
3657{
3658 /* walk /proc/mounts and change any shared entries to slave */
3659 FILE *f = fopen("/proc/self/mountinfo", "r");
3660 char *line = NULL;
3661 size_t len = 0;
3662
3663 if (!f) {
3664 SYSERROR("Failed to open /proc/self/mountinfo to mark all shared");
3665 ERROR("Continuing container startup...");
3666 return;
3667 }
3668
3669 while (getline(&line, &len, f) != -1) {
3670 char *target, *opts;
3671 target = get_field(line, 4);
3672 if (!target)
3673 continue;
3674 opts = get_field(target, 2);
3675 if (!opts)
3676 continue;
3677 null_endofword(opts);
3678 if (!strstr(opts, "shared"))
3679 continue;
3680 null_endofword(target);
3681 if (mount(NULL, target, NULL, MS_SLAVE, NULL)) {
3682 SYSERROR("Failed to make %s rslave", target);
3683 ERROR("Continuing...");
3684 }
3685 }
3686 fclose(f);
f10fad2f 3687 free(line);
e995d7a2
SH
3688}
3689
2322903b
SH
3690void lxc_execute_bind_init(struct lxc_conf *conf)
3691{
3692 int ret;
9d9c111c
SH
3693 char path[PATH_MAX], destpath[PATH_MAX], *p;
3694
3695 /* If init exists in the container, don't bind mount a static one */
3696 p = choose_init(conf->rootfs.mount);
3697 if (p) {
3698 free(p);
3699 return;
3700 }
2322903b
SH
3701
3702 ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
3703 if (ret < 0 || ret >= PATH_MAX) {
3704 WARN("Path name too long searching for lxc.init.static");
3705 return;
3706 }
3707
3708 if (!file_exists(path)) {
3709 INFO("%s does not exist on host", path);
3710 return;
3711 }
3712
3713 ret = snprintf(destpath, PATH_MAX, "%s%s", conf->rootfs.mount, "/init.lxc.static");
3714 if (ret < 0 || ret >= PATH_MAX) {
3715 WARN("Path name too long for container's lxc.init.static");
3716 return;
3717 }
3718
3719 if (!file_exists(destpath)) {
3720 FILE * pathfile = fopen(destpath, "wb");
3721 if (!pathfile) {
3722 SYSERROR("Failed to create mount target '%s'", destpath);
3723 return;
3724 }
3725 fclose(pathfile);
3726 }
3727
592fd47a 3728 ret = safe_mount(path, destpath, "none", MS_BIND, NULL, conf->rootfs.mount);
2322903b
SH
3729 if (ret < 0)
3730 SYSERROR("Failed to bind lxc.init.static into container");
3731 INFO("lxc.init.static bound into container at %s", path);
3732}
3733
35120d9c
SH
3734/*
3735 * This does the work of remounting / if it is shared, calling the
3736 * container pre-mount hooks, and mounting the rootfs.
3737 */
3738int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath)
0ad19a3f 3739{
35120d9c
SH
3740 if (conf->rootfs_setup) {
3741 /*
3742 * rootfs was set up in another namespace. bind-mount it
3743 * to give us a mount in our own ns so we can pivot_root to it
3744 */
3745 const char *path = conf->rootfs.mount;
3746 if (mount(path, path, "rootfs", MS_BIND, NULL) < 0) {
3747 ERROR("Failed to bind-mount container / onto itself");
145832ba 3748 return -1;
35120d9c 3749 }
145832ba 3750 return 0;
35120d9c 3751 }
d4ef7c50 3752
e995d7a2
SH
3753 remount_all_slave();
3754
35120d9c
SH
3755 if (run_lxc_hooks(name, "pre-mount", conf, lxcpath, NULL)) {
3756 ERROR("failed to run pre-mount hooks for container '%s'.", name);
3757 return -1;
3758 }
3759
3760 if (setup_rootfs(conf)) {
3761 ERROR("failed to setup rootfs for '%s'", name);
3762 return -1;
3763 }
3764
3765 conf->rootfs_setup = true;
3766 return 0;
3767}
3768
1c1c7051
SH
3769static bool verify_start_hooks(struct lxc_conf *conf)
3770{
3771 struct lxc_list *it;
3772 char path[MAXPATHLEN];
3773 lxc_list_for_each(it, &conf->hooks[LXCHOOK_START]) {
3774 char *hookname = it->elem;
3775 struct stat st;
3776 int ret;
3777
3778 ret = snprintf(path, MAXPATHLEN, "%s%s",
7b6753e7 3779 conf->rootfs.path ? conf->rootfs.mount : "", hookname);
1c1c7051
SH
3780 if (ret < 0 || ret >= MAXPATHLEN)
3781 return false;
3782 ret = stat(path, &st);
3783 if (ret) {
7b6753e7 3784 SYSERROR("Start hook %s not found in container",
1c1c7051
SH
3785 hookname);
3786 return false;
3787 }
6a0c909a 3788 return true;
1c1c7051
SH
3789 }
3790
3791 return true;
3792}
3793
e8bd4e43
SH
3794static int send_fd(int sock, int fd)
3795{
3796 int ret = lxc_abstract_unix_send_fd(sock, fd, NULL, 0);
3797
3798
3799 if (ret < 0) {
3800 SYSERROR("Error sending tty fd to parent");
3801 return -1;
3802 }
3803
3804 return 0;
3805}
3806
3807static int send_ttys_to_parent(struct lxc_handler *handler)
3808{
3809 struct lxc_conf *conf = handler->conf;
3810 const struct lxc_tty_info *tty_info = &conf->tty_info;
3811 int i;
3812 int sock = handler->ttysock[0];
3813
3814 for (i = 0; i < tty_info->nbtty; i++) {
3815 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3816 if (send_fd(sock, pty_info->slave) < 0)
3817 goto bad;
3818 close(pty_info->slave);
3819 pty_info->slave = -1;
3820 if (send_fd(sock, pty_info->master) < 0)
3821 goto bad;
3822 close(pty_info->master);
3823 pty_info->master = -1;
3824 }
3825
3826 close(handler->ttysock[0]);
3827 close(handler->ttysock[1]);
3828
3829 return 0;
3830
3831bad:
3832 ERROR("Error writing tty fd to parent");
3833 return -1;
3834}
3835
35120d9c
SH
3836int lxc_setup(struct lxc_handler *handler)
3837{
3838 const char *name = handler->name;
3839 struct lxc_conf *lxc_conf = handler->conf;
3840 const char *lxcpath = handler->lxcpath;
35120d9c
SH
3841
3842 if (do_rootfs_setup(lxc_conf, name, lxcpath) < 0) {
3843 ERROR("Error setting up rootfs mount after spawn");
3844 return -1;
3845 }
3846
6c544cb3
MM
3847 if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
3848 if (setup_utsname(lxc_conf->utsname)) {
3849 ERROR("failed to setup the utsname for '%s'", name);
3850 return -1;
3851 }
0ad19a3f 3852 }
3853
5f4535a3 3854 if (setup_network(&lxc_conf->network)) {
36eb9bde 3855 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 3856 return -1;
0ad19a3f 3857 }
3858
bc6928ff 3859 if (lxc_conf->autodev > 0) {
14221cbb 3860 if (mount_autodev(name, &lxc_conf->rootfs, lxcpath)) {
91c3830e 3861 ERROR("failed to mount /dev in the container");
c6883f38
SH
3862 return -1;
3863 }
3864 }
3865
368bbc02
CS
3866 /* do automatic mounts (mainly /proc and /sys), but exclude
3867 * those that need to wait until other stuff has finished
3868 */
4fb3cba5 3869 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
3870 ERROR("failed to setup the automatic mounts for '%s'", name);
3871 return -1;
3872 }
3873
0a2dddd4 3874 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name, lxcpath)) {
36eb9bde 3875 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 3876 return -1;
576f946d 3877 }
3878
0a2dddd4 3879 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name, lxcpath)) {
e7938e9e
MN
3880 ERROR("failed to setup the mount entries for '%s'", name);
3881 return -1;
3882 }
3883
7b6753e7 3884 /* Make sure any start hooks are in the container */
1c1c7051
SH
3885 if (!verify_start_hooks(lxc_conf))
3886 return -1;
3887
2322903b
SH
3888 if (lxc_conf->is_execute)
3889 lxc_execute_bind_init(lxc_conf);
3890
368bbc02
CS
3891 /* now mount only cgroup, if wanted;
3892 * before, /sys could not have been mounted
3893 * (is either mounted automatically or via fstab entries)
3894 */
4fb3cba5 3895 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
3896 ERROR("failed to setup the automatic mounts for '%s'", name);
3897 return -1;
3898 }
3899
283678ed 3900 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
773fb9ca
SH
3901 ERROR("failed to run mount hooks for container '%s'.", name);
3902 return -1;
3903 }
3904
bc6928ff 3905 if (lxc_conf->autodev > 0) {
0728ebf4
TA
3906 bool mount_console = lxc_conf->console.path && !strcmp(lxc_conf->console.path, "none");
3907
283678ed 3908 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
f7bee6c6
MW
3909 ERROR("failed to run autodev hooks for container '%s'.", name);
3910 return -1;
3911 }
0728ebf4 3912 if (fill_autodev(&lxc_conf->rootfs, mount_console)) {
91c3830e
SH
3913 ERROR("failed to populate /dev in the container");
3914 return -1;
3915 }
3916 }
368bbc02 3917
37903589 3918 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 3919 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 3920 return -1;
6e590161 3921 }
3922
7e0e1d94
AV
3923 if (lxc_conf->kmsg) {
3924 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
3925 ERROR("failed to setup kmsg for '%s'", name);
3926 }
1bd051a6 3927
69aa6655
DE
3928 if (!lxc_conf->is_execute && setup_dev_symlinks(&lxc_conf->rootfs)) {
3929 ERROR("failed to setup /dev symlinks for '%s'", name);
3930 return -1;
3931 }
3932
5112cd70
SH
3933 /* mount /proc if it's not already there */
3934 if (tmp_proc_mount(lxc_conf) < 0) {
fe4de9a6 3935 ERROR("failed to LSM mount proc for '%s'", name);
e075f5d9 3936 return -1;
e075f5d9 3937 }
e075f5d9 3938
ac778708 3939 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 3940 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 3941 return -1;
ed502555 3942 }
3943
571e6ec8 3944 if (setup_pts(lxc_conf->pts)) {
36eb9bde 3945 ERROR("failed to setup the new pts instance");
95b5ffaf 3946 return -1;
3c26f34e 3947 }
3948
e8bd4e43
SH
3949 if (lxc_create_tty(name, lxc_conf)) {
3950 ERROR("failed to create the ttys");
3951 return -1;
3952 }
3953
3954 if (send_ttys_to_parent(handler) < 0) {
3955 ERROR("failure sending console info to parent");
3956 return -1;
3957 }
3958
3959
3960 if (!lxc_conf->is_execute && setup_tty(lxc_conf)) {
3961 ERROR("failed to setup the ttys for '%s'", name);
3962 return -1;
3963 }
3964
3965 if (lxc_conf->pty_names && setenv("container_ttys", lxc_conf->pty_names, 1))
3966 SYSERROR("failed to set environment variable for container ptys");
3967
3968
cccc74b5
DL
3969 if (setup_personality(lxc_conf->personality)) {
3970 ERROR("failed to setup personality");
3971 return -1;
3972 }
3973
97a8f74f
SG
3974 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
3975 if (!lxc_list_empty(&lxc_conf->caps)) {
7389ca26 3976 ERROR("Container requests lxc.cap.drop and lxc.cap.keep: either use lxc.cap.drop or lxc.cap.keep, not both.");
f6d3e3e4
SH
3977 return -1;
3978 }
97a8f74f
SG
3979 if (dropcaps_except(&lxc_conf->keepcaps)) {
3980 ERROR("failed to keep requested caps");
3981 return -1;
3982 }
3983 } else if (setup_caps(&lxc_conf->caps)) {
3984 ERROR("failed to drop capabilities");
3985 return -1;
81810dd1
DL
3986 }
3987
cd54d859
DL
3988 NOTICE("'%s' is setup.", name);
3989
0ad19a3f 3990 return 0;
3991}
26ddeedd 3992
283678ed
SH
3993int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
3994 const char *lxcpath, char *argv[])
26ddeedd
SH
3995{
3996 int which = -1;
3997 struct lxc_list *it;
3998
3999 if (strcmp(hook, "pre-start") == 0)
4000 which = LXCHOOK_PRESTART;
5ea6163a
SH
4001 else if (strcmp(hook, "pre-mount") == 0)
4002 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
4003 else if (strcmp(hook, "mount") == 0)
4004 which = LXCHOOK_MOUNT;
f7bee6c6
MW
4005 else if (strcmp(hook, "autodev") == 0)
4006 which = LXCHOOK_AUTODEV;
26ddeedd
SH
4007 else if (strcmp(hook, "start") == 0)
4008 which = LXCHOOK_START;
52492063
WB
4009 else if (strcmp(hook, "stop") == 0)
4010 which = LXCHOOK_STOP;
26ddeedd
SH
4011 else if (strcmp(hook, "post-stop") == 0)
4012 which = LXCHOOK_POSTSTOP;
148e91f5
SH
4013 else if (strcmp(hook, "clone") == 0)
4014 which = LXCHOOK_CLONE;
37cf711b
SY
4015 else if (strcmp(hook, "destroy") == 0)
4016 which = LXCHOOK_DESTROY;
26ddeedd
SH
4017 else
4018 return -1;
4019 lxc_list_for_each(it, &conf->hooks[which]) {
4020 int ret;
4021 char *hookname = it->elem;
283678ed 4022 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
26ddeedd
SH
4023 if (ret)
4024 return ret;
4025 }
4026 return 0;
4027}
72d0e1cb 4028
427b3a21 4029static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
4030{
4031 struct lxc_netdev *netdev = it->elem;
9ebb03ad 4032 struct lxc_list *it2,*next;
72d0e1cb
SG
4033
4034 lxc_list_del(it);
4035
f10fad2f
ME
4036 free(netdev->link);
4037 free(netdev->name);
4038 if (netdev->type == LXC_NET_VETH)
c9bb9a85 4039 free(netdev->priv.veth_attr.pair);
f10fad2f
ME
4040 free(netdev->upscript);
4041 free(netdev->hwaddr);
4042 free(netdev->mtu);
4043 free(netdev->ipv4_gateway);
4044 free(netdev->ipv6_gateway);
9ebb03ad 4045 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
4046 lxc_list_del(it2);
4047 free(it2->elem);
4048 free(it2);
4049 }
9ebb03ad 4050 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
4051 lxc_list_del(it2);
4052 free(it2->elem);
4053 free(it2);
4054 }
d95db067 4055 free(netdev);
72d0e1cb
SG
4056 free(it);
4057}
4058
4059/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 4060int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
4061{
4062 char *p1;
4063 int ret, idx, i;
4064 struct lxc_list *it;
4065 struct lxc_netdev *netdev;
4066
46cd2845 4067 p1 = strchr(key, '.');
72d0e1cb
SG
4068 if (!p1 || *(p1+1) == '\0')
4069 p1 = NULL;
4070
4071 ret = sscanf(key, "%d", &idx);
4072 if (ret != 1) return -1;
4073 if (idx < 0)
4074 return -1;
4075
4076 i = 0;
4077 lxc_list_for_each(it, &c->network) {
4078 if (i == idx)
4079 break;
4080 i++;
4081 }
4082 if (i < idx) // we don't have that many nics defined
4083 return -1;
4084
4085 if (!it || !it->elem)
4086 return -1;
4087
4088 netdev = it->elem;
4089
4090 if (!p1) {
4091 lxc_remove_nic(it);
52d21d40 4092 } else if (strcmp(p1, ".ipv4") == 0) {
9ebb03ad
DE
4093 struct lxc_list *it2,*next;
4094 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
4095 lxc_list_del(it2);
4096 free(it2->elem);
4097 free(it2);
4098 }
52d21d40 4099 } else if (strcmp(p1, ".ipv6") == 0) {
9ebb03ad
DE
4100 struct lxc_list *it2,*next;
4101 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
4102 lxc_list_del(it2);
4103 free(it2->elem);
4104 free(it2);
4105 }
72d0e1cb
SG
4106 }
4107 else return -1;
4108
4109 return 0;
4110}
4111
4112int lxc_clear_config_network(struct lxc_conf *c)
4113{
9ebb03ad
DE
4114 struct lxc_list *it,*next;
4115 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
4116 lxc_remove_nic(it);
4117 }
4118 return 0;
4119}
4120
4121int lxc_clear_config_caps(struct lxc_conf *c)
4122{
9ebb03ad 4123 struct lxc_list *it,*next;
72d0e1cb 4124
9ebb03ad 4125 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
4126 lxc_list_del(it);
4127 free(it->elem);
4128 free(it);
4129 }
4130 return 0;
4131}
4132
74a3920a 4133static int lxc_free_idmap(struct lxc_list *id_map) {
27c27d73
SH
4134 struct lxc_list *it, *next;
4135
4355ab5f 4136 lxc_list_for_each_safe(it, id_map, next) {
27c27d73
SH
4137 lxc_list_del(it);
4138 free(it->elem);
4139 free(it);
4140 }
4141 return 0;
4142}
4143
4355ab5f
SH
4144int lxc_clear_idmaps(struct lxc_conf *c)
4145{
4146 return lxc_free_idmap(&c->id_map);
4147}
4148
1fb86a7c
SH
4149int lxc_clear_config_keepcaps(struct lxc_conf *c)
4150{
4151 struct lxc_list *it,*next;
4152
4153 lxc_list_for_each_safe(it, &c->keepcaps, next) {
4154 lxc_list_del(it);
4155 free(it->elem);
4156 free(it);
4157 }
4158 return 0;
4159}
4160
12a50cc6 4161int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 4162{
9ebb03ad 4163 struct lxc_list *it,*next;
72d0e1cb 4164 bool all = false;
12a50cc6 4165 const char *k = key + 11;
72d0e1cb
SG
4166
4167 if (strcmp(key, "lxc.cgroup") == 0)
4168 all = true;
4169
9ebb03ad 4170 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
4171 struct lxc_cgroup *cg = it->elem;
4172 if (!all && strcmp(cg->subsystem, k) != 0)
4173 continue;
4174 lxc_list_del(it);
4175 free(cg->subsystem);
4176 free(cg->value);
4177 free(cg);
4178 free(it);
4179 }
4180 return 0;
4181}
4182
ee1e7aa0
SG
4183int lxc_clear_groups(struct lxc_conf *c)
4184{
4185 struct lxc_list *it,*next;
4186
4187 lxc_list_for_each_safe(it, &c->groups, next) {
4188 lxc_list_del(it);
4189 free(it->elem);
4190 free(it);
4191 }
4192 return 0;
4193}
4194
ab799c0b
SG
4195int lxc_clear_environment(struct lxc_conf *c)
4196{
4197 struct lxc_list *it,*next;
4198
4199 lxc_list_for_each_safe(it, &c->environment, next) {
4200 lxc_list_del(it);
4201 free(it->elem);
4202 free(it);
4203 }
4204 return 0;
4205}
4206
4207
72d0e1cb
SG
4208int lxc_clear_mount_entries(struct lxc_conf *c)
4209{
9ebb03ad 4210 struct lxc_list *it,*next;
72d0e1cb 4211
9ebb03ad 4212 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
4213 lxc_list_del(it);
4214 free(it->elem);
4215 free(it);
4216 }
4217 return 0;
4218}
4219
b099e9e9
SH
4220int lxc_clear_automounts(struct lxc_conf *c)
4221{
4222 c->auto_mounts = 0;
4223 return 0;
4224}
4225
12a50cc6 4226int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 4227{
9ebb03ad 4228 struct lxc_list *it,*next;
17ed13a3 4229 bool all = false, done = false;
12a50cc6 4230 const char *k = key + 9;
72d0e1cb
SG
4231 int i;
4232
17ed13a3
SH
4233 if (strcmp(key, "lxc.hook") == 0)
4234 all = true;
4235
72d0e1cb 4236 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 4237 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 4238 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
4239 lxc_list_del(it);
4240 free(it->elem);
4241 free(it);
4242 }
4243 done = true;
72d0e1cb
SG
4244 }
4245 }
17ed13a3
SH
4246
4247 if (!done) {
4248 ERROR("Invalid hook key: %s", key);
4249 return -1;
4250 }
72d0e1cb
SG
4251 return 0;
4252}
8eb5694b 4253
74a3920a 4254static void lxc_clear_saved_nics(struct lxc_conf *conf)
7b35f3d6
SH
4255{
4256 int i;
4257
0cf45501 4258 if (!conf->saved_nics)
7b35f3d6
SH
4259 return;
4260 for (i=0; i < conf->num_savednics; i++)
4261 free(conf->saved_nics[i].orig_name);
7b35f3d6
SH
4262 free(conf->saved_nics);
4263}
4264
4184c3e1
SH
4265static inline void lxc_clear_aliens(struct lxc_conf *conf)
4266{
4267 struct lxc_list *it,*next;
4268
4269 lxc_list_for_each_safe(it, &conf->aliens, next) {
4270 lxc_list_del(it);
4271 free(it->elem);
4272 free(it);
4273 }
4274}
4275
f979ac15
SH
4276static inline void lxc_clear_includes(struct lxc_conf *conf)
4277{
4278 struct lxc_list *it,*next;
4279
4280 lxc_list_for_each_safe(it, &conf->includes, next) {
4281 lxc_list_del(it);
4282 free(it->elem);
4283 free(it);
4284 }
4285}
4286
8eb5694b
SH
4287void lxc_conf_free(struct lxc_conf *conf)
4288{
4289 if (!conf)
4290 return;
858377e4
SH
4291 if (current_config == conf)
4292 current_config = NULL;
f10fad2f
ME
4293 free(conf->console.log_path);
4294 free(conf->console.path);
4295 free(conf->rootfs.mount);
b3b8c97f 4296 free(conf->rootfs.bdev_type);
f10fad2f
ME
4297 free(conf->rootfs.options);
4298 free(conf->rootfs.path);
f10fad2f 4299 free(conf->logfile);
858377e4
SH
4300 if (conf->logfd != -1)
4301 close(conf->logfd);
f10fad2f
ME
4302 free(conf->utsname);
4303 free(conf->ttydir);
4304 free(conf->fstab);
4305 free(conf->rcfile);
4306 free(conf->init_cmd);
6b0d5538 4307 free(conf->unexpanded_config);
393903d1 4308 free(conf->pty_names);
76d0127f 4309 free(conf->syslog);
8eb5694b 4310 lxc_clear_config_network(conf);
f10fad2f
ME
4311 free(conf->lsm_aa_profile);
4312 free(conf->lsm_se_context);
769872f9 4313 lxc_seccomp_free(conf);
8eb5694b 4314 lxc_clear_config_caps(conf);
1fb86a7c 4315 lxc_clear_config_keepcaps(conf);
8eb5694b 4316 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 4317 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 4318 lxc_clear_mount_entries(conf);
7b35f3d6 4319 lxc_clear_saved_nics(conf);
27c27d73 4320 lxc_clear_idmaps(conf);
ee1e7aa0 4321 lxc_clear_groups(conf);
f979ac15 4322 lxc_clear_includes(conf);
761d81ca 4323 lxc_clear_aliens(conf);
ab799c0b 4324 lxc_clear_environment(conf);
8eb5694b
SH
4325 free(conf);
4326}
4355ab5f
SH
4327
4328struct userns_fn_data {
4329 int (*fn)(void *);
4330 void *arg;
4331 int p[2];
4332};
4333
4334static int run_userns_fn(void *data)
4335{
4336 struct userns_fn_data *d = data;
4337 char c;
4338 // we're not sharing with the parent any more, if it was a thread
4339
4340 close(d->p[1]);
4341 if (read(d->p[0], &c, 1) != 1)
4342 return -1;
4343 close(d->p[0]);
4344 return d->fn(d->arg);
4345}
4346
4347/*
8b227008
TS
4348 * Add ID_TYPE_UID/ID_TYPE_GID entries to an existing lxc_conf,
4349 * if they are not already there.
4355ab5f 4350 */
8b227008
TS
4351static struct lxc_list *idmap_add_id(struct lxc_conf *conf,
4352 uid_t uid, gid_t gid)
4355ab5f 4353{
8b227008
TS
4354 int hostuid_mapped = mapped_hostid(uid, conf, ID_TYPE_UID);
4355 int hostgid_mapped = mapped_hostid(gid, conf, ID_TYPE_GID);
4355ab5f
SH
4356 struct lxc_list *new = NULL, *tmp, *it, *next;
4357 struct id_map *entry;
4358
3ec1648d
SH
4359 new = malloc(sizeof(*new));
4360 if (!new) {
4361 ERROR("Out of memory building id map");
4362 return NULL;
4363 }
4364 lxc_list_init(new);
4365
8b227008
TS
4366 if (hostuid_mapped < 0) {
4367 hostuid_mapped = find_unmapped_nsuid(conf, ID_TYPE_UID);
4368 if (hostuid_mapped < 0)
3ec1648d
SH
4369 goto err;
4370 tmp = malloc(sizeof(*tmp));
4371 if (!tmp)
4372 goto err;
4355ab5f
SH
4373 entry = malloc(sizeof(*entry));
4374 if (!entry) {
3ec1648d
SH
4375 free(tmp);
4376 goto err;
4355ab5f 4377 }
3ec1648d 4378 tmp->elem = entry;
4355ab5f 4379 entry->idtype = ID_TYPE_UID;
8b227008
TS
4380 entry->nsid = hostuid_mapped;
4381 entry->hostid = (unsigned long) uid;
4382 entry->range = 1;
4383 lxc_list_add_tail(new, tmp);
4384 }
4385 if (hostgid_mapped < 0) {
4386 hostgid_mapped = find_unmapped_nsuid(conf, ID_TYPE_GID);
4387 if (hostgid_mapped < 0)
4388 goto err;
4389 tmp = malloc(sizeof(*tmp));
4390 if (!tmp)
4391 goto err;
4392 entry = malloc(sizeof(*entry));
4393 if (!entry) {
4394 free(tmp);
4395 goto err;
4396 }
4397 tmp->elem = entry;
4398 entry->idtype = ID_TYPE_GID;
4399 entry->nsid = hostgid_mapped;
4400 entry->hostid = (unsigned long) gid;
4355ab5f 4401 entry->range = 1;
3ec1648d 4402 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4403 }
4404 lxc_list_for_each_safe(it, &conf->id_map, next) {
4405 tmp = malloc(sizeof(*tmp));
4406 if (!tmp)
4407 goto err;
4408 entry = malloc(sizeof(*entry));
4409 if (!entry) {
4410 free(tmp);
4411 goto err;
4412 }
4413 memset(entry, 0, sizeof(*entry));
4414 memcpy(entry, it->elem, sizeof(*entry));
4415 tmp->elem = entry;
3ec1648d 4416 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4417 }
4418
4419 return new;
4420
4421err:
8b227008 4422 ERROR("Out of memory building a new uid/gid map");
908fde6a
SH
4423 if (new)
4424 lxc_free_idmap(new);
c30ac545 4425 free(new);
4355ab5f
SH
4426 return NULL;
4427}
4428
4429/*
4430 * Run a function in a new user namespace.
8b227008 4431 * The caller's euid/egid will be mapped in if it is not already.
4355ab5f
SH
4432 */
4433int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
4434{
4435 int ret, pid;
4436 struct userns_fn_data d;
4437 char c = '1';
4438 int p[2];
4439 struct lxc_list *idmap;
4440
4355ab5f 4441 ret = pipe(p);
4355ab5f
SH
4442 if (ret < 0) {
4443 SYSERROR("opening pipe");
4444 return -1;
4445 }
4446 d.fn = fn;
4447 d.arg = data;
4448 d.p[0] = p[0];
4449 d.p[1] = p[1];
4450 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4451 if (pid < 0)
4452 goto err;
4355ab5f 4453 close(p[0]);
4355ab5f
SH
4454 p[0] = -1;
4455
8b227008
TS
4456 if ((idmap = idmap_add_id(conf, geteuid(), getegid())) == NULL) {
4457 ERROR("Error adding self to container uid/gid map");
4355ab5f
SH
4458 goto err;
4459 }
4460
4461 ret = lxc_map_ids(idmap, pid);
4462 lxc_free_idmap(idmap);
88dd66fc 4463 free(idmap);
565e571c 4464 if (ret) {
4355ab5f
SH
4465 ERROR("Error setting up child mappings");
4466 goto err;
4467 }
4468
4469 // kick the child
4470 if (write(p[1], &c, 1) != 1) {
4471 SYSERROR("writing to pipe to child");
4472 goto err;
4473 }
4474
3139aead
SG
4475 ret = wait_for_pid(pid);
4476
4477 close(p[1]);
4478 return ret;
4479
4355ab5f 4480err:
4355ab5f
SH
4481 if (p[0] != -1)
4482 close(p[0]);
4483 close(p[1]);
4355ab5f
SH
4484 return -1;
4485}
97e9cfa0 4486
a96a8e8c 4487/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4488static char* getuname(void)
4489{
a96a8e8c 4490 struct passwd *result;
97e9cfa0 4491
a96a8e8c
SH
4492 result = getpwuid(geteuid());
4493 if (!result)
97e9cfa0
SH
4494 return NULL;
4495
a96a8e8c 4496 return strdup(result->pw_name);
97e9cfa0
SH
4497}
4498
a96a8e8c 4499/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4500static char *getgname(void)
4501{
a96a8e8c 4502 struct group *result;
97e9cfa0 4503
a96a8e8c
SH
4504 result = getgrgid(getegid());
4505 if (!result)
97e9cfa0
SH
4506 return NULL;
4507
a96a8e8c 4508 return strdup(result->gr_name);
97e9cfa0
SH
4509}
4510
a96a8e8c 4511/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4512void suggest_default_idmap(void)
4513{
4514 FILE *f;
4515 unsigned int uid = 0, urange = 0, gid = 0, grange = 0;
4516 char *line = NULL;
4517 char *uname, *gname;
4518 size_t len = 0;
4519
4520 if (!(uname = getuname()))
4521 return;
4522
4523 if (!(gname = getgname())) {
4524 free(uname);
4525 return;
4526 }
4527
4528 f = fopen(subuidfile, "r");
4529 if (!f) {
4530 ERROR("Your system is not configured with subuids");
4531 free(gname);
4532 free(uname);
4533 return;
4534 }
4535 while (getline(&line, &len, f) != -1) {
b7930180 4536 size_t no_newline = 0;
97e9cfa0
SH
4537 char *p = strchr(line, ':'), *p2;
4538 if (*line == '#')
4539 continue;
4540 if (!p)
4541 continue;
4542 *p = '\0';
4543 p++;
4544 if (strcmp(line, uname))
4545 continue;
4546 p2 = strchr(p, ':');
4547 if (!p2)
4548 continue;
4549 *p2 = '\0';
4550 p2++;
4551 if (!*p2)
4552 continue;
b7930180
CB
4553 no_newline = strcspn(p2, "\n");
4554 p2[no_newline] = '\0';
4555
b7b2fde4
CB
4556 if (lxc_safe_uint(p, &uid) < 0)
4557 WARN("Could not parse UID.");
4558 if (lxc_safe_uint(p2, &urange) < 0)
4559 WARN("Could not parse UID range.");
97e9cfa0
SH
4560 }
4561 fclose(f);
4562
4563 f = fopen(subuidfile, "r");
4564 if (!f) {
4565 ERROR("Your system is not configured with subgids");
4566 free(gname);
4567 free(uname);
4568 return;
4569 }
4570 while (getline(&line, &len, f) != -1) {
b7930180 4571 size_t no_newline = 0;
97e9cfa0
SH
4572 char *p = strchr(line, ':'), *p2;
4573 if (*line == '#')
4574 continue;
4575 if (!p)
4576 continue;
4577 *p = '\0';
4578 p++;
4579 if (strcmp(line, uname))
4580 continue;
4581 p2 = strchr(p, ':');
4582 if (!p2)
4583 continue;
4584 *p2 = '\0';
4585 p2++;
4586 if (!*p2)
4587 continue;
b7930180
CB
4588 no_newline = strcspn(p2, "\n");
4589 p2[no_newline] = '\0';
4590
b7b2fde4
CB
4591 if (lxc_safe_uint(p, &gid) < 0)
4592 WARN("Could not parse GID.");
4593 if (lxc_safe_uint(p2, &grange) < 0)
4594 WARN("Could not parse GID range.");
97e9cfa0
SH
4595 }
4596 fclose(f);
4597
f10fad2f 4598 free(line);
97e9cfa0
SH
4599
4600 if (!urange || !grange) {
4601 ERROR("You do not have subuids or subgids allocated");
4602 ERROR("Unprivileged containers require subuids and subgids");
4603 return;
4604 }
4605
4606 ERROR("You must either run as root, or define uid mappings");
4607 ERROR("To pass uid mappings to lxc-create, you could create");
4608 ERROR("~/.config/lxc/default.conf:");
4609 ERROR("lxc.include = %s", LXC_DEFAULT_CONFIG);
4610 ERROR("lxc.id_map = u 0 %u %u", uid, urange);
4611 ERROR("lxc.id_map = g 0 %u %u", gid, grange);
4612
4613 free(gname);
4614 free(uname);
4615}
aaf26830 4616
a7307747
SH
4617static void free_cgroup_settings(struct lxc_list *result)
4618{
4619 struct lxc_list *iterator, *next;
4620
4621 lxc_list_for_each_safe(iterator, result, next) {
4622 lxc_list_del(iterator);
4623 free(iterator);
4624 }
4625 free(result);
4626}
4627
aaf26830
KT
4628/*
4629 * Return the list of cgroup_settings sorted according to the following rules
4630 * 1. Put memory.limit_in_bytes before memory.memsw.limit_in_bytes
4631 */
4632struct lxc_list *sort_cgroup_settings(struct lxc_list* cgroup_settings)
4633{
4634 struct lxc_list *result;
4635 struct lxc_list *memsw_limit = NULL;
4636 struct lxc_list *it = NULL;
4637 struct lxc_cgroup *cg = NULL;
4638 struct lxc_list *item = NULL;
4639
4640 result = malloc(sizeof(*result));
fac7c663
KT
4641 if (!result) {
4642 ERROR("failed to allocate memory to sort cgroup settings");
4643 return NULL;
4644 }
aaf26830
KT
4645 lxc_list_init(result);
4646
4647 /*Iterate over the cgroup settings and copy them to the output list*/
4648 lxc_list_for_each(it, cgroup_settings) {
4649 item = malloc(sizeof(*item));
fac7c663
KT
4650 if (!item) {
4651 ERROR("failed to allocate memory to sort cgroup settings");
a7307747 4652 free_cgroup_settings(result);
fac7c663
KT
4653 return NULL;
4654 }
aaf26830
KT
4655 item->elem = it->elem;
4656 cg = it->elem;
4657 if (strcmp(cg->subsystem, "memory.memsw.limit_in_bytes") == 0) {
4658 /* Store the memsw_limit location */
4659 memsw_limit = item;
4660 } else if (strcmp(cg->subsystem, "memory.limit_in_bytes") == 0 && memsw_limit != NULL) {
4d5b72a1 4661 /* lxc.cgroup.memory.memsw.limit_in_bytes is found before
aaf26830
KT
4662 * lxc.cgroup.memory.limit_in_bytes, swap these two items */
4663 item->elem = memsw_limit->elem;
4664 memsw_limit->elem = it->elem;
4665 }
4666 lxc_list_add_tail(result, item);
4667 }
4668
4669 return result;
a7307747 4670}