]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
conf: check for {filecaps,setuid} on new{g,u}idmap
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
1d52bdf7
CB
23
24#define _GNU_SOURCE
d06245b8
NC
25#include "config.h"
26
8f3e280e
CB
27#include <dirent.h>
28#include <errno.h>
29#include <fcntl.h>
30#include <grp.h>
31#include <inttypes.h>
32#include <libgen.h>
33#include <pwd.h>
34#include <stdarg.h>
0ad19a3f 35#include <stdio.h>
0ad19a3f 36#include <stdlib.h>
0ad19a3f 37#include <string.h>
8f3e280e 38#include <time.h>
0ad19a3f 39#include <unistd.h>
8f3e280e
CB
40#include <arpa/inet.h>
41#include <linux/loop.h>
8f3e280e
CB
42#include <net/if.h>
43#include <netinet/in.h>
44#include <sys/mman.h>
45#include <sys/mount.h>
46#include <sys/param.h>
47#include <sys/prctl.h>
48#include <sys/stat.h>
49#include <sys/socket.h>
ce831b3b 50#include <sys/sysmacros.h>
2d76d1d7 51#include <sys/syscall.h>
97e9cfa0 52#include <sys/types.h>
8f3e280e
CB
53#include <sys/utsname.h>
54#include <sys/wait.h>
1d52bdf7 55
af6824fc
ST
56/* makedev() */
57#ifdef MAJOR_IN_MKDEV
58# include <sys/mkdev.h>
59#endif
af6824fc 60
614305f3 61#ifdef HAVE_STATVFS
2938f7c8 62#include <sys/statvfs.h>
614305f3 63#endif
e827ff7e
SG
64
65#if HAVE_PTY_H
b0a33c1e 66#include <pty.h>
e827ff7e
SG
67#else
68#include <../include/openpty.h>
69#endif
0ad19a3f 70
5ef5c9a3
CB
71#ifdef HAVE_LINUX_MEMFD_H
72#include <linux/memfd.h>
73#endif
74
e8bd4e43 75#include "af_unix.h"
8f3e280e
CB
76#include "bdev.h"
77#include "caps.h" /* for lxc_caps_last_cap() */
78#include "cgroup.h"
1b09f2c0 79#include "conf.h"
8f3e280e 80#include "error.h"
1b09f2c0 81#include "log.h"
d8e48992 82#include "lxcaufs.h"
025ed0f3 83#include "lxclock.h"
8f3e280e
CB
84#include "lxcoverlay.h"
85#include "lxcseccomp.h"
4355ab5f 86#include "namespace.h"
8f3e280e
CB
87#include "network.h"
88#include "parse.h"
89#include "utils.h"
fe4de9a6 90#include "lsm/lsm.h"
d0a36f2c 91
e37dda71 92#if HAVE_LIBCAP
495d2046
SG
93#include <sys/capability.h>
94#endif
95
6ff05e18
SG
96#if HAVE_SYS_PERSONALITY_H
97#include <sys/personality.h>
98#endif
99
edaf8b1b
SG
100#if IS_BIONIC
101#include <../include/lxcmntent.h>
102#else
103#include <mntent.h>
104#endif
105
36eb9bde 106lxc_log_define(lxc_conf, lxc);
e5bda9ee 107
e37dda71 108#if HAVE_LIBCAP
b09094da
MN
109#ifndef CAP_SETFCAP
110#define CAP_SETFCAP 31
111#endif
112
113#ifndef CAP_MAC_OVERRIDE
114#define CAP_MAC_OVERRIDE 32
115#endif
116
117#ifndef CAP_MAC_ADMIN
118#define CAP_MAC_ADMIN 33
119#endif
495d2046 120#endif
b09094da
MN
121
122#ifndef PR_CAPBSET_DROP
123#define PR_CAPBSET_DROP 24
124#endif
125
9818cae4
SG
126#ifndef LO_FLAGS_AUTOCLEAR
127#define LO_FLAGS_AUTOCLEAR 4
128#endif
129
0769b82a
CS
130/* needed for cgroup automount checks, regardless of whether we
131 * have included linux/capability.h or not */
132#ifndef CAP_SYS_ADMIN
133#define CAP_SYS_ADMIN 21
134#endif
135
2d76d1d7
SG
136/* Define pivot_root() if missing from the C library */
137#ifndef HAVE_PIVOT_ROOT
138static int pivot_root(const char * new_root, const char * put_old)
139{
140#ifdef __NR_pivot_root
8f3e280e 141 return syscall(__NR_pivot_root, new_root, put_old);
2d76d1d7 142#else
8f3e280e
CB
143 errno = ENOSYS;
144 return -1;
2d76d1d7
SG
145#endif
146}
147#else
148extern int pivot_root(const char * new_root, const char * put_old);
149#endif
150
151/* Define sethostname() if missing from the C library */
152#ifndef HAVE_SETHOSTNAME
153static int sethostname(const char * name, size_t len)
154{
155#ifdef __NR_sethostname
8f3e280e 156 return syscall(__NR_sethostname, name, len);
2d76d1d7 157#else
8f3e280e
CB
158 errno = ENOSYS;
159 return -1;
2d76d1d7
SG
160#endif
161}
162#endif
163
72f919c4
SG
164/* Define __S_ISTYPE if missing from the C library */
165#ifndef __S_ISTYPE
166#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
167#endif
168
ecec0126
SG
169#ifndef MS_PRIVATE
170#define MS_PRIVATE (1<<18)
171#endif
172
5ef5c9a3
CB
173/* memfd_create() */
174#ifndef MFD_CLOEXEC
175#define MFD_CLOEXEC 0x0001U
176#endif
177
178#ifndef MFD_ALLOW_SEALING
179#define MFD_ALLOW_SEALING 0x0002U
180#endif
181
182#ifndef HAVE_MEMFD_CREATE
183static int memfd_create(const char *name, unsigned int flags) {
184 #ifndef __NR_memfd_create
185 #if defined __i386__
186 #define __NR_memfd_create 356
187 #elif defined __x86_64__
188 #define __NR_memfd_create 319
189 #elif defined __arm__
190 #define __NR_memfd_create 385
191 #elif defined __aarch64__
192 #define __NR_memfd_create 279
193 #elif defined __s390__
194 #define __NR_memfd_create 350
195 #elif defined __powerpc__
196 #define __NR_memfd_create 360
197 #elif defined __sparc__
198 #define __NR_memfd_create 348
199 #elif defined __blackfin__
200 #define __NR_memfd_create 390
201 #elif defined __ia64__
202 #define __NR_memfd_create 1340
203 #elif defined _MIPS_SIM
204 #if _MIPS_SIM == _MIPS_SIM_ABI32
205 #define __NR_memfd_create 4354
206 #endif
207 #if _MIPS_SIM == _MIPS_SIM_NABI32
208 #define __NR_memfd_create 6318
209 #endif
210 #if _MIPS_SIM == _MIPS_SIM_ABI64
211 #define __NR_memfd_create 5314
212 #endif
213 #endif
214 #endif
215 #ifdef __NR_memfd_create
216 return syscall(__NR_memfd_create, name, flags);
217 #else
218 errno = ENOSYS;
219 return -1;
220 #endif
221}
222#else
223extern int memfd_create(const char *name, unsigned int flags);
224#endif
225
72d0e1cb 226char *lxchook_names[NUM_LXC_HOOKS] = {
52492063 227 "pre-start", "pre-mount", "mount", "autodev", "start", "stop", "post-stop", "clone", "destroy" };
72d0e1cb 228
a589434e 229typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 230
998ac676
RT
231struct mount_opt {
232 char *name;
233 int clear;
234 int flag;
235};
236
81810dd1
DL
237struct caps_opt {
238 char *name;
239 int value;
240};
241
c6d09e15
WB
242struct limit_opt {
243 char *name;
244 int value;
245};
246
858377e4
SH
247/*
248 * The lxc_conf of the container currently being worked on in an
249 * API call
250 * This is used in the error calls
251 */
252#ifdef HAVE_TLS
253__thread struct lxc_conf *current_config;
254#else
255struct lxc_conf *current_config;
256#endif
257
0769b82a
CS
258/* Declare this here, since we don't want to reshuffle the whole file. */
259static int in_caplist(int cap, struct lxc_list *caps);
260
a589434e
JN
261static int instantiate_veth(struct lxc_handler *, struct lxc_netdev *);
262static int instantiate_macvlan(struct lxc_handler *, struct lxc_netdev *);
263static int instantiate_vlan(struct lxc_handler *, struct lxc_netdev *);
264static int instantiate_phys(struct lxc_handler *, struct lxc_netdev *);
265static int instantiate_empty(struct lxc_handler *, struct lxc_netdev *);
266static int instantiate_none(struct lxc_handler *, struct lxc_netdev *);
267
268static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
269 [LXC_NET_VETH] = instantiate_veth,
270 [LXC_NET_MACVLAN] = instantiate_macvlan,
271 [LXC_NET_VLAN] = instantiate_vlan,
272 [LXC_NET_PHYS] = instantiate_phys,
273 [LXC_NET_EMPTY] = instantiate_empty,
274 [LXC_NET_NONE] = instantiate_none,
0ad19a3f 275};
276
74a2b586
JK
277static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
278static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
279static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
280static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
281static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 282static int shutdown_none(struct lxc_handler *, struct lxc_netdev *);
74a2b586 283
a589434e 284static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
74a2b586
JK
285 [LXC_NET_VETH] = shutdown_veth,
286 [LXC_NET_MACVLAN] = shutdown_macvlan,
287 [LXC_NET_VLAN] = shutdown_vlan,
288 [LXC_NET_PHYS] = shutdown_phys,
289 [LXC_NET_EMPTY] = shutdown_empty,
26b797f3 290 [LXC_NET_NONE] = shutdown_none,
74a2b586
JK
291};
292
998ac676 293static struct mount_opt mount_opt[] = {
88d413d5
SW
294 { "defaults", 0, 0 },
295 { "ro", 0, MS_RDONLY },
296 { "rw", 1, MS_RDONLY },
297 { "suid", 1, MS_NOSUID },
298 { "nosuid", 0, MS_NOSUID },
299 { "dev", 1, MS_NODEV },
300 { "nodev", 0, MS_NODEV },
301 { "exec", 1, MS_NOEXEC },
302 { "noexec", 0, MS_NOEXEC },
303 { "sync", 0, MS_SYNCHRONOUS },
304 { "async", 1, MS_SYNCHRONOUS },
305 { "dirsync", 0, MS_DIRSYNC },
306 { "remount", 0, MS_REMOUNT },
307 { "mand", 0, MS_MANDLOCK },
308 { "nomand", 1, MS_MANDLOCK },
309 { "atime", 1, MS_NOATIME },
310 { "noatime", 0, MS_NOATIME },
311 { "diratime", 1, MS_NODIRATIME },
312 { "nodiratime", 0, MS_NODIRATIME },
313 { "bind", 0, MS_BIND },
314 { "rbind", 0, MS_BIND|MS_REC },
315 { "relatime", 0, MS_RELATIME },
316 { "norelatime", 1, MS_RELATIME },
317 { "strictatime", 0, MS_STRICTATIME },
318 { "nostrictatime", 1, MS_STRICTATIME },
319 { NULL, 0, 0 },
998ac676
RT
320};
321
e37dda71 322#if HAVE_LIBCAP
81810dd1 323static struct caps_opt caps_opt[] = {
a6afdde9 324 { "chown", CAP_CHOWN },
1e11be34
DL
325 { "dac_override", CAP_DAC_OVERRIDE },
326 { "dac_read_search", CAP_DAC_READ_SEARCH },
327 { "fowner", CAP_FOWNER },
328 { "fsetid", CAP_FSETID },
81810dd1
DL
329 { "kill", CAP_KILL },
330 { "setgid", CAP_SETGID },
331 { "setuid", CAP_SETUID },
332 { "setpcap", CAP_SETPCAP },
333 { "linux_immutable", CAP_LINUX_IMMUTABLE },
334 { "net_bind_service", CAP_NET_BIND_SERVICE },
335 { "net_broadcast", CAP_NET_BROADCAST },
336 { "net_admin", CAP_NET_ADMIN },
337 { "net_raw", CAP_NET_RAW },
338 { "ipc_lock", CAP_IPC_LOCK },
339 { "ipc_owner", CAP_IPC_OWNER },
340 { "sys_module", CAP_SYS_MODULE },
341 { "sys_rawio", CAP_SYS_RAWIO },
342 { "sys_chroot", CAP_SYS_CHROOT },
343 { "sys_ptrace", CAP_SYS_PTRACE },
344 { "sys_pacct", CAP_SYS_PACCT },
345 { "sys_admin", CAP_SYS_ADMIN },
346 { "sys_boot", CAP_SYS_BOOT },
347 { "sys_nice", CAP_SYS_NICE },
348 { "sys_resource", CAP_SYS_RESOURCE },
349 { "sys_time", CAP_SYS_TIME },
350 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
351 { "mknod", CAP_MKNOD },
352 { "lease", CAP_LEASE },
57b837e2
CB
353#ifdef CAP_AUDIT_READ
354 { "audit_read", CAP_AUDIT_READ },
355#endif
9527e566 356#ifdef CAP_AUDIT_WRITE
81810dd1 357 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
358#endif
359#ifdef CAP_AUDIT_CONTROL
81810dd1 360 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 361#endif
81810dd1
DL
362 { "setfcap", CAP_SETFCAP },
363 { "mac_override", CAP_MAC_OVERRIDE },
364 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
365#ifdef CAP_SYSLOG
366 { "syslog", CAP_SYSLOG },
367#endif
368#ifdef CAP_WAKE_ALARM
369 { "wake_alarm", CAP_WAKE_ALARM },
370#endif
2b54359b
CB
371#ifdef CAP_BLOCK_SUSPEND
372 { "block_suspend", CAP_BLOCK_SUSPEND },
373#endif
81810dd1 374};
495d2046
SG
375#else
376static struct caps_opt caps_opt[] = {};
377#endif
81810dd1 378
c6d09e15
WB
379static struct limit_opt limit_opt[] = {
380#ifdef RLIMIT_AS
381 { "as", RLIMIT_AS },
382#endif
383#ifdef RLIMIT_CORE
384 { "core", RLIMIT_CORE },
385#endif
386#ifdef RLIMIT_CPU
387 { "cpu", RLIMIT_CPU },
388#endif
389#ifdef RLIMIT_DATA
390 { "data", RLIMIT_DATA },
391#endif
392#ifdef RLIMIT_FSIZE
393 { "fsize", RLIMIT_FSIZE },
394#endif
395#ifdef RLIMIT_LOCKS
396 { "locks", RLIMIT_LOCKS },
397#endif
398#ifdef RLIMIT_MEMLOCK
399 { "memlock", RLIMIT_MEMLOCK },
400#endif
401#ifdef RLIMIT_MSGQUEUE
402 { "msgqueue", RLIMIT_MSGQUEUE },
403#endif
404#ifdef RLIMIT_NICE
405 { "nice", RLIMIT_NICE },
406#endif
407#ifdef RLIMIT_NOFILE
408 { "nofile", RLIMIT_NOFILE },
409#endif
410#ifdef RLIMIT_NPROC
411 { "nproc", RLIMIT_NPROC },
412#endif
413#ifdef RLIMIT_RSS
414 { "rss", RLIMIT_RSS },
415#endif
416#ifdef RLIMIT_RTPRIO
417 { "rtprio", RLIMIT_RTPRIO },
418#endif
419#ifdef RLIMIT_RTTIME
420 { "rttime", RLIMIT_RTTIME },
421#endif
422#ifdef RLIMIT_SIGPENDING
423 { "sigpending", RLIMIT_SIGPENDING },
424#endif
425#ifdef RLIMIT_STACK
426 { "stack", RLIMIT_STACK },
427#endif
428};
429
91c3830e
SH
430static int run_buffer(char *buffer)
431{
ebec9176 432 struct lxc_popen_FILE *f;
91c3830e 433 char *output;
8e7da691 434 int ret;
91c3830e 435
ebec9176 436 f = lxc_popen(buffer);
91c3830e 437 if (!f) {
062b72c6 438 SYSERROR("Failed to popen() %s.", buffer);
91c3830e
SH
439 return -1;
440 }
441
442 output = malloc(LXC_LOG_BUFFER_SIZE);
443 if (!output) {
062b72c6 444 ERROR("Failed to allocate memory for %s.", buffer);
ebec9176 445 lxc_pclose(f);
91c3830e
SH
446 return -1;
447 }
448
062b72c6
CB
449 while (fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
450 DEBUG("Script %s with output: %s.", buffer, output);
91c3830e
SH
451
452 free(output);
453
ebec9176 454 ret = lxc_pclose(f);
8e7da691 455 if (ret == -1) {
062b72c6 456 SYSERROR("Script exited with error.");
91c3830e 457 return -1;
8e7da691 458 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
062b72c6 459 ERROR("Script exited with status %d.", WEXITSTATUS(ret));
8e7da691
DE
460 return -1;
461 } else if (WIFSIGNALED(ret)) {
062b72c6 462 ERROR("Script terminated by signal %d.", WTERMSIG(ret));
8e7da691 463 return -1;
91c3830e
SH
464 }
465
466 return 0;
467}
468
148e91f5 469static int run_script_argv(const char *name, const char *section,
062b72c6
CB
470 const char *script, const char *hook,
471 const char *lxcpath, char **argsin)
148e91f5
SH
472{
473 int ret, i;
474 char *buffer;
475 size_t size = 0;
476
062b72c6 477 INFO("Executing script \"%s\" for container \"%s\", config section \"%s\".",
148e91f5
SH
478 script, name, section);
479
062b72c6 480 for (i = 0; argsin && argsin[i]; i++)
148e91f5
SH
481 size += strlen(argsin[i]) + 1;
482
483 size += strlen(hook) + 1;
484
485 size += strlen(script);
486 size += strlen(name);
487 size += strlen(section);
488 size += 3;
489
490 if (size > INT_MAX)
491 return -1;
492
493 buffer = alloca(size);
494 if (!buffer) {
062b72c6 495 ERROR("Failed to allocate memory.");
148e91f5
SH
496 return -1;
497 }
498
062b72c6
CB
499 ret =
500 snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
501 if (ret < 0 || (size_t)ret >= size) {
502 ERROR("Script name too long.");
148e91f5
SH
503 return -1;
504 }
505
062b72c6
CB
506 for (i = 0; argsin && argsin[i]; i++) {
507 int len = size - ret;
148e91f5
SH
508 int rc;
509 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
510 if (rc < 0 || rc >= len) {
062b72c6 511 ERROR("Script args too long.");
148e91f5
SH
512 return -1;
513 }
514 ret += rc;
515 }
516
517 return run_buffer(buffer);
518}
519
062b72c6
CB
520static int run_script(const char *name, const char *section, const char *script,
521 ...)
e3b4c4c4 522{
abbfd20b 523 int ret;
91c3830e 524 char *buffer, *p;
abbfd20b
DL
525 size_t size = 0;
526 va_list ap;
751d9dcd 527
062b72c6 528 INFO("Executing script \"%s\" for container \"%s\", config section \"%s\".",
751d9dcd 529 script, name, section);
e3b4c4c4 530
abbfd20b
DL
531 va_start(ap, script);
532 while ((p = va_arg(ap, char *)))
95642a10 533 size += strlen(p) + 1;
abbfd20b
DL
534 va_end(ap);
535
536 size += strlen(script);
537 size += strlen(name);
538 size += strlen(section);
95642a10 539 size += 3;
abbfd20b 540
95642a10
MS
541 if (size > INT_MAX)
542 return -1;
543
544 buffer = alloca(size);
abbfd20b 545 if (!buffer) {
062b72c6 546 ERROR("Failed to allocate memory.");
751d9dcd
DL
547 return -1;
548 }
549
9ba8130c
SH
550 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
551 if (ret < 0 || ret >= size) {
062b72c6 552 ERROR("Script name too long.");
9ba8130c
SH
553 return -1;
554 }
751d9dcd 555
abbfd20b 556 va_start(ap, script);
9ba8130c 557 while ((p = va_arg(ap, char *))) {
062b72c6 558 int len = size - ret;
9ba8130c
SH
559 int rc;
560 rc = snprintf(buffer + ret, len, " %s", p);
561 if (rc < 0 || rc >= len) {
062b72c6 562 ERROR("Script args too long.");
9ba8130c
SH
563 return -1;
564 }
565 ret += rc;
566 }
abbfd20b 567 va_end(ap);
751d9dcd 568
91c3830e 569 return run_buffer(buffer);
e3b4c4c4
ST
570}
571
a17b1e65
SG
572static int mount_rootfs_dir(const char *rootfs, const char *target,
573 const char *options)
a6afdde9 574{
a17b1e65
SG
575 unsigned long mntflags;
576 char *mntdata;
577 int ret;
578
579 if (parse_mntopts(options, &mntflags, &mntdata) < 0) {
580 free(mntdata);
581 return -1;
582 }
583
584 ret = mount(rootfs, target, "none", MS_BIND | MS_REC | mntflags, mntdata);
585 free(mntdata);
586
587 return ret;
a6afdde9
DL
588}
589
590static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
591{
592 int rfd;
593 int ret = -1;
594
595 rfd = open(rootfs, O_RDWR);
596 if (rfd < 0) {
597 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 598 return -1;
599 }
600
a6afdde9 601 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 602
a6afdde9 603 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 604
a6afdde9
DL
605 if (ioctl(fd, LOOP_SET_FD, rfd)) {
606 SYSERROR("failed to LOOP_SET_FD");
607 goto out;
78ae2fcc 608 }
609
a6afdde9
DL
610 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
611 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 612 goto out;
613 }
614
a6afdde9 615 ret = 0;
78ae2fcc 616out:
a6afdde9 617 close(rfd);
78ae2fcc 618
a6afdde9 619 return ret;
78ae2fcc 620}
621
a17b1e65
SG
622static int mount_rootfs_file(const char *rootfs, const char *target,
623 const char *options)
78ae2fcc 624{
74f96976 625 struct dirent *direntp;
a6afdde9 626 struct loop_info64 loinfo;
9ba8130c 627 int ret = -1, fd = -1, rc;
a6afdde9
DL
628 DIR *dir;
629 char path[MAXPATHLEN];
78ae2fcc 630
a6afdde9
DL
631 dir = opendir("/dev");
632 if (!dir) {
633 SYSERROR("failed to open '/dev'");
78ae2fcc 634 return -1;
635 }
636
74f96976 637 while ((direntp = readdir(dir))) {
a6afdde9
DL
638
639 if (!direntp)
640 break;
641
642 if (!strcmp(direntp->d_name, "."))
643 continue;
644
645 if (!strcmp(direntp->d_name, ".."))
646 continue;
647
648 if (strncmp(direntp->d_name, "loop", 4))
649 continue;
650
9ba8130c
SH
651 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
652 if (rc < 0 || rc >= MAXPATHLEN)
653 continue;
654
a6afdde9
DL
655 fd = open(path, O_RDWR);
656 if (fd < 0)
657 continue;
658
659 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
660 close(fd);
661 continue;
662 }
663
664 if (errno != ENXIO) {
665 WARN("unexpected error for ioctl on '%s': %m",
666 direntp->d_name);
00b6be44 667 close(fd);
a6afdde9
DL
668 continue;
669 }
670
671 DEBUG("found '%s' free lodev", path);
672
673 ret = setup_lodev(rootfs, fd, &loinfo);
674 if (!ret)
a17b1e65 675 ret = mount_unknown_fs(path, target, options);
a6afdde9
DL
676 close(fd);
677
678 break;
679 }
680
681 if (closedir(dir))
682 WARN("failed to close directory");
683
684 return ret;
78ae2fcc 685}
686
a17b1e65
SG
687static int mount_rootfs_block(const char *rootfs, const char *target,
688 const char *options)
a6afdde9 689{
a17b1e65 690 return mount_unknown_fs(rootfs, target, options);
a6afdde9
DL
691}
692
0c547523
SH
693/*
694 * pin_rootfs
b7ed4bf0
CS
695 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
696 * the duration of the container run, to prevent the container from marking
697 * the underlying fs readonly on shutdown. unlink the file immediately so
698 * no name pollution is happens
0c547523
SH
699 * return -1 on error.
700 * return -2 if nothing needed to be pinned.
701 * return an open fd (>=0) if we pinned it.
702 */
703int pin_rootfs(const char *rootfs)
704{
705 char absrootfs[MAXPATHLEN];
706 char absrootfspin[MAXPATHLEN];
707 struct stat s;
708 int ret, fd;
709
e99ee0de 710 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 711 return -2;
e99ee0de 712
00ec333b 713 if (!realpath(rootfs, absrootfs))
9be53773 714 return -2;
0c547523 715
00ec333b 716 if (access(absrootfs, F_OK))
0c547523 717 return -1;
0c547523 718
00ec333b 719 if (stat(absrootfs, &s))
0c547523 720 return -1;
0c547523 721
72f919c4 722 if (!S_ISDIR(s.st_mode))
0c547523
SH
723 return -2;
724
b7ed4bf0 725 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
00ec333b 726 if (ret >= MAXPATHLEN)
0c547523 727 return -1;
0c547523
SH
728
729 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
b7ed4bf0
CS
730 if (fd < 0)
731 return fd;
732 (void)unlink(absrootfspin);
0c547523
SH
733 return fd;
734}
735
e2a7e8dc
SH
736/*
737 * If we are asking to remount something, make sure that any
738 * NOEXEC etc are honored.
739 */
740static unsigned long add_required_remount_flags(const char *s, const char *d,
741 unsigned long flags)
742{
614305f3 743#ifdef HAVE_STATVFS
e2a7e8dc
SH
744 struct statvfs sb;
745 unsigned long required_flags = 0;
746
747 if (!(flags & MS_REMOUNT))
748 return flags;
749
750 if (!s)
751 s = d;
752
753 if (!s)
754 return flags;
755 if (statvfs(s, &sb) < 0)
756 return flags;
757
758 if (sb.f_flag & MS_NOSUID)
759 required_flags |= MS_NOSUID;
760 if (sb.f_flag & MS_NODEV)
761 required_flags |= MS_NODEV;
762 if (sb.f_flag & MS_RDONLY)
763 required_flags |= MS_RDONLY;
764 if (sb.f_flag & MS_NOEXEC)
765 required_flags |= MS_NOEXEC;
766
767 return flags | required_flags;
614305f3
SH
768#else
769 return flags;
770#endif
e2a7e8dc
SH
771}
772
4fb3cba5 773static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
368bbc02 774{
368bbc02 775 int r;
80e80c40 776 int i;
b06b8511
CS
777 static struct {
778 int match_mask;
779 int match_flag;
780 const char *source;
781 const char *destination;
782 const char *fstype;
783 unsigned long flags;
784 const char *options;
785 } default_mounts[] = {
786 /* Read-only bind-mounting... In older kernels, doing that required
787 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
788 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
789 * kernel 2.6.26 onwards. However, this apparently does not work on
790 * kernel 3.8. Unfortunately, on that very same kernel, doing the
791 * same trick as above doesn't seem to work either, there one needs
792 * to ALSO specify MS_BIND for the remount, otherwise the entire
793 * fs is remounted read-only or the mount fails because it's busy...
794 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
795 * 2.6.32...
368bbc02 796 */
f24a52d5 797 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
592fd47a
SH
798 /* proc/tty is used as a temporary placeholder for proc/sys/net which we'll move back in a few steps */
799 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys/net", "%r/proc/tty", NULL, MS_BIND, NULL },
f24a52d5
SG
800 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
801 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
592fd47a 802 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/tty", "%r/proc/sys/net", NULL, MS_MOVE, NULL },
f24a52d5
SG
803 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
804 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
805 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
806 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
807 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
808 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
809 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL },
810 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
811 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys/devices/virtual/net", "sysfs", 0, NULL },
812 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys/devices/virtual/net/devices/virtual/net", "%r/sys/devices/virtual/net", NULL, MS_BIND, NULL },
813 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys/devices/virtual/net", NULL, MS_REMOUNT|MS_BIND|MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL },
814 { 0, 0, NULL, NULL, NULL, 0, NULL }
b06b8511 815 };
368bbc02 816
b06b8511
CS
817 for (i = 0; default_mounts[i].match_mask; i++) {
818 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
819 char *source = NULL;
820 char *destination = NULL;
821 int saved_errno;
e2a7e8dc 822 unsigned long mflags;
b06b8511
CS
823
824 if (default_mounts[i].source) {
825 /* will act like strdup if %r is not present */
8ede5f4c 826 source = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].source);
b06b8511
CS
827 if (!source) {
828 SYSERROR("memory allocation error");
829 return -1;
830 }
831 }
cc4fd506
SH
832 if (!default_mounts[i].destination) {
833 ERROR("BUG: auto mounts destination %d was NULL", i);
b2f44b4d 834 free(source);
cc4fd506
SH
835 return -1;
836 }
837 /* will act like strdup if %r is not present */
838 destination = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].destination);
839 if (!destination) {
840 saved_errno = errno;
841 SYSERROR("memory allocation error");
842 free(source);
843 errno = saved_errno;
844 return -1;
b06b8511 845 }
e2a7e8dc
SH
846 mflags = add_required_remount_flags(source, destination,
847 default_mounts[i].flags);
592fd47a 848 r = safe_mount(source, destination, default_mounts[i].fstype, mflags, default_mounts[i].options, conf->rootfs.path ? conf->rootfs.mount : NULL);
b06b8511 849 saved_errno = errno;
b88ff9a0
SG
850 if (r < 0 && errno == ENOENT) {
851 INFO("Mount source or target for %s on %s doesn't exist. Skipping.", source, destination);
852 r = 0;
853 }
854 else if (r < 0)
e2a7e8dc 855 SYSERROR("error mounting %s on %s flags %lu", source, destination, mflags);
f24a52d5 856
b06b8511
CS
857 free(source);
858 free(destination);
859 if (r < 0) {
b06b8511
CS
860 errno = saved_errno;
861 return -1;
862 }
368bbc02 863 }
368bbc02
CS
864 }
865
b06b8511 866 if (flags & LXC_AUTO_CGROUP_MASK) {
0769b82a
CS
867 int cg_flags;
868
869 cg_flags = flags & LXC_AUTO_CGROUP_MASK;
870 /* If the type of cgroup mount was not specified, it depends on the
871 * container's capabilities as to what makes sense: if we have
872 * CAP_SYS_ADMIN, the read-only part can be remounted read-write
873 * anyway, so we may as well default to read-write; then the admin
874 * will not be given a false sense of security. (And if they really
875 * want mixed r/o r/w, then they can explicitly specify :mixed.)
876 * OTOH, if the container lacks CAP_SYS_ADMIN, do only default to
877 * :mixed, because then the container can't remount it read-write. */
878 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
879 int has_sys_admin = 0;
880 if (!lxc_list_empty(&conf->keepcaps)) {
881 has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps);
882 } else {
883 has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps);
884 }
885 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC) {
886 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED;
887 } else {
888 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
889 }
890 }
891
8ede5f4c 892 if (!cgroup_mount(conf->rootfs.path ? conf->rootfs.mount : "", handler, cg_flags)) {
368bbc02 893 SYSERROR("error mounting /sys/fs/cgroup");
b06b8511 894 return -1;
368bbc02
CS
895 }
896 }
897
368bbc02 898 return 0;
368bbc02
CS
899}
900
a17b1e65 901static int mount_rootfs(const char *rootfs, const char *target, const char *options)
0ad19a3f 902{
b09ef133 903 char absrootfs[MAXPATHLEN];
78ae2fcc 904 struct stat s;
a6afdde9 905 int i;
78ae2fcc 906
a17b1e65 907 typedef int (*rootfs_cb)(const char *, const char *, const char *);
78ae2fcc 908
909 struct rootfs_type {
910 int type;
911 rootfs_cb cb;
912 } rtfs_type[] = {
2656d231
DL
913 { S_IFDIR, mount_rootfs_dir },
914 { S_IFBLK, mount_rootfs_block },
915 { S_IFREG, mount_rootfs_file },
78ae2fcc 916 };
0ad19a3f 917
4c8ab83b 918 if (!realpath(rootfs, absrootfs)) {
36eb9bde 919 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 920 return -1;
921 }
b09ef133 922
b09ef133 923 if (access(absrootfs, F_OK)) {
36eb9bde 924 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 925 return -1;
926 }
927
78ae2fcc 928 if (stat(absrootfs, &s)) {
36eb9bde 929 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 930 return -1;
931 }
932
78ae2fcc 933 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 934
78ae2fcc 935 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
936 continue;
9b0f0477 937
a17b1e65 938 return rtfs_type[i].cb(absrootfs, target, options);
78ae2fcc 939 }
9b0f0477 940
36eb9bde 941 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 942 return -1;
0ad19a3f 943}
944
4e5440c6 945static int setup_utsname(struct utsname *utsname)
0ad19a3f 946{
4e5440c6
DL
947 if (!utsname)
948 return 0;
0ad19a3f 949
4e5440c6
DL
950 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
951 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 952 return -1;
953 }
954
4e5440c6 955 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 956
0ad19a3f 957 return 0;
958}
959
69aa6655
DE
960struct dev_symlinks {
961 const char *oldpath;
962 const char *name;
963};
964
965static const struct dev_symlinks dev_symlinks[] = {
966 {"/proc/self/fd", "fd"},
967 {"/proc/self/fd/0", "stdin"},
968 {"/proc/self/fd/1", "stdout"},
969 {"/proc/self/fd/2", "stderr"},
970};
971
972static int setup_dev_symlinks(const struct lxc_rootfs *rootfs)
973{
974 char path[MAXPATHLEN];
975 int ret,i;
09227be2 976 struct stat s;
69aa6655
DE
977
978
979 for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
980 const struct dev_symlinks *d = &dev_symlinks[i];
ec50007f 981 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->path ? rootfs->mount : "", d->name);
69aa6655
DE
982 if (ret < 0 || ret >= MAXPATHLEN)
983 return -1;
09227be2
MW
984
985 /*
986 * Stat the path first. If we don't get an error
987 * accept it as is and don't try to create it
988 */
989 if (!stat(path, &s)) {
990 continue;
991 }
992
69aa6655 993 ret = symlink(d->oldpath, path);
09227be2 994
69aa6655 995 if (ret && errno != EEXIST) {
09227be2
MW
996 if ( errno == EROFS ) {
997 WARN("Warning: Read Only file system while creating %s", path);
998 } else {
999 SYSERROR("Error creating %s", path);
1000 return -1;
1001 }
69aa6655
DE
1002 }
1003 }
1004 return 0;
1005}
1006
393903d1
SH
1007/*
1008 * Build a space-separate list of ptys to pass to systemd.
1009 */
1010static bool append_ptyname(char **pp, char *name)
b0a33c1e 1011{
393903d1
SH
1012 char *p;
1013
1014 if (!*pp) {
1015 *pp = malloc(strlen(name) + strlen("container_ttys=") + 1);
1016 if (!*pp)
1017 return false;
1018 sprintf(*pp, "container_ttys=%s", name);
1019 return true;
1020 }
1021 p = realloc(*pp, strlen(*pp) + strlen(name) + 2);
1022 if (!p)
1023 return false;
1024 *pp = p;
1025 strcat(p, " ");
1026 strcat(p, name);
1027 return true;
1028}
1029
1030static int setup_tty(struct lxc_conf *conf)
1031{
393903d1
SH
1032 const struct lxc_tty_info *tty_info = &conf->tty_info;
1033 char *ttydir = conf->ttydir;
7c6ef2a2
SH
1034 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1035 int i, ret;
b0a33c1e 1036
e8bd4e43 1037 if (!conf->rootfs.path)
bc9bd0e3
DL
1038 return 0;
1039
b0a33c1e 1040 for (i = 0; i < tty_info->nbtty; i++) {
1041
1042 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
1043
e8bd4e43 1044 ret = snprintf(path, sizeof(path), "/dev/tty%d", i + 1);
7c6ef2a2
SH
1045 if (ret >= sizeof(path)) {
1046 ERROR("pathname too long for ttys");
1047 return -1;
1048 }
1049 if (ttydir) {
1050 /* create dev/lxc/tty%d" */
e8bd4e43 1051 ret = snprintf(lxcpath, sizeof(lxcpath), "/dev/%s/tty%d", ttydir, i + 1);
7c6ef2a2
SH
1052 if (ret >= sizeof(lxcpath)) {
1053 ERROR("pathname too long for ttys");
1054 return -1;
1055 }
1056 ret = creat(lxcpath, 0660);
1057 if (ret==-1 && errno != EEXIST) {
959aee9c 1058 SYSERROR("error creating %s", lxcpath);
7c6ef2a2
SH
1059 return -1;
1060 }
4d44e274
SH
1061 if (ret >= 0)
1062 close(ret);
7c6ef2a2
SH
1063 ret = unlink(path);
1064 if (ret && errno != ENOENT) {
959aee9c 1065 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1066 return -1;
1067 }
b0a33c1e 1068
7c6ef2a2
SH
1069 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
1070 WARN("failed to mount '%s'->'%s'",
1071 pty_info->name, path);
1072 continue;
1073 }
13954cce 1074
9ba8130c
SH
1075 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
1076 if (ret >= sizeof(lxcpath)) {
1077 ERROR("tty pathname too long");
1078 return -1;
1079 }
7c6ef2a2
SH
1080 ret = symlink(lxcpath, path);
1081 if (ret) {
959aee9c 1082 SYSERROR("failed to create symlink for tty %d", i+1);
7c6ef2a2
SH
1083 return -1;
1084 }
1085 } else {
c6883f38
SH
1086 /* If we populated /dev, then we need to create /dev/ttyN */
1087 if (access(path, F_OK)) {
1088 ret = creat(path, 0660);
1089 if (ret==-1) {
959aee9c 1090 SYSERROR("error creating %s", path);
c6883f38 1091 /* this isn't fatal, continue */
025ed0f3 1092 } else {
c6883f38 1093 close(ret);
025ed0f3 1094 }
c6883f38 1095 }
7c6ef2a2 1096 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
e8bd4e43 1097 SYSERROR("failed to mount '%s'->'%s'", pty_info->name, path);
7c6ef2a2
SH
1098 continue;
1099 }
393903d1 1100 }
e8bd4e43 1101 if (!append_ptyname(&conf->pty_names, pty_info->name)) {
393903d1
SH
1102 ERROR("Error setting up container_ttys string");
1103 return -1;
b0a33c1e 1104 }
1105 }
1106
cd54d859
DL
1107 INFO("%d tty(s) has been setup", tty_info->nbtty);
1108
b0a33c1e 1109 return 0;
1110}
1111
bf601689 1112
59bb8698 1113static int setup_rootfs_pivot_root(const char *rootfs)
bf601689 1114{
2d489f9e 1115 int oldroot = -1, newroot = -1;
bf601689 1116
2d489f9e
SH
1117 oldroot = open("/", O_DIRECTORY | O_RDONLY);
1118 if (oldroot < 0) {
1119 SYSERROR("Error opening old-/ for fchdir");
9ba8130c
SH
1120 return -1;
1121 }
2d489f9e
SH
1122 newroot = open(rootfs, O_DIRECTORY | O_RDONLY);
1123 if (newroot < 0) {
1124 SYSERROR("Error opening new-/ for fchdir");
1125 goto fail;
c08556c6 1126 }
bf601689 1127
cc6f6dd7 1128 /* change into new root fs */
2d489f9e 1129 if (fchdir(newroot)) {
cc6f6dd7 1130 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
2d489f9e 1131 goto fail;
cc6f6dd7
DL
1132 }
1133
cc6f6dd7 1134 /* pivot_root into our new root fs */
2d489f9e 1135 if (pivot_root(".", ".")) {
cc6f6dd7 1136 SYSERROR("pivot_root syscall failed");
2d489f9e 1137 goto fail;
bf601689 1138 }
cc6f6dd7 1139
2d489f9e
SH
1140 /*
1141 * at this point the old-root is mounted on top of our new-root
1142 * To unmounted it we must not be chdir'd into it, so escape back
1143 * to old-root
1144 */
1145 if (fchdir(oldroot) < 0) {
1146 SYSERROR("Error entering oldroot");
1147 goto fail;
1148 }
7981ea46 1149 if (umount2(".", MNT_DETACH) < 0) {
2d489f9e
SH
1150 SYSERROR("Error detaching old root");
1151 goto fail;
cc6f6dd7
DL
1152 }
1153
2d489f9e
SH
1154 if (fchdir(newroot) < 0) {
1155 SYSERROR("Error re-entering newroot");
1156 goto fail;
1157 }
cc6f6dd7 1158
2d489f9e
SH
1159 close(oldroot);
1160 close(newroot);
bf601689 1161
2d489f9e 1162 DEBUG("pivot_root syscall to '%s' successful", rootfs);
bf601689 1163
bf601689 1164 return 0;
2d489f9e
SH
1165
1166fail:
1167 if (oldroot != -1)
1168 close(oldroot);
1169 if (newroot != -1)
1170 close(newroot);
1171 return -1;
bf601689
MH
1172}
1173
bc6928ff 1174/*
87da4ec3
SH
1175 * Just create a path for /dev under $lxcpath/$name and in rootfs
1176 * If we hit an error, log it but don't fail yet.
91c3830e 1177 */
14221cbb 1178static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, const char *lxcpath)
91c3830e
SH
1179{
1180 int ret;
87da4ec3
SH
1181 size_t clen;
1182 char *path;
91c3830e 1183
14221cbb 1184 INFO("Mounting container /dev");
bc6928ff 1185
14221cbb 1186 /* $(rootfs->mount) + "/dev/pts" + '\0' */
ec50007f 1187 clen = (rootfs->path ? strlen(rootfs->mount) : 0) + 9;
87da4ec3 1188 path = alloca(clen);
bc6928ff 1189
ec50007f 1190 ret = snprintf(path, clen, "%s/dev", rootfs->path ? rootfs->mount : "");
87da4ec3 1191 if (ret < 0 || ret >= clen)
91c3830e 1192 return -1;
bc6928ff 1193
87da4ec3 1194 if (!dir_exists(path)) {
14221cbb 1195 WARN("No /dev in container.");
87da4ec3
SH
1196 WARN("Proceeding without autodev setup");
1197 return 0;
bc6928ff 1198 }
87da4ec3 1199
1ec0e8e3 1200 ret = safe_mount("none", path, "tmpfs", 0, "size=500000,mode=755",
ec50007f 1201 rootfs->path ? rootfs->mount : NULL);
1ec0e8e3 1202 if (ret != 0) {
87da4ec3 1203 SYSERROR("Failed mounting tmpfs onto %s\n", path);
1ec0e8e3 1204 return -1;
91c3830e 1205 }
87da4ec3
SH
1206
1207 INFO("Mounted tmpfs onto %s", path);
1208
ec50007f 1209 ret = snprintf(path, clen, "%s/dev/pts", rootfs->path ? rootfs->mount : "");
87da4ec3 1210 if (ret < 0 || ret >= clen)
91c3830e 1211 return -1;
87da4ec3 1212
bc6928ff
MW
1213 /*
1214 * If we are running on a devtmpfs mapping, dev/pts may already exist.
1215 * If not, then create it and exit if that fails...
1216 */
87da4ec3 1217 if (!dir_exists(path)) {
bc6928ff
MW
1218 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1219 if (ret) {
1220 SYSERROR("Failed to create /dev/pts in container");
1221 return -1;
1222 }
91c3830e
SH
1223 }
1224
14221cbb 1225 INFO("Mounted container /dev");
91c3830e
SH
1226 return 0;
1227}
1228
c6883f38 1229struct lxc_devs {
74a3920a 1230 const char *name;
c6883f38
SH
1231 mode_t mode;
1232 int maj;
1233 int min;
1234};
1235
74a3920a 1236static const struct lxc_devs lxc_devs[] = {
c6883f38
SH
1237 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1238 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1239 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1240 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1241 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1242 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1243 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1244};
1245
0728ebf4 1246static int fill_autodev(const struct lxc_rootfs *rootfs, bool mount_console)
c6883f38
SH
1247{
1248 int ret;
c6883f38
SH
1249 char path[MAXPATHLEN];
1250 int i;
3a32201c 1251 mode_t cmask;
c6883f38 1252
14221cbb 1253 INFO("Creating initial consoles under container /dev");
91c3830e 1254
ec50007f 1255 ret = snprintf(path, MAXPATHLEN, "%s/dev", rootfs->path ? rootfs->mount : "");
91c3830e
SH
1256 if (ret < 0 || ret >= MAXPATHLEN) {
1257 ERROR("Error calculating container /dev location");
c6883f38 1258 return -1;
f7bee6c6 1259 }
91c3830e 1260
9769034f 1261 if (!dir_exists(path)) // ignore, just don't try to fill in
9cb4d183
SH
1262 return 0;
1263
14221cbb 1264 INFO("Populating container /dev");
3a32201c 1265 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38 1266 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
74a3920a 1267 const struct lxc_devs *d = &lxc_devs[i];
0728ebf4
TA
1268
1269 if (!strcmp(d->name, "console") && !mount_console)
1270 continue;
1271
ec50007f 1272 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", rootfs->path ? rootfs->mount : "", d->name);
c6883f38
SH
1273 if (ret < 0 || ret >= MAXPATHLEN)
1274 return -1;
1275 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1276 if (ret && errno != EEXIST) {
9cb4d183
SH
1277 char hostpath[MAXPATHLEN];
1278 FILE *pathfile;
1279
1280 // Unprivileged containers cannot create devices, so
1281 // bind mount the device from the host
1282 ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", d->name);
1283 if (ret < 0 || ret >= MAXPATHLEN)
1284 return -1;
1285 pathfile = fopen(path, "wb");
1286 if (!pathfile) {
1287 SYSERROR("Failed to create device mount target '%s'", path);
1288 return -1;
1289 }
1290 fclose(pathfile);
592fd47a 1291 if (safe_mount(hostpath, path, 0, MS_BIND, NULL,
ec50007f 1292 rootfs->path ? rootfs->mount : NULL) != 0) {
9cb4d183
SH
1293 SYSERROR("Failed bind mounting device %s from host into container",
1294 d->name);
1295 return -1;
1296 }
c6883f38
SH
1297 }
1298 }
3a32201c 1299 umask(cmask);
c6883f38 1300
14221cbb 1301 INFO("Populated container /dev");
c6883f38
SH
1302 return 0;
1303}
1304
cc28d0b0 1305static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1306{
cc28d0b0
SH
1307 const struct lxc_rootfs *rootfs = &conf->rootfs;
1308
a0f379bf
DW
1309 if (!rootfs->path) {
1310 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1311 SYSERROR("Failed to make / rslave");
1312 return -1;
1313 }
c69bd12f 1314 return 0;
a0f379bf 1315 }
0ad19a3f 1316
12297168 1317 if (access(rootfs->mount, F_OK)) {
b1789442 1318 SYSERROR("failed to access to '%s', check it is present",
12297168 1319 rootfs->mount);
b1789442
DL
1320 return -1;
1321 }
1322
9be53773 1323 // First try mounting rootfs using a bdev
76a26f55 1324 struct bdev *bdev = bdev_init(conf, rootfs->path, rootfs->mount, rootfs->options);
9be53773 1325 if (bdev && bdev->ops->mount(bdev) == 0) {
59d66af2 1326 bdev_put(bdev);
9be53773
SH
1327 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1328 return 0;
1329 }
59d66af2
SH
1330 if (bdev)
1331 bdev_put(bdev);
a17b1e65 1332 if (mount_rootfs(rootfs->path, rootfs->mount, rootfs->options)) {
a6afdde9 1333 ERROR("failed to mount rootfs");
c3f0a28c 1334 return -1;
1335 }
0ad19a3f 1336
12297168 1337 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1338
ac778708
DL
1339 return 0;
1340}
1341
91e93c71
AV
1342int prepare_ramfs_root(char *root)
1343{
eab15c1e 1344 char buf[LXC_LINELEN], *p;
91e93c71
AV
1345 char nroot[PATH_MAX];
1346 FILE *f;
1347 int i;
1348 char *p2;
1349
1350 if (realpath(root, nroot) == NULL)
1351 return -1;
1352
1353 if (chdir("/") == -1)
1354 return -1;
1355
1356 /*
1357 * We could use here MS_MOVE, but in userns this mount is
1358 * locked and can't be moved.
1359 */
1360 if (mount(root, "/", NULL, MS_REC | MS_BIND, NULL)) {
1361 SYSERROR("Failed to move %s into /", root);
1362 return -1;
1363 }
1364
88322f77 1365 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
91e93c71
AV
1366 SYSERROR("Failed to make . rprivate");
1367 return -1;
1368 }
1369
1370 /*
1371 * The following code cleans up inhereted mounts which are not
1372 * required for CT.
1373 *
1374 * The mountinfo file shows not all mounts, if a few points have been
1375 * unmounted between read operations from the mountinfo. So we need to
1376 * read mountinfo a few times.
1377 *
1378 * This loop can be skipped if a container uses unserns, because all
1379 * inherited mounts are locked and we should live with all this trash.
1380 */
1381 while (1) {
1382 int progress = 0;
1383
1384 f = fopen("./proc/self/mountinfo", "r");
1385 if (!f) {
1386 SYSERROR("Unable to open /proc/self/mountinfo");
1387 return -1;
1388 }
eab15c1e 1389 while (fgets(buf, LXC_LINELEN, f)) {
91e93c71
AV
1390 for (p = buf, i=0; p && i < 4; i++)
1391 p = strchr(p+1, ' ');
1392 if (!p)
1393 continue;
1394 p2 = strchr(p+1, ' ');
1395 if (!p2)
1396 continue;
1397
1398 *p2 = '\0';
1399 *p = '.';
1400
1401 if (strcmp(p + 1, "/") == 0)
1402 continue;
1403 if (strcmp(p + 1, "/proc") == 0)
1404 continue;
1405
1406 if (umount2(p, MNT_DETACH) == 0)
1407 progress++;
1408 }
1409 fclose(f);
1410 if (!progress)
1411 break;
1412 }
1413
8bea9fae
PR
1414 /* This also can be skipped if a container uses unserns */
1415 umount2("./proc", MNT_DETACH);
91e93c71
AV
1416
1417 /* It is weird, but chdir("..") moves us in a new root */
1418 if (chdir("..") == -1) {
1419 SYSERROR("Unable to change working directory");
1420 return -1;
1421 }
1422
1423 if (chroot(".") == -1) {
1424 SYSERROR("Unable to chroot");
1425 return -1;
1426 }
1427
1428 return 0;
1429}
1430
74a3920a 1431static int setup_pivot_root(const struct lxc_rootfs *rootfs)
ac778708 1432{
ac778708
DL
1433 if (!rootfs->path)
1434 return 0;
1435
91e93c71
AV
1436 if (detect_ramfs_rootfs()) {
1437 if (prepare_ramfs_root(rootfs->mount))
1438 return -1;
59bb8698 1439 } else if (setup_rootfs_pivot_root(rootfs->mount)) {
cc6f6dd7 1440 ERROR("failed to setup pivot root");
25368b52 1441 return -1;
c69bd12f
DL
1442 }
1443
25368b52 1444 return 0;
0ad19a3f 1445}
1446
d852c78c 1447static int setup_pts(int pts)
3c26f34e 1448{
77890c6d
SW
1449 char target[PATH_MAX];
1450
d852c78c
DL
1451 if (!pts)
1452 return 0;
3c26f34e 1453
1454 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1455 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1456 return -1;
1457 }
1458
7e40254a
JTLB
1459 if (mkdir("/dev/pts", 0755)) {
1460 if ( errno != EEXIST ) {
1461 SYSERROR("failed to create '/dev/pts'");
1462 return -1;
1463 }
1464 }
1465
a6afdde9 1466 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
67e5a20a 1467 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
36eb9bde 1468 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1469 return -1;
1470 }
1471
3c26f34e 1472 if (access("/dev/ptmx", F_OK)) {
1473 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1474 goto out;
36eb9bde 1475 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1476 return -1;
1477 }
1478
77890c6d
SW
1479 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1480 goto out;
1481
3c26f34e 1482 /* fallback here, /dev/pts/ptmx exists just mount bind */
1483 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1484 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1485 return -1;
1486 }
cd54d859
DL
1487
1488 INFO("created new pts instance");
d852c78c 1489
3c26f34e 1490out:
1491 return 0;
1492}
1493
cccc74b5
DL
1494static int setup_personality(int persona)
1495{
6ff05e18 1496 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1497 if (persona == -1)
1498 return 0;
1499
1500 if (personality(persona) < 0) {
1501 SYSERROR("failed to set personality to '0x%x'", persona);
1502 return -1;
1503 }
1504
1505 INFO("set personality to '0x%x'", persona);
6ff05e18 1506 #endif
cccc74b5
DL
1507
1508 return 0;
1509}
1510
7c6ef2a2 1511static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1512 const struct lxc_console *console)
6e590161 1513{
63376d7d 1514 char path[MAXPATHLEN];
0728ebf4 1515 int ret, fd;
52e35957 1516
7c6ef2a2
SH
1517 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1518 if (ret >= sizeof(path)) {
959aee9c 1519 ERROR("console path too long");
7c6ef2a2
SH
1520 return -1;
1521 }
52e35957 1522
0728ebf4
TA
1523 fd = open(path, O_CREAT | O_EXCL, S_IXUSR | S_IXGRP | S_IXOTH);
1524 if (fd < 0) {
1525 if (errno != EEXIST) {
1526 SYSERROR("failed to create console");
1527 return -1;
1528 }
1529 } else {
1530 close(fd);
52e35957
DL
1531 }
1532
b5159817
DE
1533 if (console->master < 0) {
1534 INFO("no console");
f78a1f32
DL
1535 return 0;
1536 }
ed502555 1537
0728ebf4 1538 if (chmod(console->name, S_IXUSR | S_IXGRP | S_IXOTH)) {
63376d7d 1539 SYSERROR("failed to set mode '0%o' to '%s'",
0728ebf4 1540 S_IXUSR | S_IXGRP | S_IXOTH, console->name);
63376d7d
DL
1541 return -1;
1542 }
13954cce 1543
592fd47a 1544 if (safe_mount(console->name, path, "none", MS_BIND, 0, rootfs->mount)) {
63376d7d 1545 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1546 return -1;
1547 }
1548
63376d7d 1549 INFO("console has been setup");
7c6ef2a2
SH
1550 return 0;
1551}
1552
1553static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1554 const struct lxc_console *console,
1555 char *ttydir)
1556{
1557 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1558 int ret;
1559
1560 /* create rootfs/dev/<ttydir> directory */
1561 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1562 ttydir);
1563 if (ret >= sizeof(path))
1564 return -1;
1565 ret = mkdir(path, 0755);
1566 if (ret && errno != EEXIST) {
959aee9c 1567 SYSERROR("failed with errno %d to create %s", errno, path);
7c6ef2a2
SH
1568 return -1;
1569 }
959aee9c 1570 INFO("created %s", path);
7c6ef2a2
SH
1571
1572 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1573 rootfs->mount, ttydir);
1574 if (ret >= sizeof(lxcpath)) {
959aee9c 1575 ERROR("console path too long");
7c6ef2a2
SH
1576 return -1;
1577 }
1578
1579 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1580 ret = unlink(path);
1581 if (ret && errno != ENOENT) {
959aee9c 1582 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1583 return -1;
1584 }
1585
1586 ret = creat(lxcpath, 0660);
1587 if (ret==-1 && errno != EEXIST) {
959aee9c 1588 SYSERROR("error %d creating %s", errno, lxcpath);
7c6ef2a2
SH
1589 return -1;
1590 }
4d44e274
SH
1591 if (ret >= 0)
1592 close(ret);
7c6ef2a2 1593
b5159817
DE
1594 if (console->master < 0) {
1595 INFO("no console");
7c6ef2a2
SH
1596 return 0;
1597 }
1598
592fd47a 1599 if (safe_mount(console->name, lxcpath, "none", MS_BIND, 0, rootfs->mount)) {
7c6ef2a2
SH
1600 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1601 return -1;
1602 }
1603
1604 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1605 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1606 if (ret >= sizeof(lxcpath)) {
1607 ERROR("lxc/console path too long");
1608 return -1;
1609 }
7c6ef2a2
SH
1610 ret = symlink(lxcpath, path);
1611 if (ret) {
1612 SYSERROR("failed to create symlink for console");
1613 return -1;
1614 }
1615
1616 INFO("console has been setup on %s", lxcpath);
cd54d859 1617
6e590161 1618 return 0;
1619}
1620
7c6ef2a2
SH
1621static int setup_console(const struct lxc_rootfs *rootfs,
1622 const struct lxc_console *console,
1623 char *ttydir)
1624{
1625 /* We don't have a rootfs, /dev/console will be shared */
1626 if (!rootfs->path)
1627 return 0;
1628 if (!ttydir)
1629 return setup_dev_console(rootfs, console);
1630
1631 return setup_ttydir_console(rootfs, console, ttydir);
1632}
1633
1bd051a6
SH
1634static int setup_kmsg(const struct lxc_rootfs *rootfs,
1635 const struct lxc_console *console)
1636{
1637 char kpath[MAXPATHLEN];
1638 int ret;
1639
222fea5a
DE
1640 if (!rootfs->path)
1641 return 0;
1bd051a6
SH
1642 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1643 if (ret < 0 || ret >= sizeof(kpath))
1644 return -1;
1645
1646 ret = unlink(kpath);
1647 if (ret && errno != ENOENT) {
959aee9c 1648 SYSERROR("error unlinking %s", kpath);
1bd051a6
SH
1649 return -1;
1650 }
1651
1652 ret = symlink("console", kpath);
1653 if (ret) {
1654 SYSERROR("failed to create symlink for kmsg");
1655 return -1;
1656 }
1657
1658 return 0;
1659}
1660
998ac676
RT
1661static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1662{
1663 struct mount_opt *mo;
1664
1665 /* If opt is found in mount_opt, set or clear flags.
1666 * Otherwise append it to data. */
1667
1668 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1669 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1670 if (mo->clear)
1671 *flags &= ~mo->flag;
1672 else
1673 *flags |= mo->flag;
1674 return;
1675 }
1676 }
1677
1678 if (strlen(*data))
1679 strcat(*data, ",");
1680 strcat(*data, opt);
1681}
1682
a17b1e65 1683int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1684 char **mntdata)
1685{
1686 char *s, *data;
1687 char *p, *saveptr = NULL;
1688
911324ef 1689 *mntdata = NULL;
91656ce5 1690 *mntflags = 0L;
911324ef
DL
1691
1692 if (!mntopts)
998ac676
RT
1693 return 0;
1694
911324ef 1695 s = strdup(mntopts);
998ac676 1696 if (!s) {
36eb9bde 1697 SYSERROR("failed to allocate memory");
998ac676
RT
1698 return -1;
1699 }
1700
1701 data = malloc(strlen(s) + 1);
1702 if (!data) {
36eb9bde 1703 SYSERROR("failed to allocate memory");
998ac676
RT
1704 free(s);
1705 return -1;
1706 }
1707 *data = 0;
1708
1709 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1710 p = strtok_r(NULL, ",", &saveptr))
1711 parse_mntopt(p, mntflags, &data);
1712
1713 if (*data)
1714 *mntdata = data;
1715 else
1716 free(data);
1717 free(s);
1718
1719 return 0;
1720}
1721
6fd5e769
SH
1722static void null_endofword(char *word)
1723{
1724 while (*word && *word != ' ' && *word != '\t')
1725 word++;
1726 *word = '\0';
1727}
1728
1729/*
1730 * skip @nfields spaces in @src
1731 */
1732static char *get_field(char *src, int nfields)
1733{
1734 char *p = src;
1735 int i;
1736
1737 for (i = 0; i < nfields; i++) {
1738 while (*p && *p != ' ' && *p != '\t')
1739 p++;
1740 if (!*p)
1741 break;
1742 p++;
1743 }
1744 return p;
1745}
1746
911324ef
DL
1747static int mount_entry(const char *fsname, const char *target,
1748 const char *fstype, unsigned long mountflags,
ae7a770e 1749 const char *data, int optional, int dev, const char *rootfs)
911324ef 1750{
614305f3 1751#ifdef HAVE_STATVFS
2938f7c8 1752 struct statvfs sb;
614305f3 1753#endif
2938f7c8 1754
592fd47a 1755 if (safe_mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data, rootfs)) {
1fc64d22
SG
1756 if (optional) {
1757 INFO("failed to mount '%s' on '%s' (optional): %s", fsname,
1758 target, strerror(errno));
1759 return 0;
1760 }
1761 else {
1762 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1763 return -1;
1764 }
911324ef
DL
1765 }
1766
1767 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
2938f7c8
SH
1768 DEBUG("remounting %s on %s to respect bind or remount options",
1769 fsname ? fsname : "(none)", target ? target : "(none)");
7c5b6e7c
AS
1770 unsigned long rqd_flags = 0;
1771 if (mountflags & MS_RDONLY)
1772 rqd_flags |= MS_RDONLY;
614305f3 1773#ifdef HAVE_STATVFS
2938f7c8 1774 if (statvfs(fsname, &sb) == 0) {
7c5b6e7c 1775 unsigned long required_flags = rqd_flags;
2938f7c8
SH
1776 if (sb.f_flag & MS_NOSUID)
1777 required_flags |= MS_NOSUID;
ae7a770e 1778 if (sb.f_flag & MS_NODEV && !dev)
2938f7c8
SH
1779 required_flags |= MS_NODEV;
1780 if (sb.f_flag & MS_RDONLY)
1781 required_flags |= MS_RDONLY;
1782 if (sb.f_flag & MS_NOEXEC)
1783 required_flags |= MS_NOEXEC;
1784 DEBUG("(at remount) flags for %s was %lu, required extra flags are %lu", fsname, sb.f_flag, required_flags);
1785 /*
1786 * If this was a bind mount request, and required_flags
1787 * does not have any flags which are not already in
1788 * mountflags, then skip the remount
1789 */
1790 if (!(mountflags & MS_REMOUNT)) {
7c5b6e7c 1791 if (!(required_flags & ~mountflags) && rqd_flags == 0) {
2938f7c8
SH
1792 DEBUG("mountflags already was %lu, skipping remount",
1793 mountflags);
1794 goto skipremount;
1795 }
1796 }
1797 mountflags |= required_flags;
6fd5e769 1798 }
614305f3 1799#endif
911324ef
DL
1800
1801 if (mount(fsname, target, fstype,
592fd47a 1802 mountflags | MS_REMOUNT, data) < 0) {
1fc64d22
SG
1803 if (optional) {
1804 INFO("failed to mount '%s' on '%s' (optional): %s",
1805 fsname, target, strerror(errno));
1806 return 0;
1807 }
1808 else {
1809 SYSERROR("failed to mount '%s' on '%s'",
1810 fsname, target);
1811 return -1;
1812 }
911324ef
DL
1813 }
1814 }
1815
614305f3 1816#ifdef HAVE_STATVFS
6fd5e769 1817skipremount:
614305f3 1818#endif
911324ef
DL
1819 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1820
1821 return 0;
1822}
1823
4e4ca161
SH
1824/*
1825 * Remove 'optional', 'create=dir', and 'create=file' from mntopt
1826 */
1827static void cull_mntent_opt(struct mntent *mntent)
1828{
1829 int i;
1830 char *p, *p2;
1831 char *list[] = {"create=dir",
1832 "create=file",
1833 "optional",
1834 NULL };
1835
1836 for (i=0; list[i]; i++) {
1837 if (!(p = strstr(mntent->mnt_opts, list[i])))
1838 continue;
1839 p2 = strchr(p, ',');
1840 if (!p2) {
1841 /* no more mntopts, so just chop it here */
1842 *p = '\0';
1843 continue;
1844 }
1845 memmove(p, p2+1, strlen(p2+1)+1);
1846 }
1847}
1848
4d5b72a1 1849static int mount_entry_create_dir_file(const struct mntent *mntent,
0a2dddd4
CB
1850 const char* path, const struct lxc_rootfs *rootfs,
1851 const char *lxc_name, const char *lxc_path)
0ad19a3f 1852{
4d5b72a1 1853 char *pathdirname = NULL;
608e3567 1854 int ret = 0;
34cfffb3 1855 FILE *pathfile = NULL;
911324ef 1856
6e46cc0d 1857 if (strncmp(mntent->mnt_type, "overlay", 7) == 0) {
5c484f79 1858 if (ovl_mkdir(mntent, rootfs, lxc_name, lxc_path) < 0)
6e46cc0d
CB
1859 return -1;
1860 } else if (strncmp(mntent->mnt_type, "aufs", 4) == 0) {
1d52bdf7 1861 if (aufs_mkdir(mntent, rootfs, lxc_name, lxc_path) < 0)
6e46cc0d
CB
1862 return -1;
1863 }
1864
34cfffb3 1865 if (hasmntopt(mntent, "create=dir")) {
4d5b72a1
NC
1866 if (mkdir_p(path, 0755) < 0) {
1867 WARN("Failed to create mount target '%s'", path);
34cfffb3
SG
1868 ret = -1;
1869 }
1870 }
1871
4d5b72a1
NC
1872 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
1873 pathdirname = strdup(path);
34cfffb3 1874 pathdirname = dirname(pathdirname);
119126b6
SG
1875 if (mkdir_p(pathdirname, 0755) < 0) {
1876 WARN("Failed to create target directory");
1877 }
4d5b72a1 1878 pathfile = fopen(path, "wb");
34cfffb3 1879 if (!pathfile) {
4d5b72a1 1880 WARN("Failed to create mount target '%s'", path);
34cfffb3 1881 ret = -1;
6e46cc0d 1882 } else {
34cfffb3 1883 fclose(pathfile);
6e46cc0d 1884 }
34cfffb3 1885 }
4d5b72a1
NC
1886 free(pathdirname);
1887 return ret;
1888}
1889
ec50007f
CB
1890/* rootfs, lxc_name, and lxc_path can be NULL when the container is created
1891 * without a rootfs. */
db4aba38 1892static inline int mount_entry_on_generic(struct mntent *mntent,
0a2dddd4
CB
1893 const char* path, const struct lxc_rootfs *rootfs,
1894 const char *lxc_name, const char *lxc_path)
4d5b72a1
NC
1895{
1896 unsigned long mntflags;
1897 char *mntdata;
1898 int ret;
1899 bool optional = hasmntopt(mntent, "optional") != NULL;
ae7a770e 1900 bool dev = hasmntopt(mntent, "dev") != NULL;
4d5b72a1 1901
ec50007f
CB
1902 char *rootfs_path = NULL;
1903 if (rootfs && rootfs->path)
1904 rootfs_path = rootfs->mount;
1905
0a2dddd4 1906 ret = mount_entry_create_dir_file(mntent, path, rootfs, lxc_name, lxc_path);
34cfffb3 1907
608e3567
SH
1908 if (ret < 0)
1909 return optional ? 0 : -1;
1910
4e4ca161
SH
1911 cull_mntent_opt(mntent);
1912
a17b1e65
SG
1913 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1914 free(mntdata);
1915 return -1;
1916 }
1917
6e46cc0d 1918 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type, mntflags,
ae7a770e 1919 mntdata, optional, dev, rootfs_path);
68c152ef 1920
911324ef 1921 free(mntdata);
911324ef
DL
1922 return ret;
1923}
1924
db4aba38
NC
1925static inline int mount_entry_on_systemfs(struct mntent *mntent)
1926{
1433c9f9
CB
1927 char path[MAXPATHLEN];
1928 int ret;
1929
1930 /* For containers created without a rootfs all mounts are treated as
1931 * absolute paths starting at / on the host. */
1932 if (mntent->mnt_dir[0] != '/')
1933 ret = snprintf(path, sizeof(path), "/%s", mntent->mnt_dir);
1934 else
1935 ret = snprintf(path, sizeof(path), "%s", mntent->mnt_dir);
1936
1937 if (ret < 0 || ret >= sizeof(path)) {
1938 ERROR("path name too long");
1939 return -1;
1940 }
1941
1942 return mount_entry_on_generic(mntent, path, NULL, NULL, NULL);
db4aba38
NC
1943}
1944
4e4ca161 1945static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2 1946 const struct lxc_rootfs *rootfs,
0a2dddd4
CB
1947 const char *lxc_name,
1948 const char *lxc_path)
911324ef 1949{
013bd428 1950 char *aux;
59760f5d 1951 char path[MAXPATHLEN];
80a881b2 1952 int r, ret = 0, offset;
67e571de 1953 const char *lxcpath;
0ad19a3f 1954
593e8478 1955 lxcpath = lxc_global_config_value("lxc.lxcpath");
2a59a681
SH
1956 if (!lxcpath) {
1957 ERROR("Out of memory");
1958 return -1;
1959 }
1960
80a881b2 1961 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
1962 * use $lxcpath/CN/rootfs as the target prefix */
1963 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
1964 if (r < 0 || r >= MAXPATHLEN)
1965 goto skipvarlib;
1966
1967 aux = strstr(mntent->mnt_dir, path);
1968 if (aux) {
1969 offset = strlen(path);
1970 goto skipabs;
1971 }
1972
1973skipvarlib:
013bd428
DL
1974 aux = strstr(mntent->mnt_dir, rootfs->path);
1975 if (!aux) {
1976 WARN("ignoring mount point '%s'", mntent->mnt_dir);
db4aba38 1977 return ret;
013bd428 1978 }
80a881b2
SH
1979 offset = strlen(rootfs->path);
1980
1981skipabs:
013bd428 1982
9ba8130c 1983 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
1984 aux + offset);
1985 if (r < 0 || r >= MAXPATHLEN) {
1986 WARN("pathnme too long for '%s'", mntent->mnt_dir);
a17b1e65
SG
1987 return -1;
1988 }
1989
0a2dddd4 1990 return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
911324ef 1991}
d330fe7b 1992
4e4ca161 1993static int mount_entry_on_relative_rootfs(struct mntent *mntent,
0a2dddd4
CB
1994 const struct lxc_rootfs *rootfs,
1995 const char *lxc_name,
1996 const char *lxc_path)
911324ef
DL
1997{
1998 char path[MAXPATHLEN];
911324ef 1999 int ret;
d330fe7b 2000
34cfffb3 2001 /* relative to root mount point */
6e46cc0d 2002 ret = snprintf(path, sizeof(path), "%s/%s", rootfs->mount, mntent->mnt_dir);
1433c9f9 2003 if (ret < 0 || ret >= sizeof(path)) {
9ba8130c
SH
2004 ERROR("path name too long");
2005 return -1;
2006 }
911324ef 2007
0a2dddd4 2008 return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
911324ef
DL
2009}
2010
80a881b2 2011static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
0a2dddd4 2012 const char *lxc_name, const char *lxc_path)
911324ef 2013{
aaf901be
AM
2014 struct mntent mntent;
2015 char buf[4096];
911324ef 2016 int ret = -1;
e76b8764 2017
aaf901be 2018 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
e76b8764 2019
911324ef 2020 if (!rootfs->path) {
aaf901be 2021 if (mount_entry_on_systemfs(&mntent))
e76b8764 2022 goto out;
911324ef 2023 continue;
e76b8764
CDC
2024 }
2025
911324ef 2026 /* We have a separate root, mounts are relative to it */
aaf901be 2027 if (mntent.mnt_dir[0] != '/') {
0a2dddd4 2028 if (mount_entry_on_relative_rootfs(&mntent, rootfs, lxc_name, lxc_path))
911324ef
DL
2029 goto out;
2030 continue;
2031 }
cd54d859 2032
0a2dddd4 2033 if (mount_entry_on_absolute_rootfs(&mntent, rootfs, lxc_name, lxc_path))
911324ef 2034 goto out;
0ad19a3f 2035 }
cd54d859 2036
0ad19a3f 2037 ret = 0;
cd54d859
DL
2038
2039 INFO("mount points have been setup");
0ad19a3f 2040out:
e7938e9e
MN
2041 return ret;
2042}
2043
80a881b2 2044static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
0a2dddd4 2045 const char *lxc_name, const char *lxc_path)
e7938e9e
MN
2046{
2047 FILE *file;
2048 int ret;
2049
2050 if (!fstab)
2051 return 0;
2052
2053 file = setmntent(fstab, "r");
2054 if (!file) {
2055 SYSERROR("failed to use '%s'", fstab);
2056 return -1;
2057 }
2058
0a2dddd4 2059 ret = mount_file_entries(rootfs, file, lxc_name, lxc_path);
e7938e9e 2060
0ad19a3f 2061 endmntent(file);
2062 return ret;
2063}
2064
5ef5c9a3 2065FILE *make_anonymous_mount_file(struct lxc_list *mount)
e7938e9e 2066{
5ef5c9a3 2067 int ret;
e7938e9e 2068 char *mount_entry;
5ef5c9a3
CB
2069 struct lxc_list *iterator;
2070 FILE *file;
2071 int fd = -1;
2072
2073 fd = memfd_create("lxc_mount_file", MFD_CLOEXEC);
2074 if (fd < 0) {
2075 if (errno != ENOSYS)
2076 return NULL;
2077 file = tmpfile();
2078 } else {
2079 file = fdopen(fd, "r+");
2080 }
e7938e9e 2081
e7938e9e 2082 if (!file) {
fad6ef95 2083 int saved_errno = errno;
5ef5c9a3
CB
2084 if (fd != -1)
2085 close(fd);
fad6ef95 2086 ERROR("Could not create mount entry file: %s.", strerror(saved_errno));
9fc7f8c0 2087 return NULL;
e7938e9e
MN
2088 }
2089
2090 lxc_list_for_each(iterator, mount) {
2091 mount_entry = iterator->elem;
5ef5c9a3
CB
2092 ret = fprintf(file, "%s\n", mount_entry);
2093 if (ret < strlen(mount_entry))
2094 WARN("Could not write mount entry to anonymous mount file.");
2095 }
2096
2097 if (fseek(file, 0, SEEK_SET) < 0) {
2098 fclose(file);
2099 return NULL;
e7938e9e
MN
2100 }
2101
9fc7f8c0
TA
2102 return file;
2103}
2104
5ef5c9a3
CB
2105static int setup_mount_entries(const struct lxc_rootfs *rootfs,
2106 struct lxc_list *mount, const char *lxc_name,
2107 const char *lxc_path)
9fc7f8c0
TA
2108{
2109 FILE *file;
2110 int ret;
2111
5ef5c9a3 2112 file = make_anonymous_mount_file(mount);
9fc7f8c0
TA
2113 if (!file)
2114 return -1;
e7938e9e 2115
0a2dddd4 2116 ret = mount_file_entries(rootfs, file, lxc_name, lxc_path);
e7938e9e
MN
2117
2118 fclose(file);
2119 return ret;
2120}
2121
bab88e68
CS
2122static int parse_cap(const char *cap)
2123{
2124 char *ptr = NULL;
84760c11 2125 size_t i;
2126 int capid = -1;
bab88e68 2127
7035407c
DE
2128 if (!strcmp(cap, "none"))
2129 return -2;
2130
bab88e68
CS
2131 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2132
2133 if (strcmp(cap, caps_opt[i].name))
2134 continue;
2135
2136 capid = caps_opt[i].value;
2137 break;
2138 }
2139
2140 if (capid < 0) {
2141 /* try to see if it's numeric, so the user may specify
2142 * capabilities that the running kernel knows about but
2143 * we don't */
2144 errno = 0;
2145 capid = strtol(cap, &ptr, 10);
2146 if (!ptr || *ptr != '\0' || errno != 0)
2147 /* not a valid number */
2148 capid = -1;
2149 else if (capid > lxc_caps_last_cap())
2150 /* we have a number but it's not a valid
2151 * capability */
2152 capid = -1;
2153 }
2154
2155 return capid;
2156}
2157
0769b82a
CS
2158int in_caplist(int cap, struct lxc_list *caps)
2159{
2160 struct lxc_list *iterator;
2161 int capid;
2162
2163 lxc_list_for_each(iterator, caps) {
2164 capid = parse_cap(iterator->elem);
2165 if (capid == cap)
2166 return 1;
2167 }
2168
2169 return 0;
2170}
2171
81810dd1
DL
2172static int setup_caps(struct lxc_list *caps)
2173{
2174 struct lxc_list *iterator;
2175 char *drop_entry;
bab88e68 2176 int capid;
81810dd1
DL
2177
2178 lxc_list_for_each(iterator, caps) {
2179
2180 drop_entry = iterator->elem;
2181
bab88e68 2182 capid = parse_cap(drop_entry);
d55bc1ad 2183
81810dd1 2184 if (capid < 0) {
1e11be34
DL
2185 ERROR("unknown capability %s", drop_entry);
2186 return -1;
81810dd1
DL
2187 }
2188
2189 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
2190
2191 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
3ec1648d
SH
2192 SYSERROR("failed to remove %s capability", drop_entry);
2193 return -1;
2194 }
81810dd1
DL
2195
2196 }
2197
1fb86a7c
SH
2198 DEBUG("capabilities have been setup");
2199
2200 return 0;
2201}
2202
2203static int dropcaps_except(struct lxc_list *caps)
2204{
2205 struct lxc_list *iterator;
2206 char *keep_entry;
1fb86a7c
SH
2207 int i, capid;
2208 int numcaps = lxc_caps_last_cap() + 1;
959aee9c 2209 INFO("found %d capabilities", numcaps);
1fb86a7c 2210
2caf9a97
SH
2211 if (numcaps <= 0 || numcaps > 200)
2212 return -1;
2213
1fb86a7c
SH
2214 // caplist[i] is 1 if we keep capability i
2215 int *caplist = alloca(numcaps * sizeof(int));
2216 memset(caplist, 0, numcaps * sizeof(int));
2217
2218 lxc_list_for_each(iterator, caps) {
2219
2220 keep_entry = iterator->elem;
2221
bab88e68 2222 capid = parse_cap(keep_entry);
1fb86a7c 2223
7035407c
DE
2224 if (capid == -2)
2225 continue;
2226
1fb86a7c
SH
2227 if (capid < 0) {
2228 ERROR("unknown capability %s", keep_entry);
2229 return -1;
2230 }
2231
8255688a 2232 DEBUG("keep capability '%s' (%d)", keep_entry, capid);
1fb86a7c
SH
2233
2234 caplist[capid] = 1;
2235 }
2236 for (i=0; i<numcaps; i++) {
2237 if (caplist[i])
2238 continue;
2239 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
3ec1648d
SH
2240 SYSERROR("failed to remove capability %d", i);
2241 return -1;
2242 }
1fb86a7c
SH
2243 }
2244
2245 DEBUG("capabilities have been setup");
81810dd1
DL
2246
2247 return 0;
2248}
2249
0ad19a3f 2250static int setup_hw_addr(char *hwaddr, const char *ifname)
2251{
2252 struct sockaddr sockaddr;
2253 struct ifreq ifr;
fad6ef95 2254 int ret, fd, saved_errno;
0ad19a3f 2255
3cfc0f3a
MN
2256 ret = lxc_convert_mac(hwaddr, &sockaddr);
2257 if (ret) {
2258 ERROR("mac address '%s' conversion failed : %s",
2259 hwaddr, strerror(-ret));
0ad19a3f 2260 return -1;
2261 }
2262
2263 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
5da6aa8c 2264 ifr.ifr_name[IFNAMSIZ-1] = '\0';
0ad19a3f 2265 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2266
2267 fd = socket(AF_INET, SOCK_DGRAM, 0);
2268 if (fd < 0) {
3ab87b66 2269 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 2270 return -1;
2271 }
2272
2273 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
fad6ef95 2274 saved_errno = errno;
0ad19a3f 2275 close(fd);
2276 if (ret)
fad6ef95 2277 ERROR("ioctl failure : %s", strerror(saved_errno));
0ad19a3f 2278
5da6aa8c 2279 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
cd54d859 2280
0ad19a3f 2281 return ret;
2282}
2283
82d5ae15 2284static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2285{
82d5ae15
DL
2286 struct lxc_list *iterator;
2287 struct lxc_inetdev *inetdev;
3cfc0f3a 2288 int err;
0ad19a3f 2289
82d5ae15
DL
2290 lxc_list_for_each(iterator, ip) {
2291
2292 inetdev = iterator->elem;
2293
0093bb8c
DL
2294 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2295 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
2296 if (err) {
2297 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2298 ifindex, strerror(-err));
82d5ae15
DL
2299 return -1;
2300 }
2301 }
2302
2303 return 0;
0ad19a3f 2304}
2305
82d5ae15 2306static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2307{
82d5ae15 2308 struct lxc_list *iterator;
7fa9074f 2309 struct lxc_inet6dev *inet6dev;
3cfc0f3a 2310 int err;
0ad19a3f 2311
82d5ae15
DL
2312 lxc_list_for_each(iterator, ip) {
2313
2314 inet6dev = iterator->elem;
2315
b3df193c 2316 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
2317 &inet6dev->mcast, &inet6dev->acast,
2318 inet6dev->prefix);
3cfc0f3a
MN
2319 if (err) {
2320 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2321 ifindex, strerror(-err));
82d5ae15 2322 return -1;
3cfc0f3a 2323 }
82d5ae15
DL
2324 }
2325
2326 return 0;
0ad19a3f 2327}
2328
82d5ae15 2329static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 2330{
0ad19a3f 2331 char ifname[IFNAMSIZ];
0ad19a3f 2332 char *current_ifname = ifname;
3cfc0f3a 2333 int err;
0ad19a3f 2334
82d5ae15
DL
2335 /* empty network namespace */
2336 if (!netdev->ifindex) {
b0efbac4 2337 if (netdev->flags & IFF_UP) {
d472214b 2338 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2339 if (err) {
2340 ERROR("failed to set the loopback up : %s",
2341 strerror(-err));
82d5ae15
DL
2342 return -1;
2343 }
82d5ae15 2344 }
40790553
SH
2345 if (netdev->type != LXC_NET_VETH)
2346 return 0;
2347 netdev->ifindex = if_nametoindex(netdev->name);
0ad19a3f 2348 }
13954cce 2349
b466dc33 2350 /* get the new ifindex in case of physical netdev */
40790553 2351 if (netdev->type == LXC_NET_PHYS) {
b466dc33
BP
2352 if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
2353 ERROR("failed to get ifindex for %s",
2354 netdev->link);
2355 return -1;
2356 }
40790553 2357 }
b466dc33 2358
82d5ae15
DL
2359 /* retrieve the name of the interface */
2360 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 2361 ERROR("no interface corresponding to index '%d'",
82d5ae15 2362 netdev->ifindex);
0ad19a3f 2363 return -1;
2364 }
13954cce 2365
018ef520 2366 /* default: let the system to choose one interface name */
9d083402 2367 if (!netdev->name)
fb6d9b2f
DL
2368 netdev->name = netdev->type == LXC_NET_PHYS ?
2369 netdev->link : "eth%d";
018ef520 2370
82d5ae15 2371 /* rename the interface name */
40790553
SH
2372 if (strcmp(ifname, netdev->name) != 0) {
2373 err = lxc_netdev_rename_by_name(ifname, netdev->name);
2374 if (err) {
2375 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2376 strerror(-err));
2377 return -1;
2378 }
018ef520
DL
2379 }
2380
2381 /* Re-read the name of the interface because its name has changed
2382 * and would be automatically allocated by the system
2383 */
82d5ae15 2384 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 2385 ERROR("no interface corresponding to index '%d'",
82d5ae15 2386 netdev->ifindex);
018ef520 2387 return -1;
0ad19a3f 2388 }
2389
82d5ae15
DL
2390 /* set a mac address */
2391 if (netdev->hwaddr) {
2392 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 2393 ERROR("failed to setup hw address for '%s'",
82d5ae15 2394 current_ifname);
0ad19a3f 2395 return -1;
2396 }
2397 }
2398
82d5ae15
DL
2399 /* setup ipv4 addresses on the interface */
2400 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 2401 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 2402 ifname);
2403 return -1;
2404 }
2405
82d5ae15
DL
2406 /* setup ipv6 addresses on the interface */
2407 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 2408 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 2409 ifname);
2410 return -1;
2411 }
2412
82d5ae15 2413 /* set the network device up */
b0efbac4 2414 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
2415 int err;
2416
d472214b 2417 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
2418 if (err) {
2419 ERROR("failed to set '%s' up : %s", current_ifname,
2420 strerror(-err));
0ad19a3f 2421 return -1;
2422 }
2423
2424 /* the network is up, make the loopback up too */
d472214b 2425 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2426 if (err) {
2427 ERROR("failed to set the loopback up : %s",
2428 strerror(-err));
0ad19a3f 2429 return -1;
2430 }
2431 }
2432
f8fee0e2
MK
2433 /* We can only set up the default routes after bringing
2434 * up the interface, sine bringing up the interface adds
2435 * the link-local routes and we can't add a default
2436 * route if the gateway is not reachable. */
2437
2438 /* setup ipv4 gateway on the interface */
2439 if (netdev->ipv4_gateway) {
2440 if (!(netdev->flags & IFF_UP)) {
2441 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2442 return -1;
2443 }
2444
2445 if (lxc_list_empty(&netdev->ipv4)) {
2446 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2447 return -1;
2448 }
2449
2450 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2451 if (err) {
fc739df5
SG
2452 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2453 if (err) {
2454 ERROR("failed to add ipv4 dest for '%s': %s",
2455 ifname, strerror(-err));
2456 }
2457
2458 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2459 if (err) {
2460 ERROR("failed to setup ipv4 gateway for '%s': %s",
2461 ifname, strerror(-err));
2462 if (netdev->ipv4_gateway_auto) {
2463 char buf[INET_ADDRSTRLEN];
2464 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2465 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2466 }
2467 return -1;
19a26f82 2468 }
f8fee0e2
MK
2469 }
2470 }
2471
2472 /* setup ipv6 gateway on the interface */
2473 if (netdev->ipv6_gateway) {
2474 if (!(netdev->flags & IFF_UP)) {
2475 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2476 return -1;
2477 }
2478
2479 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2480 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2481 return -1;
2482 }
2483
2484 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2485 if (err) {
fc739df5
SG
2486 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2487 if (err) {
2488 ERROR("failed to add ipv6 dest for '%s': %s",
f8fee0e2 2489 ifname, strerror(-err));
19a26f82 2490 }
fc739df5
SG
2491
2492 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2493 if (err) {
2494 ERROR("failed to setup ipv6 gateway for '%s': %s",
2495 ifname, strerror(-err));
2496 if (netdev->ipv6_gateway_auto) {
2497 char buf[INET6_ADDRSTRLEN];
2498 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2499 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2500 }
2501 return -1;
2502 }
f8fee0e2
MK
2503 }
2504 }
2505
cd54d859
DL
2506 DEBUG("'%s' has been setup", current_ifname);
2507
0ad19a3f 2508 return 0;
2509}
2510
5f4535a3 2511static int setup_network(struct lxc_list *network)
0ad19a3f 2512{
82d5ae15 2513 struct lxc_list *iterator;
82d5ae15 2514 struct lxc_netdev *netdev;
0ad19a3f 2515
5f4535a3 2516 lxc_list_for_each(iterator, network) {
cd54d859 2517
5f4535a3 2518 netdev = iterator->elem;
82d5ae15
DL
2519
2520 if (setup_netdev(netdev)) {
2521 ERROR("failed to setup netdev");
2522 return -1;
2523 }
2524 }
cd54d859 2525
5f4535a3
DL
2526 if (!lxc_list_empty(network))
2527 INFO("network has been setup");
cd54d859
DL
2528
2529 return 0;
0ad19a3f 2530}
2531
c6d09e15
WB
2532static int parse_resource(const char *res) {
2533 size_t i;
2534 int resid = -1;
2535
2536 for (i = 0; i < sizeof(limit_opt)/sizeof(limit_opt[0]); ++i) {
2537 if (strcmp(res, limit_opt[i].name) == 0)
2538 return limit_opt[i].value;
2539 }
2540
2541 /* try to see if it's numeric, so the user may specify
2542 * resources that the running kernel knows about but
2543 * we don't */
2544 if (lxc_safe_int(res, &resid) == 0)
2545 return resid;
2546 return -1;
2547}
2548
2549int setup_resource_limits(struct lxc_list *limits, pid_t pid) {
2550 struct lxc_list *it;
2551 struct lxc_limit *lim;
2552 int resid;
2553
2554 lxc_list_for_each(it, limits) {
2555 lim = it->elem;
2556
2557 resid = parse_resource(lim->resource);
2558 if (resid < 0) {
2559 ERROR("unknown resource %s", lim->resource);
2560 return -1;
2561 }
2562
2563 if (prlimit(pid, resid, &lim->limit, NULL) != 0) {
2564 ERROR("failed to set limit %s: %s", lim->resource, strerror(errno));
2565 return -1;
2566 }
2567 }
2568 return 0;
2569}
2570
2af6bd1b 2571/* try to move physical nics to the init netns */
5610055a 2572void lxc_restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf)
2af6bd1b 2573{
64d2fcb5 2574 int i, oldfd;
4ec31c52 2575 char ifname[IFNAMSIZ];
2af6bd1b 2576
5610055a 2577 if (netnsfd < 0 || conf->num_savednics == 0)
2af6bd1b
SH
2578 return;
2579
64d2fcb5 2580 INFO("Running to reset %d nic names.", conf->num_savednics);
5610055a 2581
64d2fcb5
CB
2582 oldfd = lxc_preserve_ns(getpid(), "net");
2583 if (oldfd < 0) {
2584 SYSERROR("Failed to open monitor netns fd.");
2af6bd1b
SH
2585 return;
2586 }
64d2fcb5 2587
2af6bd1b
SH
2588 if (setns(netnsfd, 0) != 0) {
2589 SYSERROR("Failed to enter container netns to reset nics");
2590 close(oldfd);
2591 return;
2592 }
2593 for (i=0; i<conf->num_savednics; i++) {
2594 struct saved_nic *s = &conf->saved_nics[i];
f2e206ff 2595 /* retrieve the name of the interface */
2596 if (!if_indextoname(s->ifindex, ifname)) {
2597 WARN("no interface corresponding to index '%d'", s->ifindex);
2598 continue;
2599 }
5610055a 2600 if (lxc_netdev_move_by_name(ifname, 1, s->orig_name))
f2e206ff 2601 WARN("Error moving nic name:%s back to host netns", ifname);
5610055a 2602 free(s->orig_name);
2af6bd1b 2603 }
5610055a
WB
2604 conf->num_savednics = 0;
2605
2af6bd1b
SH
2606 if (setns(oldfd, 0) != 0)
2607 SYSERROR("Failed to re-enter monitor's netns");
2608 close(oldfd);
2609}
2610
ae9242c8
SH
2611static char *default_rootfs_mount = LXCROOTFSMOUNT;
2612
7b379ab3 2613struct lxc_conf *lxc_conf_init(void)
089cd8b8 2614{
7b379ab3 2615 struct lxc_conf *new;
26ddeedd 2616 int i;
7b379ab3
MN
2617
2618 new = malloc(sizeof(*new));
2619 if (!new) {
2620 ERROR("lxc_conf_init : %m");
2621 return NULL;
2622 }
2623 memset(new, 0, sizeof(*new));
2624
b40a606e 2625 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
cccc74b5 2626 new->personality = -1;
124fa0a8 2627 new->autodev = 1;
596a818d
DE
2628 new->console.log_path = NULL;
2629 new->console.log_fd = -1;
28a4b0e5 2630 new->console.path = NULL;
63376d7d 2631 new->console.peer = -1;
b5159817
DE
2632 new->console.peerpty.busy = -1;
2633 new->console.peerpty.master = -1;
2634 new->console.peerpty.slave = -1;
63376d7d
DL
2635 new->console.master = -1;
2636 new->console.slave = -1;
2637 new->console.name[0] = '\0';
d2e30e99 2638 new->maincmd_fd = -1;
76a26f55 2639 new->nbd_idx = -1;
54c30e29 2640 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048
SH
2641 if (!new->rootfs.mount) {
2642 ERROR("lxc_conf_init : %m");
2643 free(new);
2644 return NULL;
2645 }
d89de239 2646 new->kmsg = 0;
858377e4 2647 new->logfd = -1;
7b379ab3
MN
2648 lxc_list_init(&new->cgroup);
2649 lxc_list_init(&new->network);
2650 lxc_list_init(&new->mount_list);
81810dd1 2651 lxc_list_init(&new->caps);
1fb86a7c 2652 lxc_list_init(&new->keepcaps);
f6d3e3e4 2653 lxc_list_init(&new->id_map);
f979ac15 2654 lxc_list_init(&new->includes);
4184c3e1 2655 lxc_list_init(&new->aliens);
7c661726 2656 lxc_list_init(&new->environment);
c6d09e15 2657 lxc_list_init(&new->limits);
26ddeedd
SH
2658 for (i=0; i<NUM_LXC_HOOKS; i++)
2659 lxc_list_init(&new->hooks[i]);
ee1e7aa0 2660 lxc_list_init(&new->groups);
fe4de9a6
DE
2661 new->lsm_aa_profile = NULL;
2662 new->lsm_se_context = NULL;
5112cd70 2663 new->tmp_umount_proc = 0;
7b379ab3 2664
9f30a190
MM
2665 for (i = 0; i < LXC_NS_MAX; i++)
2666 new->inherit_ns_fd[i] = -1;
2667
72bb04e4
PT
2668 /* if running in a new user namespace, init and COMMAND
2669 * default to running as UID/GID 0 when using lxc-execute */
2670 new->init_uid = 0;
2671 new->init_gid = 0;
2672
7b379ab3 2673 return new;
089cd8b8
DL
2674}
2675
a589434e 2676static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2677{
8634bc19 2678 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2679 char veth2buf[IFNAMSIZ], *veth2;
b7b2fde4
CB
2680 int bridge_index, err;
2681 unsigned int mtu = 0;
13954cce 2682
8bee8851 2683 if (netdev->priv.veth_attr.pair) {
e892973e 2684 veth1 = netdev->priv.veth_attr.pair;
8bee8851
WB
2685 if (handler->conf->reboot)
2686 lxc_netdev_delete_by_name(veth1);
2687 } else {
9ba8130c
SH
2688 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2689 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2690 ERROR("veth1 name too long");
2691 return -1;
2692 }
a0265685 2693 veth1 = lxc_mkifname(veth1buf);
ad40563e
ÇO
2694 if (!veth1) {
2695 ERROR("failed to allocate a temporary name");
2696 return -1;
2697 }
74a2b586
JK
2698 /* store away for deconf */
2699 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2700 }
82d5ae15 2701
0e391e57 2702 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
a0265685 2703 veth2 = lxc_mkifname(veth2buf);
ad40563e 2704 if (!veth2) {
82d5ae15 2705 ERROR("failed to allocate a temporary name");
ad40563e 2706 goto out_delete;
0ad19a3f 2707 }
2708
3cfc0f3a
MN
2709 err = lxc_veth_create(veth1, veth2);
2710 if (err) {
2e2d6a7b 2711 ERROR("failed to create veth pair (%s and %s): %s", veth1, veth2,
3cfc0f3a 2712 strerror(-err));
ad40563e 2713 goto out_delete;
0ad19a3f 2714 }
13954cce 2715
49684c0b
CS
2716 /* changing the high byte of the mac address to 0xfe, the bridge interface
2717 * will always keep the host's mac address and not take the mac address
2718 * of a container */
2719 err = setup_private_host_hw_addr(veth1);
2720 if (err) {
2e2d6a7b 2721 ERROR("failed to change mac address of host interface '%s': %s",
49684c0b
CS
2722 veth1, strerror(-err));
2723 goto out_delete;
2724 }
2725
af651aa9
SN
2726 netdev->ifindex = if_nametoindex(veth2);
2727 if (!netdev->ifindex) {
2728 ERROR("failed to retrieve the index for %s", veth2);
2729 goto out_delete;
2730 }
2731
82d5ae15 2732 if (netdev->mtu) {
b7b2fde4
CB
2733 if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
2734 WARN("Failed to parse mtu from.");
2735 else
2736 INFO("Retrieved mtu %d", mtu);
e54864d3 2737 } else if (netdev->link) {
e9280f65 2738 bridge_index = if_nametoindex(netdev->link);
729e8bf6
CB
2739 if (bridge_index) {
2740 mtu = netdev_get_mtu(bridge_index);
2741 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
2742 } else {
2743 mtu = netdev_get_mtu(netdev->ifindex);
2744 INFO("Retrieved mtu %d from %s", mtu, veth2);
2745 }
e54864d3
NC
2746 }
2747
2748 if (mtu) {
2749 err = lxc_netdev_set_mtu(veth1, mtu);
3cfc0f3a 2750 if (!err)
e54864d3 2751 err = lxc_netdev_set_mtu(veth2, mtu);
3cfc0f3a 2752 if (err) {
e54864d3
NC
2753 ERROR("failed to set mtu '%i' for veth pair (%s and %s): %s",
2754 mtu, veth1, veth2, strerror(-err));
eb14c10a 2755 goto out_delete;
75d09f83
DL
2756 }
2757 }
2758
3cfc0f3a 2759 if (netdev->link) {
c43cbc04 2760 err = lxc_bridge_attach(handler->lxcpath, handler->name, netdev->link, veth1);
3cfc0f3a 2761 if (err) {
2e2d6a7b 2762 ERROR("failed to attach '%s' to the bridge '%s': %s",
3cfc0f3a
MN
2763 veth1, netdev->link, strerror(-err));
2764 goto out_delete;
2765 }
738d0deb 2766 INFO("Attached '%s': to the bridge '%s': ", veth1, netdev->link);
eb14c10a
DL
2767 }
2768
d472214b 2769 err = lxc_netdev_up(veth1);
6e35af2e
DL
2770 if (err) {
2771 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2772 goto out_delete;
0ad19a3f 2773 }
2774
e3b4c4c4 2775 if (netdev->upscript) {
751d9dcd
DL
2776 err = run_script(handler->name, "net", netdev->upscript, "up",
2777 "veth", veth1, (char*) NULL);
2778 if (err)
e3b4c4c4 2779 goto out_delete;
e3b4c4c4
ST
2780 }
2781
a589434e 2782 DEBUG("instantiated veth '%s/%s', index is '%d'",
82d5ae15
DL
2783 veth1, veth2, netdev->ifindex);
2784
6ab9ab6d 2785 return 0;
eb14c10a
DL
2786
2787out_delete:
b84f58b9 2788 lxc_netdev_delete_by_name(veth1);
f10fad2f 2789 if (!netdev->priv.veth_attr.pair)
ad40563e 2790 free(veth1);
f10fad2f 2791 free(veth2);
6ab9ab6d 2792 return -1;
13954cce 2793}
d957ae2d 2794
74a2b586
JK
2795static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2796{
2797 char *veth1;
2798 int err;
2799
2800 if (netdev->priv.veth_attr.pair)
2801 veth1 = netdev->priv.veth_attr.pair;
2802 else
2803 veth1 = netdev->priv.veth_attr.veth1;
2804
2805 if (netdev->downscript) {
2806 err = run_script(handler->name, "net", netdev->downscript,
2807 "down", "veth", veth1, (char*) NULL);
2808 if (err)
2809 return -1;
2810 }
2811 return 0;
2812}
2813
a589434e 2814static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2815{
0e391e57 2816 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2817 int err;
d957ae2d
MT
2818
2819 if (!netdev->link) {
2820 ERROR("no link specified for macvlan netdev");
2821 return -1;
2822 }
13954cce 2823
9ba8130c
SH
2824 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2825 if (err >= sizeof(peerbuf))
2826 return -1;
82d5ae15 2827
a0265685 2828 peer = lxc_mkifname(peerbuf);
ad40563e 2829 if (!peer) {
82d5ae15
DL
2830 ERROR("failed to make a temporary name");
2831 return -1;
0ad19a3f 2832 }
2833
3cfc0f3a
MN
2834 err = lxc_macvlan_create(netdev->link, peer,
2835 netdev->priv.macvlan_attr.mode);
2836 if (err) {
2837 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2838 peer, netdev->link, strerror(-err));
ad40563e 2839 goto out;
0ad19a3f 2840 }
2841
82d5ae15
DL
2842 netdev->ifindex = if_nametoindex(peer);
2843 if (!netdev->ifindex) {
36eb9bde 2844 ERROR("failed to retrieve the index for %s", peer);
ad40563e 2845 goto out;
22ebac19 2846 }
2847
e3b4c4c4 2848 if (netdev->upscript) {
751d9dcd
DL
2849 err = run_script(handler->name, "net", netdev->upscript, "up",
2850 "macvlan", netdev->link, (char*) NULL);
2851 if (err)
ad40563e 2852 goto out;
e3b4c4c4
ST
2853 }
2854
a589434e 2855 DEBUG("instantiated macvlan '%s', index is '%d' and mode '%d'",
e892973e 2856 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 2857
d957ae2d 2858 return 0;
ad40563e
ÇO
2859out:
2860 lxc_netdev_delete_by_name(peer);
2861 free(peer);
2862 return -1;
0ad19a3f 2863}
2864
74a2b586
JK
2865static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2866{
2867 int err;
2868
2869 if (netdev->downscript) {
2870 err = run_script(handler->name, "net", netdev->downscript,
2871 "down", "macvlan", netdev->link,
2872 (char*) NULL);
2873 if (err)
2874 return -1;
2875 }
2876 return 0;
2877}
2878
a589434e
JN
2879/* XXX: merge with instantiate_macvlan */
2880static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
2881{
2882 char peer[IFNAMSIZ];
3cfc0f3a 2883 int err;
82f58d03 2884 static uint16_t vlan_cntr = 0;
b7b2fde4 2885 unsigned int mtu = 0;
26c39028
JHS
2886
2887 if (!netdev->link) {
2888 ERROR("no link specified for vlan netdev");
2889 return -1;
2890 }
2891
82f58d03 2892 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
9ba8130c
SH
2893 if (err >= sizeof(peer)) {
2894 ERROR("peer name too long");
2895 return -1;
2896 }
26c39028 2897
3cfc0f3a
MN
2898 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2899 if (err) {
2900 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2901 peer, netdev->link, strerror(-err));
26c39028
JHS
2902 return -1;
2903 }
2904
2905 netdev->ifindex = if_nametoindex(peer);
2906 if (!netdev->ifindex) {
2907 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 2908 lxc_netdev_delete_by_name(peer);
26c39028
JHS
2909 return -1;
2910 }
2911
a589434e 2912 DEBUG("instantiated vlan '%s', ifindex is '%d'", " vlan1000",
e892973e 2913 netdev->ifindex);
b4fb7de1 2914 if (netdev->mtu) {
b7b2fde4
CB
2915 if (lxc_safe_uint(netdev->mtu, &mtu) < 0) {
2916 ERROR("Failed to retrieve mtu from: '%d'/'%s'.",
2917 netdev->ifindex, netdev->name);
2918 return -1;
2919 }
2920 err = lxc_netdev_set_mtu(peer, mtu);
b4fb7de1
VL
2921 if (err) {
2922 ERROR("failed to set mtu '%s' for %s : %s",
2923 netdev->mtu, peer, strerror(-err));
2924 lxc_netdev_delete_by_name(peer);
2925 return -1;
2926 }
2927 }
e892973e 2928
26c39028
JHS
2929 return 0;
2930}
2931
74a2b586
JK
2932static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2933{
2934 return 0;
2935}
2936
a589434e 2937static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2938{
6168e99f
DL
2939 if (!netdev->link) {
2940 ERROR("no link specified for the physical interface");
2941 return -1;
2942 }
2943
9d083402 2944 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 2945 if (!netdev->ifindex) {
9d083402 2946 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 2947 return -1;
2948 }
2949
e3b4c4c4
ST
2950 if (netdev->upscript) {
2951 int err;
751d9dcd
DL
2952 err = run_script(handler->name, "net", netdev->upscript,
2953 "up", "phys", netdev->link, (char*) NULL);
2954 if (err)
e3b4c4c4 2955 return -1;
e3b4c4c4
ST
2956 }
2957
82d5ae15 2958 return 0;
0ad19a3f 2959}
2960
74a2b586
JK
2961static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2962{
2963 int err;
2964
2965 if (netdev->downscript) {
2966 err = run_script(handler->name, "net", netdev->downscript,
2967 "down", "phys", netdev->link, (char*) NULL);
2968 if (err)
2969 return -1;
2970 }
2971 return 0;
2972}
2973
a589434e 2974static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
26b797f3
SH
2975{
2976 netdev->ifindex = 0;
2977 return 0;
2978}
2979
a589434e 2980static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2981{
82d5ae15 2982 netdev->ifindex = 0;
e3b4c4c4
ST
2983 if (netdev->upscript) {
2984 int err;
751d9dcd
DL
2985 err = run_script(handler->name, "net", netdev->upscript,
2986 "up", "empty", (char*) NULL);
2987 if (err)
e3b4c4c4 2988 return -1;
e3b4c4c4 2989 }
82d5ae15 2990 return 0;
0ad19a3f 2991}
2992
74a2b586
JK
2993static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2994{
2995 int err;
2996
2997 if (netdev->downscript) {
2998 err = run_script(handler->name, "net", netdev->downscript,
2999 "down", "empty", (char*) NULL);
3000 if (err)
3001 return -1;
3002 }
3003 return 0;
3004}
3005
26b797f3
SH
3006static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
3007{
3008 return 0;
3009}
3010
3011int lxc_requests_empty_network(struct lxc_handler *handler)
3012{
3013 struct lxc_list *network = &handler->conf->network;
3014 struct lxc_list *iterator;
3015 struct lxc_netdev *netdev;
3016 bool found_none = false, found_nic = false;
3017
3018 if (lxc_list_empty(network))
3019 return 0;
3020
3021 lxc_list_for_each(iterator, network) {
3022
3023 netdev = iterator->elem;
3024
3025 if (netdev->type == LXC_NET_NONE)
3026 found_none = true;
3027 else
3028 found_nic = true;
3029 }
3030 if (found_none && !found_nic)
3031 return 1;
3032 return 0;
3033}
3034
e3b4c4c4 3035int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 3036{
e3b4c4c4 3037 struct lxc_list *network = &handler->conf->network;
82d5ae15 3038 struct lxc_list *iterator;
82d5ae15 3039 struct lxc_netdev *netdev;
cbef6c52
SH
3040 int am_root = (getuid() == 0);
3041
3042 if (!am_root)
3043 return 0;
0ad19a3f 3044
5f4535a3 3045 lxc_list_for_each(iterator, network) {
0ad19a3f 3046
5f4535a3 3047 netdev = iterator->elem;
13954cce 3048
24654103 3049 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 3050 ERROR("invalid network configuration type '%d'",
5f4535a3 3051 netdev->type);
82d5ae15
DL
3052 return -1;
3053 }
0ad19a3f 3054
e3b4c4c4 3055 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
3056 ERROR("failed to create netdev");
3057 return -1;
3058 }
e3b4c4c4 3059
0ad19a3f 3060 }
3061
3062 return 0;
3063}
3064
358daf49 3065bool lxc_delete_network(struct lxc_handler *handler)
7fef7a06 3066{
e97946ae 3067 int ret;
74a2b586 3068 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
3069 struct lxc_list *iterator;
3070 struct lxc_netdev *netdev;
358daf49 3071 bool deleted_all = true;
7fef7a06
DL
3072
3073 lxc_list_for_each(iterator, network) {
3074 netdev = iterator->elem;
d472214b 3075
74a2b586 3076 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352 3077 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
358daf49
CB
3078 WARN("Failed to rename interface with index %d "
3079 "to its initial name \"%s\".",
3080 netdev->ifindex, netdev->link);
d472214b 3081 continue;
d8f8e352 3082 }
d472214b 3083
74a2b586 3084 if (netdev_deconf[netdev->type](handler, netdev)) {
e97946ae 3085 WARN("Failed to destroy netdev");
74a2b586
JK
3086 }
3087
d8f8e352
DL
3088 /* Recent kernel remove the virtual interfaces when the network
3089 * namespace is destroyed but in case we did not moved the
3090 * interface to the network namespace, we have to destroy it
3091 */
e97946ae
CB
3092 if (netdev->ifindex != 0) {
3093 ret = lxc_netdev_delete_by_index(netdev->ifindex);
358daf49
CB
3094 if (-ret == ENODEV) {
3095 INFO("Interface \"%s\" with index %d already "
3096 "deleted or existing in different network "
3097 "namespace.",
3098 netdev->name ? netdev->name : "(null)",
3099 netdev->ifindex);
3100 } else if (ret < 0) {
3101 deleted_all = false;
3102 WARN("Failed to remove interface \"%s\" with "
3103 "index %d: %s.",
3104 netdev->name ? netdev->name : "(null)",
3105 netdev->ifindex, strerror(-ret));
3106 } else {
3107 INFO("Removed interface \"%s\" with index %d.",
3108 netdev->name ? netdev->name : "(null)",
3109 netdev->ifindex);
3110 }
e97946ae
CB
3111 }
3112
3113 /* Explicitly delete host veth device to prevent lingering
3114 * devices. We had issues in LXD around this.
3115 */
9aaaad30 3116 if (netdev->type == LXC_NET_VETH && !am_unpriv()) {
358daf49
CB
3117 char *hostveth;
3118 if (netdev->priv.veth_attr.pair) {
e97946ae 3119 hostveth = netdev->priv.veth_attr.pair;
358daf49
CB
3120 ret = lxc_netdev_delete_by_name(hostveth);
3121 if (ret < 0) {
3122 WARN("Failed to remove interface \"%s\" from host: %s.", hostveth, strerror(-ret));
3123 } else {
3124 INFO("Removed interface \"%s\" from host.", hostveth);
358daf49
CB
3125 }
3126 } else if (strlen(netdev->priv.veth_attr.veth1) > 0) {
e97946ae 3127 hostveth = netdev->priv.veth_attr.veth1;
e97946ae 3128 ret = lxc_netdev_delete_by_name(hostveth);
358daf49
CB
3129 if (ret < 0) {
3130 WARN("Failed to remove \"%s\" from host: %s.", hostveth, strerror(-ret));
3131 } else {
3132 INFO("Removed interface \"%s\" from host.", hostveth);
3133 memset((void *)&netdev->priv.veth_attr.veth1, 0, sizeof(netdev->priv.veth_attr.veth1));
3134 }
e97946ae
CB
3135 }
3136 }
7fef7a06 3137 }
358daf49
CB
3138
3139 return deleted_all;
7fef7a06
DL
3140}
3141
45e854dc
SG
3142#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
3143
fe1f672f 3144/* lxc-user-nic returns "interface_name:interface_name\n" */
eab15c1e 3145#define MAX_BUFFER_SIZE IFNAMSIZ * 2 + 2
c43cbc04
SH
3146static int unpriv_assign_nic(const char *lxcpath, char *lxcname,
3147 struct lxc_netdev *netdev, pid_t pid)
cbef6c52
SH
3148{
3149 pid_t child;
a7242d9a
ÇO
3150 int bytes, pipefd[2];
3151 char *token, *saveptr = NULL;
fe1f672f 3152 char buffer[MAX_BUFFER_SIZE];
091045f8 3153 char netdev_link[IFNAMSIZ + 1];
cbef6c52
SH
3154
3155 if (netdev->type != LXC_NET_VETH) {
3156 ERROR("nic type %d not support for unprivileged use",
091045f8 3157 netdev->type);
cbef6c52
SH
3158 return -1;
3159 }
3160
091045f8 3161 if (pipe(pipefd) < 0) {
a7242d9a
ÇO
3162 SYSERROR("pipe failed");
3163 return -1;
3164 }
3165
091045f8
CB
3166 child = fork();
3167 if (child < 0) {
cbef6c52 3168 SYSERROR("fork");
a7242d9a
ÇO
3169 close(pipefd[0]);
3170 close(pipefd[1]);
3171 return -1;
3172 }
3173
3174 if (child == 0) { // child
091045f8
CB
3175 /* Call lxc-user-nic pid type bridge. */
3176 int ret;
3177 char pidstr[LXC_NUMSTRLEN64];
3178
3179 close(pipefd[0]); /* Close the read-end of the pipe. */
3180
3181 /* Redirect stdout to write-end of the pipe. */
3182 ret = dup2(pipefd[1], STDOUT_FILENO);
3183 close(pipefd[1]); /* Close the write-end of the pipe. */
3184 if (ret < 0) {
3185 SYSERROR("Failed to dup2() to redirect stdout to pipe file descriptor.");
3186 exit(EXIT_FAILURE);
3187 }
a7242d9a 3188
091045f8 3189 if (netdev->link)
cff7b5eb 3190 strncpy(netdev_link, netdev->link, IFNAMSIZ);
091045f8 3191 else
cff7b5eb 3192 strncpy(netdev_link, "none", IFNAMSIZ);
091045f8
CB
3193
3194 ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid);
3195 if (ret < 0 || ret >= LXC_NUMSTRLEN64)
3196 exit(EXIT_FAILURE);
3197 pidstr[LXC_NUMSTRLEN64 - 1] = '\0';
3198
3199 INFO("Execing lxc-user-nic %s %s %s veth %s %s", lxcpath,
3200 lxcname, pidstr, netdev_link, netdev->name);
c43cbc04 3201 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, lxcpath, lxcname,
091045f8
CB
3202 pidstr, "veth", netdev_link, netdev->name, NULL);
3203
3204 SYSERROR("Failed to exec lxc-user-nic.");
3205 exit(EXIT_FAILURE);
a7242d9a
ÇO
3206 }
3207
3208 /* close the write-end of the pipe */
3209 close(pipefd[1]);
3210
fe1f672f 3211 bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE);
091045f8
CB
3212 if (bytes < 0)
3213 SYSERROR("Failed to read from pipe file descriptor.");
a7242d9a
ÇO
3214 buffer[bytes - 1] = '\0';
3215
3216 if (wait_for_pid(child) != 0) {
3217 close(pipefd[0]);
cbef6c52
SH
3218 return -1;
3219 }
3220
a7242d9a
ÇO
3221 /* close the read-end of the pipe */
3222 close(pipefd[0]);
cbef6c52 3223
a7242d9a
ÇO
3224 /* fill netdev->name field */
3225 token = strtok_r(buffer, ":", &saveptr);
3226 if (!token)
3227 return -1;
091045f8
CB
3228
3229 netdev->name = malloc(IFNAMSIZ + 1);
658979c5 3230 if (!netdev->name) {
091045f8 3231 SYSERROR("Failed to allocate memory.");
658979c5
SH
3232 return -1;
3233 }
091045f8 3234 memset(netdev->name, 0, IFNAMSIZ + 1);
658979c5 3235 strncpy(netdev->name, token, IFNAMSIZ);
a7242d9a
ÇO
3236
3237 /* fill netdev->veth_attr.pair field */
3238 token = strtok_r(NULL, ":", &saveptr);
3239 if (!token)
3240 return -1;
091045f8 3241
a7242d9a 3242 netdev->priv.veth_attr.pair = strdup(token);
658979c5 3243 if (!netdev->priv.veth_attr.pair) {
091045f8 3244 ERROR("Failed to allocate memory.");
658979c5
SH
3245 return -1;
3246 }
45e854dc 3247
a7242d9a 3248 return 0;
cbef6c52
SH
3249}
3250
c43cbc04
SH
3251int lxc_assign_network(const char *lxcpath, char *lxcname,
3252 struct lxc_list *network, pid_t pid)
0ad19a3f 3253{
82d5ae15 3254 struct lxc_list *iterator;
82d5ae15 3255 struct lxc_netdev *netdev;
f2e206ff 3256 char ifname[IFNAMSIZ];
cbef6c52 3257 int am_root = (getuid() == 0);
3cfc0f3a 3258 int err;
0ad19a3f 3259
5f4535a3 3260 lxc_list_for_each(iterator, network) {
82d5ae15 3261
5f4535a3 3262 netdev = iterator->elem;
82d5ae15 3263
fbb16259 3264 if (netdev->type == LXC_NET_VETH && !am_root) {
c43cbc04 3265 if (unpriv_assign_nic(lxcpath, lxcname, netdev, pid))
cbef6c52 3266 return -1;
658979c5
SH
3267 // lxc-user-nic has moved the nic to the new ns.
3268 // unpriv_assign_nic() fills in netdev->name.
3269 // netdev->ifindex will be filed in at setup_netdev.
cbef6c52
SH
3270 continue;
3271 }
236087a6 3272
fbb16259
SH
3273 /* empty network namespace, nothing to move */
3274 if (!netdev->ifindex)
3275 continue;
3276
f2e206ff 3277 /* retrieve the name of the interface */
3278 if (!if_indextoname(netdev->ifindex, ifname)) {
3279 ERROR("no interface corresponding to index '%d'", netdev->ifindex);
3280 return -1;
3281 }
3282
3283 err = lxc_netdev_move_by_name(ifname, pid, NULL);
3cfc0f3a
MN
3284 if (err) {
3285 ERROR("failed to move '%s' to the container : %s",
3286 netdev->link, strerror(-err));
82d5ae15
DL
3287 return -1;
3288 }
3289
198cbbaa 3290 DEBUG("move '%s'/'%s' to '%d': .", ifname, netdev->name, pid);
0ad19a3f 3291 }
3292
3293 return 0;
3294}
3295
251d0d2a
DE
3296static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
3297 size_t buf_size)
f6d3e3e4
SH
3298{
3299 char path[PATH_MAX];
e4ccd113 3300 int ret, closeret;
f6d3e3e4
SH
3301 FILE *f;
3302
3303 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
3304 if (ret < 0 || ret >= PATH_MAX) {
03fadd16 3305 fprintf(stderr, "%s: path name too long\n", __func__);
f6d3e3e4
SH
3306 return -E2BIG;
3307 }
3308 f = fopen(path, "w");
3309 if (!f) {
3310 perror("open");
3311 return -EINVAL;
3312 }
251d0d2a 3313 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 3314 if (ret < 0)
e4ccd113
SH
3315 SYSERROR("writing id mapping");
3316 closeret = fclose(f);
3317 if (closeret)
3318 SYSERROR("writing id mapping");
3319 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
3320}
3321
df6a2945
CB
3322/* Check whether a binary exist and has either CAP_SETUID, CAP_SETGID or both. */
3323static int idmaptool_on_path_and_privileged(const char *binary, cap_value_t cap)
3324{
3325 char *path;
3326 int ret;
3327 struct stat st;
3328 int fret = 0;
3329
3330 path = on_path(binary, NULL);
3331 if (!path)
3332 return -ENOENT;
3333
3334 ret = stat(path, &st);
3335 if (ret < 0) {
3336 fret = -errno;
3337 goto cleanup;
3338 }
3339
3340 /* Check if the binary is setuid. */
3341 if (st.st_mode & S_ISUID) {
3342 DEBUG("The binary \"%s\" does have the setuid bit set.", path);
3343 fret = 1;
3344 goto cleanup;
3345 }
3346
3347 #if HAVE_LIBCAP
3348 /* Check if it has the CAP_SETUID capability. */
3349 if ((cap & CAP_SETUID) &&
3350 lxc_file_cap_is_set(path, CAP_SETUID, CAP_EFFECTIVE) &&
3351 lxc_file_cap_is_set(path, CAP_SETUID, CAP_PERMITTED)) {
3352 DEBUG("The binary \"%s\" has CAP_SETUID in its CAP_EFFECTIVE "
3353 "and CAP_PERMITTED sets.", path);
3354 fret = 1;
3355 goto cleanup;
3356 }
3357
3358 /* Check if it has the CAP_SETGID capability. */
3359 if ((cap & CAP_SETGID) &&
3360 lxc_file_cap_is_set(path, CAP_SETGID, CAP_EFFECTIVE) &&
3361 lxc_file_cap_is_set(path, CAP_SETGID, CAP_PERMITTED)) {
3362 DEBUG("The binary \"%s\" has CAP_SETGID in its CAP_EFFECTIVE "
3363 "and CAP_PERMITTED sets.", path);
3364 fret = 1;
3365 goto cleanup;
3366 }
3367 #endif
3368
3369cleanup:
3370 free(path);
3371 return fret;
3372}
3373
f6d3e3e4
SH
3374int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
3375{
f6d3e3e4 3376 struct id_map *map;
4bc3b759 3377 struct lxc_list *iterator;
251d0d2a 3378 enum idtype type;
4bc3b759 3379 char *pos;
df6a2945
CB
3380 int euid;
3381 int ret = 0, use_shadow = 0;
3382 int uidmap = 0, gidmap = 0;
3383 char *buf = NULL;
8afb3e61 3384
df6a2945
CB
3385 euid = geteuid();
3386
3387 /* If new{g,u}idmap exists, that is, if shadow is handing out subuid
3388 * ranges, then insist that root also reserve ranges in subuid. This
22038de5
SH
3389 * will protected it by preventing another user from being handed the
3390 * range by shadow.
3391 */
df6a2945
CB
3392 uidmap = idmaptool_on_path_and_privileged("newuidmap", CAP_SETUID);
3393 gidmap = idmaptool_on_path_and_privileged("newgidmap", CAP_SETGID);
3394 if (uidmap > 0 && gidmap > 0) {
3395 DEBUG("Functional newuidmap and newgidmap binary found.");
4bc3b759 3396 use_shadow = true;
df6a2945
CB
3397 } else if (uidmap == -ENOENT && gidmap == -ENOENT && !euid) {
3398 DEBUG("No newuidmap and newgidmap binary found. Trying to "
3399 "write directly with euid 0.");
3400 use_shadow = false;
3401 } else {
3402 DEBUG("Either one or both of the newuidmap and newgidmap "
3403 "binaries do not exist or are missing necessary "
3404 "privilege.");
0e6e3a41
SG
3405 return -1;
3406 }
251d0d2a 3407
4bc3b759 3408 for (type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4 3409 int left, fill;
4bc3b759 3410 bool had_entry = false;
cf3ef16d 3411 if (!buf) {
4bc3b759 3412 buf = pos = malloc(LXC_IDMAPLEN);
4f7521b4
SH
3413 if (!buf)
3414 return -ENOMEM;
cf3ef16d
SH
3415 }
3416 pos = buf;
0e6e3a41 3417 if (use_shadow)
4bc3b759 3418 pos += sprintf(buf, "new%cidmap %d", type == ID_TYPE_UID ? 'u' : 'g', pid);
4f7521b4 3419
cf3ef16d 3420 lxc_list_for_each(iterator, idmap) {
4bc3b759
CB
3421 /* The kernel only takes <= 4k for writes to
3422 * /proc/<nr>/[ug]id_map
3423 */
251d0d2a 3424 map = iterator->elem;
cf3ef16d
SH
3425 if (map->idtype != type)
3426 continue;
3427
4bc3b759
CB
3428 had_entry = true;
3429
3430 left = LXC_IDMAPLEN - (pos - buf);
d1838f34 3431 fill = snprintf(pos, left, "%s%lu %lu %lu%s",
4bc3b759
CB
3432 use_shadow ? " " : "", map->nsid,
3433 map->hostid, map->range,
0e6e3a41 3434 use_shadow ? "" : "\n");
cf3ef16d 3435 if (fill <= 0 || fill >= left)
4bc3b759
CB
3436 SYSERROR("Too many {g,u}id mappings defined.");
3437
cf3ef16d 3438 pos += fill;
251d0d2a 3439 }
cf3ef16d 3440 if (!had_entry)
4f7521b4 3441 continue;
cf3ef16d 3442
0e6e3a41 3443 if (!use_shadow) {
4bc3b759 3444 ret = write_id_mapping(type, pid, buf, pos - buf);
d1838f34 3445 } else {
4bc3b759 3446 left = LXC_IDMAPLEN - (pos - buf);
d1838f34
MS
3447 fill = snprintf(pos, left, "\n");
3448 if (fill <= 0 || fill >= left)
4bc3b759 3449 SYSERROR("Too many {g,u}id mappings defined.");
d1838f34 3450 pos += fill;
cf3ef16d 3451 ret = system(buf);
d1838f34 3452 }
f6d3e3e4
SH
3453 if (ret)
3454 break;
3455 }
251d0d2a 3456
f10fad2f 3457 free(buf);
f6d3e3e4
SH
3458 return ret;
3459}
3460
cf3ef16d 3461/*
7b50c609
TS
3462 * return the host uid/gid to which the container root is mapped in
3463 * *val.
0b3a6504 3464 * Return true if id was found, false otherwise.
cf3ef16d 3465 */
2a9a80cb 3466bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
3ec1648d 3467 unsigned long *val)
cf3ef16d
SH
3468{
3469 struct lxc_list *it;
3470 struct id_map *map;
3471
3472 lxc_list_for_each(it, &conf->id_map) {
3473 map = it->elem;
7b50c609 3474 if (map->idtype != idtype)
cf3ef16d
SH
3475 continue;
3476 if (map->nsid != 0)
3477 continue;
2a9a80cb
SH
3478 *val = map->hostid;
3479 return true;
cf3ef16d 3480 }
2a9a80cb 3481 return false;
cf3ef16d
SH
3482}
3483
2133f58c 3484int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3485{
3486 struct lxc_list *it;
3487 struct id_map *map;
3488 lxc_list_for_each(it, &conf->id_map) {
3489 map = it->elem;
2133f58c 3490 if (map->idtype != idtype)
cf3ef16d
SH
3491 continue;
3492 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 3493 return (id - map->hostid) + map->nsid;
cf3ef16d 3494 }
57d116ab 3495 return -1;
cf3ef16d
SH
3496}
3497
2133f58c 3498int find_unmapped_nsuid(struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3499{
3500 struct lxc_list *it;
3501 struct id_map *map;
2133f58c 3502 unsigned int freeid = 0;
cf3ef16d
SH
3503again:
3504 lxc_list_for_each(it, &conf->id_map) {
3505 map = it->elem;
2133f58c 3506 if (map->idtype != idtype)
cf3ef16d
SH
3507 continue;
3508 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3509 freeid = map->nsid + map->range;
3510 goto again;
3511 }
3512 }
3513 return freeid;
3514}
3515
19a26f82
MK
3516int lxc_find_gateway_addresses(struct lxc_handler *handler)
3517{
3518 struct lxc_list *network = &handler->conf->network;
3519 struct lxc_list *iterator;
3520 struct lxc_netdev *netdev;
3521 int link_index;
3522
3523 lxc_list_for_each(iterator, network) {
3524 netdev = iterator->elem;
3525
3526 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
3527 continue;
3528
3529 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
3530 ERROR("gateway = auto only supported for "
3531 "veth and macvlan");
3532 return -1;
3533 }
3534
3535 if (!netdev->link) {
3536 ERROR("gateway = auto needs a link interface");
3537 return -1;
3538 }
3539
3540 link_index = if_nametoindex(netdev->link);
3541 if (!link_index)
3542 return -EINVAL;
3543
3544 if (netdev->ipv4_gateway_auto) {
3545 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
3546 ERROR("failed to automatically find ipv4 gateway "
3547 "address from link interface '%s'", netdev->link);
3548 return -1;
3549 }
3550 }
3551
3552 if (netdev->ipv6_gateway_auto) {
3553 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
3554 ERROR("failed to automatically find ipv6 gateway "
3555 "address from link interface '%s'", netdev->link);
3556 return -1;
3557 }
3558 }
3559 }
3560
3561 return 0;
3562}
3563
5e4a62bf 3564int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 3565{
5e4a62bf 3566 struct lxc_tty_info *tty_info = &conf->tty_info;
025ed0f3 3567 int i, ret;
b0a33c1e 3568
5e4a62bf
DL
3569 /* no tty in the configuration */
3570 if (!conf->tty)
b0a33c1e 3571 return 0;
3572
13954cce 3573 tty_info->pty_info =
e4e7d59d 3574 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 3575 if (!tty_info->pty_info) {
36eb9bde 3576 SYSERROR("failed to allocate pty_info");
985d15b1 3577 return -1;
b0a33c1e 3578 }
3579
985d15b1 3580 for (i = 0; i < conf->tty; i++) {
13954cce 3581
b0a33c1e 3582 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3583
025ed0f3
SH
3584 process_lock();
3585 ret = openpty(&pty_info->master, &pty_info->slave,
3586 pty_info->name, NULL, NULL);
3587 process_unlock();
3588 if (ret) {
36eb9bde 3589 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
3590 tty_info->nbtty = i;
3591 lxc_delete_tty(tty_info);
3592 return -1;
b0a33c1e 3593 }
3594
5332bb84
DL
3595 DEBUG("allocated pty '%s' (%d/%d)",
3596 pty_info->name, pty_info->master, pty_info->slave);
3597
3ec1648d 3598 /* Prevent leaking the file descriptors to the container */
b035ad62
MS
3599 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3600 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3601
b0a33c1e 3602 pty_info->busy = 0;
3603 }
3604
985d15b1 3605 tty_info->nbtty = conf->tty;
1ac470c0
DL
3606
3607 INFO("tty's configured");
3608
985d15b1 3609 return 0;
b0a33c1e 3610}
3611
3612void lxc_delete_tty(struct lxc_tty_info *tty_info)
3613{
3614 int i;
3615
3616 for (i = 0; i < tty_info->nbtty; i++) {
3617 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3618
3619 close(pty_info->master);
3620 close(pty_info->slave);
3621 }
3622
3623 free(tty_info->pty_info);
e00c0242 3624 tty_info->pty_info = NULL;
b0a33c1e 3625 tty_info->nbtty = 0;
3626}
3627
f6d3e3e4 3628/*
7b50c609
TS
3629 * chown_mapped_root: for an unprivileged user with uid/gid X to
3630 * chown a dir to subuid/subgid Y, he needs to run chown as root
3631 * in a userns where nsid 0 is mapped to hostuid/hostgid Y, and
3632 * nsid Y is mapped to hostuid/hostgid X. That way, the container
3633 * root is privileged with respect to hostuid/hostgid X, allowing
3634 * him to do the chown.
f6d3e3e4 3635 */
c4d10a05 3636int chown_mapped_root(char *path, struct lxc_conf *conf)
f6d3e3e4 3637{
7b50c609
TS
3638 uid_t rootuid;
3639 gid_t rootgid;
c4d10a05 3640 pid_t pid;
2a9a80cb 3641 unsigned long val;
a7ef8753 3642 char *chownpath = path;
f6d3e3e4 3643
2a9a80cb 3644 if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
c4d10a05
SH
3645 ERROR("No mapping for container root");
3646 return -1;
f6d3e3e4 3647 }
7b50c609
TS
3648 rootuid = (uid_t) val;
3649 if (!get_mapped_rootid(conf, ID_TYPE_GID, &val)) {
3650 ERROR("No mapping for container root");
3651 return -1;
3652 }
3653 rootgid = (gid_t) val;
2a9a80cb 3654
a7ef8753
SH
3655 /*
3656 * In case of overlay, we want only the writeable layer
3657 * to be chowned
3658 */
1f92162d 3659 if (strncmp(path, "overlayfs:", 10) == 0 || strncmp(path, "aufs:", 5) == 0) {
a7ef8753
SH
3660 chownpath = strchr(path, ':');
3661 if (!chownpath) {
3662 ERROR("Bad overlay path: %s", path);
3663 return -1;
3664 }
3665 chownpath = strchr(chownpath+1, ':');
3666 if (!chownpath) {
3667 ERROR("Bad overlay path: %s", path);
3668 return -1;
3669 }
3670 chownpath++;
3671 }
3672 path = chownpath;
c4d10a05 3673 if (geteuid() == 0) {
7b50c609 3674 if (chown(path, rootuid, rootgid) < 0) {
c4d10a05
SH
3675 ERROR("Error chowning %s", path);
3676 return -1;
3677 }
3678 return 0;
3679 }
f3d7e4ca 3680
7b50c609 3681 if (rootuid == geteuid()) {
f3d7e4ca
SH
3682 // nothing to do
3683 INFO("%s: container root is our uid; no need to chown" ,__func__);
3684 return 0;
3685 }
3686
c4d10a05
SH
3687 pid = fork();
3688 if (pid < 0) {
3689 SYSERROR("Failed forking");
f6d3e3e4
SH
3690 return -1;
3691 }
c4d10a05 3692 if (!pid) {
7b50c609
TS
3693 int hostuid = geteuid(), hostgid = getegid(), ret;
3694 struct stat sb;
3695 char map1[100], map2[100], map3[100], map4[100], map5[100];
3696 char ugid[100];
3697 char *args1[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3698 "-m", map3, "-m", map5,
3699 "--", "chown", ugid, path, NULL };
3700 char *args2[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3701 "-m", map3, "-m", map4, "-m", map5,
3702 "--", "chown", ugid, path, NULL };
3703
3704 // save the current gid of "path"
3705 if (stat(path, &sb) < 0) {
3706 ERROR("Error stat %s", path);
3707 return -1;
3708 }
f6d3e3e4 3709
9a7c2aba
SH
3710 /*
3711 * A file has to be group-owned by a gid mapped into the
3712 * container, or the container won't be privileged over it.
3713 */
3714 if (sb.st_uid == geteuid() &&
3715 mapped_hostid(sb.st_gid, conf, ID_TYPE_GID) < 0 &&
3716 chown(path, -1, hostgid) < 0) {
3717 ERROR("Failed chgrping %s", path);
7b50c609
TS
3718 return -1;
3719 }
3720
3721 // "u:0:rootuid:1"
3722 ret = snprintf(map1, 100, "u:0:%d:1", rootuid);
c4d10a05
SH
3723 if (ret < 0 || ret >= 100) {
3724 ERROR("Error uid printing map string");
f6d3e3e4
SH
3725 return -1;
3726 }
c4d10a05 3727
98e5ba51
SH
3728 // "u:hostuid:hostuid:1"
3729 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3730 if (ret < 0 || ret >= 100) {
3731 ERROR("Error uid printing map string");
3732 return -1;
3733 }
3734
7b50c609
TS
3735 // "g:0:rootgid:1"
3736 ret = snprintf(map3, 100, "g:0:%d:1", rootgid);
c4d10a05 3737 if (ret < 0 || ret >= 100) {
7b50c609 3738 ERROR("Error gid printing map string");
c4d10a05
SH
3739 return -1;
3740 }
3741
7b50c609 3742 // "g:pathgid:rootgid+pathgid:1"
b4c1e35d
SG
3743 ret = snprintf(map4, 100, "g:%d:%d:1", (gid_t)sb.st_gid,
3744 rootgid + (gid_t)sb.st_gid);
7b50c609
TS
3745 if (ret < 0 || ret >= 100) {
3746 ERROR("Error gid printing map string");
3747 return -1;
3748 }
3749
3750 // "g:hostgid:hostgid:1"
3751 ret = snprintf(map5, 100, "g:%d:%d:1", hostgid, hostgid);
3752 if (ret < 0 || ret >= 100) {
3753 ERROR("Error gid printing map string");
3754 return -1;
3755 }
3756
3757 // "0:pathgid" (chown)
b4c1e35d 3758 ret = snprintf(ugid, 100, "0:%d", (gid_t)sb.st_gid);
7b50c609
TS
3759 if (ret < 0 || ret >= 100) {
3760 ERROR("Error owner printing format string for chown");
3761 return -1;
3762 }
3763
3764 if (hostgid == sb.st_gid)
3765 ret = execvp("lxc-usernsexec", args1);
3766 else
3767 ret = execvp("lxc-usernsexec", args2);
c4d10a05
SH
3768 SYSERROR("Failed executing usernsexec");
3769 exit(1);
f6d3e3e4 3770 }
c4d10a05 3771 return wait_for_pid(pid);
f6d3e3e4
SH
3772}
3773
c4d10a05 3774int ttys_shift_ids(struct lxc_conf *c)
f6d3e3e4 3775{
c4d10a05 3776 if (lxc_list_empty(&c->id_map))
f6d3e3e4 3777 return 0;
c4d10a05 3778
29b10e4f 3779 if (strcmp(c->console.name, "") !=0 && chown_mapped_root(c->console.name, c) < 0) {
c4d10a05
SH
3780 ERROR("Failed to chown %s", c->console.name);
3781 return -1;
3782 }
3783
f6d3e3e4
SH
3784 return 0;
3785}
3786
f267d666 3787/* NOTE: not to be called from inside the container namespace! */
5112cd70
SH
3788int tmp_proc_mount(struct lxc_conf *lxc_conf)
3789{
3790 int mounted;
3791
01958b1f 3792 mounted = mount_proc_if_needed(lxc_conf->rootfs.path ? lxc_conf->rootfs.mount : "");
5112cd70
SH
3793 if (mounted == -1) {
3794 SYSERROR("failed to mount /proc in the container.");
01958b1f
DW
3795 /* continue only if there is no rootfs */
3796 if (lxc_conf->rootfs.path)
3797 return -1;
5112cd70
SH
3798 } else if (mounted == 1) {
3799 lxc_conf->tmp_umount_proc = 1;
3800 }
3801 return 0;
3802}
3803
3804void tmp_proc_unmount(struct lxc_conf *lxc_conf)
3805{
3806 if (lxc_conf->tmp_umount_proc == 1) {
3807 umount("/proc");
3808 lxc_conf->tmp_umount_proc = 0;
3809 }
3810}
3811
6a0c909a 3812void remount_all_slave(void)
e995d7a2
SH
3813{
3814 /* walk /proc/mounts and change any shared entries to slave */
3815 FILE *f = fopen("/proc/self/mountinfo", "r");
3816 char *line = NULL;
3817 size_t len = 0;
3818
3819 if (!f) {
3820 SYSERROR("Failed to open /proc/self/mountinfo to mark all shared");
3821 ERROR("Continuing container startup...");
3822 return;
3823 }
3824
3825 while (getline(&line, &len, f) != -1) {
3826 char *target, *opts;
3827 target = get_field(line, 4);
3828 if (!target)
3829 continue;
3830 opts = get_field(target, 2);
3831 if (!opts)
3832 continue;
3833 null_endofword(opts);
3834 if (!strstr(opts, "shared"))
3835 continue;
3836 null_endofword(target);
3837 if (mount(NULL, target, NULL, MS_SLAVE, NULL)) {
3838 SYSERROR("Failed to make %s rslave", target);
3839 ERROR("Continuing...");
3840 }
3841 }
3842 fclose(f);
f10fad2f 3843 free(line);
e995d7a2
SH
3844}
3845
2322903b
SH
3846void lxc_execute_bind_init(struct lxc_conf *conf)
3847{
3848 int ret;
9d9c111c
SH
3849 char path[PATH_MAX], destpath[PATH_MAX], *p;
3850
3851 /* If init exists in the container, don't bind mount a static one */
3852 p = choose_init(conf->rootfs.mount);
3853 if (p) {
3854 free(p);
3855 return;
3856 }
2322903b
SH
3857
3858 ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
3859 if (ret < 0 || ret >= PATH_MAX) {
3860 WARN("Path name too long searching for lxc.init.static");
3861 return;
3862 }
3863
3864 if (!file_exists(path)) {
3865 INFO("%s does not exist on host", path);
3866 return;
3867 }
3868
3869 ret = snprintf(destpath, PATH_MAX, "%s%s", conf->rootfs.mount, "/init.lxc.static");
3870 if (ret < 0 || ret >= PATH_MAX) {
3871 WARN("Path name too long for container's lxc.init.static");
3872 return;
3873 }
3874
3875 if (!file_exists(destpath)) {
3876 FILE * pathfile = fopen(destpath, "wb");
3877 if (!pathfile) {
3878 SYSERROR("Failed to create mount target '%s'", destpath);
3879 return;
3880 }
3881 fclose(pathfile);
3882 }
3883
592fd47a 3884 ret = safe_mount(path, destpath, "none", MS_BIND, NULL, conf->rootfs.mount);
2322903b
SH
3885 if (ret < 0)
3886 SYSERROR("Failed to bind lxc.init.static into container");
3887 INFO("lxc.init.static bound into container at %s", path);
3888}
3889
35120d9c
SH
3890/*
3891 * This does the work of remounting / if it is shared, calling the
3892 * container pre-mount hooks, and mounting the rootfs.
3893 */
3894int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath)
0ad19a3f 3895{
35120d9c
SH
3896 if (conf->rootfs_setup) {
3897 /*
3898 * rootfs was set up in another namespace. bind-mount it
3899 * to give us a mount in our own ns so we can pivot_root to it
3900 */
3901 const char *path = conf->rootfs.mount;
3902 if (mount(path, path, "rootfs", MS_BIND, NULL) < 0) {
3903 ERROR("Failed to bind-mount container / onto itself");
145832ba 3904 return -1;
35120d9c 3905 }
145832ba 3906 return 0;
35120d9c 3907 }
d4ef7c50 3908
e995d7a2
SH
3909 remount_all_slave();
3910
35120d9c
SH
3911 if (run_lxc_hooks(name, "pre-mount", conf, lxcpath, NULL)) {
3912 ERROR("failed to run pre-mount hooks for container '%s'.", name);
3913 return -1;
3914 }
3915
3916 if (setup_rootfs(conf)) {
3917 ERROR("failed to setup rootfs for '%s'", name);
3918 return -1;
3919 }
3920
3921 conf->rootfs_setup = true;
3922 return 0;
3923}
3924
1c1c7051
SH
3925static bool verify_start_hooks(struct lxc_conf *conf)
3926{
3927 struct lxc_list *it;
3928 char path[MAXPATHLEN];
3929 lxc_list_for_each(it, &conf->hooks[LXCHOOK_START]) {
3930 char *hookname = it->elem;
3931 struct stat st;
3932 int ret;
3933
3934 ret = snprintf(path, MAXPATHLEN, "%s%s",
7b6753e7 3935 conf->rootfs.path ? conf->rootfs.mount : "", hookname);
1c1c7051
SH
3936 if (ret < 0 || ret >= MAXPATHLEN)
3937 return false;
3938 ret = stat(path, &st);
3939 if (ret) {
7b6753e7 3940 SYSERROR("Start hook %s not found in container",
1c1c7051
SH
3941 hookname);
3942 return false;
3943 }
6a0c909a 3944 return true;
1c1c7051
SH
3945 }
3946
3947 return true;
3948}
3949
e8bd4e43
SH
3950static int send_fd(int sock, int fd)
3951{
3952 int ret = lxc_abstract_unix_send_fd(sock, fd, NULL, 0);
3953
3954
3955 if (ret < 0) {
3956 SYSERROR("Error sending tty fd to parent");
3957 return -1;
3958 }
3959
3960 return 0;
3961}
3962
3963static int send_ttys_to_parent(struct lxc_handler *handler)
3964{
3965 struct lxc_conf *conf = handler->conf;
3966 const struct lxc_tty_info *tty_info = &conf->tty_info;
3967 int i;
3968 int sock = handler->ttysock[0];
3969
3970 for (i = 0; i < tty_info->nbtty; i++) {
3971 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3972 if (send_fd(sock, pty_info->slave) < 0)
3973 goto bad;
3974 close(pty_info->slave);
3975 pty_info->slave = -1;
3976 if (send_fd(sock, pty_info->master) < 0)
3977 goto bad;
3978 close(pty_info->master);
3979 pty_info->master = -1;
3980 }
3981
3982 close(handler->ttysock[0]);
3983 close(handler->ttysock[1]);
3984
3985 return 0;
3986
3987bad:
3988 ERROR("Error writing tty fd to parent");
3989 return -1;
3990}
3991
35120d9c
SH
3992int lxc_setup(struct lxc_handler *handler)
3993{
3994 const char *name = handler->name;
3995 struct lxc_conf *lxc_conf = handler->conf;
3996 const char *lxcpath = handler->lxcpath;
35120d9c
SH
3997
3998 if (do_rootfs_setup(lxc_conf, name, lxcpath) < 0) {
3999 ERROR("Error setting up rootfs mount after spawn");
4000 return -1;
4001 }
4002
6c544cb3
MM
4003 if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
4004 if (setup_utsname(lxc_conf->utsname)) {
4005 ERROR("failed to setup the utsname for '%s'", name);
4006 return -1;
4007 }
0ad19a3f 4008 }
4009
5f4535a3 4010 if (setup_network(&lxc_conf->network)) {
36eb9bde 4011 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 4012 return -1;
0ad19a3f 4013 }
4014
bc6928ff 4015 if (lxc_conf->autodev > 0) {
14221cbb 4016 if (mount_autodev(name, &lxc_conf->rootfs, lxcpath)) {
91c3830e 4017 ERROR("failed to mount /dev in the container");
c6883f38
SH
4018 return -1;
4019 }
4020 }
4021
368bbc02
CS
4022 /* do automatic mounts (mainly /proc and /sys), but exclude
4023 * those that need to wait until other stuff has finished
4024 */
4fb3cba5 4025 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
4026 ERROR("failed to setup the automatic mounts for '%s'", name);
4027 return -1;
4028 }
4029
0a2dddd4 4030 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name, lxcpath)) {
36eb9bde 4031 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 4032 return -1;
576f946d 4033 }
4034
0a2dddd4 4035 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name, lxcpath)) {
e7938e9e
MN
4036 ERROR("failed to setup the mount entries for '%s'", name);
4037 return -1;
4038 }
4039
7b6753e7 4040 /* Make sure any start hooks are in the container */
1c1c7051
SH
4041 if (!verify_start_hooks(lxc_conf))
4042 return -1;
4043
2322903b
SH
4044 if (lxc_conf->is_execute)
4045 lxc_execute_bind_init(lxc_conf);
4046
368bbc02
CS
4047 /* now mount only cgroup, if wanted;
4048 * before, /sys could not have been mounted
4049 * (is either mounted automatically or via fstab entries)
4050 */
4fb3cba5 4051 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
4052 ERROR("failed to setup the automatic mounts for '%s'", name);
4053 return -1;
4054 }
4055
283678ed 4056 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
773fb9ca
SH
4057 ERROR("failed to run mount hooks for container '%s'.", name);
4058 return -1;
4059 }
4060
bc6928ff 4061 if (lxc_conf->autodev > 0) {
0728ebf4
TA
4062 bool mount_console = lxc_conf->console.path && !strcmp(lxc_conf->console.path, "none");
4063
283678ed 4064 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
f7bee6c6
MW
4065 ERROR("failed to run autodev hooks for container '%s'.", name);
4066 return -1;
4067 }
0728ebf4 4068 if (fill_autodev(&lxc_conf->rootfs, mount_console)) {
91c3830e
SH
4069 ERROR("failed to populate /dev in the container");
4070 return -1;
4071 }
4072 }
368bbc02 4073
37903589 4074 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 4075 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 4076 return -1;
6e590161 4077 }
4078
7e0e1d94
AV
4079 if (lxc_conf->kmsg) {
4080 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
4081 ERROR("failed to setup kmsg for '%s'", name);
4082 }
1bd051a6 4083
69aa6655
DE
4084 if (!lxc_conf->is_execute && setup_dev_symlinks(&lxc_conf->rootfs)) {
4085 ERROR("failed to setup /dev symlinks for '%s'", name);
4086 return -1;
4087 }
4088
5112cd70
SH
4089 /* mount /proc if it's not already there */
4090 if (tmp_proc_mount(lxc_conf) < 0) {
fe4de9a6 4091 ERROR("failed to LSM mount proc for '%s'", name);
e075f5d9 4092 return -1;
e075f5d9 4093 }
e075f5d9 4094
ac778708 4095 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 4096 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 4097 return -1;
ed502555 4098 }
4099
571e6ec8 4100 if (setup_pts(lxc_conf->pts)) {
36eb9bde 4101 ERROR("failed to setup the new pts instance");
95b5ffaf 4102 return -1;
3c26f34e 4103 }
4104
e8bd4e43
SH
4105 if (lxc_create_tty(name, lxc_conf)) {
4106 ERROR("failed to create the ttys");
4107 return -1;
4108 }
4109
4110 if (send_ttys_to_parent(handler) < 0) {
4111 ERROR("failure sending console info to parent");
4112 return -1;
4113 }
4114
4115
4116 if (!lxc_conf->is_execute && setup_tty(lxc_conf)) {
4117 ERROR("failed to setup the ttys for '%s'", name);
4118 return -1;
4119 }
4120
4121 if (lxc_conf->pty_names && setenv("container_ttys", lxc_conf->pty_names, 1))
4122 SYSERROR("failed to set environment variable for container ptys");
4123
4124
cccc74b5
DL
4125 if (setup_personality(lxc_conf->personality)) {
4126 ERROR("failed to setup personality");
4127 return -1;
4128 }
4129
97a8f74f
SG
4130 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
4131 if (!lxc_list_empty(&lxc_conf->caps)) {
7389ca26 4132 ERROR("Container requests lxc.cap.drop and lxc.cap.keep: either use lxc.cap.drop or lxc.cap.keep, not both.");
f6d3e3e4
SH
4133 return -1;
4134 }
97a8f74f
SG
4135 if (dropcaps_except(&lxc_conf->keepcaps)) {
4136 ERROR("failed to keep requested caps");
4137 return -1;
4138 }
4139 } else if (setup_caps(&lxc_conf->caps)) {
4140 ERROR("failed to drop capabilities");
4141 return -1;
81810dd1
DL
4142 }
4143
cd54d859
DL
4144 NOTICE("'%s' is setup.", name);
4145
0ad19a3f 4146 return 0;
4147}
26ddeedd 4148
283678ed
SH
4149int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
4150 const char *lxcpath, char *argv[])
26ddeedd
SH
4151{
4152 int which = -1;
4153 struct lxc_list *it;
4154
4155 if (strcmp(hook, "pre-start") == 0)
4156 which = LXCHOOK_PRESTART;
5ea6163a
SH
4157 else if (strcmp(hook, "pre-mount") == 0)
4158 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
4159 else if (strcmp(hook, "mount") == 0)
4160 which = LXCHOOK_MOUNT;
f7bee6c6
MW
4161 else if (strcmp(hook, "autodev") == 0)
4162 which = LXCHOOK_AUTODEV;
26ddeedd
SH
4163 else if (strcmp(hook, "start") == 0)
4164 which = LXCHOOK_START;
52492063
WB
4165 else if (strcmp(hook, "stop") == 0)
4166 which = LXCHOOK_STOP;
26ddeedd
SH
4167 else if (strcmp(hook, "post-stop") == 0)
4168 which = LXCHOOK_POSTSTOP;
148e91f5
SH
4169 else if (strcmp(hook, "clone") == 0)
4170 which = LXCHOOK_CLONE;
37cf711b
SY
4171 else if (strcmp(hook, "destroy") == 0)
4172 which = LXCHOOK_DESTROY;
26ddeedd
SH
4173 else
4174 return -1;
4175 lxc_list_for_each(it, &conf->hooks[which]) {
4176 int ret;
4177 char *hookname = it->elem;
283678ed 4178 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
26ddeedd
SH
4179 if (ret)
4180 return ret;
4181 }
4182 return 0;
4183}
72d0e1cb 4184
427b3a21 4185static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
4186{
4187 struct lxc_netdev *netdev = it->elem;
9ebb03ad 4188 struct lxc_list *it2,*next;
72d0e1cb
SG
4189
4190 lxc_list_del(it);
4191
f10fad2f
ME
4192 free(netdev->link);
4193 free(netdev->name);
4194 if (netdev->type == LXC_NET_VETH)
c9bb9a85 4195 free(netdev->priv.veth_attr.pair);
f10fad2f
ME
4196 free(netdev->upscript);
4197 free(netdev->hwaddr);
4198 free(netdev->mtu);
4199 free(netdev->ipv4_gateway);
4200 free(netdev->ipv6_gateway);
9ebb03ad 4201 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
4202 lxc_list_del(it2);
4203 free(it2->elem);
4204 free(it2);
4205 }
9ebb03ad 4206 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
4207 lxc_list_del(it2);
4208 free(it2->elem);
4209 free(it2);
4210 }
d95db067 4211 free(netdev);
72d0e1cb
SG
4212 free(it);
4213}
4214
4215/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 4216int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
4217{
4218 char *p1;
4219 int ret, idx, i;
4220 struct lxc_list *it;
4221 struct lxc_netdev *netdev;
4222
46cd2845 4223 p1 = strchr(key, '.');
72d0e1cb
SG
4224 if (!p1 || *(p1+1) == '\0')
4225 p1 = NULL;
4226
4227 ret = sscanf(key, "%d", &idx);
4228 if (ret != 1) return -1;
4229 if (idx < 0)
4230 return -1;
4231
4232 i = 0;
4233 lxc_list_for_each(it, &c->network) {
4234 if (i == idx)
4235 break;
4236 i++;
4237 }
4238 if (i < idx) // we don't have that many nics defined
4239 return -1;
4240
4241 if (!it || !it->elem)
4242 return -1;
4243
4244 netdev = it->elem;
4245
4246 if (!p1) {
4247 lxc_remove_nic(it);
52d21d40 4248 } else if (strcmp(p1, ".ipv4") == 0) {
9ebb03ad
DE
4249 struct lxc_list *it2,*next;
4250 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
4251 lxc_list_del(it2);
4252 free(it2->elem);
4253 free(it2);
4254 }
52d21d40 4255 } else if (strcmp(p1, ".ipv6") == 0) {
9ebb03ad
DE
4256 struct lxc_list *it2,*next;
4257 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
4258 lxc_list_del(it2);
4259 free(it2->elem);
4260 free(it2);
4261 }
72d0e1cb
SG
4262 }
4263 else return -1;
4264
4265 return 0;
4266}
4267
4268int lxc_clear_config_network(struct lxc_conf *c)
4269{
9ebb03ad
DE
4270 struct lxc_list *it,*next;
4271 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
4272 lxc_remove_nic(it);
4273 }
4274 return 0;
4275}
4276
4277int lxc_clear_config_caps(struct lxc_conf *c)
4278{
9ebb03ad 4279 struct lxc_list *it,*next;
72d0e1cb 4280
9ebb03ad 4281 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
4282 lxc_list_del(it);
4283 free(it->elem);
4284 free(it);
4285 }
4286 return 0;
4287}
4288
74a3920a 4289static int lxc_free_idmap(struct lxc_list *id_map) {
27c27d73
SH
4290 struct lxc_list *it, *next;
4291
4355ab5f 4292 lxc_list_for_each_safe(it, id_map, next) {
27c27d73
SH
4293 lxc_list_del(it);
4294 free(it->elem);
4295 free(it);
4296 }
4297 return 0;
4298}
4299
4355ab5f
SH
4300int lxc_clear_idmaps(struct lxc_conf *c)
4301{
4302 return lxc_free_idmap(&c->id_map);
4303}
4304
1fb86a7c
SH
4305int lxc_clear_config_keepcaps(struct lxc_conf *c)
4306{
4307 struct lxc_list *it,*next;
4308
4309 lxc_list_for_each_safe(it, &c->keepcaps, next) {
4310 lxc_list_del(it);
4311 free(it->elem);
4312 free(it);
4313 }
4314 return 0;
4315}
4316
12a50cc6 4317int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 4318{
9ebb03ad 4319 struct lxc_list *it,*next;
72d0e1cb 4320 bool all = false;
a6390f01 4321 const char *k = NULL;
72d0e1cb
SG
4322
4323 if (strcmp(key, "lxc.cgroup") == 0)
4324 all = true;
a6390f01
WB
4325 else if (strncmp(key, "lxc.cgroup.", sizeof("lxc.cgroup.")-1) == 0)
4326 k = key + sizeof("lxc.cgroup.")-1;
4327 else
4328 return -1;
72d0e1cb 4329
9ebb03ad 4330 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
4331 struct lxc_cgroup *cg = it->elem;
4332 if (!all && strcmp(cg->subsystem, k) != 0)
4333 continue;
4334 lxc_list_del(it);
4335 free(cg->subsystem);
4336 free(cg->value);
4337 free(cg);
4338 free(it);
4339 }
4340 return 0;
4341}
4342
c6d09e15
WB
4343int lxc_clear_limits(struct lxc_conf *c, const char *key)
4344{
4345 struct lxc_list *it, *next;
4346 bool all = false;
4347 const char *k = NULL;
4348
4349 if (strcmp(key, "lxc.limit") == 0)
4350 all = true;
4351 else if (strncmp(key, "lxc.limit.", sizeof("lxc.limit.")-1) == 0)
4352 k = key + sizeof("lxc.limit.")-1;
4353 else
4354 return -1;
4355
4356 lxc_list_for_each_safe(it, &c->limits, next) {
4357 struct lxc_limit *lim = it->elem;
4358 if (!all && strcmp(lim->resource, k) != 0)
4359 continue;
4360 lxc_list_del(it);
4361 free(lim->resource);
4362 free(lim);
4363 free(it);
4364 }
4365 return 0;
4366}
4367
ee1e7aa0
SG
4368int lxc_clear_groups(struct lxc_conf *c)
4369{
4370 struct lxc_list *it,*next;
4371
4372 lxc_list_for_each_safe(it, &c->groups, next) {
4373 lxc_list_del(it);
4374 free(it->elem);
4375 free(it);
4376 }
4377 return 0;
4378}
4379
ab799c0b
SG
4380int lxc_clear_environment(struct lxc_conf *c)
4381{
4382 struct lxc_list *it,*next;
4383
4384 lxc_list_for_each_safe(it, &c->environment, next) {
4385 lxc_list_del(it);
4386 free(it->elem);
4387 free(it);
4388 }
4389 return 0;
4390}
4391
4392
72d0e1cb
SG
4393int lxc_clear_mount_entries(struct lxc_conf *c)
4394{
9ebb03ad 4395 struct lxc_list *it,*next;
72d0e1cb 4396
9ebb03ad 4397 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
4398 lxc_list_del(it);
4399 free(it->elem);
4400 free(it);
4401 }
4402 return 0;
4403}
4404
b099e9e9
SH
4405int lxc_clear_automounts(struct lxc_conf *c)
4406{
4407 c->auto_mounts = 0;
4408 return 0;
4409}
4410
12a50cc6 4411int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 4412{
9ebb03ad 4413 struct lxc_list *it,*next;
17ed13a3 4414 bool all = false, done = false;
a6390f01 4415 const char *k = NULL;
72d0e1cb
SG
4416 int i;
4417
17ed13a3
SH
4418 if (strcmp(key, "lxc.hook") == 0)
4419 all = true;
a6390f01
WB
4420 else if (strncmp(key, "lxc.hook.", sizeof("lxc.hook.")-1) == 0)
4421 k = key + sizeof("lxc.hook.")-1;
4422 else
4423 return -1;
17ed13a3 4424
72d0e1cb 4425 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 4426 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 4427 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
4428 lxc_list_del(it);
4429 free(it->elem);
4430 free(it);
4431 }
4432 done = true;
72d0e1cb
SG
4433 }
4434 }
17ed13a3
SH
4435
4436 if (!done) {
4437 ERROR("Invalid hook key: %s", key);
4438 return -1;
4439 }
72d0e1cb
SG
4440 return 0;
4441}
8eb5694b 4442
74a3920a 4443static void lxc_clear_saved_nics(struct lxc_conf *conf)
7b35f3d6
SH
4444{
4445 int i;
4446
0cf45501 4447 if (!conf->saved_nics)
7b35f3d6
SH
4448 return;
4449 for (i=0; i < conf->num_savednics; i++)
4450 free(conf->saved_nics[i].orig_name);
7b35f3d6
SH
4451 free(conf->saved_nics);
4452}
4453
4184c3e1
SH
4454static inline void lxc_clear_aliens(struct lxc_conf *conf)
4455{
4456 struct lxc_list *it,*next;
4457
4458 lxc_list_for_each_safe(it, &conf->aliens, next) {
4459 lxc_list_del(it);
4460 free(it->elem);
4461 free(it);
4462 }
4463}
4464
f979ac15
SH
4465static inline void lxc_clear_includes(struct lxc_conf *conf)
4466{
4467 struct lxc_list *it,*next;
4468
4469 lxc_list_for_each_safe(it, &conf->includes, next) {
4470 lxc_list_del(it);
4471 free(it->elem);
4472 free(it);
4473 }
4474}
4475
8eb5694b
SH
4476void lxc_conf_free(struct lxc_conf *conf)
4477{
4478 if (!conf)
4479 return;
858377e4
SH
4480 if (current_config == conf)
4481 current_config = NULL;
f10fad2f
ME
4482 free(conf->console.log_path);
4483 free(conf->console.path);
4484 free(conf->rootfs.mount);
b3b8c97f 4485 free(conf->rootfs.bdev_type);
f10fad2f
ME
4486 free(conf->rootfs.options);
4487 free(conf->rootfs.path);
f10fad2f 4488 free(conf->logfile);
858377e4
SH
4489 if (conf->logfd != -1)
4490 close(conf->logfd);
f10fad2f
ME
4491 free(conf->utsname);
4492 free(conf->ttydir);
4493 free(conf->fstab);
4494 free(conf->rcfile);
4495 free(conf->init_cmd);
6b0d5538 4496 free(conf->unexpanded_config);
393903d1 4497 free(conf->pty_names);
76d0127f 4498 free(conf->syslog);
8eb5694b 4499 lxc_clear_config_network(conf);
f10fad2f
ME
4500 free(conf->lsm_aa_profile);
4501 free(conf->lsm_se_context);
769872f9 4502 lxc_seccomp_free(conf);
8eb5694b 4503 lxc_clear_config_caps(conf);
1fb86a7c 4504 lxc_clear_config_keepcaps(conf);
8eb5694b 4505 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 4506 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 4507 lxc_clear_mount_entries(conf);
7b35f3d6 4508 lxc_clear_saved_nics(conf);
27c27d73 4509 lxc_clear_idmaps(conf);
ee1e7aa0 4510 lxc_clear_groups(conf);
f979ac15 4511 lxc_clear_includes(conf);
761d81ca 4512 lxc_clear_aliens(conf);
ab799c0b 4513 lxc_clear_environment(conf);
c6d09e15 4514 lxc_clear_limits(conf, "lxc.limit");
8eb5694b
SH
4515 free(conf);
4516}
4355ab5f
SH
4517
4518struct userns_fn_data {
4519 int (*fn)(void *);
4520 void *arg;
4521 int p[2];
4522};
4523
4524static int run_userns_fn(void *data)
4525{
4526 struct userns_fn_data *d = data;
4527 char c;
4528 // we're not sharing with the parent any more, if it was a thread
4529
4530 close(d->p[1]);
4531 if (read(d->p[0], &c, 1) != 1)
4532 return -1;
4533 close(d->p[0]);
4534 return d->fn(d->arg);
4535}
4536
4537/*
8b227008
TS
4538 * Add ID_TYPE_UID/ID_TYPE_GID entries to an existing lxc_conf,
4539 * if they are not already there.
4355ab5f 4540 */
8b227008
TS
4541static struct lxc_list *idmap_add_id(struct lxc_conf *conf,
4542 uid_t uid, gid_t gid)
4355ab5f 4543{
8b227008
TS
4544 int hostuid_mapped = mapped_hostid(uid, conf, ID_TYPE_UID);
4545 int hostgid_mapped = mapped_hostid(gid, conf, ID_TYPE_GID);
4355ab5f
SH
4546 struct lxc_list *new = NULL, *tmp, *it, *next;
4547 struct id_map *entry;
4548
3ec1648d
SH
4549 new = malloc(sizeof(*new));
4550 if (!new) {
4551 ERROR("Out of memory building id map");
4552 return NULL;
4553 }
4554 lxc_list_init(new);
4555
8b227008
TS
4556 if (hostuid_mapped < 0) {
4557 hostuid_mapped = find_unmapped_nsuid(conf, ID_TYPE_UID);
4558 if (hostuid_mapped < 0)
3ec1648d
SH
4559 goto err;
4560 tmp = malloc(sizeof(*tmp));
4561 if (!tmp)
4562 goto err;
4355ab5f
SH
4563 entry = malloc(sizeof(*entry));
4564 if (!entry) {
3ec1648d
SH
4565 free(tmp);
4566 goto err;
4355ab5f 4567 }
3ec1648d 4568 tmp->elem = entry;
4355ab5f 4569 entry->idtype = ID_TYPE_UID;
8b227008
TS
4570 entry->nsid = hostuid_mapped;
4571 entry->hostid = (unsigned long) uid;
4572 entry->range = 1;
4573 lxc_list_add_tail(new, tmp);
4574 }
4575 if (hostgid_mapped < 0) {
4576 hostgid_mapped = find_unmapped_nsuid(conf, ID_TYPE_GID);
4577 if (hostgid_mapped < 0)
4578 goto err;
4579 tmp = malloc(sizeof(*tmp));
4580 if (!tmp)
4581 goto err;
4582 entry = malloc(sizeof(*entry));
4583 if (!entry) {
4584 free(tmp);
4585 goto err;
4586 }
4587 tmp->elem = entry;
4588 entry->idtype = ID_TYPE_GID;
4589 entry->nsid = hostgid_mapped;
4590 entry->hostid = (unsigned long) gid;
4355ab5f 4591 entry->range = 1;
3ec1648d 4592 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4593 }
4594 lxc_list_for_each_safe(it, &conf->id_map, next) {
4595 tmp = malloc(sizeof(*tmp));
4596 if (!tmp)
4597 goto err;
4598 entry = malloc(sizeof(*entry));
4599 if (!entry) {
4600 free(tmp);
4601 goto err;
4602 }
4603 memset(entry, 0, sizeof(*entry));
4604 memcpy(entry, it->elem, sizeof(*entry));
4605 tmp->elem = entry;
3ec1648d 4606 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4607 }
4608
4609 return new;
4610
4611err:
8b227008 4612 ERROR("Out of memory building a new uid/gid map");
908fde6a
SH
4613 if (new)
4614 lxc_free_idmap(new);
c30ac545 4615 free(new);
4355ab5f
SH
4616 return NULL;
4617}
4618
4619/*
4620 * Run a function in a new user namespace.
8b227008 4621 * The caller's euid/egid will be mapped in if it is not already.
4355ab5f
SH
4622 */
4623int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
4624{
4625 int ret, pid;
4626 struct userns_fn_data d;
4627 char c = '1';
4628 int p[2];
4629 struct lxc_list *idmap;
4630
4355ab5f 4631 ret = pipe(p);
4355ab5f
SH
4632 if (ret < 0) {
4633 SYSERROR("opening pipe");
4634 return -1;
4635 }
4636 d.fn = fn;
4637 d.arg = data;
4638 d.p[0] = p[0];
4639 d.p[1] = p[1];
4640 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4641 if (pid < 0)
4642 goto err;
4355ab5f 4643 close(p[0]);
4355ab5f
SH
4644 p[0] = -1;
4645
8b227008
TS
4646 if ((idmap = idmap_add_id(conf, geteuid(), getegid())) == NULL) {
4647 ERROR("Error adding self to container uid/gid map");
4355ab5f
SH
4648 goto err;
4649 }
4650
4651 ret = lxc_map_ids(idmap, pid);
4652 lxc_free_idmap(idmap);
88dd66fc 4653 free(idmap);
565e571c 4654 if (ret) {
4355ab5f
SH
4655 ERROR("Error setting up child mappings");
4656 goto err;
4657 }
4658
4659 // kick the child
4660 if (write(p[1], &c, 1) != 1) {
4661 SYSERROR("writing to pipe to child");
4662 goto err;
4663 }
4664
3139aead
SG
4665 ret = wait_for_pid(pid);
4666
4667 close(p[1]);
4668 return ret;
4669
4355ab5f 4670err:
4355ab5f
SH
4671 if (p[0] != -1)
4672 close(p[0]);
4673 close(p[1]);
4355ab5f
SH
4674 return -1;
4675}
97e9cfa0 4676
a96a8e8c 4677/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4678static char* getuname(void)
4679{
a96a8e8c 4680 struct passwd *result;
97e9cfa0 4681
a96a8e8c
SH
4682 result = getpwuid(geteuid());
4683 if (!result)
97e9cfa0
SH
4684 return NULL;
4685
a96a8e8c 4686 return strdup(result->pw_name);
97e9cfa0
SH
4687}
4688
a96a8e8c 4689/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4690static char *getgname(void)
4691{
a96a8e8c 4692 struct group *result;
97e9cfa0 4693
a96a8e8c
SH
4694 result = getgrgid(getegid());
4695 if (!result)
97e9cfa0
SH
4696 return NULL;
4697
a96a8e8c 4698 return strdup(result->gr_name);
97e9cfa0
SH
4699}
4700
a96a8e8c 4701/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4702void suggest_default_idmap(void)
4703{
4704 FILE *f;
4705 unsigned int uid = 0, urange = 0, gid = 0, grange = 0;
4706 char *line = NULL;
4707 char *uname, *gname;
4708 size_t len = 0;
4709
4710 if (!(uname = getuname()))
4711 return;
4712
4713 if (!(gname = getgname())) {
4714 free(uname);
4715 return;
4716 }
4717
4718 f = fopen(subuidfile, "r");
4719 if (!f) {
4720 ERROR("Your system is not configured with subuids");
4721 free(gname);
4722 free(uname);
4723 return;
4724 }
4725 while (getline(&line, &len, f) != -1) {
b7930180 4726 size_t no_newline = 0;
97e9cfa0
SH
4727 char *p = strchr(line, ':'), *p2;
4728 if (*line == '#')
4729 continue;
4730 if (!p)
4731 continue;
4732 *p = '\0';
4733 p++;
4734 if (strcmp(line, uname))
4735 continue;
4736 p2 = strchr(p, ':');
4737 if (!p2)
4738 continue;
4739 *p2 = '\0';
4740 p2++;
4741 if (!*p2)
4742 continue;
b7930180
CB
4743 no_newline = strcspn(p2, "\n");
4744 p2[no_newline] = '\0';
4745
b7b2fde4
CB
4746 if (lxc_safe_uint(p, &uid) < 0)
4747 WARN("Could not parse UID.");
4748 if (lxc_safe_uint(p2, &urange) < 0)
4749 WARN("Could not parse UID range.");
97e9cfa0
SH
4750 }
4751 fclose(f);
4752
6be7389a 4753 f = fopen(subgidfile, "r");
97e9cfa0
SH
4754 if (!f) {
4755 ERROR("Your system is not configured with subgids");
4756 free(gname);
4757 free(uname);
4758 return;
4759 }
4760 while (getline(&line, &len, f) != -1) {
b7930180 4761 size_t no_newline = 0;
97e9cfa0
SH
4762 char *p = strchr(line, ':'), *p2;
4763 if (*line == '#')
4764 continue;
4765 if (!p)
4766 continue;
4767 *p = '\0';
4768 p++;
4769 if (strcmp(line, uname))
4770 continue;
4771 p2 = strchr(p, ':');
4772 if (!p2)
4773 continue;
4774 *p2 = '\0';
4775 p2++;
4776 if (!*p2)
4777 continue;
b7930180
CB
4778 no_newline = strcspn(p2, "\n");
4779 p2[no_newline] = '\0';
4780
b7b2fde4
CB
4781 if (lxc_safe_uint(p, &gid) < 0)
4782 WARN("Could not parse GID.");
4783 if (lxc_safe_uint(p2, &grange) < 0)
4784 WARN("Could not parse GID range.");
97e9cfa0
SH
4785 }
4786 fclose(f);
4787
f10fad2f 4788 free(line);
97e9cfa0
SH
4789
4790 if (!urange || !grange) {
4791 ERROR("You do not have subuids or subgids allocated");
4792 ERROR("Unprivileged containers require subuids and subgids");
4793 return;
4794 }
4795
4796 ERROR("You must either run as root, or define uid mappings");
4797 ERROR("To pass uid mappings to lxc-create, you could create");
4798 ERROR("~/.config/lxc/default.conf:");
4799 ERROR("lxc.include = %s", LXC_DEFAULT_CONFIG);
4800 ERROR("lxc.id_map = u 0 %u %u", uid, urange);
4801 ERROR("lxc.id_map = g 0 %u %u", gid, grange);
4802
4803 free(gname);
4804 free(uname);
4805}
aaf26830 4806
a7307747
SH
4807static void free_cgroup_settings(struct lxc_list *result)
4808{
4809 struct lxc_list *iterator, *next;
4810
4811 lxc_list_for_each_safe(iterator, result, next) {
4812 lxc_list_del(iterator);
4813 free(iterator);
4814 }
4815 free(result);
4816}
4817
aaf26830
KT
4818/*
4819 * Return the list of cgroup_settings sorted according to the following rules
4820 * 1. Put memory.limit_in_bytes before memory.memsw.limit_in_bytes
4821 */
4822struct lxc_list *sort_cgroup_settings(struct lxc_list* cgroup_settings)
4823{
4824 struct lxc_list *result;
4825 struct lxc_list *memsw_limit = NULL;
4826 struct lxc_list *it = NULL;
4827 struct lxc_cgroup *cg = NULL;
4828 struct lxc_list *item = NULL;
4829
4830 result = malloc(sizeof(*result));
fac7c663
KT
4831 if (!result) {
4832 ERROR("failed to allocate memory to sort cgroup settings");
4833 return NULL;
4834 }
aaf26830
KT
4835 lxc_list_init(result);
4836
4837 /*Iterate over the cgroup settings and copy them to the output list*/
4838 lxc_list_for_each(it, cgroup_settings) {
4839 item = malloc(sizeof(*item));
fac7c663
KT
4840 if (!item) {
4841 ERROR("failed to allocate memory to sort cgroup settings");
a7307747 4842 free_cgroup_settings(result);
fac7c663
KT
4843 return NULL;
4844 }
aaf26830
KT
4845 item->elem = it->elem;
4846 cg = it->elem;
4847 if (strcmp(cg->subsystem, "memory.memsw.limit_in_bytes") == 0) {
4848 /* Store the memsw_limit location */
4849 memsw_limit = item;
4850 } else if (strcmp(cg->subsystem, "memory.limit_in_bytes") == 0 && memsw_limit != NULL) {
4d5b72a1 4851 /* lxc.cgroup.memory.memsw.limit_in_bytes is found before
aaf26830
KT
4852 * lxc.cgroup.memory.limit_in_bytes, swap these two items */
4853 item->elem = memsw_limit->elem;
4854 memsw_limit->elem = it->elem;
4855 }
4856 lxc_list_add_tail(result, item);
4857 }
4858
4859 return result;
a7307747 4860}