]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
coverity: #1436916
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
1d52bdf7
CB
23
24#define _GNU_SOURCE
d06245b8
NC
25#include "config.h"
26
9d257a2a 27#include <arpa/inet.h>
8f3e280e
CB
28#include <dirent.h>
29#include <errno.h>
30#include <fcntl.h>
31#include <grp.h>
32#include <inttypes.h>
33#include <libgen.h>
9d257a2a
CB
34#include <linux/loop.h>
35#include <net/if.h>
36#include <netinet/in.h>
8f3e280e
CB
37#include <pwd.h>
38#include <stdarg.h>
0ad19a3f 39#include <stdio.h>
0ad19a3f 40#include <stdlib.h>
0ad19a3f 41#include <string.h>
8f3e280e
CB
42#include <sys/mman.h>
43#include <sys/mount.h>
44#include <sys/param.h>
45#include <sys/prctl.h>
6a49f05e 46#include <sys/sendfile.h>
8f3e280e 47#include <sys/socket.h>
9d257a2a 48#include <sys/stat.h>
2d76d1d7 49#include <sys/syscall.h>
9d257a2a 50#include <sys/sysmacros.h>
97e9cfa0 51#include <sys/types.h>
8f3e280e
CB
52#include <sys/utsname.h>
53#include <sys/wait.h>
9d257a2a
CB
54#include <time.h>
55#include <unistd.h>
1d52bdf7 56
af6824fc 57#ifdef MAJOR_IN_MKDEV
9d257a2a 58#include <sys/mkdev.h>
af6824fc 59#endif
af6824fc 60
614305f3 61#ifdef HAVE_STATVFS
2938f7c8 62#include <sys/statvfs.h>
614305f3 63#endif
e827ff7e
SG
64
65#if HAVE_PTY_H
b0a33c1e 66#include <pty.h>
e827ff7e
SG
67#else
68#include <../include/openpty.h>
69#endif
0ad19a3f 70
9d257a2a
CB
71#if HAVE_LIBCAP
72#include <sys/capability.h>
73#endif
74
75#if HAVE_SYS_PERSONALITY_H
76#include <sys/personality.h>
77#endif
78
79#if IS_BIONIC
80#include <../include/lxcmntent.h>
81#else
82#include <mntent.h>
83#endif
84
85#if !defined(HAVE_PRLIMIT) && defined(HAVE_PRLIMIT64)
86#include <../include/prlimit.h>
87#endif
88
e8bd4e43 89#include "af_unix.h"
9d257a2a 90#include "caps.h"
8f3e280e 91#include "cgroup.h"
1b09f2c0 92#include "conf.h"
1ed6ba91 93#include "confile_utils.h"
8f3e280e 94#include "error.h"
1b09f2c0 95#include "log.h"
0ed9b1bc 96#include "lsm/lsm.h"
025ed0f3 97#include "lxclock.h"
8f3e280e 98#include "lxcseccomp.h"
4355ab5f 99#include "namespace.h"
8f3e280e
CB
100#include "network.h"
101#include "parse.h"
732375f5 102#include "ringbuf.h"
794248d0 103#include "start.h"
28d832c4 104#include "storage.h"
28d832c4 105#include "storage/overlay.h"
0ed9b1bc 106#include "terminal.h"
8f3e280e 107#include "utils.h"
d0a36f2c 108
9d257a2a
CB
109#ifndef MS_PRIVATE
110#define MS_PRIVATE (1<<18)
edaf8b1b
SG
111#endif
112
9d257a2a
CB
113#ifndef MS_LAZYTIME
114#define MS_LAZYTIME (1<<25)
f48b5fd8
FF
115#endif
116
36eb9bde 117lxc_log_define(lxc_conf, lxc);
e5bda9ee 118
0fd73091
CB
119/* The lxc_conf of the container currently being worked on in an API call.
120 * This is used in the error calls.
121 */
122#ifdef HAVE_TLS
123__thread struct lxc_conf *current_config;
124#else
125struct lxc_conf *current_config;
126#endif
127
2d76d1d7
SG
128/* Define pivot_root() if missing from the C library */
129#ifndef HAVE_PIVOT_ROOT
9d257a2a 130static int pivot_root(const char *new_root, const char *put_old)
2d76d1d7
SG
131{
132#ifdef __NR_pivot_root
8f3e280e 133 return syscall(__NR_pivot_root, new_root, put_old);
2d76d1d7 134#else
8f3e280e
CB
135 errno = ENOSYS;
136 return -1;
2d76d1d7
SG
137#endif
138}
139#else
9d257a2a 140extern int pivot_root(const char *new_root, const char *put_old);
8912711c
CB
141#endif
142
0fd73091
CB
143char *lxchook_names[NUM_LXC_HOOKS] = {
144 "pre-start",
145 "pre-mount",
146 "mount",
147 "autodev",
148 "start",
149 "stop",
150 "post-stop",
151 "clone",
152 "destroy",
153 "start-host"
154};
72d0e1cb 155
998ac676
RT
156struct mount_opt {
157 char *name;
158 int clear;
159 int flag;
160};
161
81810dd1
DL
162struct caps_opt {
163 char *name;
164 int value;
165};
166
c6d09e15
WB
167struct limit_opt {
168 char *name;
169 int value;
170};
171
998ac676 172static struct mount_opt mount_opt[] = {
470b359b
CB
173 { "async", 1, MS_SYNCHRONOUS },
174 { "atime", 1, MS_NOATIME },
175 { "bind", 0, MS_BIND },
88d413d5 176 { "defaults", 0, 0 },
88d413d5 177 { "dev", 1, MS_NODEV },
470b359b 178 { "diratime", 1, MS_NODIRATIME },
88d413d5 179 { "dirsync", 0, MS_DIRSYNC },
470b359b 180 { "exec", 1, MS_NOEXEC },
8912711c 181 { "lazytime", 0, MS_LAZYTIME },
88d413d5 182 { "mand", 0, MS_MANDLOCK },
88d413d5 183 { "noatime", 0, MS_NOATIME },
470b359b 184 { "nodev", 0, MS_NODEV },
88d413d5 185 { "nodiratime", 0, MS_NODIRATIME },
470b359b
CB
186 { "noexec", 0, MS_NOEXEC },
187 { "nomand", 1, MS_MANDLOCK },
188 { "norelatime", 1, MS_RELATIME },
189 { "nostrictatime", 1, MS_STRICTATIME },
190 { "nosuid", 0, MS_NOSUID },
88d413d5
SW
191 { "rbind", 0, MS_BIND|MS_REC },
192 { "relatime", 0, MS_RELATIME },
470b359b
CB
193 { "remount", 0, MS_REMOUNT },
194 { "ro", 0, MS_RDONLY },
195 { "rw", 1, MS_RDONLY },
88d413d5 196 { "strictatime", 0, MS_STRICTATIME },
470b359b
CB
197 { "suid", 1, MS_NOSUID },
198 { "sync", 0, MS_SYNCHRONOUS },
88d413d5 199 { NULL, 0, 0 },
998ac676
RT
200};
201
d840039e 202static struct mount_opt propagation_opt[] = {
0fd73091
CB
203 { "private", 0, MS_PRIVATE },
204 { "shared", 0, MS_SHARED },
205 { "slave", 0, MS_SLAVE },
206 { "unbindable", 0, MS_UNBINDABLE },
207 { "rprivate", 0, MS_PRIVATE|MS_REC },
208 { "rshared", 0, MS_SHARED|MS_REC },
209 { "rslave", 0, MS_SLAVE|MS_REC },
210 { "runbindable", 0, MS_UNBINDABLE|MS_REC },
211 { NULL, 0, 0 },
d840039e
YT
212};
213
81810dd1 214static struct caps_opt caps_opt[] = {
8560cd36 215#if HAVE_LIBCAP
0fd73091
CB
216 { "chown", CAP_CHOWN },
217 { "dac_override", CAP_DAC_OVERRIDE },
218 { "dac_read_search", CAP_DAC_READ_SEARCH },
219 { "fowner", CAP_FOWNER },
220 { "fsetid", CAP_FSETID },
221 { "kill", CAP_KILL },
222 { "setgid", CAP_SETGID },
223 { "setuid", CAP_SETUID },
224 { "setpcap", CAP_SETPCAP },
225 { "linux_immutable", CAP_LINUX_IMMUTABLE },
226 { "net_bind_service", CAP_NET_BIND_SERVICE },
227 { "net_broadcast", CAP_NET_BROADCAST },
228 { "net_admin", CAP_NET_ADMIN },
229 { "net_raw", CAP_NET_RAW },
230 { "ipc_lock", CAP_IPC_LOCK },
231 { "ipc_owner", CAP_IPC_OWNER },
232 { "sys_module", CAP_SYS_MODULE },
233 { "sys_rawio", CAP_SYS_RAWIO },
234 { "sys_chroot", CAP_SYS_CHROOT },
235 { "sys_ptrace", CAP_SYS_PTRACE },
236 { "sys_pacct", CAP_SYS_PACCT },
237 { "sys_admin", CAP_SYS_ADMIN },
238 { "sys_boot", CAP_SYS_BOOT },
239 { "sys_nice", CAP_SYS_NICE },
240 { "sys_resource", CAP_SYS_RESOURCE },
241 { "sys_time", CAP_SYS_TIME },
242 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
243 { "mknod", CAP_MKNOD },
244 { "lease", CAP_LEASE },
57b837e2 245#ifdef CAP_AUDIT_READ
0fd73091 246 { "audit_read", CAP_AUDIT_READ },
57b837e2 247#endif
9527e566 248#ifdef CAP_AUDIT_WRITE
0fd73091 249 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
250#endif
251#ifdef CAP_AUDIT_CONTROL
0fd73091 252 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 253#endif
0fd73091
CB
254 { "setfcap", CAP_SETFCAP },
255 { "mac_override", CAP_MAC_OVERRIDE },
256 { "mac_admin", CAP_MAC_ADMIN },
5170c716 257#ifdef CAP_SYSLOG
0fd73091 258 { "syslog", CAP_SYSLOG },
5170c716
CS
259#endif
260#ifdef CAP_WAKE_ALARM
0fd73091 261 { "wake_alarm", CAP_WAKE_ALARM },
5170c716 262#endif
2b54359b 263#ifdef CAP_BLOCK_SUSPEND
0fd73091 264 { "block_suspend", CAP_BLOCK_SUSPEND },
2b54359b 265#endif
495d2046 266#endif
8560cd36 267};
81810dd1 268
c6d09e15
WB
269static struct limit_opt limit_opt[] = {
270#ifdef RLIMIT_AS
271 { "as", RLIMIT_AS },
272#endif
273#ifdef RLIMIT_CORE
274 { "core", RLIMIT_CORE },
275#endif
276#ifdef RLIMIT_CPU
277 { "cpu", RLIMIT_CPU },
278#endif
279#ifdef RLIMIT_DATA
280 { "data", RLIMIT_DATA },
281#endif
282#ifdef RLIMIT_FSIZE
283 { "fsize", RLIMIT_FSIZE },
284#endif
285#ifdef RLIMIT_LOCKS
286 { "locks", RLIMIT_LOCKS },
287#endif
288#ifdef RLIMIT_MEMLOCK
289 { "memlock", RLIMIT_MEMLOCK },
290#endif
291#ifdef RLIMIT_MSGQUEUE
292 { "msgqueue", RLIMIT_MSGQUEUE },
293#endif
294#ifdef RLIMIT_NICE
295 { "nice", RLIMIT_NICE },
296#endif
297#ifdef RLIMIT_NOFILE
298 { "nofile", RLIMIT_NOFILE },
299#endif
300#ifdef RLIMIT_NPROC
301 { "nproc", RLIMIT_NPROC },
302#endif
303#ifdef RLIMIT_RSS
304 { "rss", RLIMIT_RSS },
305#endif
306#ifdef RLIMIT_RTPRIO
307 { "rtprio", RLIMIT_RTPRIO },
308#endif
309#ifdef RLIMIT_RTTIME
310 { "rttime", RLIMIT_RTTIME },
311#endif
312#ifdef RLIMIT_SIGPENDING
313 { "sigpending", RLIMIT_SIGPENDING },
314#endif
315#ifdef RLIMIT_STACK
316 { "stack", RLIMIT_STACK },
317#endif
318};
319
91c3830e
SH
320static int run_buffer(char *buffer)
321{
8e7da691 322 int ret;
0fd73091
CB
323 char *output;
324 struct lxc_popen_FILE *f;
91c3830e 325
ebec9176 326 f = lxc_popen(buffer);
91c3830e 327 if (!f) {
3f60c2f7 328 SYSERROR("Failed to popen() %s", buffer);
91c3830e
SH
329 return -1;
330 }
331
332 output = malloc(LXC_LOG_BUFFER_SIZE);
333 if (!output) {
3f60c2f7 334 ERROR("Failed to allocate memory for %s", buffer);
ebec9176 335 lxc_pclose(f);
91c3830e
SH
336 return -1;
337 }
338
062b72c6 339 while (fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
3f60c2f7 340 DEBUG("Script %s with output: %s", buffer, output);
91c3830e
SH
341
342 free(output);
343
ebec9176 344 ret = lxc_pclose(f);
8e7da691 345 if (ret == -1) {
3f60c2f7 346 SYSERROR("Script exited with error");
91c3830e 347 return -1;
8e7da691 348 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
3f60c2f7 349 ERROR("Script exited with status %d", WEXITSTATUS(ret));
8e7da691
DE
350 return -1;
351 } else if (WIFSIGNALED(ret)) {
3f60c2f7 352 ERROR("Script terminated by signal %d", WTERMSIG(ret));
8e7da691 353 return -1;
91c3830e
SH
354 }
355
356 return 0;
357}
358
14a7b0f9
CB
359int run_script_argv(const char *name, unsigned int hook_version,
360 const char *section, const char *script,
586b1ce7 361 const char *hookname, char **argv)
148e91f5 362{
3f60c2f7 363 int buf_pos, i, ret;
148e91f5 364 char *buffer;
6f8d00d2 365 int fret = -1;
d08e5708 366 size_t size = 0;
148e91f5 367
3f60c2f7
CB
368 if (hook_version == 0)
369 INFO("Executing script \"%s\" for container \"%s\", config "
370 "section \"%s\"", script, name, section);
371 else
372 INFO("Executing script \"%s\" for container \"%s\"", script, name);
148e91f5 373
586b1ce7
CB
374 for (i = 0; argv && argv[i]; i++)
375 size += strlen(argv[i]) + 1;
148e91f5 376
3f60c2f7 377 size += sizeof("exec");
148e91f5 378 size += strlen(script);
3f60c2f7
CB
379 size++;
380
148e91f5 381 if (size > INT_MAX)
3f60c2f7 382 return -EFBIG;
148e91f5 383
3f60c2f7 384 if (hook_version == 0) {
d08e5708
CB
385 size += strlen(hookname);
386 size++;
387
388 size += strlen(name);
389 size++;
390
391 size += strlen(section);
392 size++;
393
394 if (size > INT_MAX)
395 return -EFBIG;
327cce76 396 }
3f60c2f7 397
6f8d00d2
CB
398 buffer = malloc(size);
399 if (!buffer)
400 return -ENOMEM;
401
327cce76 402 if (hook_version == 0)
3f60c2f7 403 buf_pos = snprintf(buffer, size, "exec %s %s %s %s", script, name, section, hookname);
327cce76 404 else
3f60c2f7 405 buf_pos = snprintf(buffer, size, "exec %s", script);
327cce76
CB
406 if (buf_pos < 0 || (size_t)buf_pos >= size) {
407 ERROR("Failed to create command line for script \"%s\"", script);
6f8d00d2 408 goto on_error;
327cce76 409 }
3f60c2f7 410
327cce76 411 if (hook_version == 1) {
3f60c2f7
CB
412 ret = setenv("LXC_HOOK_TYPE", hookname, 1);
413 if (ret < 0) {
414 SYSERROR("Failed to set environment variable: "
415 "LXC_HOOK_TYPE=%s", hookname);
6f8d00d2 416 goto on_error;
3f60c2f7 417 }
90f20466 418 TRACE("Set environment variable: LXC_HOOK_TYPE=%s", hookname);
3f60c2f7
CB
419
420 ret = setenv("LXC_HOOK_SECTION", section, 1);
421 if (ret < 0) {
422 SYSERROR("Failed to set environment variable: "
423 "LXC_HOOK_SECTION=%s", section);
6f8d00d2 424 goto on_error;
3f60c2f7
CB
425 }
426 TRACE("Set environment variable: LXC_HOOK_SECTION=%s", section);
14a7b0f9
CB
427
428 if (strcmp(section, "net") == 0) {
429 char *parent;
430
586b1ce7 431 if (!argv || !argv[0])
6f8d00d2 432 goto on_error;
14a7b0f9 433
586b1ce7 434 ret = setenv("LXC_NET_TYPE", argv[0], 1);
14a7b0f9
CB
435 if (ret < 0) {
436 SYSERROR("Failed to set environment variable: "
586b1ce7 437 "LXC_NET_TYPE=%s", argv[0]);
6f8d00d2 438 goto on_error;
14a7b0f9 439 }
586b1ce7 440 TRACE("Set environment variable: LXC_NET_TYPE=%s", argv[0]);
14a7b0f9 441
586b1ce7 442 parent = argv[1] ? argv[1] : "";
14a7b0f9 443
a8144263 444 if (strcmp(argv[0], "macvlan") == 0) {
14a7b0f9
CB
445 ret = setenv("LXC_NET_PARENT", parent, 1);
446 if (ret < 0) {
447 SYSERROR("Failed to set environment "
448 "variable: LXC_NET_PARENT=%s", parent);
6f8d00d2 449 goto on_error;
14a7b0f9
CB
450 }
451 TRACE("Set environment variable: LXC_NET_PARENT=%s", parent);
a8144263 452 } else if (strcmp(argv[0], "phys") == 0) {
14a7b0f9
CB
453 ret = setenv("LXC_NET_PARENT", parent, 1);
454 if (ret < 0) {
455 SYSERROR("Failed to set environment "
456 "variable: LXC_NET_PARENT=%s", parent);
6f8d00d2 457 goto on_error;
14a7b0f9
CB
458 }
459 TRACE("Set environment variable: LXC_NET_PARENT=%s", parent);
a8144263 460 } else if (strcmp(argv[0], "veth") == 0) {
586b1ce7 461 char *peer = argv[2] ? argv[2] : "";
14a7b0f9
CB
462
463 ret = setenv("LXC_NET_PEER", peer, 1);
464 if (ret < 0) {
465 SYSERROR("Failed to set environment "
466 "variable: LXC_NET_PEER=%s", peer);
6f8d00d2 467 goto on_error;
14a7b0f9
CB
468 }
469 TRACE("Set environment variable: LXC_NET_PEER=%s", peer);
470
471 ret = setenv("LXC_NET_PARENT", parent, 1);
472 if (ret < 0) {
473 SYSERROR("Failed to set environment "
474 "variable: LXC_NET_PARENT=%s", parent);
6f8d00d2 475 goto on_error;
14a7b0f9
CB
476 }
477 TRACE("Set environment variable: LXC_NET_PARENT=%s", parent);
478 }
479 }
148e91f5
SH
480 }
481
586b1ce7 482 for (i = 0; argv && argv[i]; i++) {
3f60c2f7
CB
483 size_t len = size - buf_pos;
484
586b1ce7 485 ret = snprintf(buffer + buf_pos, len, " %s", argv[i]);
3f60c2f7
CB
486 if (ret < 0 || (size_t)ret >= len) {
487 ERROR("Failed to create command line for script \"%s\"", script);
6f8d00d2 488 goto on_error;
148e91f5 489 }
3f60c2f7 490 buf_pos += ret;
148e91f5
SH
491 }
492
6f8d00d2
CB
493 fret = run_buffer(buffer);
494
495on_error:
496 free(buffer);
497 return fret;
148e91f5
SH
498}
499
811ef482 500int run_script(const char *name, const char *section, const char *script, ...)
e3b4c4c4 501{
abbfd20b 502 int ret;
91c3830e 503 char *buffer, *p;
abbfd20b 504 va_list ap;
0fd73091 505 size_t size = 0;
751d9dcd 506
0fd73091 507 INFO("Executing script \"%s\" for container \"%s\", config section \"%s\"",
751d9dcd 508 script, name, section);
e3b4c4c4 509
abbfd20b
DL
510 va_start(ap, script);
511 while ((p = va_arg(ap, char *)))
95642a10 512 size += strlen(p) + 1;
abbfd20b
DL
513 va_end(ap);
514
6d1a5f93 515 size += strlen("exec");
abbfd20b
DL
516 size += strlen(script);
517 size += strlen(name);
518 size += strlen(section);
6d1a5f93 519 size += 4;
abbfd20b 520
95642a10
MS
521 if (size > INT_MAX)
522 return -1;
523
524 buffer = alloca(size);
6d1a5f93 525 ret = snprintf(buffer, size, "exec %s %s %s", script, name, section);
0fd73091 526 if (ret < 0 || ret >= size)
9ba8130c 527 return -1;
751d9dcd 528
abbfd20b 529 va_start(ap, script);
9ba8130c 530 while ((p = va_arg(ap, char *))) {
062b72c6 531 int len = size - ret;
9ba8130c
SH
532 int rc;
533 rc = snprintf(buffer + ret, len, " %s", p);
7b5a2435
DJ
534 if (rc < 0 || rc >= len) {
535 va_end(ap);
9ba8130c 536 return -1;
7b5a2435 537 }
9ba8130c
SH
538 ret += rc;
539 }
abbfd20b 540 va_end(ap);
751d9dcd 541
91c3830e 542 return run_buffer(buffer);
e3b4c4c4
ST
543}
544
0fd73091 545/* pin_rootfs
63fc76c3 546 * if rootfs is a directory, then open ${rootfs}/.lxc-keep for writing for
b7ed4bf0
CS
547 * the duration of the container run, to prevent the container from marking
548 * the underlying fs readonly on shutdown. unlink the file immediately so
63fc76c3
GJ
549 * no name pollution is happens.
550 * don't unlink on NFS to avoid random named stale handles.
0c547523
SH
551 * return -1 on error.
552 * return -2 if nothing needed to be pinned.
553 * return an open fd (>=0) if we pinned it.
554 */
555int pin_rootfs(const char *rootfs)
556{
0fd73091
CB
557 int fd, ret;
558 char absrootfs[MAXPATHLEN], absrootfspin[MAXPATHLEN];
0c547523 559 struct stat s;
63fc76c3 560 struct statfs sfs;
0c547523 561
e99ee0de 562 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 563 return -2;
e99ee0de 564
00ec333b 565 if (!realpath(rootfs, absrootfs))
9be53773 566 return -2;
0c547523 567
0fd73091
CB
568 ret = stat(absrootfs, &s);
569 if (ret < 0)
0c547523 570 return -1;
0c547523 571
72f919c4 572 if (!S_ISDIR(s.st_mode))
0c547523
SH
573 return -2;
574
63fc76c3 575 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/.lxc-keep", absrootfs);
00ec333b 576 if (ret >= MAXPATHLEN)
0c547523 577 return -1;
0c547523 578
0fd73091 579 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR | S_IRUSR);
b7ed4bf0
CS
580 if (fd < 0)
581 return fd;
0fd73091 582
205fc010
CB
583 ret = fstatfs (fd, &sfs);
584 if (ret < 0)
585 return fd;
63fc76c3
GJ
586
587 if (sfs.f_type == NFS_SUPER_MAGIC) {
205fc010 588 DEBUG("Rootfs on NFS, not unlinking pin file \"%s\"", absrootfspin);
63fc76c3
GJ
589 return fd;
590 }
591
b7ed4bf0 592 (void)unlink(absrootfspin);
0fd73091 593
0c547523
SH
594 return fd;
595}
596
0fd73091
CB
597/* If we are asking to remount something, make sure that any NOEXEC etc are
598 * honored.
e2a7e8dc 599 */
5ae72b98 600unsigned long add_required_remount_flags(const char *s, const char *d,
5285689c 601 unsigned long flags)
e2a7e8dc 602{
614305f3 603#ifdef HAVE_STATVFS
0fd73091 604 int ret;
e2a7e8dc
SH
605 struct statvfs sb;
606 unsigned long required_flags = 0;
607
608 if (!(flags & MS_REMOUNT))
609 return flags;
610
611 if (!s)
612 s = d;
613
614 if (!s)
615 return flags;
0fd73091
CB
616
617 ret = statvfs(s, &sb);
618 if (ret < 0)
e2a7e8dc
SH
619 return flags;
620
621 if (sb.f_flag & MS_NOSUID)
622 required_flags |= MS_NOSUID;
623 if (sb.f_flag & MS_NODEV)
624 required_flags |= MS_NODEV;
625 if (sb.f_flag & MS_RDONLY)
626 required_flags |= MS_RDONLY;
627 if (sb.f_flag & MS_NOEXEC)
628 required_flags |= MS_NOEXEC;
629
630 return flags | required_flags;
614305f3
SH
631#else
632 return flags;
633#endif
e2a7e8dc
SH
634}
635
4fb3cba5 636static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
368bbc02 637{
0fd73091 638 int i, r;
b06b8511
CS
639 static struct {
640 int match_mask;
641 int match_flag;
642 const char *source;
643 const char *destination;
644 const char *fstype;
645 unsigned long flags;
646 const char *options;
647 } default_mounts[] = {
0fd73091
CB
648 /* Read-only bind-mounting... In older kernels, doing that
649 * required to do one MS_BIND mount and then
650 * MS_REMOUNT|MS_RDONLY the same one. According to mount(2)
651 * manpage, MS_BIND honors MS_RDONLY from kernel 2.6.26
652 * onwards. However, this apparently does not work on kernel
653 * 3.8. Unfortunately, on that very same kernel, doing the same
654 * trick as above doesn't seem to work either, there one needs
655 * to ALSO specify MS_BIND for the remount, otherwise the
656 * entire fs is remounted read-only or the mount fails because
657 * it's busy... MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for
658 * kernels as low as 2.6.32...
368bbc02 659 */
0fd73091 660 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
592fd47a 661 /* proc/tty is used as a temporary placeholder for proc/sys/net which we'll move back in a few steps */
0fd73091
CB
662 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys/net", "%r/proc/tty", NULL, MS_BIND, NULL },
663 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
664 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
665 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/tty", "%r/proc/sys/net", NULL, MS_MOVE, NULL },
666 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
667 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
668 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
669 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
670 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
671 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
672 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL },
673 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
674 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys/devices/virtual/net", "sysfs", 0, NULL },
675 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys/devices/virtual/net/devices/virtual/net", "%r/sys/devices/virtual/net", NULL, MS_BIND, NULL },
676 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys/devices/virtual/net", NULL, MS_REMOUNT|MS_BIND|MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL },
677 { 0, 0, NULL, NULL, NULL, 0, NULL }
b06b8511 678 };
368bbc02 679
b06b8511 680 for (i = 0; default_mounts[i].match_mask; i++) {
0fd73091
CB
681 int saved_errno;
682 unsigned long mflags;
683 char *destination = NULL;
684 char *source = NULL;
685 if ((flags & default_mounts[i].match_mask) != default_mounts[i].match_flag)
686 continue;
687
688 if (default_mounts[i].source) {
cc4fd506 689 /* will act like strdup if %r is not present */
0fd73091
CB
690 source = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].source);
691 if (!source)
cc4fd506 692 return -1;
0fd73091 693 }
f24a52d5 694
0fd73091
CB
695 if (!default_mounts[i].destination) {
696 ERROR("BUG: auto mounts destination %d was NULL", i);
b06b8511 697 free(source);
0fd73091
CB
698 return -1;
699 }
700
701 /* will act like strdup if %r is not present */
702 destination = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].destination);
703 if (!destination) {
704 saved_errno = errno;
705 free(source);
706 errno = saved_errno;
707 return -1;
708 }
709
710 mflags = add_required_remount_flags(source, destination,
711 default_mounts[i].flags);
712 r = safe_mount(source, destination, default_mounts[i].fstype,
713 mflags, default_mounts[i].options,
714 conf->rootfs.path ? conf->rootfs.mount : NULL);
715 saved_errno = errno;
716 if (r < 0 && errno == ENOENT) {
717 INFO("Mount source or target for \"%s\" on \"%s\" does "
718 "not exist. Skipping", source, destination);
719 r = 0;
720 } else if (r < 0) {
721 SYSERROR("Failed to mount \"%s\" on \"%s\" with flags %lu", source, destination, mflags);
722 }
723
724 free(source);
725 free(destination);
726 if (r < 0) {
727 errno = saved_errno;
728 return -1;
368bbc02 729 }
368bbc02
CS
730 }
731
b06b8511 732 if (flags & LXC_AUTO_CGROUP_MASK) {
0769b82a
CS
733 int cg_flags;
734
3f69fb12 735 cg_flags = flags & (LXC_AUTO_CGROUP_MASK & ~LXC_AUTO_CGROUP_FORCE);
0fd73091
CB
736 /* If the type of cgroup mount was not specified, it depends on
737 * the container's capabilities as to what makes sense: if we
738 * have CAP_SYS_ADMIN, the read-only part can be remounted
739 * read-write anyway, so we may as well default to read-write;
740 * then the admin will not be given a false sense of security.
741 * (And if they really want mixed r/o r/w, then they can
742 * explicitly specify :mixed.) OTOH, if the container lacks
743 * CAP_SYS_ADMIN, do only default to :mixed, because then the
744 * container can't remount it read-write.
745 */
0769b82a
CS
746 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
747 int has_sys_admin = 0;
b0ee5983
CB
748
749 if (!lxc_list_empty(&conf->keepcaps))
0769b82a 750 has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps);
b0ee5983 751 else
0769b82a 752 has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps);
b0ee5983
CB
753
754 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC)
0769b82a 755 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED;
b0ee5983 756 else
0769b82a 757 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
0769b82a 758 }
0fd73091 759
3f69fb12 760 if (flags & LXC_AUTO_CGROUP_FORCE)
0fd73091
CB
761 cg_flags |= LXC_AUTO_CGROUP_FORCE;
762
2202afc9
CB
763 if (!handler->cgroup_ops->mount(handler->cgroup_ops,
764 handler,
765 conf->rootfs.path ? conf->rootfs.mount : "",
766 cg_flags)) {
0fd73091 767 SYSERROR("Failed to mount \"/sys/fs/cgroup\"");
b06b8511 768 return -1;
368bbc02
CS
769 }
770 }
771
368bbc02 772 return 0;
368bbc02
CS
773}
774
4e5440c6 775static int setup_utsname(struct utsname *utsname)
0ad19a3f 776{
0fd73091
CB
777 int ret;
778
4e5440c6
DL
779 if (!utsname)
780 return 0;
0ad19a3f 781
0fd73091
CB
782 ret = sethostname(utsname->nodename, strlen(utsname->nodename));
783 if (ret < 0) {
784 SYSERROR("Failed to set the hostname to \"%s\"", utsname->nodename);
0ad19a3f 785 return -1;
786 }
787
0fd73091 788 INFO("Set hostname to \"%s\"", utsname->nodename);
cd54d859 789
0ad19a3f 790 return 0;
791}
792
69aa6655
DE
793struct dev_symlinks {
794 const char *oldpath;
795 const char *name;
796};
797
798static const struct dev_symlinks dev_symlinks[] = {
0fd73091
CB
799 { "/proc/self/fd", "fd" },
800 { "/proc/self/fd/0", "stdin" },
801 { "/proc/self/fd/1", "stdout" },
802 { "/proc/self/fd/2", "stderr" },
69aa6655
DE
803};
804
ed8704d0 805static int lxc_setup_dev_symlinks(const struct lxc_rootfs *rootfs)
69aa6655 806{
0fd73091 807 int i, ret;
69aa6655 808 char path[MAXPATHLEN];
09227be2 809 struct stat s;
69aa6655 810
69aa6655
DE
811 for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
812 const struct dev_symlinks *d = &dev_symlinks[i];
0fd73091
CB
813
814 ret = snprintf(path, sizeof(path), "%s/dev/%s",
815 rootfs->path ? rootfs->mount : "", d->name);
69aa6655
DE
816 if (ret < 0 || ret >= MAXPATHLEN)
817 return -1;
09227be2 818
0fd73091
CB
819 /* Stat the path first. If we don't get an error accept it as
820 * is and don't try to create it
09227be2 821 */
0fd73091
CB
822 ret = stat(path, &s);
823 if (ret == 0)
09227be2 824 continue;
09227be2 825
69aa6655
DE
826 ret = symlink(d->oldpath, path);
827 if (ret && errno != EEXIST) {
0fd73091
CB
828 if (errno == EROFS) {
829 WARN("Failed to create \"%s\". Read-only filesystem", path);
09227be2 830 } else {
0fd73091 831 SYSERROR("Failed to create \"%s\"", path);
09227be2
MW
832 return -1;
833 }
69aa6655
DE
834 }
835 }
0fd73091 836
69aa6655
DE
837 return 0;
838}
839
2187efd3 840/* Build a space-separate list of ptys to pass to systemd. */
885766f5 841static bool append_ttyname(char **pp, char *name)
b0a33c1e 842{
393903d1
SH
843 char *p;
844
845 if (!*pp) {
846 *pp = malloc(strlen(name) + strlen("container_ttys=") + 1);
847 if (!*pp)
848 return false;
0fd73091 849
393903d1
SH
850 sprintf(*pp, "container_ttys=%s", name);
851 return true;
852 }
0fd73091 853
393903d1
SH
854 p = realloc(*pp, strlen(*pp) + strlen(name) + 2);
855 if (!p)
856 return false;
0fd73091 857
393903d1
SH
858 *pp = p;
859 strcat(p, " ");
860 strcat(p, name);
0fd73091 861
393903d1
SH
862 return true;
863}
864
2187efd3 865static int lxc_setup_ttys(struct lxc_conf *conf)
393903d1 866{
9e1045e3 867 int i, ret;
0e4be3cf 868 const struct lxc_tty_info *ttys = &conf->ttys;
885766f5 869 char *ttydir = ttys->dir;
7c6ef2a2 870 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
b0a33c1e 871
e8bd4e43 872 if (!conf->rootfs.path)
bc9bd0e3
DL
873 return 0;
874
885766f5 875 for (i = 0; i < ttys->max; i++) {
0e4be3cf 876 struct lxc_terminal_info *tty = &ttys->tty[i];
b0a33c1e 877
e8bd4e43 878 ret = snprintf(path, sizeof(path), "/dev/tty%d", i + 1);
73363c61 879 if (ret < 0 || (size_t)ret >= sizeof(path))
7c6ef2a2 880 return -1;
9e1045e3 881
7c6ef2a2
SH
882 if (ttydir) {
883 /* create dev/lxc/tty%d" */
9e1045e3
CB
884 ret = snprintf(lxcpath, sizeof(lxcpath),
885 "/dev/%s/tty%d", ttydir, i + 1);
73363c61 886 if (ret < 0 || (size_t)ret >= sizeof(lxcpath))
7c6ef2a2 887 return -1;
9e1045e3 888
7c6ef2a2 889 ret = creat(lxcpath, 0660);
9e1045e3 890 if (ret < 0 && errno != EEXIST) {
73363c61 891 SYSERROR("Failed to create \"%s\"", lxcpath);
7c6ef2a2
SH
892 return -1;
893 }
4d44e274
SH
894 if (ret >= 0)
895 close(ret);
9e1045e3 896
7c6ef2a2 897 ret = unlink(path);
9e1045e3 898 if (ret < 0 && errno != ENOENT) {
73363c61 899 SYSERROR("Failed to unlink \"%s\"", path);
7c6ef2a2
SH
900 return -1;
901 }
b0a33c1e 902
2520facd 903 ret = mount(tty->name, lxcpath, "none", MS_BIND, 0);
9e1045e3 904 if (ret < 0) {
73363c61 905 WARN("Failed to bind mount \"%s\" onto \"%s\"",
2520facd 906 tty->name, path);
7c6ef2a2
SH
907 continue;
908 }
0fd73091 909 DEBUG("Bind mounted \"%s\" onto \"%s\"", tty->name,
9e1045e3 910 path);
13954cce 911
9e1045e3
CB
912 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d",
913 ttydir, i + 1);
73363c61 914 if (ret < 0 || (size_t)ret >= sizeof(lxcpath))
9ba8130c 915 return -1;
9e1045e3 916
7c6ef2a2 917 ret = symlink(lxcpath, path);
9e1045e3 918 if (ret < 0) {
73363c61 919 SYSERROR("Failed to create symlink \"%s\" -> \"%s\"",
9e1045e3 920 path, lxcpath);
7c6ef2a2
SH
921 return -1;
922 }
923 } else {
9e1045e3
CB
924 /* If we populated /dev, then we need to create
925 * /dev/ttyN
926 */
927 ret = access(path, F_OK);
928 if (ret < 0) {
c6883f38 929 ret = creat(path, 0660);
9e1045e3 930 if (ret < 0) {
73363c61 931 SYSERROR("Failed to create \"%s\"", path);
c6883f38 932 /* this isn't fatal, continue */
025ed0f3 933 } else {
c6883f38 934 close(ret);
025ed0f3 935 }
c6883f38 936 }
9e1045e3 937
2520facd 938 ret = mount(tty->name, path, "none", MS_BIND, 0);
9e1045e3 939 if (ret < 0) {
2520facd 940 SYSERROR("Failed to mount '%s'->'%s'", tty->name, path);
7c6ef2a2
SH
941 continue;
942 }
9e1045e3 943
2520facd 944 DEBUG("Bind mounted \"%s\" onto \"%s\"", tty->name,
9e1045e3 945 path);
393903d1 946 }
9e1045e3 947
885766f5 948 if (!append_ttyname(&conf->ttys.tty_names, tty->name)) {
393903d1
SH
949 ERROR("Error setting up container_ttys string");
950 return -1;
b0a33c1e 951 }
952 }
953
885766f5 954 INFO("Finished setting up %zu /dev/tty<N> device(s)", ttys->max);
b0a33c1e 955 return 0;
956}
957
2187efd3
CB
958int lxc_allocate_ttys(const char *name, struct lxc_conf *conf)
959{
2187efd3 960 int i, ret;
0fd73091 961 struct lxc_tty_info *ttys = &conf->ttys;
2187efd3
CB
962
963 /* no tty in the configuration */
885766f5 964 if (ttys->max == 0)
2187efd3
CB
965 return 0;
966
885766f5 967 ttys->tty = malloc(sizeof(*ttys->tty) * ttys->max);
0e4be3cf 968 if (!ttys->tty)
2187efd3 969 return -ENOMEM;
2187efd3 970
885766f5 971 for (i = 0; i < ttys->max; i++) {
0e4be3cf 972 struct lxc_terminal_info *tty = &ttys->tty[i];
2187efd3 973
386e6768
CB
974 tty->master = -EBADF;
975 tty->slave = -EBADF;
2520facd
CB
976 ret = openpty(&tty->master, &tty->slave,
977 tty->name, NULL, NULL);
2187efd3 978 if (ret) {
0fd73091 979 SYSERROR("Failed to create tty %d", i);
885766f5 980 ttys->max = i;
0e4be3cf 981 lxc_delete_tty(ttys);
2187efd3
CB
982 return -ENOTTY;
983 }
984
0fd73091 985 DEBUG("Created tty \"%s\" with master fd %d and slave fd %d",
2520facd 986 tty->name, tty->master, tty->slave);
2187efd3
CB
987
988 /* Prevent leaking the file descriptors to the container */
2520facd 989 ret = fcntl(tty->master, F_SETFD, FD_CLOEXEC);
2187efd3 990 if (ret < 0)
0fd73091
CB
991 WARN("Failed to set FD_CLOEXEC flag on master fd %d of "
992 "tty device \"%s\": %s",
2520facd 993 tty->master, tty->name, strerror(errno));
2187efd3 994
2520facd 995 ret = fcntl(tty->slave, F_SETFD, FD_CLOEXEC);
2187efd3 996 if (ret < 0)
0fd73091
CB
997 WARN("Failed to set FD_CLOEXEC flag on slave fd %d of "
998 "tty device \"%s\": %s",
2520facd 999 tty->slave, tty->name, strerror(errno));
2187efd3 1000
2520facd 1001 tty->busy = 0;
2187efd3
CB
1002 }
1003
885766f5 1004 INFO("Finished creating %zu tty devices", ttys->max);
2187efd3
CB
1005 return 0;
1006}
1007
0e4be3cf 1008void lxc_delete_tty(struct lxc_tty_info *ttys)
2187efd3
CB
1009{
1010 int i;
1011
386e6768
CB
1012 if (!ttys->tty)
1013 return;
1014
885766f5 1015 for (i = 0; i < ttys->max; i++) {
0e4be3cf 1016 struct lxc_terminal_info *tty = &ttys->tty[i];
2187efd3 1017
386e6768
CB
1018 if (tty->master >= 0) {
1019 close(tty->master);
1020 tty->master = -EBADF;
1021 }
1022
1023 if (tty->slave >= 0) {
1024 close(tty->slave);
1025 tty->slave = -EBADF;
1026 }
2187efd3
CB
1027 }
1028
0e4be3cf
CB
1029 free(ttys->tty);
1030 ttys->tty = NULL;
2187efd3
CB
1031}
1032
1033static int lxc_send_ttys_to_parent(struct lxc_handler *handler)
1034{
1035 int i;
0fd73091 1036 int ret = -1;
2187efd3 1037 struct lxc_conf *conf = handler->conf;
0e4be3cf 1038 struct lxc_tty_info *ttys = &conf->ttys;
2187efd3 1039 int sock = handler->data_sock[0];
2187efd3 1040
885766f5 1041 if (ttys->max == 0)
2187efd3
CB
1042 return 0;
1043
885766f5 1044 for (i = 0; i < ttys->max; i++) {
2187efd3 1045 int ttyfds[2];
0e4be3cf 1046 struct lxc_terminal_info *tty = &ttys->tty[i];
2187efd3 1047
2520facd
CB
1048 ttyfds[0] = tty->master;
1049 ttyfds[1] = tty->slave;
2187efd3
CB
1050
1051 ret = lxc_abstract_unix_send_fds(sock, ttyfds, 2, NULL, 0);
1052 if (ret < 0)
1053 break;
1054
0fd73091 1055 TRACE("Sent ty \"%s\" with master fd %d and slave fd %d to "
2520facd 1056 "parent", tty->name, tty->master, tty->slave);
2187efd3
CB
1057 }
1058
1059 if (ret < 0)
885766f5 1060 ERROR("Failed to send %zu ttys to parent: %s", ttys->max,
2187efd3
CB
1061 strerror(errno));
1062 else
885766f5 1063 TRACE("Sent %zu ttys to parent", ttys->max);
2187efd3
CB
1064
1065 return ret;
1066}
1067
1068static int lxc_create_ttys(struct lxc_handler *handler)
1069{
1070 int ret = -1;
1071 struct lxc_conf *conf = handler->conf;
1072
1073 ret = lxc_allocate_ttys(handler->name, conf);
1074 if (ret < 0) {
1075 ERROR("Failed to allocate ttys");
1076 goto on_error;
1077 }
1078
1079 ret = lxc_send_ttys_to_parent(handler);
1080 if (ret < 0) {
1081 ERROR("Failed to send ttys to parent");
1082 goto on_error;
1083 }
1084
1085 if (!conf->is_execute) {
1086 ret = lxc_setup_ttys(conf);
1087 if (ret < 0) {
1088 ERROR("Failed to setup ttys");
1089 goto on_error;
1090 }
1091 }
1092
885766f5
CB
1093 if (conf->ttys.tty_names) {
1094 ret = setenv("container_ttys", conf->ttys.tty_names, 1);
2187efd3 1095 if (ret < 0)
885766f5 1096 SYSERROR("Failed to set \"container_ttys=%s\"", conf->ttys.tty_names);
2187efd3
CB
1097 }
1098
1099 ret = 0;
1100
1101on_error:
0e4be3cf 1102 lxc_delete_tty(&conf->ttys);
2187efd3
CB
1103
1104 return ret;
1105}
1106
59bb8698 1107static int setup_rootfs_pivot_root(const char *rootfs)
bf601689 1108{
0fd73091
CB
1109 int ret;
1110 int newroot = -1, oldroot = -1;
bf601689 1111
2d489f9e
SH
1112 oldroot = open("/", O_DIRECTORY | O_RDONLY);
1113 if (oldroot < 0) {
0fd73091 1114 SYSERROR("Failed to open old root directory");
9ba8130c
SH
1115 return -1;
1116 }
0fd73091 1117
2d489f9e
SH
1118 newroot = open(rootfs, O_DIRECTORY | O_RDONLY);
1119 if (newroot < 0) {
0fd73091
CB
1120 SYSERROR("Failed to open new root directory");
1121 goto on_error;
c08556c6 1122 }
bf601689 1123
cc6f6dd7 1124 /* change into new root fs */
0fd73091
CB
1125 ret = fchdir(newroot);
1126 if (ret < 0) {
1127 SYSERROR("Failed to change to new rootfs \"%s\"", rootfs);
1128 goto on_error;
cc6f6dd7
DL
1129 }
1130
cc6f6dd7 1131 /* pivot_root into our new root fs */
0fd73091
CB
1132 ret = pivot_root(".", ".");
1133 if (ret < 0) {
1134 SYSERROR("Failed to pivot_root()");
1135 goto on_error;
bf601689 1136 }
cc6f6dd7 1137
e599717b 1138 /* At this point the old-root is mounted on top of our new-root. To
0fd73091
CB
1139 * unmounted it we must not be chdir'd into it, so escape back to
1140 * old-root.
2d489f9e 1141 */
0fd73091
CB
1142 ret = fchdir(oldroot);
1143 if (ret < 0) {
1144 SYSERROR("Failed to enter old root directory");
1145 goto on_error;
2d489f9e 1146 }
0fd73091 1147
e599717b
FW
1148 /* Make oldroot rslave to make sure our umounts don't propagate to the
1149 * host.
1150 */
1151 ret = mount("", ".", "", MS_SLAVE | MS_REC, NULL);
1152 if (ret < 0) {
1153 SYSERROR("Failed to make oldroot rslave");
1154 goto on_error;
1155 }
1156
0fd73091
CB
1157 ret = umount2(".", MNT_DETACH);
1158 if (ret < 0) {
1159 SYSERROR("Failed to detach old root directory");
1160 goto on_error;
cc6f6dd7
DL
1161 }
1162
0fd73091
CB
1163 ret = fchdir(newroot);
1164 if (ret < 0) {
1165 SYSERROR("Failed to re-enter new root directory");
1166 goto on_error;
2d489f9e 1167 }
cc6f6dd7 1168
2d489f9e
SH
1169 close(oldroot);
1170 close(newroot);
bf601689 1171
0fd73091 1172 DEBUG("pivot_root(\"%s\") successful", rootfs);
bf601689 1173
bf601689 1174 return 0;
2d489f9e 1175
0fd73091 1176on_error:
2d489f9e
SH
1177 if (oldroot != -1)
1178 close(oldroot);
1179 if (newroot != -1)
1180 close(newroot);
0fd73091 1181
2d489f9e 1182 return -1;
bf601689
MH
1183}
1184
7133b912
CB
1185/* Just create a path for /dev under $lxcpath/$name and in rootfs If we hit an
1186 * error, log it but don't fail yet.
91c3830e 1187 */
7133b912
CB
1188static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs,
1189 const char *lxcpath)
91c3830e
SH
1190{
1191 int ret;
87da4ec3
SH
1192 size_t clen;
1193 char *path;
91c3830e 1194
7133b912 1195 INFO("Preparing \"/dev\"");
bc6928ff 1196
14221cbb 1197 /* $(rootfs->mount) + "/dev/pts" + '\0' */
ec50007f 1198 clen = (rootfs->path ? strlen(rootfs->mount) : 0) + 9;
87da4ec3 1199 path = alloca(clen);
bc6928ff 1200
ec50007f 1201 ret = snprintf(path, clen, "%s/dev", rootfs->path ? rootfs->mount : "");
7133b912 1202 if (ret < 0 || (size_t)ret >= clen)
91c3830e 1203 return -1;
bc6928ff 1204
87da4ec3 1205 if (!dir_exists(path)) {
7133b912
CB
1206 WARN("\"/dev\" directory does not exist. Proceeding without "
1207 "autodev being set up");
87da4ec3 1208 return 0;
bc6928ff 1209 }
87da4ec3 1210
1ec0e8e3 1211 ret = safe_mount("none", path, "tmpfs", 0, "size=500000,mode=755",
7133b912
CB
1212 rootfs->path ? rootfs->mount : NULL);
1213 if (ret < 0) {
1214 SYSERROR("Failed to mount tmpfs on \"%s\"", path);
1ec0e8e3 1215 return -1;
91c3830e 1216 }
7133b912 1217 INFO("Mounted tmpfs on \"%s\"", path);
87da4ec3 1218
ec50007f 1219 ret = snprintf(path, clen, "%s/dev/pts", rootfs->path ? rootfs->mount : "");
7133b912 1220 if (ret < 0 || (size_t)ret >= clen)
91c3830e 1221 return -1;
87da4ec3 1222
7133b912 1223 /* If we are running on a devtmpfs mapping, dev/pts may already exist.
bc6928ff
MW
1224 * If not, then create it and exit if that fails...
1225 */
87da4ec3 1226 if (!dir_exists(path)) {
bc6928ff 1227 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
7133b912
CB
1228 if (ret < 0) {
1229 SYSERROR("Failed to create directory \"%s\"", path);
bc6928ff
MW
1230 return -1;
1231 }
91c3830e
SH
1232 }
1233
7133b912 1234 INFO("Prepared \"/dev\"");
91c3830e
SH
1235 return 0;
1236}
1237
5e73416f 1238struct lxc_device_node {
74a3920a 1239 const char *name;
5e73416f
CB
1240 const mode_t mode;
1241 const int maj;
1242 const int min;
c6883f38
SH
1243};
1244
5e73416f 1245static const struct lxc_device_node lxc_devices[] = {
06749971 1246 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
5e73416f 1247 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
06749971
CB
1248 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1249 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
5e73416f
CB
1250 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1251 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
c6883f38
SH
1252};
1253
27245ff7 1254static int lxc_fill_autodev(const struct lxc_rootfs *rootfs)
c6883f38 1255{
5e73416f 1256 int i, ret;
c6883f38 1257 char path[MAXPATHLEN];
3a32201c 1258 mode_t cmask;
5e73416f 1259 bool can_mknod = true;
c6883f38 1260
3999be0a
CB
1261 ret = snprintf(path, MAXPATHLEN, "%s/dev",
1262 rootfs->path ? rootfs->mount : "");
1263 if (ret < 0 || ret >= MAXPATHLEN)
c6883f38 1264 return -1;
91c3830e 1265
0bbf8572
CB
1266 /* ignore, just don't try to fill in */
1267 if (!dir_exists(path))
9cb4d183
SH
1268 return 0;
1269
3999be0a
CB
1270 INFO("Populating \"/dev\"");
1271
3a32201c 1272 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
5e73416f
CB
1273 for (i = 0; i < sizeof(lxc_devices) / sizeof(lxc_devices[0]); i++) {
1274 char hostpath[MAXPATHLEN];
1275 const struct lxc_device_node *device = &lxc_devices[i];
0728ebf4 1276
3999be0a 1277 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s",
5e73416f 1278 rootfs->path ? rootfs->mount : "", device->name);
c6883f38
SH
1279 if (ret < 0 || ret >= MAXPATHLEN)
1280 return -1;
0bbf8572 1281
5e73416f
CB
1282 if (can_mknod) {
1283 ret = mknod(path, device->mode, makedev(device->maj, device->min));
1284 if (ret == 0 || (ret < 0 && errno == EEXIST)) {
1285 DEBUG("Created device node \"%s\"", path);
0bbf8572
CB
1286 continue;
1287 }
1288
5e73416f
CB
1289 if (errno != EPERM) {
1290 SYSERROR("Failed to create device node \"%s\"", path);
9cb4d183
SH
1291 return -1;
1292 }
3999be0a 1293
5e73416f
CB
1294 /* This can e.g. happen when the container is
1295 * unprivileged or CAP_MKNOD has been dropped.
1296 */
1297 can_mknod = false;
1298 }
1299
1300 ret = mknod(path, S_IFREG, 0);
1301 if (ret < 0 && errno != EEXIST) {
1302 SYSERROR("Failed to create file \"%s\"", path);
1303 return -1;
1304 }
1305
1306 /* Fallback to bind-mounting the device from the host. */
1307 ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", device->name);
1308 if (ret < 0 || ret >= MAXPATHLEN)
1309 return -1;
1310
1311 ret = safe_mount(hostpath, path, 0, MS_BIND, NULL,
1312 rootfs->path ? rootfs->mount : NULL);
1313 if (ret < 0) {
1314 SYSERROR("Failed to bind mount host device node \"%s\" "
1315 "onto \"%s\"", hostpath, path);
1316 return -1;
c6883f38 1317 }
5e73416f
CB
1318 DEBUG("Bind mounted host device node \"%s\" onto \"%s\"",
1319 hostpath, path);
c6883f38 1320 }
5e73416f 1321 (void)umask(cmask);
c6883f38 1322
3999be0a 1323 INFO("Populated \"/dev\"");
c6883f38
SH
1324 return 0;
1325}
1326
9aa76a17 1327static int lxc_setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1328{
9aa76a17 1329 int ret;
10bc1861 1330 struct lxc_storage *bdev;
91c3e281 1331 const struct lxc_rootfs *rootfs;
cc28d0b0 1332
91c3e281 1333 rootfs = &conf->rootfs;
a0f379bf 1334 if (!rootfs->path) {
0fd73091
CB
1335 ret = mount("", "/", NULL, MS_SLAVE | MS_REC, 0);
1336 if (ret < 0) {
1337 SYSERROR("Failed to make / rslave");
a0f379bf
DW
1338 return -1;
1339 }
0fd73091 1340
c69bd12f 1341 return 0;
a0f379bf 1342 }
0ad19a3f 1343
0fd73091
CB
1344 ret = access(rootfs->mount, F_OK);
1345 if (ret != 0) {
1346 SYSERROR("Failed to access to \"%s\". Check it is present",
12297168 1347 rootfs->mount);
b1789442
DL
1348 return -1;
1349 }
1350
8a388ed4 1351 bdev = storage_init(conf);
9aa76a17 1352 if (!bdev) {
0fd73091 1353 ERROR("Failed to mount rootfs \"%s\" onto \"%s\" with options \"%s\"",
91c3e281
CB
1354 rootfs->path, rootfs->mount,
1355 rootfs->options ? rootfs->options : "(null)");
9aa76a17 1356 return -1;
9be53773 1357 }
9aa76a17
CB
1358
1359 ret = bdev->ops->mount(bdev);
10bc1861 1360 storage_put(bdev);
9aa76a17 1361 if (ret < 0) {
0fd73091 1362 ERROR("Failed to mount rootfs \"%s\" onto \"%s\" with options \"%s\"",
91c3e281
CB
1363 rootfs->path, rootfs->mount,
1364 rootfs->options ? rootfs->options : "(null)");
c3f0a28c 1365 return -1;
1366 }
0ad19a3f 1367
0fd73091 1368 DEBUG("Mounted rootfs \"%s\" onto \"%s\" with options \"%s\"",
91c3e281
CB
1369 rootfs->path, rootfs->mount,
1370 rootfs->options ? rootfs->options : "(null)");
9aa76a17 1371
ac778708
DL
1372 return 0;
1373}
1374
91e93c71
AV
1375int prepare_ramfs_root(char *root)
1376{
0fd73091
CB
1377 int i, ret;
1378 char *p, *p2;
1379 char buf[LXC_LINELEN], nroot[PATH_MAX];
91e93c71 1380 FILE *f;
91e93c71 1381
0fd73091
CB
1382 if (!realpath(root, nroot))
1383 return -1;
91e93c71 1384
0fd73091
CB
1385 ret = chdir("/");
1386 if (ret < 0)
1387 return -1;
91e93c71 1388
0fd73091
CB
1389 /* We could use here MS_MOVE, but in userns this mount is locked and
1390 * can't be moved.
91e93c71 1391 */
0fd73091
CB
1392 ret = mount(root, "/", NULL, MS_REC | MS_BIND, NULL);
1393 if (ret < 0) {
1394 SYSERROR("Failed to move \"%s\" into \"/\"", root);
1395 return -1;
91e93c71
AV
1396 }
1397
0fd73091
CB
1398 ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL);
1399 if (ret < 0) {
1400 SYSERROR("Failed to make \"/\" rprivate");
1401 return -1;
91e93c71
AV
1402 }
1403
0fd73091
CB
1404 /* The following code cleans up inhereted mounts which are not required
1405 * for CT.
91e93c71
AV
1406 *
1407 * The mountinfo file shows not all mounts, if a few points have been
1408 * unmounted between read operations from the mountinfo. So we need to
1409 * read mountinfo a few times.
1410 *
1411 * This loop can be skipped if a container uses unserns, because all
1412 * inherited mounts are locked and we should live with all this trash.
1413 */
0fd73091 1414 for (;;) {
91e93c71
AV
1415 int progress = 0;
1416
1417 f = fopen("./proc/self/mountinfo", "r");
1418 if (!f) {
1419 SYSERROR("Unable to open /proc/self/mountinfo");
1420 return -1;
1421 }
0fd73091 1422
eab15c1e 1423 while (fgets(buf, LXC_LINELEN, f)) {
91e93c71
AV
1424 for (p = buf, i=0; p && i < 4; i++)
1425 p = strchr(p+1, ' ');
0fd73091 1426
91e93c71
AV
1427 if (!p)
1428 continue;
0fd73091 1429
91e93c71
AV
1430 p2 = strchr(p+1, ' ');
1431 if (!p2)
1432 continue;
1433
1434 *p2 = '\0';
1435 *p = '.';
1436
1437 if (strcmp(p + 1, "/") == 0)
1438 continue;
0fd73091 1439
91e93c71
AV
1440 if (strcmp(p + 1, "/proc") == 0)
1441 continue;
1442
0fd73091
CB
1443 ret = umount2(p, MNT_DETACH);
1444 if (ret == 0)
91e93c71
AV
1445 progress++;
1446 }
0fd73091 1447
91e93c71 1448 fclose(f);
0fd73091 1449
91e93c71
AV
1450 if (!progress)
1451 break;
1452 }
1453
0fd73091
CB
1454 /* This also can be skipped if a container uses unserns. */
1455 (void)umount2("./proc", MNT_DETACH);
91e93c71
AV
1456
1457 /* It is weird, but chdir("..") moves us in a new root */
0fd73091
CB
1458 ret = chdir("..");
1459 if (ret < 0) {
91e93c71
AV
1460 SYSERROR("Unable to change working directory");
1461 return -1;
1462 }
1463
0fd73091
CB
1464 ret = chroot(".");
1465 if (ret < 0) {
91e93c71
AV
1466 SYSERROR("Unable to chroot");
1467 return -1;
1468 }
1469
1470 return 0;
1471}
1472
74a3920a 1473static int setup_pivot_root(const struct lxc_rootfs *rootfs)
ac778708 1474{
0fd73091
CB
1475 int ret;
1476
39c7b795 1477 if (!rootfs->path) {
0fd73091 1478 DEBUG("Container does not have a rootfs");
ac778708 1479 return 0;
39c7b795 1480 }
ac778708 1481
91e93c71 1482 if (detect_ramfs_rootfs()) {
0fd73091
CB
1483 DEBUG("Detected that container is on ramfs");
1484
1485 ret = prepare_ramfs_root(rootfs->mount);
1486 if (ret < 0) {
1487 ERROR("Failed to prepare minimal ramfs root");
91e93c71 1488 return -1;
39c7b795
CB
1489 }
1490
0fd73091 1491 DEBUG("Prepared ramfs root for container");
39c7b795
CB
1492 return 0;
1493 }
1494
0fd73091
CB
1495 ret = setup_rootfs_pivot_root(rootfs->mount);
1496 if (ret < 0) {
1497 ERROR("Failed to pivot_root()");
25368b52 1498 return -1;
c69bd12f
DL
1499 }
1500
0fd73091 1501 DEBUG("Finished pivot_root()");
25368b52 1502 return 0;
0ad19a3f 1503}
1504
5173b710 1505static const struct id_map *find_mapped_nsid_entry(struct lxc_conf *conf, unsigned id,
f4900711
CB
1506 enum idtype idtype)
1507{
1508 struct lxc_list *it;
1509 struct id_map *map;
1510 struct id_map *retmap = NULL;
1511
dcf0ffdf
CB
1512 /* Shortcut for container's root mappings. */
1513 if (id == 0) {
1514 if (idtype == ID_TYPE_UID)
1515 return conf->root_nsuid_map;
1516
1517 if (idtype == ID_TYPE_GID)
1518 return conf->root_nsgid_map;
1519 }
1520
f4900711
CB
1521 lxc_list_for_each(it, &conf->id_map) {
1522 map = it->elem;
1523 if (map->idtype != idtype)
1524 continue;
1525
1526 if (id >= map->nsid && id < map->nsid + map->range) {
1527 retmap = map;
1528 break;
1529 }
1530 }
1531
1532 return retmap;
1533}
1534
1535static int lxc_setup_devpts(struct lxc_conf *conf)
3c26f34e 1536{
70761e5e 1537 int ret;
11293068 1538 const char *default_devpts_mntopts = "gid=5,newinstance,ptmxmode=0666,mode=0620";
9d28c4f9 1539 char devpts_mntopts[256];
77890c6d 1540
e528c735 1541 if (conf->pty_max <= 0) {
0fd73091 1542 DEBUG("No new devpts instance will be mounted since no pts "
70761e5e 1543 "devices are requested");
d852c78c 1544 return 0;
3c26f34e 1545 }
1546
e528c735
CB
1547 ret = snprintf(devpts_mntopts, sizeof(devpts_mntopts), "%s,max=%zu",
1548 default_devpts_mntopts, conf->pty_max);
9d28c4f9
CB
1549 if (ret < 0 || (size_t)ret >= sizeof(devpts_mntopts))
1550 return -1;
1551
d5cb35d6 1552 /* Unmount old devpts instance. */
70761e5e
CB
1553 ret = access("/dev/pts/ptmx", F_OK);
1554 if (!ret) {
70761e5e
CB
1555 ret = umount("/dev/pts");
1556 if (ret < 0) {
0fd73091 1557 SYSERROR("Failed to unmount old devpts instance");
70761e5e 1558 return -1;
7e40254a 1559 }
0fd73091 1560 DEBUG("Unmounted old devpts instance");
7e40254a
JTLB
1561 }
1562
70761e5e
CB
1563 /* Create mountpoint for devpts instance. */
1564 ret = mkdir("/dev/pts", 0755);
1565 if (ret < 0 && errno != EEXIST) {
0fd73091 1566 SYSERROR("Failed to create \"/dev/pts\" directory");
3c26f34e 1567 return -1;
1568 }
1569
11293068 1570 /* mount new devpts instance */
f4900711 1571 ret = mount("devpts", "/dev/pts", "devpts", MS_NOSUID | MS_NOEXEC, devpts_mntopts);
70761e5e 1572 if (ret < 0) {
11293068
CB
1573 /* try mounting without gid=5 */
1574 ret = mount("devpts", "/dev/pts", "devpts",
1575 MS_NOSUID | MS_NOEXEC, devpts_mntopts + sizeof("gid=5"));
1576 if (ret < 0) {
1577 SYSERROR("Failed to mount new devpts instance");
1578 return -1;
1579 }
70761e5e 1580 }
0fd73091 1581 DEBUG("Mount new devpts instance with options \"%s\"", devpts_mntopts);
70761e5e 1582
d5cb35d6 1583 /* Remove any pre-existing /dev/ptmx file. */
70761e5e 1584 ret = access("/dev/ptmx", F_OK);
d5cb35d6
CB
1585 if (!ret) {
1586 ret = remove("/dev/ptmx");
1587 if (ret < 0) {
0fd73091 1588 SYSERROR("Failed to remove existing \"/dev/ptmx\" file");
d5cb35d6 1589 return -1;
70761e5e 1590 }
0fd73091 1591 DEBUG("Removed existing \"/dev/ptmx\" file");
3c26f34e 1592 }
1593
d5cb35d6
CB
1594 /* Create dummy /dev/ptmx file as bind mountpoint for /dev/pts/ptmx. */
1595 ret = open("/dev/ptmx", O_CREAT, 0666);
1596 if (ret < 0) {
0fd73091 1597 SYSERROR("Failed to create dummy \"/dev/ptmx\" file as bind mount target");
d5cb35d6
CB
1598 return -1;
1599 }
e87bd19c 1600 close(ret);
0fd73091 1601 DEBUG("Created dummy \"/dev/ptmx\" file as bind mount target");
77890c6d 1602
d5cb35d6 1603 /* Fallback option: create symlink /dev/ptmx -> /dev/pts/ptmx */
e87bd19c 1604 ret = mount("/dev/pts/ptmx", "/dev/ptmx", NULL, MS_BIND, NULL);
d5cb35d6 1605 if (!ret) {
0fd73091 1606 DEBUG("Bind mounted \"/dev/pts/ptmx\" to \"/dev/ptmx\"");
d5cb35d6
CB
1607 return 0;
1608 } else {
1609 /* Fallthrough and try to create a symlink. */
0fd73091 1610 ERROR("Failed to bind mount \"/dev/pts/ptmx\" to \"/dev/ptmx\"");
d5cb35d6
CB
1611 }
1612
1613 /* Remove the dummy /dev/ptmx file we created above. */
1614 ret = remove("/dev/ptmx");
70761e5e 1615 if (ret < 0) {
0fd73091 1616 SYSERROR("Failed to remove existing \"/dev/ptmx\"");
d5cb35d6
CB
1617 return -1;
1618 }
1619
1620 /* Fallback option: Create symlink /dev/ptmx -> /dev/pts/ptmx. */
1621 ret = symlink("/dev/pts/ptmx", "/dev/ptmx");
1622 if (ret < 0) {
0fd73091 1623 SYSERROR("Failed to create symlink from \"/dev/ptmx\" to \"/dev/pts/ptmx\"");
3c26f34e 1624 return -1;
1625 }
0fd73091 1626 DEBUG("Created symlink from \"/dev/ptmx\" to \"/dev/pts/ptmx\"");
cd54d859 1627
3c26f34e 1628 return 0;
1629}
1630
cccc74b5
DL
1631static int setup_personality(int persona)
1632{
0fd73091
CB
1633 int ret;
1634
1635#if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1636 if (persona == -1)
1637 return 0;
1638
0fd73091
CB
1639 ret = personality(persona);
1640 if (ret < 0) {
1641 SYSERROR("Failed to set personality to \"0x%x\"", persona);
cccc74b5
DL
1642 return -1;
1643 }
1644
0fd73091
CB
1645 INFO("Set personality to \"0x%x\"", persona);
1646#endif
cccc74b5
DL
1647
1648 return 0;
1649}
1650
3d7d929a 1651static int lxc_setup_dev_console(const struct lxc_rootfs *rootfs,
dcad02f8 1652 const struct lxc_terminal *console)
6e590161 1653{
0fd73091 1654 int fd, ret;
63376d7d 1655 char path[MAXPATHLEN];
86530b0a 1656 char *rootfs_path = rootfs->path ? rootfs->mount : "";
52e35957 1657
8b1b1210
CB
1658 if (console->path && !strcmp(console->path, "none"))
1659 return 0;
1660
86530b0a 1661 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs_path);
3d7d929a 1662 if (ret < 0 || (size_t)ret >= sizeof(path))
7c6ef2a2 1663 return -1;
52e35957 1664
8b1b1210
CB
1665 /* When we are asked to setup a console we remove any previous
1666 * /dev/console bind-mounts.
1667 */
a7ba3c7f
CB
1668 if (file_exists(path)) {
1669 ret = lxc_unstack_mountpoint(path, false);
1670 if (ret < 0) {
86530b0a 1671 ERROR("Failed to unmount \"%s\": %s", path, strerror(errno));
a7ba3c7f
CB
1672 return -ret;
1673 } else {
86530b0a 1674 DEBUG("Cleared all (%d) mounts from \"%s\"", ret, path);
a7ba3c7f 1675 }
8b1b1210
CB
1676 }
1677
1678 /* For unprivileged containers autodev or automounts will already have
1679 * taken care of creating /dev/console.
1680 */
0728ebf4
TA
1681 fd = open(path, O_CREAT | O_EXCL, S_IXUSR | S_IXGRP | S_IXOTH);
1682 if (fd < 0) {
1683 if (errno != EEXIST) {
86530b0a 1684 SYSERROR("Failed to create console");
3d7d929a 1685 return -errno;
0728ebf4
TA
1686 }
1687 } else {
1688 close(fd);
52e35957
DL
1689 }
1690
86530b0a
L
1691 ret = chmod(console->name, S_IXUSR | S_IXGRP | S_IXOTH);
1692 if (ret < 0) {
0fd73091
CB
1693 SYSERROR("Failed to set mode \"0%o\" to \"%s\"",
1694 S_IXUSR | S_IXGRP | S_IXOTH, console->name);
3d7d929a 1695 return -errno;
63376d7d 1696 }
13954cce 1697
86530b0a
L
1698 ret = safe_mount(console->name, path, "none", MS_BIND, 0, rootfs_path);
1699 if (ret < 0) {
0fd73091 1700 ERROR("Failed to mount \"%s\" on \"%s\"", console->name, path);
6e590161 1701 return -1;
1702 }
1703
86530b0a 1704 DEBUG("Mounted pts device \"%s\" onto \"%s\"", console->name, path);
7c6ef2a2
SH
1705 return 0;
1706}
1707
3d7d929a 1708static int lxc_setup_ttydir_console(const struct lxc_rootfs *rootfs,
dcad02f8 1709 const struct lxc_terminal *console,
3d7d929a 1710 char *ttydir)
7c6ef2a2 1711{
3dc035f1 1712 int ret, fd;
3d7d929a 1713 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
86530b0a 1714 char *rootfs_path = rootfs->path ? rootfs->mount : "";
7c6ef2a2 1715
3dc035f1
L
1716 if (console->path && !strcmp(console->path, "none"))
1717 return 0;
1718
7c6ef2a2 1719 /* create rootfs/dev/<ttydir> directory */
86530b0a 1720 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs_path, ttydir);
3d7d929a 1721 if (ret < 0 || (size_t)ret >= sizeof(path))
7c6ef2a2 1722 return -1;
3d7d929a 1723
7c6ef2a2
SH
1724 ret = mkdir(path, 0755);
1725 if (ret && errno != EEXIST) {
0fd73091 1726 SYSERROR("Failed to create \"%s\"", path);
3d7d929a 1727 return -errno;
7c6ef2a2 1728 }
4742cd9a 1729 DEBUG("Created directory for console and tty devices at \"%s\"", path);
7c6ef2a2 1730
86530b0a 1731 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console", rootfs_path, ttydir);
3d7d929a
CB
1732 if (ret < 0 || (size_t)ret >= sizeof(lxcpath))
1733 return -1;
1734
7c6ef2a2 1735 ret = creat(lxcpath, 0660);
3d7d929a 1736 if (ret == -1 && errno != EEXIST) {
0fd73091 1737 SYSERROR("Failed to create \"%s\"", lxcpath);
3d7d929a 1738 return -errno;
7c6ef2a2 1739 }
4d44e274
SH
1740 if (ret >= 0)
1741 close(ret);
7c6ef2a2 1742
86530b0a 1743 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs_path);
3dc035f1 1744 if (ret < 0 || (size_t)ret >= sizeof(path))
7c6ef2a2 1745 return -1;
2a12fefd 1746
3dc035f1 1747 if (file_exists(path)) {
a7ba3c7f 1748 ret = lxc_unstack_mountpoint(path, false);
2a12fefd 1749 if (ret < 0) {
0fd73091 1750 ERROR("%s - Failed to unmount \"%s\"", strerror(errno), path);
a7ba3c7f
CB
1751 return -ret;
1752 } else {
86530b0a 1753 DEBUG("Cleared all (%d) mounts from \"%s\"", ret, path);
a7ba3c7f 1754 }
3dc035f1 1755 }
2a12fefd 1756
3dc035f1
L
1757 fd = open(path, O_CREAT | O_EXCL, S_IXUSR | S_IXGRP | S_IXOTH);
1758 if (fd < 0) {
1759 if (errno != EEXIST) {
86530b0a 1760 SYSERROR("Failed to create console");
3dc035f1 1761 return -errno;
2a12fefd 1762 }
3dc035f1
L
1763 } else {
1764 close(fd);
7c6ef2a2
SH
1765 }
1766
86530b0a
L
1767 ret = chmod(console->name, S_IXUSR | S_IXGRP | S_IXOTH);
1768 if (ret < 0) {
0fd73091
CB
1769 SYSERROR("Failed to set mode \"0%o\" to \"%s\"",
1770 S_IXUSR | S_IXGRP | S_IXOTH, console->name);
2a12fefd
CB
1771 return -errno;
1772 }
1773
3dc035f1 1774 /* bind mount console->name to '/dev/<ttydir>/console' */
86530b0a
L
1775 ret = safe_mount(console->name, lxcpath, "none", MS_BIND, 0, rootfs_path);
1776 if (ret < 0) {
0fd73091 1777 ERROR("Failed to mount \"%s\" on \"%s\"", console->name, lxcpath);
7c6ef2a2
SH
1778 return -1;
1779 }
86530b0a 1780 DEBUG("Mounted \"%s\" onto \"%s\"", console->name, lxcpath);
3dc035f1
L
1781
1782 /* bind mount '/dev/<ttydir>/console' to '/dev/console' */
86530b0a
L
1783 ret = safe_mount(lxcpath, path, "none", MS_BIND, 0, rootfs_path);
1784 if (ret < 0) {
0fd73091 1785 ERROR("Failed to mount \"%s\" on \"%s\"", console->name, lxcpath);
3dc035f1
L
1786 return -1;
1787 }
86530b0a 1788 DEBUG("Mounted \"%s\" onto \"%s\"", console->name, lxcpath);
3dc035f1 1789
86530b0a 1790 DEBUG("Console has been setup under \"%s\" and mounted to \"%s\"", lxcpath, path);
6e590161 1791 return 0;
1792}
1793
3d7d929a 1794static int lxc_setup_console(const struct lxc_rootfs *rootfs,
dcad02f8 1795 const struct lxc_terminal *console, char *ttydir)
7c6ef2a2 1796{
3d7d929a 1797
7c6ef2a2 1798 if (!ttydir)
3d7d929a 1799 return lxc_setup_dev_console(rootfs, console);
7c6ef2a2 1800
3d7d929a 1801 return lxc_setup_ttydir_console(rootfs, console, ttydir);
7c6ef2a2
SH
1802}
1803
998ac676
RT
1804static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1805{
1806 struct mount_opt *mo;
1807
1808 /* If opt is found in mount_opt, set or clear flags.
1809 * Otherwise append it to data. */
1810
1811 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
0fd73091 1812 if (strncmp(opt, mo->name, strlen(mo->name)) == 0) {
998ac676
RT
1813 if (mo->clear)
1814 *flags &= ~mo->flag;
1815 else
1816 *flags |= mo->flag;
1817 return;
1818 }
1819 }
1820
1821 if (strlen(*data))
1822 strcat(*data, ",");
1823 strcat(*data, opt);
1824}
1825
0fd73091 1826int parse_mntopts(const char *mntopts, unsigned long *mntflags, char **mntdata)
998ac676 1827{
0fd73091
CB
1828 char *data, *p, *s;
1829 char *saveptr = NULL;
998ac676 1830
911324ef 1831 *mntdata = NULL;
91656ce5 1832 *mntflags = 0L;
911324ef
DL
1833
1834 if (!mntopts)
998ac676
RT
1835 return 0;
1836
911324ef 1837 s = strdup(mntopts);
0fd73091 1838 if (!s)
998ac676 1839 return -1;
998ac676
RT
1840
1841 data = malloc(strlen(s) + 1);
1842 if (!data) {
998ac676
RT
1843 free(s);
1844 return -1;
1845 }
1846 *data = 0;
1847
0fd73091 1848 for (; (p = strtok_r(s, ",", &saveptr)); s = NULL)
998ac676
RT
1849 parse_mntopt(p, mntflags, &data);
1850
1851 if (*data)
1852 *mntdata = data;
1853 else
1854 free(data);
1855 free(s);
1856
1857 return 0;
1858}
1859
d840039e
YT
1860static void parse_propagationopt(char *opt, unsigned long *flags)
1861{
1862 struct mount_opt *mo;
1863
1864 /* If opt is found in propagation_opt, set or clear flags. */
d840039e 1865 for (mo = &propagation_opt[0]; mo->name != NULL; mo++) {
0fd73091
CB
1866 if (strncmp(opt, mo->name, strlen(mo->name)) != 0)
1867 continue;
1868
1869 if (mo->clear)
1870 *flags &= ~mo->flag;
1871 else
1872 *flags |= mo->flag;
1873
1874 return;
d840039e
YT
1875 }
1876}
1877
1878static int parse_propagationopts(const char *mntopts, unsigned long *pflags)
1879{
0fd73091
CB
1880 char *p, *s;
1881 char *saveptr = NULL;
d840039e
YT
1882
1883 if (!mntopts)
1884 return 0;
1885
1886 s = strdup(mntopts);
1887 if (!s) {
1888 SYSERROR("Failed to allocate memory");
1889 return -ENOMEM;
1890 }
1891
0fd73091
CB
1892 *pflags = 0L;
1893 for (; (p = strtok_r(s, ",", &saveptr)); s = NULL)
d840039e 1894 parse_propagationopt(p, pflags);
d840039e 1895 free(s);
0fd73091 1896
d840039e
YT
1897 return 0;
1898}
1899
6fd5e769
SH
1900static void null_endofword(char *word)
1901{
1902 while (*word && *word != ' ' && *word != '\t')
1903 word++;
1904 *word = '\0';
1905}
1906
0fd73091 1907/* skip @nfields spaces in @src */
6fd5e769
SH
1908static char *get_field(char *src, int nfields)
1909{
6fd5e769 1910 int i;
0fd73091 1911 char *p = src;
6fd5e769
SH
1912
1913 for (i = 0; i < nfields; i++) {
1914 while (*p && *p != ' ' && *p != '\t')
1915 p++;
0fd73091 1916
6fd5e769
SH
1917 if (!*p)
1918 break;
0fd73091 1919
6fd5e769
SH
1920 p++;
1921 }
0fd73091 1922
6fd5e769
SH
1923 return p;
1924}
1925
911324ef
DL
1926static int mount_entry(const char *fsname, const char *target,
1927 const char *fstype, unsigned long mountflags,
d840039e
YT
1928 unsigned long pflags, const char *data, bool optional,
1929 bool dev, bool relative, const char *rootfs)
911324ef 1930{
0ac4b28a 1931 int ret;
181437fd
YT
1932 char srcbuf[MAXPATHLEN];
1933 const char *srcpath = fsname;
614305f3 1934#ifdef HAVE_STATVFS
2938f7c8 1935 struct statvfs sb;
614305f3 1936#endif
2938f7c8 1937
181437fd
YT
1938 if (relative) {
1939 ret = snprintf(srcbuf, MAXPATHLEN, "%s/%s", rootfs ? rootfs : "/", fsname ? fsname : "");
1940 if (ret < 0 || ret >= MAXPATHLEN) {
1941 ERROR("source path is too long");
1942 return -1;
1943 }
1944 srcpath = srcbuf;
1945 }
1946
1947 ret = safe_mount(srcpath, target, fstype, mountflags & ~MS_REMOUNT, data,
0ac4b28a
CB
1948 rootfs);
1949 if (ret < 0) {
1fc64d22 1950 if (optional) {
0fd73091
CB
1951 INFO("%s - Failed to mount \"%s\" on \"%s\" "
1952 "(optional)", strerror(errno),
1953 srcpath ? srcpath : "(null)", target);
1fc64d22
SG
1954 return 0;
1955 }
0ac4b28a 1956
0103eb53 1957 SYSERROR("Failed to mount \"%s\" on \"%s\"",
181437fd 1958 srcpath ? srcpath : "(null)", target);
0ac4b28a 1959 return -1;
911324ef
DL
1960 }
1961
1962 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
7c5b6e7c 1963 unsigned long rqd_flags = 0;
0ac4b28a
CB
1964
1965 DEBUG("Remounting \"%s\" on \"%s\" to respect bind or remount "
181437fd 1966 "options", srcpath ? srcpath : "(none)", target ? target : "(none)");
0ac4b28a 1967
7c5b6e7c
AS
1968 if (mountflags & MS_RDONLY)
1969 rqd_flags |= MS_RDONLY;
614305f3 1970#ifdef HAVE_STATVFS
181437fd 1971 if (srcpath && statvfs(srcpath, &sb) == 0) {
7c5b6e7c 1972 unsigned long required_flags = rqd_flags;
0ac4b28a 1973
2938f7c8
SH
1974 if (sb.f_flag & MS_NOSUID)
1975 required_flags |= MS_NOSUID;
0ac4b28a 1976
ae7a770e 1977 if (sb.f_flag & MS_NODEV && !dev)
2938f7c8 1978 required_flags |= MS_NODEV;
0ac4b28a 1979
2938f7c8
SH
1980 if (sb.f_flag & MS_RDONLY)
1981 required_flags |= MS_RDONLY;
0ac4b28a 1982
2938f7c8
SH
1983 if (sb.f_flag & MS_NOEXEC)
1984 required_flags |= MS_NOEXEC;
0ac4b28a
CB
1985
1986 DEBUG("Flags for \"%s\" were %lu, required extra flags "
181437fd 1987 "are %lu", srcpath, sb.f_flag, required_flags);
0ac4b28a
CB
1988
1989 /* If this was a bind mount request, and required_flags
2938f7c8 1990 * does not have any flags which are not already in
0ac4b28a 1991 * mountflags, then skip the remount.
2938f7c8
SH
1992 */
1993 if (!(mountflags & MS_REMOUNT)) {
0ac4b28a
CB
1994 if (!(required_flags & ~mountflags) &&
1995 rqd_flags == 0) {
1996 DEBUG("Mountflags already were %lu, "
1997 "skipping remount", mountflags);
2938f7c8
SH
1998 goto skipremount;
1999 }
2000 }
0ac4b28a 2001
2938f7c8 2002 mountflags |= required_flags;
6fd5e769 2003 }
614305f3 2004#endif
911324ef 2005
181437fd 2006 ret = mount(srcpath, target, fstype, mountflags | MS_REMOUNT, data);
0ac4b28a 2007 if (ret < 0) {
1fc64d22 2008 if (optional) {
0ac4b28a 2009 INFO("Failed to mount \"%s\" on \"%s\" "
0103eb53 2010 "(optional): %s",
181437fd 2011 srcpath ? srcpath : "(null)", target,
0ac4b28a 2012 strerror(errno));
1fc64d22
SG
2013 return 0;
2014 }
0ac4b28a 2015
0103eb53 2016 SYSERROR("Failed to mount \"%s\" on \"%s\"",
181437fd 2017 srcpath ? srcpath : "(null)", target);
0ac4b28a 2018 return -1;
911324ef
DL
2019 }
2020 }
2021
d840039e
YT
2022 if (pflags) {
2023 ret = mount(NULL, target, NULL, pflags, NULL);
2024 if (ret < 0) {
2025 if (optional) {
2026 INFO("%s - Failed to change mount propagation "
2027 "for \"%s\" (optional)", strerror(errno), target);
2028 return 0;
2029 } else {
2030 SYSERROR("Failed to change mount propagation "
2031 "for \"%s\" (optional)", target);
2032 return -1;
2033 }
2034 }
2035 DEBUG("Changed mount propagation for \"%s\"", target);
2036 }
2037
2038
614305f3 2039#ifdef HAVE_STATVFS
6fd5e769 2040skipremount:
614305f3 2041#endif
0103eb53 2042 DEBUG("Mounted \"%s\" on \"%s\" with filesystem type \"%s\"",
181437fd 2043 srcpath ? srcpath : "(null)", target, fstype);
911324ef
DL
2044
2045 return 0;
2046}
2047
c5e30de4 2048/* Remove "optional", "create=dir", and "create=file" from mntopt */
4e4ca161
SH
2049static void cull_mntent_opt(struct mntent *mntent)
2050{
2051 int i;
0fd73091
CB
2052 char *list[] = {
2053 "create=dir",
2054 "create=file",
2055 "optional",
2056 "relative",
2057 NULL
2058 };
c5e30de4
CB
2059
2060 for (i = 0; list[i]; i++) {
2061 char *p, *p2;
2062
2063 p = strstr(mntent->mnt_opts, list[i]);
2064 if (!p)
4e4ca161 2065 continue;
c5e30de4 2066
4e4ca161
SH
2067 p2 = strchr(p, ',');
2068 if (!p2) {
2069 /* no more mntopts, so just chop it here */
2070 *p = '\0';
2071 continue;
2072 }
c5e30de4
CB
2073
2074 memmove(p, p2 + 1, strlen(p2 + 1) + 1);
4e4ca161
SH
2075 }
2076}
2077
4d5b72a1 2078static int mount_entry_create_dir_file(const struct mntent *mntent,
749f98d9
CB
2079 const char *path,
2080 const struct lxc_rootfs *rootfs,
0fd73091 2081 const char *lxc_name, const char *lxc_path)
0ad19a3f 2082{
12e6ab5d
CB
2083 int fd, ret;
2084 char *p1, *p2;
911324ef 2085
12e6ab5d 2086 if (strncmp(mntent->mnt_type, "overlay", 7) == 0) {
749f98d9 2087 ret = ovl_mkdir(mntent, rootfs, lxc_name, lxc_path);
12e6ab5d
CB
2088 if (ret < 0)
2089 return -1;
2090 }
6e46cc0d 2091
34cfffb3 2092 if (hasmntopt(mntent, "create=dir")) {
749f98d9
CB
2093 ret = mkdir_p(path, 0755);
2094 if (ret < 0 && errno != EEXIST) {
2095 SYSERROR("Failed to create directory \"%s\"", path);
2096 return -1;
34cfffb3
SG
2097 }
2098 }
2099
0fd73091
CB
2100 if (!hasmntopt(mntent, "create=file"))
2101 return 0;
749f98d9 2102
0fd73091
CB
2103 ret = access(path, F_OK);
2104 if (ret == 0)
2105 return 0;
749f98d9 2106
0fd73091
CB
2107 p1 = strdup(path);
2108 if (!p1)
2109 return -1;
749f98d9 2110
0fd73091 2111 p2 = dirname(p1);
749f98d9 2112
0fd73091
CB
2113 ret = mkdir_p(p2, 0755);
2114 free(p1);
2115 if (ret < 0 && errno != EEXIST) {
2116 SYSERROR("Failed to create directory \"%s\"", path);
2117 return -1;
34cfffb3 2118 }
749f98d9 2119
0fd73091
CB
2120 fd = open(path, O_CREAT, 0644);
2121 if (fd < 0)
2122 return -1;
2123 close(fd);
2124
749f98d9 2125 return 0;
4d5b72a1
NC
2126}
2127
ec50007f
CB
2128/* rootfs, lxc_name, and lxc_path can be NULL when the container is created
2129 * without a rootfs. */
db4aba38 2130static inline int mount_entry_on_generic(struct mntent *mntent,
d8b712bc
CB
2131 const char *path,
2132 const struct lxc_rootfs *rootfs,
2133 const char *lxc_name,
2134 const char *lxc_path)
4d5b72a1 2135{
d8b712bc 2136 int ret;
949d0338 2137 unsigned long mntflags;
4d5b72a1 2138 char *mntdata;
181437fd 2139 bool dev, optional, relative;
949d0338 2140 unsigned long pflags = 0;
ec50007f 2141 char *rootfs_path = NULL;
d8b712bc
CB
2142
2143 optional = hasmntopt(mntent, "optional") != NULL;
2144 dev = hasmntopt(mntent, "dev") != NULL;
181437fd 2145 relative = hasmntopt(mntent, "relative") != NULL;
d8b712bc 2146
ec50007f
CB
2147 if (rootfs && rootfs->path)
2148 rootfs_path = rootfs->mount;
2149
d8b712bc
CB
2150 ret = mount_entry_create_dir_file(mntent, path, rootfs, lxc_name,
2151 lxc_path);
2152 if (ret < 0) {
2153 if (optional)
2154 return 0;
608e3567 2155
d8b712bc
CB
2156 return -1;
2157 }
4e4ca161
SH
2158 cull_mntent_opt(mntent);
2159
d840039e
YT
2160 ret = parse_propagationopts(mntent->mnt_opts, &pflags);
2161 if (ret < 0)
2162 return -1;
2163
d8b712bc
CB
2164 ret = parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata);
2165 if (ret < 0)
a17b1e65 2166 return -1;
a17b1e65 2167
6e46cc0d 2168 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type, mntflags,
d840039e 2169 pflags, mntdata, optional, dev, relative, rootfs_path);
68c152ef 2170
911324ef 2171 free(mntdata);
911324ef
DL
2172 return ret;
2173}
2174
db4aba38
NC
2175static inline int mount_entry_on_systemfs(struct mntent *mntent)
2176{
1433c9f9 2177 int ret;
07667a6a 2178 char path[MAXPATHLEN];
1433c9f9
CB
2179
2180 /* For containers created without a rootfs all mounts are treated as
07667a6a
CB
2181 * absolute paths starting at / on the host.
2182 */
1433c9f9
CB
2183 if (mntent->mnt_dir[0] != '/')
2184 ret = snprintf(path, sizeof(path), "/%s", mntent->mnt_dir);
2185 else
2186 ret = snprintf(path, sizeof(path), "%s", mntent->mnt_dir);
07667a6a 2187 if (ret < 0 || ret >= sizeof(path))
1433c9f9 2188 return -1;
1433c9f9
CB
2189
2190 return mount_entry_on_generic(mntent, path, NULL, NULL, NULL);
db4aba38
NC
2191}
2192
4e4ca161 2193static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2 2194 const struct lxc_rootfs *rootfs,
0a2dddd4
CB
2195 const char *lxc_name,
2196 const char *lxc_path)
911324ef 2197{
bdd2b34c 2198 int offset;
013bd428 2199 char *aux;
67e571de 2200 const char *lxcpath;
bdd2b34c
CB
2201 char path[MAXPATHLEN];
2202 int ret = 0;
0ad19a3f 2203
593e8478 2204 lxcpath = lxc_global_config_value("lxc.lxcpath");
bdd2b34c 2205 if (!lxcpath)
2a59a681 2206 return -1;
2a59a681 2207
bdd2b34c
CB
2208 /* If rootfs->path is a blockdev path, allow container fstab to use
2209 * <lxcpath>/<name>/rootfs" as the target prefix.
2210 */
2211 ret = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
2212 if (ret < 0 || ret >= MAXPATHLEN)
80a881b2
SH
2213 goto skipvarlib;
2214
2215 aux = strstr(mntent->mnt_dir, path);
2216 if (aux) {
2217 offset = strlen(path);
2218 goto skipabs;
2219 }
2220
2221skipvarlib:
013bd428
DL
2222 aux = strstr(mntent->mnt_dir, rootfs->path);
2223 if (!aux) {
bdd2b34c 2224 WARN("Ignoring mount point \"%s\"", mntent->mnt_dir);
db4aba38 2225 return ret;
013bd428 2226 }
80a881b2
SH
2227 offset = strlen(rootfs->path);
2228
2229skipabs:
bdd2b34c
CB
2230 ret = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount, aux + offset);
2231 if (ret < 0 || ret >= MAXPATHLEN)
a17b1e65 2232 return -1;
a17b1e65 2233
0a2dddd4 2234 return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
911324ef 2235}
d330fe7b 2236
4e4ca161 2237static int mount_entry_on_relative_rootfs(struct mntent *mntent,
0a2dddd4
CB
2238 const struct lxc_rootfs *rootfs,
2239 const char *lxc_name,
2240 const char *lxc_path)
911324ef 2241{
911324ef 2242 int ret;
0fd73091 2243 char path[MAXPATHLEN];
d330fe7b 2244
34cfffb3 2245 /* relative to root mount point */
6e46cc0d 2246 ret = snprintf(path, sizeof(path), "%s/%s", rootfs->mount, mntent->mnt_dir);
0fd73091 2247 if (ret < 0 || (size_t)ret >= sizeof(path))
9ba8130c 2248 return -1;
911324ef 2249
0a2dddd4 2250 return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
911324ef
DL
2251}
2252
06749971
CB
2253static int mount_file_entries(const struct lxc_conf *conf,
2254 const struct lxc_rootfs *rootfs, FILE *file,
1ae3c19f 2255 const char *lxc_name, const char *lxc_path)
911324ef 2256{
aaf901be 2257 char buf[4096];
0fd73091 2258 struct mntent mntent;
911324ef 2259 int ret = -1;
e76b8764 2260
aaf901be 2261 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
1ae3c19f
CB
2262 if (!rootfs->path)
2263 ret = mount_entry_on_systemfs(&mntent);
2264 else if (mntent.mnt_dir[0] != '/')
2265 ret = mount_entry_on_relative_rootfs(&mntent, rootfs,
2266 lxc_name, lxc_path);
2267 else
2268 ret = mount_entry_on_absolute_rootfs(&mntent, rootfs,
2269 lxc_name, lxc_path);
2270 if (ret < 0)
2271 return -1;
0ad19a3f 2272 }
2273 ret = 0;
cd54d859 2274
0fd73091 2275 INFO("Finished setting up mounts");
e7938e9e
MN
2276 return ret;
2277}
2278
06749971
CB
2279static int setup_mount(const struct lxc_conf *conf,
2280 const struct lxc_rootfs *rootfs, const char *fstab,
42dff448 2281 const char *lxc_name, const char *lxc_path)
e7938e9e 2282{
42dff448 2283 FILE *f;
e7938e9e
MN
2284 int ret;
2285
2286 if (!fstab)
2287 return 0;
2288
42dff448
CB
2289 f = setmntent(fstab, "r");
2290 if (!f) {
2291 SYSERROR("Failed to open \"%s\"", fstab);
e7938e9e
MN
2292 return -1;
2293 }
2294
06749971 2295 ret = mount_file_entries(conf, rootfs, f, lxc_name, lxc_path);
42dff448
CB
2296 if (ret < 0)
2297 ERROR("Failed to set up mount entries");
e7938e9e 2298
42dff448 2299 endmntent(f);
0ad19a3f 2300 return ret;
2301}
2302
5ef5c9a3 2303FILE *make_anonymous_mount_file(struct lxc_list *mount)
e7938e9e 2304{
5ef5c9a3 2305 int ret;
e7938e9e 2306 char *mount_entry;
5ef5c9a3 2307 struct lxc_list *iterator;
5ef5c9a3
CB
2308 int fd = -1;
2309
0fd73091 2310 fd = memfd_create(".lxc_mount_file", MFD_CLOEXEC);
5ef5c9a3 2311 if (fd < 0) {
a324e7eb
CB
2312 char template[] = P_tmpdir "/.lxc_mount_file_XXXXXX";
2313
5ef5c9a3
CB
2314 if (errno != ENOSYS)
2315 return NULL;
a324e7eb
CB
2316
2317 fd = lxc_make_tmpfile(template, true);
0fd73091
CB
2318 if (fd < 0) {
2319 SYSERROR("Could not create temporary mount file");
2320 return NULL;
2321 }
2322
6bd04140 2323 TRACE("Created temporary mount file");
5ef5c9a3 2324 }
0fd73091
CB
2325 if (fd < 0) {
2326 SYSERROR("Could not create temporary mount file");
9fc7f8c0 2327 return NULL;
e7938e9e
MN
2328 }
2329
0fd73091
CB
2330 lxc_list_for_each (iterator, mount) {
2331 size_t len;
2332
e7938e9e 2333 mount_entry = iterator->elem;
0fd73091 2334 len = strlen(mount_entry);
5ef5c9a3 2335
489f39be 2336 ret = lxc_write_nointr(fd, mount_entry, len);
0fd73091
CB
2337 if (ret != len)
2338 goto on_error;
2339
489f39be 2340 ret = lxc_write_nointr(fd, "\n", 1);
0fd73091
CB
2341 if (ret != 1)
2342 goto on_error;
e7938e9e
MN
2343 }
2344
0fd73091
CB
2345 ret = lseek(fd, 0, SEEK_SET);
2346 if (ret < 0)
2347 goto on_error;
2348
2349 return fdopen(fd, "r+");
2350
2351on_error:
2352 SYSERROR("Failed to write mount entry to temporary mount file");
2353 close(fd);
2354 return NULL;
9fc7f8c0
TA
2355}
2356
06749971
CB
2357static int setup_mount_entries(const struct lxc_conf *conf,
2358 const struct lxc_rootfs *rootfs,
5ef5c9a3
CB
2359 struct lxc_list *mount, const char *lxc_name,
2360 const char *lxc_path)
9fc7f8c0 2361{
9fc7f8c0 2362 int ret;
0fd73091 2363 FILE *f;
9fc7f8c0 2364
19b5d755
CB
2365 f = make_anonymous_mount_file(mount);
2366 if (!f)
9fc7f8c0 2367 return -1;
e7938e9e 2368
06749971 2369 ret = mount_file_entries(conf, rootfs, f, lxc_name, lxc_path);
19b5d755 2370 fclose(f);
0fd73091 2371
e7938e9e
MN
2372 return ret;
2373}
2374
bab88e68
CS
2375static int parse_cap(const char *cap)
2376{
84760c11 2377 size_t i;
2378 int capid = -1;
0fd73091
CB
2379 size_t end = sizeof(caps_opt) / sizeof(caps_opt[0]);
2380 char *ptr = NULL;
bab88e68 2381
0fd73091 2382 if (strcmp(cap, "none") == 0)
7035407c
DE
2383 return -2;
2384
8560cd36 2385 for (i = 0; i < end; i++) {
bab88e68
CS
2386 if (strcmp(cap, caps_opt[i].name))
2387 continue;
2388
2389 capid = caps_opt[i].value;
2390 break;
2391 }
2392
2393 if (capid < 0) {
0fd73091
CB
2394 /* Try to see if it's numeric, so the user may specify
2395 * capabilities that the running kernel knows about but we
2396 * don't
2397 */
bab88e68
CS
2398 errno = 0;
2399 capid = strtol(cap, &ptr, 10);
2400 if (!ptr || *ptr != '\0' || errno != 0)
2401 /* not a valid number */
2402 capid = -1;
2403 else if (capid > lxc_caps_last_cap())
2404 /* we have a number but it's not a valid
2405 * capability */
2406 capid = -1;
2407 }
2408
2409 return capid;
2410}
2411
0769b82a
CS
2412int in_caplist(int cap, struct lxc_list *caps)
2413{
0769b82a 2414 int capid;
0fd73091 2415 struct lxc_list *iterator;
0769b82a 2416
0fd73091 2417 lxc_list_for_each (iterator, caps) {
0769b82a
CS
2418 capid = parse_cap(iterator->elem);
2419 if (capid == cap)
2420 return 1;
2421 }
2422
2423 return 0;
2424}
2425
81810dd1
DL
2426static int setup_caps(struct lxc_list *caps)
2427{
bab88e68 2428 int capid;
0fd73091
CB
2429 char *drop_entry;
2430 struct lxc_list *iterator;
81810dd1 2431
0fd73091
CB
2432 lxc_list_for_each (iterator, caps) {
2433 int ret;
81810dd1
DL
2434
2435 drop_entry = iterator->elem;
2436
bab88e68 2437 capid = parse_cap(drop_entry);
0fd73091 2438 if (capid < 0) {
1e11be34
DL
2439 ERROR("unknown capability %s", drop_entry);
2440 return -1;
81810dd1
DL
2441 }
2442
0fd73091
CB
2443 ret = prctl(PR_CAPBSET_DROP, capid, 0, 0, 0);
2444 if (ret < 0) {
2445 SYSERROR("Failed to remove %s capability", drop_entry);
3ec1648d
SH
2446 return -1;
2447 }
0fd73091 2448 DEBUG("Dropped %s (%d) capability", drop_entry, capid);
81810dd1
DL
2449 }
2450
0fd73091 2451 DEBUG("Capabilities have been setup");
1fb86a7c
SH
2452 return 0;
2453}
2454
2455static int dropcaps_except(struct lxc_list *caps)
2456{
0fd73091 2457 int i, capid, numcaps;
1fb86a7c 2458 char *keep_entry;
0fd73091 2459 struct lxc_list *iterator;
1fb86a7c 2460
0fd73091 2461 numcaps = lxc_caps_last_cap() + 1;
2caf9a97
SH
2462 if (numcaps <= 0 || numcaps > 200)
2463 return -1;
0fd73091 2464 TRACE("Found %d capabilities", numcaps);
2caf9a97 2465
1a0e70ac 2466 /* caplist[i] is 1 if we keep capability i */
1fb86a7c
SH
2467 int *caplist = alloca(numcaps * sizeof(int));
2468 memset(caplist, 0, numcaps * sizeof(int));
2469
0fd73091 2470 lxc_list_for_each (iterator, caps) {
1fb86a7c
SH
2471 keep_entry = iterator->elem;
2472
bab88e68 2473 capid = parse_cap(keep_entry);
7035407c
DE
2474 if (capid == -2)
2475 continue;
2476
0fd73091
CB
2477 if (capid < 0) {
2478 ERROR("Unknown capability %s", keep_entry);
1fb86a7c
SH
2479 return -1;
2480 }
2481
0fd73091 2482 DEBUG("Keep capability %s (%d)", keep_entry, capid);
1fb86a7c
SH
2483 caplist[capid] = 1;
2484 }
0fd73091
CB
2485
2486 for (i = 0; i < numcaps; i++) {
2487 int ret;
2488
1fb86a7c
SH
2489 if (caplist[i])
2490 continue;
0fd73091
CB
2491
2492 ret = prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
2493 if (ret < 0) {
2494 SYSERROR("Failed to remove capability %d", i);
3ec1648d
SH
2495 return -1;
2496 }
1fb86a7c
SH
2497 }
2498
0fd73091 2499 DEBUG("Capabilities have been setup");
81810dd1
DL
2500 return 0;
2501}
2502
0fd73091
CB
2503static int parse_resource(const char *res)
2504{
2505 int ret;
c6d09e15
WB
2506 size_t i;
2507 int resid = -1;
2508
0fd73091 2509 for (i = 0; i < sizeof(limit_opt) / sizeof(limit_opt[0]); ++i)
c6d09e15
WB
2510 if (strcmp(res, limit_opt[i].name) == 0)
2511 return limit_opt[i].value;
c6d09e15 2512
0fd73091 2513 /* Try to see if it's numeric, so the user may specify
c6d09e15 2514 * resources that the running kernel knows about but
0fd73091
CB
2515 * we don't.
2516 */
2517 ret = lxc_safe_int(res, &resid);
2518 if (ret < 0)
2519 return -1;
2520
2521 return resid;
c6d09e15
WB
2522}
2523
0fd73091
CB
2524int setup_resource_limits(struct lxc_list *limits, pid_t pid)
2525{
2526 int resid;
c6d09e15
WB
2527 struct lxc_list *it;
2528 struct lxc_limit *lim;
c6d09e15 2529
0fd73091 2530 lxc_list_for_each (it, limits) {
c6d09e15
WB
2531 lim = it->elem;
2532
2533 resid = parse_resource(lim->resource);
2534 if (resid < 0) {
0fd73091 2535 ERROR("Unknown resource %s", lim->resource);
c6d09e15
WB
2536 return -1;
2537 }
2538
f48b5fd8 2539#if HAVE_PRLIMIT || HAVE_PRLIMIT64
c6d09e15 2540 if (prlimit(pid, resid, &lim->limit, NULL) != 0) {
0fd73091
CB
2541 ERROR("Failed to set limit %s: %s", lim->resource,
2542 strerror(errno));
c6d09e15
WB
2543 return -1;
2544 }
f48b5fd8
FF
2545#else
2546 ERROR("Cannot set limit %s as prlimit is missing", lim->resource);
2547 return -1;
2548#endif
c6d09e15 2549 }
0fd73091 2550
c6d09e15
WB
2551 return 0;
2552}
2553
7edd0540
L
2554int setup_sysctl_parameters(struct lxc_list *sysctls)
2555{
2556 struct lxc_list *it;
2557 struct lxc_sysctl *elem;
0fd73091 2558 int ret = 0;
7edd0540
L
2559 char *tmp = NULL;
2560 char filename[MAXPATHLEN] = {0};
7edd0540 2561
0fd73091 2562 lxc_list_for_each (it, sysctls) {
7edd0540
L
2563 elem = it->elem;
2564 tmp = lxc_string_replace(".", "/", elem->key);
2565 if (!tmp) {
2566 ERROR("Failed to replace key %s", elem->key);
2567 return -1;
2568 }
2569
2570 ret = snprintf(filename, sizeof(filename), "/proc/sys/%s", tmp);
2571 free(tmp);
2572 if (ret < 0 || (size_t)ret >= sizeof(filename)) {
2573 ERROR("Error setting up sysctl parameters path");
2574 return -1;
2575 }
2576
0fd73091 2577 ret = lxc_write_to_file(filename, elem->value,
7cea5905 2578 strlen(elem->value), false, 0666);
7edd0540 2579 if (ret < 0) {
0fd73091
CB
2580 ERROR("Failed to setup sysctl parameters %s to %s",
2581 elem->key, elem->value);
7edd0540
L
2582 return -1;
2583 }
2584 }
0fd73091 2585
7edd0540
L
2586 return 0;
2587}
2588
61d7a733
YT
2589int setup_proc_filesystem(struct lxc_list *procs, pid_t pid)
2590{
2591 struct lxc_list *it;
2592 struct lxc_proc *elem;
0fd73091 2593 int ret = 0;
61d7a733
YT
2594 char *tmp = NULL;
2595 char filename[MAXPATHLEN] = {0};
61d7a733 2596
0fd73091 2597 lxc_list_for_each (it, procs) {
61d7a733
YT
2598 elem = it->elem;
2599 tmp = lxc_string_replace(".", "/", elem->filename);
2600 if (!tmp) {
2601 ERROR("Failed to replace key %s", elem->filename);
2602 return -1;
2603 }
2604
2605 ret = snprintf(filename, sizeof(filename), "/proc/%d/%s", pid, tmp);
2606 free(tmp);
2607 if (ret < 0 || (size_t)ret >= sizeof(filename)) {
2608 ERROR("Error setting up proc filesystem path");
2609 return -1;
2610 }
2611
0fd73091 2612 ret = lxc_write_to_file(filename, elem->value,
7cea5905 2613 strlen(elem->value), false, 0666);
61d7a733 2614 if (ret < 0) {
0fd73091
CB
2615 ERROR("Failed to setup proc filesystem %s to %s",
2616 elem->filename, elem->value);
61d7a733
YT
2617 return -1;
2618 }
2619 }
0fd73091 2620
61d7a733
YT
2621 return 0;
2622}
2623
ae9242c8
SH
2624static char *default_rootfs_mount = LXCROOTFSMOUNT;
2625
7b379ab3 2626struct lxc_conf *lxc_conf_init(void)
089cd8b8 2627{
26ddeedd 2628 int i;
0fd73091 2629 struct lxc_conf *new;
7b379ab3 2630
13277ec4 2631 new = malloc(sizeof(*new));
0fd73091 2632 if (!new)
7b379ab3 2633 return NULL;
7b379ab3
MN
2634 memset(new, 0, sizeof(*new));
2635
4b73005c 2636 new->loglevel = LXC_LOG_LEVEL_NOTSET;
cccc74b5 2637 new->personality = -1;
124fa0a8 2638 new->autodev = 1;
3a784510 2639 new->console.buffer_size = 0;
596a818d
DE
2640 new->console.log_path = NULL;
2641 new->console.log_fd = -1;
861813e5 2642 new->console.log_size = 0;
28a4b0e5 2643 new->console.path = NULL;
63376d7d 2644 new->console.peer = -1;
fb87aa6a
CB
2645 new->console.proxy.busy = -1;
2646 new->console.proxy.master = -1;
2647 new->console.proxy.slave = -1;
63376d7d
DL
2648 new->console.master = -1;
2649 new->console.slave = -1;
2650 new->console.name[0] = '\0';
732375f5 2651 memset(&new->console.ringbuf, 0, sizeof(struct lxc_ringbuf));
d2e30e99 2652 new->maincmd_fd = -1;
76a26f55 2653 new->nbd_idx = -1;
54c30e29 2654 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048 2655 if (!new->rootfs.mount) {
53f3f048
SH
2656 free(new);
2657 return NULL;
2658 }
858377e4 2659 new->logfd = -1;
7b379ab3 2660 lxc_list_init(&new->cgroup);
54860ed0 2661 lxc_list_init(&new->cgroup2);
7b379ab3
MN
2662 lxc_list_init(&new->network);
2663 lxc_list_init(&new->mount_list);
81810dd1 2664 lxc_list_init(&new->caps);
1fb86a7c 2665 lxc_list_init(&new->keepcaps);
f6d3e3e4 2666 lxc_list_init(&new->id_map);
46ad64ab
CB
2667 new->root_nsuid_map = NULL;
2668 new->root_nsgid_map = NULL;
f979ac15 2669 lxc_list_init(&new->includes);
4184c3e1 2670 lxc_list_init(&new->aliens);
7c661726 2671 lxc_list_init(&new->environment);
c6d09e15 2672 lxc_list_init(&new->limits);
7edd0540 2673 lxc_list_init(&new->sysctls);
61d7a733 2674 lxc_list_init(&new->procs);
44ae0fb6 2675 new->hooks_version = 0;
28d9e29e 2676 for (i = 0; i < NUM_LXC_HOOKS; i++)
26ddeedd 2677 lxc_list_init(&new->hooks[i]);
ee1e7aa0 2678 lxc_list_init(&new->groups);
d39b10eb 2679 lxc_list_init(&new->state_clients);
fe4de9a6
DE
2680 new->lsm_aa_profile = NULL;
2681 new->lsm_se_context = NULL;
7a0bcca3 2682 new->tmp_umount_proc = false;
7b379ab3 2683
72bb04e4
PT
2684 /* if running in a new user namespace, init and COMMAND
2685 * default to running as UID/GID 0 when using lxc-execute */
2686 new->init_uid = 0;
2687 new->init_gid = 0;
43654d34 2688 memset(&new->cgroup_meta, 0, sizeof(struct lxc_cgroup));
b074bbf1 2689 memset(&new->ns_share, 0, sizeof(char *) * LXC_NS_MAX);
72bb04e4 2690
7b379ab3 2691 return new;
089cd8b8
DL
2692}
2693
344c9d81 2694int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
a19b974f 2695 size_t buf_size)
f6d3e3e4 2696{
29053180 2697 int fd, ret;
0fd73091 2698 char path[MAXPATHLEN];
f6d3e3e4 2699
a19b974f
CB
2700 if (geteuid() != 0 && idtype == ID_TYPE_GID) {
2701 size_t buflen;
2702
2703 ret = snprintf(path, MAXPATHLEN, "/proc/%d/setgroups", pid);
0fd73091 2704 if (ret < 0 || ret >= MAXPATHLEN)
a19b974f 2705 return -E2BIG;
a19b974f
CB
2706
2707 fd = open(path, O_WRONLY);
2708 if (fd < 0 && errno != ENOENT) {
2709 SYSERROR("Failed to open \"%s\"", path);
2710 return -1;
2711 }
2712
2388737b
CB
2713 if (fd >= 0) {
2714 buflen = sizeof("deny\n") - 1;
2715 errno = 0;
2716 ret = lxc_write_nointr(fd, "deny\n", buflen);
395b1a3e 2717 close(fd);
2388737b 2718 if (ret != buflen) {
0fd73091
CB
2719 SYSERROR("Failed to write \"deny\" to "
2720 "\"/proc/%d/setgroups\"", pid);
2388737b
CB
2721 return -1;
2722 }
395b1a3e 2723 TRACE("Wrote \"deny\" to \"/proc/%d/setgroups\"", pid);
a19b974f 2724 }
a19b974f
CB
2725 }
2726
29053180
CB
2727 ret = snprintf(path, MAXPATHLEN, "/proc/%d/%cid_map", pid,
2728 idtype == ID_TYPE_UID ? 'u' : 'g');
0fd73091 2729 if (ret < 0 || ret >= MAXPATHLEN)
f6d3e3e4 2730 return -E2BIG;
29053180
CB
2731
2732 fd = open(path, O_WRONLY);
2733 if (fd < 0) {
a19b974f 2734 SYSERROR("Failed to open \"%s\"", path);
29053180 2735 return -1;
f6d3e3e4 2736 }
29053180
CB
2737
2738 errno = 0;
2739 ret = lxc_write_nointr(fd, buf, buf_size);
395b1a3e 2740 close(fd);
29053180 2741 if (ret != buf_size) {
a19b974f 2742 SYSERROR("Failed to write %cid mapping to \"%s\"",
29053180 2743 idtype == ID_TYPE_UID ? 'u' : 'g', path);
29053180
CB
2744 return -1;
2745 }
29053180
CB
2746
2747 return 0;
f6d3e3e4
SH
2748}
2749
6e50e704
CB
2750/* Check whether a binary exist and has either CAP_SETUID, CAP_SETGID or both.
2751 *
2752 * @return 1 if functional binary was found
2753 * @return 0 if binary exists but is lacking privilege
2754 * @return -ENOENT if binary does not exist
2755 * @return -EINVAL if cap to check is neither CAP_SETUID nor CAP_SETGID
6e50e704 2756 */
df6a2945
CB
2757static int idmaptool_on_path_and_privileged(const char *binary, cap_value_t cap)
2758{
2759 char *path;
2760 int ret;
2761 struct stat st;
2762 int fret = 0;
2763
6e50e704
CB
2764 if (cap != CAP_SETUID && cap != CAP_SETGID)
2765 return -EINVAL;
2766
df6a2945
CB
2767 path = on_path(binary, NULL);
2768 if (!path)
2769 return -ENOENT;
2770
2771 ret = stat(path, &st);
2772 if (ret < 0) {
2773 fret = -errno;
2774 goto cleanup;
2775 }
2776
2777 /* Check if the binary is setuid. */
2778 if (st.st_mode & S_ISUID) {
0fd73091 2779 DEBUG("The binary \"%s\" does have the setuid bit set", path);
df6a2945
CB
2780 fret = 1;
2781 goto cleanup;
2782 }
2783
0fd73091 2784#if HAVE_LIBCAP && LIBCAP_SUPPORTS_FILE_CAPABILITIES
df6a2945
CB
2785 /* Check if it has the CAP_SETUID capability. */
2786 if ((cap & CAP_SETUID) &&
2787 lxc_file_cap_is_set(path, CAP_SETUID, CAP_EFFECTIVE) &&
2788 lxc_file_cap_is_set(path, CAP_SETUID, CAP_PERMITTED)) {
2789 DEBUG("The binary \"%s\" has CAP_SETUID in its CAP_EFFECTIVE "
0fd73091 2790 "and CAP_PERMITTED sets", path);
df6a2945
CB
2791 fret = 1;
2792 goto cleanup;
2793 }
2794
2795 /* Check if it has the CAP_SETGID capability. */
2796 if ((cap & CAP_SETGID) &&
2797 lxc_file_cap_is_set(path, CAP_SETGID, CAP_EFFECTIVE) &&
2798 lxc_file_cap_is_set(path, CAP_SETGID, CAP_PERMITTED)) {
2799 DEBUG("The binary \"%s\" has CAP_SETGID in its CAP_EFFECTIVE "
0fd73091 2800 "and CAP_PERMITTED sets", path);
df6a2945
CB
2801 fret = 1;
2802 goto cleanup;
2803 }
0fd73091 2804#else
69924fff
CB
2805 /* If we cannot check for file capabilities we need to give the benefit
2806 * of the doubt. Otherwise we might fail even though all the necessary
2807 * file capabilities are set.
2808 */
d6018f88 2809 DEBUG("Cannot check for file capabilites as full capability support is "
0fd73091 2810 "missing. Manual intervention needed");
d6018f88 2811 fret = 1;
0fd73091 2812#endif
df6a2945
CB
2813
2814cleanup:
2815 free(path);
2816 return fret;
2817}
2818
986ef930
CB
2819int lxc_map_ids_exec_wrapper(void *args)
2820{
2821 execl("/bin/sh", "sh", "-c", (char *)args, (char *)NULL);
2822 return -1;
2823}
2824
f6d3e3e4
SH
2825int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
2826{
0fd73091 2827 int fill, left;
986ef930 2828 char u_or_g;
4bc3b759 2829 char *pos;
986ef930 2830 char cmd_output[MAXPATHLEN];
0fd73091
CB
2831 struct id_map *map;
2832 struct lxc_list *iterator;
2833 enum idtype type;
986ef930
CB
2834 /* strlen("new@idmap") = 9
2835 * +
2836 * strlen(" ") = 1
2837 * +
2838 * LXC_NUMSTRLEN64
2839 * +
2840 * strlen(" ") = 1
2841 *
2842 * We add some additional space to make sure that we really have
2843 * LXC_IDMAPLEN bytes available for our the {g,u]id mapping.
2844 */
0fd73091 2845 int ret = 0, gidmap = 0, uidmap = 0;
986ef930 2846 char mapbuf[9 + 1 + LXC_NUMSTRLEN64 + 1 + LXC_IDMAPLEN] = {0};
0fd73091 2847 bool had_entry = false, use_shadow = false;
c724025c
JC
2848 int hostuid, hostgid;
2849
2850 hostuid = geteuid();
2851 hostgid = getegid();
df6a2945
CB
2852
2853 /* If new{g,u}idmap exists, that is, if shadow is handing out subuid
2854 * ranges, then insist that root also reserve ranges in subuid. This
22038de5
SH
2855 * will protected it by preventing another user from being handed the
2856 * range by shadow.
2857 */
df6a2945 2858 uidmap = idmaptool_on_path_and_privileged("newuidmap", CAP_SETUID);
6e50e704
CB
2859 if (uidmap == -ENOENT)
2860 WARN("newuidmap binary is missing");
2861 else if (!uidmap)
2862 WARN("newuidmap is lacking necessary privileges");
2863
df6a2945 2864 gidmap = idmaptool_on_path_and_privileged("newgidmap", CAP_SETGID);
6e50e704
CB
2865 if (gidmap == -ENOENT)
2866 WARN("newgidmap binary is missing");
2867 else if (!gidmap)
2868 WARN("newgidmap is lacking necessary privileges");
2869
df6a2945 2870 if (uidmap > 0 && gidmap > 0) {
0fd73091 2871 DEBUG("Functional newuidmap and newgidmap binary found");
4bc3b759 2872 use_shadow = true;
df6a2945 2873 } else {
99d43365
CB
2874 /* In case unprivileged users run application containers via
2875 * execute() or a start*() there are valid cases where they may
2876 * only want to map their own {g,u}id. Let's not block them from
2877 * doing so by requiring geteuid() == 0.
2878 */
2879 DEBUG("No newuidmap and newgidmap binary found. Trying to "
c724025c
JC
2880 "write directly with euid %d", hostuid);
2881 }
2882
2883 /* Check if we really need to use newuidmap and newgidmap.
2884 * If the user is only remapping his own {g,u}id, we don't need it.
2885 */
2886 if (use_shadow && lxc_list_len(idmap) == 2) {
2887 use_shadow = false;
2888 lxc_list_for_each(iterator, idmap) {
2889 map = iterator->elem;
2890 if (map->idtype == ID_TYPE_UID && map->range == 1 &&
2891 map->nsid == hostuid && map->hostid == hostuid)
2892 continue;
2893 if (map->idtype == ID_TYPE_GID && map->range == 1 &&
2894 map->nsid == hostgid && map->hostid == hostgid)
2895 continue;
2896 use_shadow = true;
2897 break;
2898 }
0e6e3a41 2899 }
251d0d2a 2900
986ef930
CB
2901 for (type = ID_TYPE_UID, u_or_g = 'u'; type <= ID_TYPE_GID;
2902 type++, u_or_g = 'g') {
2903 pos = mapbuf;
2904
0e6e3a41 2905 if (use_shadow)
986ef930 2906 pos += sprintf(mapbuf, "new%cidmap %d", u_or_g, pid);
4f7521b4 2907
cf3ef16d 2908 lxc_list_for_each(iterator, idmap) {
251d0d2a 2909 map = iterator->elem;
cf3ef16d
SH
2910 if (map->idtype != type)
2911 continue;
2912
4bc3b759
CB
2913 had_entry = true;
2914
986ef930 2915 left = LXC_IDMAPLEN - (pos - mapbuf);
d1838f34 2916 fill = snprintf(pos, left, "%s%lu %lu %lu%s",
4bc3b759
CB
2917 use_shadow ? " " : "", map->nsid,
2918 map->hostid, map->range,
0e6e3a41 2919 use_shadow ? "" : "\n");
a427e268
CB
2920 if (fill <= 0 || fill >= left) {
2921 /* The kernel only takes <= 4k for writes to
2922 * /proc/<pid>/{g,u}id_map
2923 */
2924 SYSERROR("Too many %cid mappings defined", u_or_g);
2925 return -1;
2926 }
4bc3b759 2927
cf3ef16d 2928 pos += fill;
251d0d2a 2929 }
cf3ef16d 2930 if (!had_entry)
4f7521b4 2931 continue;
cf3ef16d 2932
986ef930
CB
2933 /* Try to catch the ouput of new{g,u}idmap to make debugging
2934 * easier.
2935 */
2936 if (use_shadow) {
2937 ret = run_command(cmd_output, sizeof(cmd_output),
2938 lxc_map_ids_exec_wrapper,
2939 (void *)mapbuf);
2940 if (ret < 0) {
54fbbeb5
CB
2941 ERROR("new%cidmap failed to write mapping \"%s\": %s",
2942 u_or_g, cmd_output, mapbuf);
986ef930
CB
2943 return -1;
2944 }
54fbbeb5 2945 TRACE("new%cidmap wrote mapping \"%s\"", u_or_g, mapbuf);
d1838f34 2946 } else {
986ef930 2947 ret = write_id_mapping(type, pid, mapbuf, pos - mapbuf);
54fbbeb5 2948 if (ret < 0) {
da0f9977 2949 ERROR("Failed to write mapping: %s", mapbuf);
986ef930 2950 return -1;
54fbbeb5
CB
2951 }
2952 TRACE("Wrote mapping \"%s\"", mapbuf);
d1838f34 2953 }
986ef930
CB
2954
2955 memset(mapbuf, 0, sizeof(mapbuf));
f6d3e3e4 2956 }
251d0d2a 2957
986ef930 2958 return 0;
f6d3e3e4
SH
2959}
2960
0fd73091 2961/* Return the host uid/gid to which the container root is mapped in val.
0b3a6504 2962 * Return true if id was found, false otherwise.
cf3ef16d 2963 */
2a9a80cb 2964bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
4160c3a0 2965 unsigned long *val)
cf3ef16d 2966{
4160c3a0 2967 unsigned nsid;
0fd73091
CB
2968 struct id_map *map;
2969 struct lxc_list *it;
4160c3a0
CB
2970
2971 if (idtype == ID_TYPE_UID)
2972 nsid = (conf->root_nsuid_map != NULL) ? 0 : conf->init_uid;
2973 else
2974 nsid = (conf->root_nsgid_map != NULL) ? 0 : conf->init_gid;
cf3ef16d 2975
0fd73091 2976 lxc_list_for_each (it, &conf->id_map) {
cf3ef16d 2977 map = it->elem;
7b50c609 2978 if (map->idtype != idtype)
cf3ef16d 2979 continue;
4160c3a0 2980 if (map->nsid != nsid)
cf3ef16d 2981 continue;
2a9a80cb
SH
2982 *val = map->hostid;
2983 return true;
cf3ef16d 2984 }
4160c3a0 2985
2a9a80cb 2986 return false;
cf3ef16d
SH
2987}
2988
2133f58c 2989int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype)
cf3ef16d 2990{
cf3ef16d 2991 struct id_map *map;
0fd73091
CB
2992 struct lxc_list *it;
2993
2994 lxc_list_for_each (it, &conf->id_map) {
cf3ef16d 2995 map = it->elem;
2133f58c 2996 if (map->idtype != idtype)
cf3ef16d 2997 continue;
0fd73091 2998
cf3ef16d 2999 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 3000 return (id - map->hostid) + map->nsid;
cf3ef16d 3001 }
0fd73091 3002
57d116ab 3003 return -1;
cf3ef16d
SH
3004}
3005
339efad9 3006int find_unmapped_nsid(struct lxc_conf *conf, enum idtype idtype)
cf3ef16d 3007{
cf3ef16d 3008 struct id_map *map;
0fd73091 3009 struct lxc_list *it;
2133f58c 3010 unsigned int freeid = 0;
0fd73091 3011
cf3ef16d 3012again:
0fd73091 3013 lxc_list_for_each (it, &conf->id_map) {
cf3ef16d 3014 map = it->elem;
2133f58c 3015 if (map->idtype != idtype)
cf3ef16d 3016 continue;
0fd73091 3017
cf3ef16d
SH
3018 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3019 freeid = map->nsid + map->range;
3020 goto again;
3021 }
3022 }
0fd73091 3023
cf3ef16d
SH
3024 return freeid;
3025}
3026
f4f52cb5
CB
3027int chown_mapped_root_exec_wrapper(void *args)
3028{
3029 execvp("lxc-usernsexec", args);
3030 return -1;
3031}
3032
0fd73091 3033/* chown_mapped_root: for an unprivileged user with uid/gid X to
7b50c609
TS
3034 * chown a dir to subuid/subgid Y, he needs to run chown as root
3035 * in a userns where nsid 0 is mapped to hostuid/hostgid Y, and
3036 * nsid Y is mapped to hostuid/hostgid X. That way, the container
3037 * root is privileged with respect to hostuid/hostgid X, allowing
3038 * him to do the chown.
f6d3e3e4 3039 */
41dc7155 3040int chown_mapped_root(const char *path, struct lxc_conf *conf)
f6d3e3e4 3041{
f4f52cb5 3042 uid_t rootuid, rootgid;
2a9a80cb 3043 unsigned long val;
f4f52cb5
CB
3044 int hostuid, hostgid, ret;
3045 struct stat sb;
3046 char map1[100], map2[100], map3[100], map4[100], map5[100];
3047 char ugid[100];
41dc7155 3048 const char *args1[] = {"lxc-usernsexec",
f4f52cb5
CB
3049 "-m", map1,
3050 "-m", map2,
3051 "-m", map3,
3052 "-m", map5,
3053 "--", "chown", ugid, path,
3054 NULL};
41dc7155 3055 const char *args2[] = {"lxc-usernsexec",
f4f52cb5
CB
3056 "-m", map1,
3057 "-m", map2,
3058 "-m", map3,
3059 "-m", map4,
3060 "-m", map5,
3061 "--", "chown", ugid, path,
3062 NULL};
3063 char cmd_output[MAXPATHLEN];
3064
3065 hostuid = geteuid();
3066 hostgid = getegid();
f6d3e3e4 3067
2a9a80cb 3068 if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
bc80f098 3069 ERROR("No uid mapping for container root");
c4d10a05 3070 return -1;
f6d3e3e4 3071 }
f4f52cb5 3072 rootuid = (uid_t)val;
0fd73091 3073
7b50c609 3074 if (!get_mapped_rootid(conf, ID_TYPE_GID, &val)) {
bc80f098 3075 ERROR("No gid mapping for container root");
7b50c609
TS
3076 return -1;
3077 }
f4f52cb5 3078 rootgid = (gid_t)val;
2a9a80cb 3079
f4f52cb5 3080 if (hostuid == 0) {
7b50c609 3081 if (chown(path, rootuid, rootgid) < 0) {
c4d10a05
SH
3082 ERROR("Error chowning %s", path);
3083 return -1;
3084 }
0fd73091 3085
c4d10a05
SH
3086 return 0;
3087 }
f3d7e4ca 3088
f4f52cb5 3089 if (rootuid == hostuid) {
1a0e70ac 3090 /* nothing to do */
b103ceac 3091 INFO("Container root is our uid; no need to chown");
f3d7e4ca
SH
3092 return 0;
3093 }
3094
bbdbf8f0 3095 /* save the current gid of "path" */
f4f52cb5
CB
3096 if (stat(path, &sb) < 0) {
3097 ERROR("Error stat %s", path);
f6d3e3e4
SH
3098 return -1;
3099 }
7b50c609 3100
bbdbf8f0
CB
3101 /* Update the path argument in case this was overlayfs. */
3102 args1[sizeof(args1) / sizeof(args1[0]) - 2] = path;
3103 args2[sizeof(args2) / sizeof(args2[0]) - 2] = path;
3104
f4f52cb5
CB
3105 /*
3106 * A file has to be group-owned by a gid mapped into the
3107 * container, or the container won't be privileged over it.
3108 */
3109 DEBUG("trying to chown \"%s\" to %d", path, hostgid);
3110 if (sb.st_uid == hostuid &&
3111 mapped_hostid(sb.st_gid, conf, ID_TYPE_GID) < 0 &&
3112 chown(path, -1, hostgid) < 0) {
3113 ERROR("Failed chgrping %s", path);
3114 return -1;
3115 }
f6d3e3e4 3116
1a0e70ac 3117 /* "u:0:rootuid:1" */
f4f52cb5
CB
3118 ret = snprintf(map1, 100, "u:0:%d:1", rootuid);
3119 if (ret < 0 || ret >= 100) {
3120 ERROR("Error uid printing map string");
3121 return -1;
3122 }
7b50c609 3123
1a0e70ac 3124 /* "u:hostuid:hostuid:1" */
f4f52cb5
CB
3125 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3126 if (ret < 0 || ret >= 100) {
3127 ERROR("Error uid printing map string");
3128 return -1;
3129 }
c4d10a05 3130
1a0e70ac 3131 /* "g:0:rootgid:1" */
f4f52cb5
CB
3132 ret = snprintf(map3, 100, "g:0:%d:1", rootgid);
3133 if (ret < 0 || ret >= 100) {
3134 ERROR("Error gid printing map string");
3135 return -1;
3136 }
98e5ba51 3137
1a0e70ac 3138 /* "g:pathgid:rootgid+pathgid:1" */
f4f52cb5
CB
3139 ret = snprintf(map4, 100, "g:%d:%d:1", (gid_t)sb.st_gid,
3140 rootgid + (gid_t)sb.st_gid);
3141 if (ret < 0 || ret >= 100) {
3142 ERROR("Error gid printing map string");
3143 return -1;
3144 }
c4d10a05 3145
1a0e70ac 3146 /* "g:hostgid:hostgid:1" */
f4f52cb5
CB
3147 ret = snprintf(map5, 100, "g:%d:%d:1", hostgid, hostgid);
3148 if (ret < 0 || ret >= 100) {
3149 ERROR("Error gid printing map string");
3150 return -1;
3151 }
7b50c609 3152
1a0e70ac 3153 /* "0:pathgid" (chown) */
f4f52cb5
CB
3154 ret = snprintf(ugid, 100, "0:%d", (gid_t)sb.st_gid);
3155 if (ret < 0 || ret >= 100) {
3156 ERROR("Error owner printing format string for chown");
3157 return -1;
3158 }
7b50c609 3159
f4f52cb5
CB
3160 if (hostgid == sb.st_gid)
3161 ret = run_command(cmd_output, sizeof(cmd_output),
3162 chown_mapped_root_exec_wrapper,
3163 (void *)args1);
3164 else
3165 ret = run_command(cmd_output, sizeof(cmd_output),
3166 chown_mapped_root_exec_wrapper,
3167 (void *)args2);
3168 if (ret < 0)
3169 ERROR("lxc-usernsexec failed: %s", cmd_output);
7b50c609 3170
f4f52cb5 3171 return ret;
f6d3e3e4
SH
3172}
3173
943144d9
CB
3174/* NOTE: Must not be called from inside the container namespace! */
3175int lxc_create_tmp_proc_mount(struct lxc_conf *conf)
5112cd70
SH
3176{
3177 int mounted;
3178
943144d9 3179 mounted = lxc_mount_proc_if_needed(conf->rootfs.path ? conf->rootfs.mount : "");
5112cd70 3180 if (mounted == -1) {
0fd73091 3181 SYSERROR("Failed to mount proc in the container");
01958b1f 3182 /* continue only if there is no rootfs */
943144d9 3183 if (conf->rootfs.path)
01958b1f 3184 return -1;
5112cd70 3185 } else if (mounted == 1) {
7a0bcca3 3186 conf->tmp_umount_proc = true;
5112cd70 3187 }
943144d9 3188
5112cd70
SH
3189 return 0;
3190}
3191
3192void tmp_proc_unmount(struct lxc_conf *lxc_conf)
3193{
7a0bcca3 3194 if (!lxc_conf->tmp_umount_proc)
0fd73091
CB
3195 return;
3196
7a0bcca3
CB
3197 (void)umount2("/proc", MNT_DETACH);
3198 lxc_conf->tmp_umount_proc = false;
5112cd70
SH
3199}
3200
0fd73091 3201/* Walk /proc/mounts and change any shared entries to slave. */
6a0c909a 3202void remount_all_slave(void)
e995d7a2 3203{
6a49f05e
CB
3204 int memfd, mntinfo_fd, ret;
3205 ssize_t copied;
0fd73091 3206 FILE *f;
e995d7a2 3207 size_t len = 0;
0fd73091 3208 char *line = NULL;
e995d7a2 3209
6a49f05e 3210 mntinfo_fd = open("/proc/self/mountinfo", O_RDONLY | O_CLOEXEC);
fea3b91d
DJ
3211 if (mntinfo_fd < 0) {
3212 SYSERROR("Failed to open \"/proc/self/mountinfo\"");
6a49f05e 3213 return;
fea3b91d 3214 }
6a49f05e
CB
3215
3216 memfd = memfd_create(".lxc_mountinfo", MFD_CLOEXEC);
3217 if (memfd < 0) {
3218 char template[] = P_tmpdir "/.lxc_mountinfo_XXXXXX";
3219
3220 if (errno != ENOSYS) {
fea3b91d 3221 SYSERROR("Failed to create temporary in-memory file");
6a49f05e 3222 close(mntinfo_fd);
6a49f05e
CB
3223 return;
3224 }
3225
3226 memfd = lxc_make_tmpfile(template, true);
fea3b91d
DJ
3227 if (memfd < 0) {
3228 close(mntinfo_fd);
3229 WARN("Failed to create temporary file");
3230 return;
3231 }
6a49f05e
CB
3232 }
3233
3234#define __LXC_SENDFILE_MAX 0x7ffff000 /* maximum number of bytes sendfile can handle */
3235again:
3236 copied = sendfile(memfd, mntinfo_fd, NULL, __LXC_SENDFILE_MAX);
3237 if (copied < 0) {
3238 if (errno == EINTR)
3239 goto again;
3240
fea3b91d 3241 SYSERROR("Failed to copy \"/proc/self/mountinfo\"");
6a49f05e
CB
3242 close(mntinfo_fd);
3243 close(memfd);
6a49f05e
CB
3244 return;
3245 }
3246 close(mntinfo_fd);
3247
3248 /* After a successful fdopen() memfd will be closed when calling
3249 * fclose(f). Calling close(memfd) afterwards is undefined.
3250 */
3251 ret = lseek(memfd, 0, SEEK_SET);
3252 if (ret < 0) {
fea3b91d 3253 SYSERROR("Failed to reset file descriptor offset");
6a49f05e 3254 close(memfd);
6a49f05e
CB
3255 return;
3256 }
3257
3258 f = fdopen(memfd, "r");
e995d7a2 3259 if (!f) {
fea3b91d
DJ
3260 SYSERROR("Failed to open copy of \"/proc/self/mountinfo\" to mark "
3261 "all shared. Continuing");
6a49f05e 3262 close(memfd);
e995d7a2
SH
3263 return;
3264 }
3265
3266 while (getline(&line, &len, f) != -1) {
0fd73091
CB
3267 int ret;
3268 char *opts, *target;
3269
e995d7a2
SH
3270 target = get_field(line, 4);
3271 if (!target)
3272 continue;
0fd73091 3273
e995d7a2
SH
3274 opts = get_field(target, 2);
3275 if (!opts)
3276 continue;
0fd73091 3277
e995d7a2
SH
3278 null_endofword(opts);
3279 if (!strstr(opts, "shared"))
3280 continue;
0fd73091 3281
e995d7a2 3282 null_endofword(target);
0fd73091
CB
3283 ret = mount(NULL, target, NULL, MS_SLAVE, NULL);
3284 if (ret < 0) {
3285 SYSERROR("Failed to make \"%s\" MS_SLAVE", target);
e995d7a2 3286 ERROR("Continuing...");
6a49f05e 3287 continue;
e995d7a2 3288 }
6a49f05e 3289 TRACE("Remounted \"%s\" as MS_SLAVE", target);
e995d7a2
SH
3290 }
3291 fclose(f);
f10fad2f 3292 free(line);
6a49f05e 3293 TRACE("Remounted all mount table entries as MS_SLAVE");
e995d7a2
SH
3294}
3295
794248d0 3296static int lxc_execute_bind_init(struct lxc_handler *handler)
2322903b
SH
3297{
3298 int ret;
794248d0
CB
3299 char *p;
3300 char path[PATH_MAX], destpath[PATH_MAX];
3301 struct lxc_conf *conf = handler->conf;
9d9c111c
SH
3302
3303 /* If init exists in the container, don't bind mount a static one */
3304 p = choose_init(conf->rootfs.mount);
3305 if (p) {
41089848
TA
3306 char *old = p;
3307
3308 p = strdup(old + strlen(conf->rootfs.mount));
3309 free(old);
3310 if (!p)
3311 return -ENOMEM;
3312
3313 INFO("Found existing init at \"%s\"", p);
3314 goto out;
9d9c111c 3315 }
2322903b
SH
3316
3317 ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
0fd73091 3318 if (ret < 0 || ret >= PATH_MAX)
8353b4c9 3319 return -1;
2322903b
SH
3320
3321 if (!file_exists(path)) {
0fd73091 3322 ERROR("The file \"%s\" does not exist on host", path);
8353b4c9 3323 return -1;
2322903b
SH
3324 }
3325
794248d0 3326 ret = snprintf(destpath, PATH_MAX, "%s" P_tmpdir "%s", conf->rootfs.mount, "/.lxc-init");
0fd73091 3327 if (ret < 0 || ret >= PATH_MAX)
8353b4c9 3328 return -1;
2322903b
SH
3329
3330 if (!file_exists(destpath)) {
794248d0
CB
3331 ret = mknod(destpath, S_IFREG | 0000, 0);
3332 if (ret < 0 && errno != EEXIST) {
3333 SYSERROR("Failed to create dummy \"%s\" file as bind mount target", destpath);
8353b4c9 3334 return -1;
2322903b 3335 }
2322903b
SH
3336 }
3337
592fd47a 3338 ret = safe_mount(path, destpath, "none", MS_BIND, NULL, conf->rootfs.mount);
8353b4c9 3339 if (ret < 0) {
0fd73091 3340 SYSERROR("Failed to bind mount lxc.init.static into container");
8353b4c9
CB
3341 return -1;
3342 }
3343
794248d0
CB
3344 p = strdup(destpath + strlen(conf->rootfs.mount));
3345 if (!p)
3346 return -ENOMEM;
794248d0 3347
8353b4c9 3348 INFO("Bind mounted lxc.init.static into container at \"%s\"", path);
41089848 3349out:
4b5b3a2a 3350 ((struct execute_args *)handler->data)->init_fd = -1;
41089848 3351 ((struct execute_args *)handler->data)->init_path = p;
8353b4c9 3352 return 0;
2322903b
SH
3353}
3354
0fd73091
CB
3355/* This does the work of remounting / if it is shared, calling the container
3356 * pre-mount hooks, and mounting the rootfs.
35120d9c
SH
3357 */
3358int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath)
0ad19a3f 3359{
0fd73091
CB
3360 int ret;
3361
35120d9c 3362 if (conf->rootfs_setup) {
35120d9c 3363 const char *path = conf->rootfs.mount;
0fd73091
CB
3364
3365 /* The rootfs was set up in another namespace. bind-mount it to
3366 * give us a mount in our own ns so we can pivot_root to it
3367 */
3368 ret = mount(path, path, "rootfs", MS_BIND, NULL);
3369 if (ret < 0) {
3370 ERROR("Failed to bind mount container / onto itself");
145832ba 3371 return -1;
35120d9c 3372 }
0fd73091
CB
3373
3374 TRACE("Bind mounted container / onto itself");
145832ba 3375 return 0;
35120d9c 3376 }
d4ef7c50 3377
e995d7a2
SH
3378 remount_all_slave();
3379
0fd73091
CB
3380 ret = run_lxc_hooks(name, "pre-mount", conf, NULL);
3381 if (ret < 0) {
3382 ERROR("Failed to run pre-mount hooks");
35120d9c
SH
3383 return -1;
3384 }
3385
0fd73091
CB
3386 ret = lxc_setup_rootfs(conf);
3387 if (ret < 0) {
3388 ERROR("Failed to setup rootfs for");
35120d9c
SH
3389 return -1;
3390 }
3391
3392 conf->rootfs_setup = true;
3393 return 0;
3394}
3395
1c1c7051
SH
3396static bool verify_start_hooks(struct lxc_conf *conf)
3397{
1c1c7051 3398 char path[MAXPATHLEN];
0fd73091
CB
3399 struct lxc_list *it;
3400
3401 lxc_list_for_each (it, &conf->hooks[LXCHOOK_START]) {
1c1c7051 3402 int ret;
0fd73091
CB
3403 struct stat st;
3404 char *hookname = it->elem;
1c1c7051
SH
3405
3406 ret = snprintf(path, MAXPATHLEN, "%s%s",
0fd73091
CB
3407 conf->rootfs.path ? conf->rootfs.mount : "",
3408 hookname);
1c1c7051
SH
3409 if (ret < 0 || ret >= MAXPATHLEN)
3410 return false;
0fd73091 3411
1c1c7051 3412 ret = stat(path, &st);
0fd73091 3413 if (ret < 0) {
7b6753e7 3414 SYSERROR("Start hook %s not found in container",
0fd73091 3415 hookname);
1c1c7051
SH
3416 return false;
3417 }
0fd73091 3418
6a0c909a 3419 return true;
1c1c7051
SH
3420 }
3421
3422 return true;
3423}
3424
4b5b3a2a
TA
3425static bool execveat_supported(void)
3426{
3427#ifdef __NR_execveat
3428 /*
3429 * We use the syscall here, because it was introduced in kernel 3.19,
3430 * while glibc got support for using the syscall much later, in 2.27.
3431 * We don't want to use glibc because it falls back to /proc, and the
3432 * container may not have /proc mounted depending on its configuration.
3433 */
3434 syscall(__NR_execveat, -1, "", NULL, NULL, AT_EMPTY_PATH);
3435 if (errno == ENOSYS)
3436 return false;
3437
3438 return true;
3439#else
3440 return false;
3441#endif
3442}
3443
3b988b33 3444int lxc_setup(struct lxc_handler *handler)
35120d9c 3445{
2187efd3 3446 int ret;
0fd73091 3447 const char *lxcpath = handler->lxcpath, *name = handler->name;
35120d9c 3448 struct lxc_conf *lxc_conf = handler->conf;
35120d9c 3449
8353b4c9
CB
3450 ret = do_rootfs_setup(lxc_conf, name, lxcpath);
3451 if (ret < 0) {
3452 ERROR("Failed to setup rootfs");
35120d9c
SH
3453 return -1;
3454 }
3455
28d9e29e 3456 if (handler->nsfd[LXC_NS_UTS] == -1) {
8353b4c9
CB
3457 ret = setup_utsname(lxc_conf->utsname);
3458 if (ret < 0) {
0fd73091 3459 ERROR("Failed to setup the utsname %s", name);
6c544cb3
MM
3460 return -1;
3461 }
0ad19a3f 3462 }
3463
8353b4c9
CB
3464 ret = lxc_setup_network_in_child_namespaces(lxc_conf, &lxc_conf->network);
3465 if (ret < 0) {
3466 ERROR("Failed to setup network");
95b5ffaf 3467 return -1;
0ad19a3f 3468 }
3469
8353b4c9
CB
3470 ret = lxc_network_send_name_and_ifindex_to_parent(handler);
3471 if (ret < 0) {
3472 ERROR("Failed to send network device names and ifindices to parent");
790255cf
CB
3473 return -1;
3474 }
3475
bc6928ff 3476 if (lxc_conf->autodev > 0) {
8353b4c9
CB
3477 ret = mount_autodev(name, &lxc_conf->rootfs, lxcpath);
3478 if (ret < 0) {
3479 ERROR("Failed to mount \"/dev\"");
c6883f38
SH
3480 return -1;
3481 }
3482 }
3483
8353b4c9
CB
3484 /* Do automatic mounts (mainly /proc and /sys), but exclude those that
3485 * need to wait until other stuff has finished.
368bbc02 3486 */
8353b4c9
CB
3487 ret = lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler);
3488 if (ret < 0) {
3489 ERROR("Failed to setup first automatic mounts");
368bbc02
CS
3490 return -1;
3491 }
3492
8353b4c9
CB
3493 ret = setup_mount(lxc_conf, &lxc_conf->rootfs, lxc_conf->fstab, name, lxcpath);
3494 if (ret < 0) {
3495 ERROR("Failed to setup mounts");
95b5ffaf 3496 return -1;
576f946d 3497 }
3498
7b6753e7 3499 /* Make sure any start hooks are in the container */
1c1c7051
SH
3500 if (!verify_start_hooks(lxc_conf))
3501 return -1;
3502
8353b4c9 3503 if (lxc_conf->is_execute) {
4b5b3a2a
TA
3504 if (execveat_supported()) {
3505 int fd;
3506 char path[PATH_MAX];
3507
3508 ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
3509 if (ret < 0 || ret >= PATH_MAX) {
3510 ERROR("Path to init.lxc.static too long");
3511 return -1;
3512 }
3513
3514 fd = open(path, O_PATH | O_CLOEXEC);
3515 if (fd < 0) {
3516 SYSERROR("Unable to open lxc.init.static");
3517 return -1;
3518 }
3519
3520 ((struct execute_args *)handler->data)->init_fd = fd;
3521 ((struct execute_args *)handler->data)->init_path = NULL;
3522 } else {
3523 ret = lxc_execute_bind_init(handler);
3524 if (ret < 0) {
3525 ERROR("Failed to bind-mount the lxc init system");
3526 return -1;
3527 }
8353b4c9
CB
3528 }
3529 }
2322903b 3530
8353b4c9
CB
3531 /* Now mount only cgroups, if wanted. Before, /sys could not have been
3532 * mounted. It is guaranteed to be mounted now either through
3533 * automatically or via fstab entries.
368bbc02 3534 */
8353b4c9
CB
3535 ret = lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler);
3536 if (ret < 0) {
3537 ERROR("Failed to setup remaining automatic mounts");
368bbc02
CS
3538 return -1;
3539 }
3540
8353b4c9 3541 ret = run_lxc_hooks(name, "mount", lxc_conf, NULL);
1a2cf89d 3542 if (ret < 0) {
8353b4c9 3543 ERROR("Failed to run mount hooks");
773fb9ca
SH
3544 return -1;
3545 }
3546
bc6928ff 3547 if (lxc_conf->autodev > 0) {
8353b4c9
CB
3548 ret = run_lxc_hooks(name, "autodev", lxc_conf, NULL);
3549 if (ret < 0) {
3550 ERROR("Failed to run autodev hooks");
f7bee6c6
MW
3551 return -1;
3552 }
06749971 3553
8353b4c9
CB
3554 ret = lxc_fill_autodev(&lxc_conf->rootfs);
3555 if (ret < 0) {
3556 ERROR("Failed to populate \"/dev\"");
91c3830e
SH
3557 return -1;
3558 }
3559 }
368bbc02 3560
8353b4c9
CB
3561 if (!lxc_list_empty(&lxc_conf->mount_list)) {
3562 ret = setup_mount_entries(lxc_conf, &lxc_conf->rootfs,
3563 &lxc_conf->mount_list, name, lxcpath);
3564 if (ret < 0) {
3565 ERROR("Failed to setup mount entries");
3566 return -1;
3567 }
181437fd
YT
3568 }
3569
ed8704d0 3570 ret = lxc_setup_console(&lxc_conf->rootfs, &lxc_conf->console,
885766f5 3571 lxc_conf->ttys.dir);
ed8704d0
CB
3572 if (ret < 0) {
3573 ERROR("Failed to setup console");
95b5ffaf 3574 return -1;
6e590161 3575 }
3576
ed8704d0
CB
3577 ret = lxc_setup_dev_symlinks(&lxc_conf->rootfs);
3578 if (ret < 0) {
8353b4c9 3579 ERROR("Failed to setup \"/dev\" symlinks");
69aa6655
DE
3580 return -1;
3581 }
3582
8353b4c9
CB
3583 ret = lxc_create_tmp_proc_mount(lxc_conf);
3584 if (ret < 0) {
3585 ERROR("Failed to \"/proc\" LSMs");
e075f5d9 3586 return -1;
e075f5d9 3587 }
e075f5d9 3588
8353b4c9
CB
3589 ret = setup_pivot_root(&lxc_conf->rootfs);
3590 if (ret < 0) {
3591 ERROR("Failed to pivot root into rootfs");
95b5ffaf 3592 return -1;
ed502555 3593 }
3594
8353b4c9
CB
3595 ret = lxc_setup_devpts(lxc_conf);
3596 if (ret < 0) {
3597 ERROR("Failed to setup new devpts instance");
95b5ffaf 3598 return -1;
3c26f34e 3599 }
3600
2187efd3
CB
3601 ret = lxc_create_ttys(handler);
3602 if (ret < 0)
e8bd4e43 3603 return -1;
e8bd4e43 3604
8353b4c9
CB
3605 ret = setup_personality(lxc_conf->personality);
3606 if (ret < 0) {
3607 ERROR("Failed to set personality");
cccc74b5
DL
3608 return -1;
3609 }
3610
8353b4c9
CB
3611 /* Set sysctl value to a path under /proc/sys as determined from the
3612 * key. For e.g. net.ipv4.ip_forward translated to
3613 * /proc/sys/net/ipv4/ip_forward.
7edd0540
L
3614 */
3615 if (!lxc_list_empty(&lxc_conf->sysctls)) {
3616 ret = setup_sysctl_parameters(&lxc_conf->sysctls);
8353b4c9
CB
3617 if (ret < 0) {
3618 ERROR("Failed to setup sysctl parameters");
7edd0540 3619 return -1;
8353b4c9 3620 }
7edd0540
L
3621 }
3622
97a8f74f
SG
3623 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
3624 if (!lxc_list_empty(&lxc_conf->caps)) {
8353b4c9
CB
3625 ERROR("Container requests lxc.cap.drop and "
3626 "lxc.cap.keep: either use lxc.cap.drop or "
3627 "lxc.cap.keep, not both");
f6d3e3e4
SH
3628 return -1;
3629 }
8353b4c9 3630
97a8f74f 3631 if (dropcaps_except(&lxc_conf->keepcaps)) {
8353b4c9 3632 ERROR("Failed to keep capabilities");
97a8f74f
SG
3633 return -1;
3634 }
3635 } else if (setup_caps(&lxc_conf->caps)) {
8353b4c9 3636 ERROR("Failed to drop capabilities");
97a8f74f 3637 return -1;
81810dd1
DL
3638 }
3639
8353b4c9 3640 NOTICE("The container \"%s\" is set up", name);
cd54d859 3641
0ad19a3f 3642 return 0;
3643}
26ddeedd 3644
3f60c2f7 3645int run_lxc_hooks(const char *name, char *hookname, struct lxc_conf *conf,
14a7b0f9 3646 char *argv[])
26ddeedd 3647{
26ddeedd 3648 struct lxc_list *it;
3f60c2f7 3649 int which = -1;
26ddeedd 3650
3f60c2f7 3651 if (strcmp(hookname, "pre-start") == 0)
26ddeedd 3652 which = LXCHOOK_PRESTART;
3f60c2f7 3653 else if (strcmp(hookname, "start-host") == 0)
08dd2805 3654 which = LXCHOOK_START_HOST;
3f60c2f7 3655 else if (strcmp(hookname, "pre-mount") == 0)
5ea6163a 3656 which = LXCHOOK_PREMOUNT;
3f60c2f7 3657 else if (strcmp(hookname, "mount") == 0)
26ddeedd 3658 which = LXCHOOK_MOUNT;
3f60c2f7 3659 else if (strcmp(hookname, "autodev") == 0)
f7bee6c6 3660 which = LXCHOOK_AUTODEV;
3f60c2f7 3661 else if (strcmp(hookname, "start") == 0)
26ddeedd 3662 which = LXCHOOK_START;
3f60c2f7 3663 else if (strcmp(hookname, "stop") == 0)
52492063 3664 which = LXCHOOK_STOP;
3f60c2f7 3665 else if (strcmp(hookname, "post-stop") == 0)
26ddeedd 3666 which = LXCHOOK_POSTSTOP;
3f60c2f7 3667 else if (strcmp(hookname, "clone") == 0)
148e91f5 3668 which = LXCHOOK_CLONE;
3f60c2f7 3669 else if (strcmp(hookname, "destroy") == 0)
37cf711b 3670 which = LXCHOOK_DESTROY;
26ddeedd
SH
3671 else
3672 return -1;
3f60c2f7 3673
0fd73091 3674 lxc_list_for_each (it, &conf->hooks[which]) {
26ddeedd 3675 int ret;
3f60c2f7
CB
3676 char *hook = it->elem;
3677
3678 ret = run_script_argv(name, conf->hooks_version, "lxc", hook,
14a7b0f9 3679 hookname, argv);
3f60c2f7
CB
3680 if (ret < 0)
3681 return -1;
26ddeedd 3682 }
3f60c2f7 3683
26ddeedd
SH
3684 return 0;
3685}
72d0e1cb 3686
72d0e1cb
SG
3687int lxc_clear_config_caps(struct lxc_conf *c)
3688{
1a0e70ac 3689 struct lxc_list *it, *next;
72d0e1cb 3690
0fd73091 3691 lxc_list_for_each_safe (it, &c->caps, next) {
72d0e1cb
SG
3692 lxc_list_del(it);
3693 free(it->elem);
3694 free(it);
3695 }
0fd73091 3696
72d0e1cb
SG
3697 return 0;
3698}
3699
c7e345ae
CB
3700static int lxc_free_idmap(struct lxc_list *id_map)
3701{
27c27d73
SH
3702 struct lxc_list *it, *next;
3703
0fd73091 3704 lxc_list_for_each_safe (it, id_map, next) {
27c27d73
SH
3705 lxc_list_del(it);
3706 free(it->elem);
3707 free(it);
3708 }
c7e345ae 3709
27c27d73
SH
3710 return 0;
3711}
3712
4355ab5f
SH
3713int lxc_clear_idmaps(struct lxc_conf *c)
3714{
3715 return lxc_free_idmap(&c->id_map);
3716}
3717
1fb86a7c
SH
3718int lxc_clear_config_keepcaps(struct lxc_conf *c)
3719{
0fd73091 3720 struct lxc_list *it, *next;
1fb86a7c 3721
0fd73091 3722 lxc_list_for_each_safe (it, &c->keepcaps, next) {
1fb86a7c
SH
3723 lxc_list_del(it);
3724 free(it->elem);
3725 free(it);
3726 }
0fd73091 3727
1fb86a7c
SH
3728 return 0;
3729}
3730
54860ed0 3731int lxc_clear_cgroups(struct lxc_conf *c, const char *key, int version)
72d0e1cb 3732{
54860ed0 3733 char *global_token, *namespaced_token;
ab1a6cac 3734 size_t namespaced_token_len;
54860ed0 3735 struct lxc_list *it, *next, *list;
ab1a6cac 3736 const char *k = key;
54860ed0 3737 bool all = false;
72d0e1cb 3738
54860ed0
CB
3739 if (version == CGROUP2_SUPER_MAGIC) {
3740 global_token = "lxc.cgroup2";
3741 namespaced_token = "lxc.cgroup2.";
0fd73091 3742 namespaced_token_len = sizeof("lxc.cgroup2.") - 1;
54860ed0
CB
3743 list = &c->cgroup2;
3744 } else if (version == CGROUP_SUPER_MAGIC) {
3745 global_token = "lxc.cgroup";
3746 namespaced_token = "lxc.cgroup.";
0fd73091 3747 namespaced_token_len = sizeof("lxc.cgroup.") - 1;
54860ed0
CB
3748 list = &c->cgroup;
3749 } else {
ab1a6cac 3750 return -EINVAL;
54860ed0
CB
3751 }
3752
3753 if (strcmp(key, global_token) == 0)
72d0e1cb 3754 all = true;
54860ed0 3755 else if (strncmp(key, namespaced_token, sizeof(namespaced_token) - 1) == 0)
ab1a6cac 3756 k += namespaced_token_len;
a6390f01 3757 else
ab1a6cac 3758 return -EINVAL;
72d0e1cb 3759
0fd73091 3760 lxc_list_for_each_safe (it, list, next) {
72d0e1cb 3761 struct lxc_cgroup *cg = it->elem;
54860ed0 3762
72d0e1cb
SG
3763 if (!all && strcmp(cg->subsystem, k) != 0)
3764 continue;
54860ed0 3765
72d0e1cb
SG
3766 lxc_list_del(it);
3767 free(cg->subsystem);
3768 free(cg->value);
3769 free(cg);
3770 free(it);
3771 }
e409b214 3772
72d0e1cb
SG
3773 return 0;
3774}
3775
c6d09e15
WB
3776int lxc_clear_limits(struct lxc_conf *c, const char *key)
3777{
3778 struct lxc_list *it, *next;
c6d09e15 3779 const char *k = NULL;
0fd73091 3780 bool all = false;
c6d09e15 3781
b668653c 3782 if (strcmp(key, "lxc.limit") == 0 || strcmp(key, "lxc.prlimit") == 0)
c6d09e15 3783 all = true;
b668653c
CB
3784 else if (strncmp(key, "lxc.limit.", sizeof("lxc.limit.") - 1) == 0)
3785 k = key + sizeof("lxc.limit.") - 1;
3786 else if (strncmp(key, "lxc.prlimit.", sizeof("lxc.prlimit.") - 1) == 0)
3787 k = key + sizeof("lxc.prlimit.") - 1;
c6d09e15
WB
3788 else
3789 return -1;
3790
0fd73091 3791 lxc_list_for_each_safe (it, &c->limits, next) {
c6d09e15 3792 struct lxc_limit *lim = it->elem;
0fd73091 3793
c6d09e15
WB
3794 if (!all && strcmp(lim->resource, k) != 0)
3795 continue;
0fd73091 3796
c6d09e15
WB
3797 lxc_list_del(it);
3798 free(lim->resource);
3799 free(lim);
3800 free(it);
3801 }
b668653c 3802
c6d09e15
WB
3803 return 0;
3804}
3805
7edd0540
L
3806int lxc_clear_sysctls(struct lxc_conf *c, const char *key)
3807{
3808 struct lxc_list *it, *next;
7edd0540 3809 const char *k = NULL;
0fd73091 3810 bool all = false;
7edd0540
L
3811
3812 if (strcmp(key, "lxc.sysctl") == 0)
3813 all = true;
3814 else if (strncmp(key, "lxc.sysctl.", sizeof("lxc.sysctl.") - 1) == 0)
3815 k = key + sizeof("lxc.sysctl.") - 1;
3816 else
3817 return -1;
3818
0fd73091 3819 lxc_list_for_each_safe (it, &c->sysctls, next) {
7edd0540 3820 struct lxc_sysctl *elem = it->elem;
0fd73091 3821
7edd0540
L
3822 if (!all && strcmp(elem->key, k) != 0)
3823 continue;
0fd73091 3824
7edd0540
L
3825 lxc_list_del(it);
3826 free(elem->key);
3827 free(elem->value);
3828 free(elem);
3829 free(it);
3830 }
0fd73091 3831
7edd0540
L
3832 return 0;
3833}
3834
61d7a733
YT
3835int lxc_clear_procs(struct lxc_conf *c, const char *key)
3836{
0fd73091 3837 struct lxc_list *it, *next;
61d7a733 3838 const char *k = NULL;
0fd73091 3839 bool all = false;
61d7a733
YT
3840
3841 if (strcmp(key, "lxc.proc") == 0)
3842 all = true;
3843 else if (strncmp(key, "lxc.proc.", sizeof("lxc.proc.") - 1) == 0)
3844 k = key + sizeof("lxc.proc.") - 1;
3845 else
3846 return -1;
3847
0fd73091 3848 lxc_list_for_each_safe (it, &c->procs, next) {
61d7a733 3849 struct lxc_proc *proc = it->elem;
0fd73091 3850
61d7a733
YT
3851 if (!all && strcmp(proc->filename, k) != 0)
3852 continue;
0fd73091 3853
61d7a733
YT
3854 lxc_list_del(it);
3855 free(proc->filename);
3856 free(proc->value);
3857 free(proc);
3858 free(it);
3859 }
3860
3861 return 0;
3862}
3863
ee1e7aa0
SG
3864int lxc_clear_groups(struct lxc_conf *c)
3865{
0fd73091 3866 struct lxc_list *it, *next;
ee1e7aa0 3867
0fd73091 3868 lxc_list_for_each_safe (it, &c->groups, next) {
ee1e7aa0
SG
3869 lxc_list_del(it);
3870 free(it->elem);
3871 free(it);
3872 }
0fd73091 3873
ee1e7aa0
SG
3874 return 0;
3875}
3876
ab799c0b
SG
3877int lxc_clear_environment(struct lxc_conf *c)
3878{
0fd73091 3879 struct lxc_list *it, *next;
ab799c0b 3880
0fd73091 3881 lxc_list_for_each_safe (it, &c->environment, next) {
ab799c0b
SG
3882 lxc_list_del(it);
3883 free(it->elem);
3884 free(it);
3885 }
0fd73091 3886
ab799c0b
SG
3887 return 0;
3888}
3889
72d0e1cb
SG
3890int lxc_clear_mount_entries(struct lxc_conf *c)
3891{
0fd73091 3892 struct lxc_list *it, *next;
72d0e1cb 3893
0fd73091 3894 lxc_list_for_each_safe (it, &c->mount_list, next) {
72d0e1cb
SG
3895 lxc_list_del(it);
3896 free(it->elem);
3897 free(it);
3898 }
0fd73091 3899
72d0e1cb
SG
3900 return 0;
3901}
3902
b099e9e9
SH
3903int lxc_clear_automounts(struct lxc_conf *c)
3904{
3905 c->auto_mounts = 0;
3906 return 0;
3907}
3908
12a50cc6 3909int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 3910{
72d0e1cb 3911 int i;
0fd73091
CB
3912 struct lxc_list *it, *next;
3913 const char *k = NULL;
3914 bool all = false, done = false;
72d0e1cb 3915
17ed13a3
SH
3916 if (strcmp(key, "lxc.hook") == 0)
3917 all = true;
0fd73091
CB
3918 else if (strncmp(key, "lxc.hook.", sizeof("lxc.hook.") - 1) == 0)
3919 k = key + sizeof("lxc.hook.") - 1;
a6390f01
WB
3920 else
3921 return -1;
17ed13a3 3922
0fd73091 3923 for (i = 0; i < NUM_LXC_HOOKS; i++) {
17ed13a3 3924 if (all || strcmp(k, lxchook_names[i]) == 0) {
0fd73091 3925 lxc_list_for_each_safe (it, &c->hooks[i], next) {
17ed13a3
SH
3926 lxc_list_del(it);
3927 free(it->elem);
3928 free(it);
3929 }
0fd73091 3930
17ed13a3 3931 done = true;
72d0e1cb
SG
3932 }
3933 }
17ed13a3
SH
3934
3935 if (!done) {
3936 ERROR("Invalid hook key: %s", key);
3937 return -1;
3938 }
0fd73091 3939
72d0e1cb
SG
3940 return 0;
3941}
8eb5694b 3942
4184c3e1
SH
3943static inline void lxc_clear_aliens(struct lxc_conf *conf)
3944{
0fd73091 3945 struct lxc_list *it, *next;
4184c3e1 3946
0fd73091 3947 lxc_list_for_each_safe (it, &conf->aliens, next) {
4184c3e1
SH
3948 lxc_list_del(it);
3949 free(it->elem);
3950 free(it);
3951 }
3952}
3953
c7b15d1e 3954void lxc_clear_includes(struct lxc_conf *conf)
f979ac15 3955{
0fd73091 3956 struct lxc_list *it, *next;
f979ac15 3957
0fd73091 3958 lxc_list_for_each_safe (it, &conf->includes, next) {
f979ac15
SH
3959 lxc_list_del(it);
3960 free(it->elem);
3961 free(it);
3962 }
3963}
3964
8eb5694b
SH
3965void lxc_conf_free(struct lxc_conf *conf)
3966{
3967 if (!conf)
3968 return;
0fd73091 3969
858377e4
SH
3970 if (current_config == conf)
3971 current_config = NULL;
aed105d5 3972 lxc_terminal_conf_free(&conf->console);
f10fad2f 3973 free(conf->rootfs.mount);
b3b8c97f 3974 free(conf->rootfs.bdev_type);
f10fad2f
ME
3975 free(conf->rootfs.options);
3976 free(conf->rootfs.path);
f10fad2f 3977 free(conf->logfile);
858377e4
SH
3978 if (conf->logfd != -1)
3979 close(conf->logfd);
f10fad2f 3980 free(conf->utsname);
885766f5
CB
3981 free(conf->ttys.dir);
3982 free(conf->ttys.tty_names);
f10fad2f
ME
3983 free(conf->fstab);
3984 free(conf->rcfile);
5cda27c1 3985 free(conf->execute_cmd);
f10fad2f 3986 free(conf->init_cmd);
3c491553 3987 free(conf->init_cwd);
6b0d5538 3988 free(conf->unexpanded_config);
76d0127f 3989 free(conf->syslog);
c302b476 3990 lxc_free_networks(&conf->network);
f10fad2f
ME
3991 free(conf->lsm_aa_profile);
3992 free(conf->lsm_se_context);
769872f9 3993 lxc_seccomp_free(conf);
8eb5694b 3994 lxc_clear_config_caps(conf);
1fb86a7c 3995 lxc_clear_config_keepcaps(conf);
54860ed0
CB
3996 lxc_clear_cgroups(conf, "lxc.cgroup", CGROUP_SUPER_MAGIC);
3997 lxc_clear_cgroups(conf, "lxc.cgroup2", CGROUP2_SUPER_MAGIC);
17ed13a3 3998 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 3999 lxc_clear_mount_entries(conf);
27c27d73 4000 lxc_clear_idmaps(conf);
ee1e7aa0 4001 lxc_clear_groups(conf);
f979ac15 4002 lxc_clear_includes(conf);
761d81ca 4003 lxc_clear_aliens(conf);
ab799c0b 4004 lxc_clear_environment(conf);
240d4b74 4005 lxc_clear_limits(conf, "lxc.prlimit");
7edd0540 4006 lxc_clear_sysctls(conf, "lxc.sysctl");
61d7a733 4007 lxc_clear_procs(conf, "lxc.proc");
43654d34
CB
4008 free(conf->cgroup_meta.dir);
4009 free(conf->cgroup_meta.controllers);
8eb5694b
SH
4010 free(conf);
4011}
4355ab5f
SH
4012
4013struct userns_fn_data {
4014 int (*fn)(void *);
c9b7c33e 4015 const char *fn_name;
4355ab5f
SH
4016 void *arg;
4017 int p[2];
4018};
4019
4020static int run_userns_fn(void *data)
4021{
4355ab5f 4022 char c;
0fd73091 4023 struct userns_fn_data *d = data;
4355ab5f 4024
f8aa4bf3 4025 /* Close write end of the pipe. */
4355ab5f 4026 close(d->p[1]);
f8aa4bf3
CB
4027
4028 /* Wait for parent to finish establishing a new mapping in the user
4029 * namespace we are executing in.
4030 */
489f39be 4031 if (lxc_read_nointr(d->p[0], &c, 1) != 1)
4355ab5f 4032 return -1;
f8aa4bf3
CB
4033
4034 /* Close read end of the pipe. */
4355ab5f 4035 close(d->p[0]);
f8aa4bf3 4036
c9b7c33e
CB
4037 if (d->fn_name)
4038 TRACE("calling function \"%s\"", d->fn_name);
0fd73091 4039
f8aa4bf3 4040 /* Call function to run. */
4355ab5f
SH
4041 return d->fn(d->arg);
4042}
4043
db7cfe23
CB
4044static struct id_map *mapped_nsid_add(struct lxc_conf *conf, unsigned id,
4045 enum idtype idtype)
4046{
5173b710
CB
4047 const struct id_map *map;
4048 struct id_map *retmap;
db7cfe23
CB
4049
4050 map = find_mapped_nsid_entry(conf, id, idtype);
4051 if (!map)
4052 return NULL;
4053
4054 retmap = malloc(sizeof(*retmap));
4055 if (!retmap)
4056 return NULL;
4057
4058 memcpy(retmap, map, sizeof(*retmap));
4059 return retmap;
4060}
4061
c4333195
CB
4062static struct id_map *find_mapped_hostid_entry(struct lxc_conf *conf,
4063 unsigned id, enum idtype idtype)
f8aa4bf3 4064{
f8aa4bf3 4065 struct id_map *map;
0fd73091 4066 struct lxc_list *it;
f8aa4bf3
CB
4067 struct id_map *retmap = NULL;
4068
0fd73091 4069 lxc_list_for_each (it, &conf->id_map) {
f8aa4bf3
CB
4070 map = it->elem;
4071 if (map->idtype != idtype)
4072 continue;
4073
4074 if (id >= map->hostid && id < map->hostid + map->range) {
4075 retmap = map;
4076 break;
4077 }
4078 }
4079
f8aa4bf3
CB
4080 return retmap;
4081}
4082
0fd73091 4083/* Allocate a new {g,u}id mapping for the given {g,u}id. Re-use an already
f8aa4bf3 4084 * existing one or establish a new one.
4355ab5f 4085 */
0fd73091
CB
4086static struct id_map *mapped_hostid_add(struct lxc_conf *conf, uid_t id,
4087 enum idtype type)
4355ab5f 4088{
28a2d9e7 4089 int hostid_mapped;
c4333195
CB
4090 struct id_map *entry = NULL, *tmp = NULL;
4091
4092 entry = malloc(sizeof(*entry));
4093 if (!entry)
4094 return NULL;
f8aa4bf3 4095
28a2d9e7 4096 /* Reuse existing mapping. */
c4333195
CB
4097 tmp = find_mapped_hostid_entry(conf, id, type);
4098 if (tmp)
4099 return memcpy(entry, tmp, sizeof(*entry));
f8aa4bf3 4100
28a2d9e7
CB
4101 /* Find new mapping. */
4102 hostid_mapped = find_unmapped_nsid(conf, type);
4103 if (hostid_mapped < 0) {
c4333195
CB
4104 DEBUG("Failed to find free mapping for id %d", id);
4105 free(entry);
28a2d9e7 4106 return NULL;
f8aa4bf3 4107 }
f8aa4bf3 4108
28a2d9e7
CB
4109 entry->idtype = type;
4110 entry->nsid = hostid_mapped;
4111 entry->hostid = (unsigned long)id;
4112 entry->range = 1;
4355ab5f 4113
28a2d9e7 4114 return entry;
4355ab5f
SH
4115}
4116
dcf0ffdf 4117struct lxc_list *get_minimal_idmap(struct lxc_conf *conf)
4355ab5f 4118{
f8aa4bf3 4119 uid_t euid, egid;
4160c3a0
CB
4120 uid_t nsuid = (conf->root_nsuid_map != NULL) ? 0 : conf->init_uid;
4121 gid_t nsgid = (conf->root_nsgid_map != NULL) ? 0 : conf->init_gid;
f8aa4bf3 4122 struct lxc_list *idmap = NULL, *tmplist = NULL;
28a2d9e7
CB
4123 struct id_map *container_root_uid = NULL, *container_root_gid = NULL,
4124 *host_uid_map = NULL, *host_gid_map = NULL;
4355ab5f 4125
db7cfe23 4126 /* Find container root mappings. */
4160c3a0 4127 container_root_uid = mapped_nsid_add(conf, nsuid, ID_TYPE_UID);
db7cfe23 4128 if (!container_root_uid) {
dcf0ffdf 4129 DEBUG("Failed to find mapping for namespace uid %d", 0);
db7cfe23 4130 goto on_error;
f8aa4bf3 4131 }
dcf0ffdf
CB
4132 euid = geteuid();
4133 if (euid >= container_root_uid->hostid &&
4134 euid < (container_root_uid->hostid + container_root_uid->range))
db7cfe23 4135 host_uid_map = container_root_uid;
f8aa4bf3 4136
4160c3a0 4137 container_root_gid = mapped_nsid_add(conf, nsgid, ID_TYPE_GID);
db7cfe23 4138 if (!container_root_gid) {
dcf0ffdf 4139 DEBUG("Failed to find mapping for namespace gid %d", 0);
f8aa4bf3
CB
4140 goto on_error;
4141 }
dcf0ffdf
CB
4142 egid = getegid();
4143 if (egid >= container_root_gid->hostid &&
4144 egid < (container_root_gid->hostid + container_root_gid->range))
db7cfe23 4145 host_gid_map = container_root_gid;
f8aa4bf3
CB
4146
4147 /* Check whether the {g,u}id of the user has a mapping. */
954b7d9b 4148 if (!host_uid_map)
c4333195 4149 host_uid_map = mapped_hostid_add(conf, euid, ID_TYPE_UID);
28a2d9e7 4150 if (!host_uid_map) {
db7cfe23 4151 DEBUG("Failed to find mapping for uid %d", euid);
f8aa4bf3
CB
4152 goto on_error;
4153 }
4154
dcf0ffdf
CB
4155 if (!host_gid_map)
4156 host_gid_map = mapped_hostid_add(conf, egid, ID_TYPE_GID);
28a2d9e7 4157 if (!host_gid_map) {
db7cfe23 4158 DEBUG("Failed to find mapping for gid %d", egid);
28a2d9e7
CB
4159 goto on_error;
4160 }
4161
4162 /* Allocate new {g,u}id map list. */
4163 idmap = malloc(sizeof(*idmap));
4164 if (!idmap)
4165 goto on_error;
4166 lxc_list_init(idmap);
4167
f8aa4bf3
CB
4168 /* Add container root to the map. */
4169 tmplist = malloc(sizeof(*tmplist));
4170 if (!tmplist)
4171 goto on_error;
4172 lxc_list_add_elem(tmplist, container_root_uid);
4173 lxc_list_add_tail(idmap, tmplist);
28a2d9e7 4174
1d90e064 4175 if (host_uid_map && (host_uid_map != container_root_uid)) {
28a2d9e7
CB
4176 /* idmap will now keep track of that memory. */
4177 container_root_uid = NULL;
4178
4179 /* Add container root to the map. */
4180 tmplist = malloc(sizeof(*tmplist));
4181 if (!tmplist)
4182 goto on_error;
4183 lxc_list_add_elem(tmplist, host_uid_map);
4184 lxc_list_add_tail(idmap, tmplist);
28a2d9e7 4185 }
1d90e064
CB
4186 /* idmap will now keep track of that memory. */
4187 container_root_uid = NULL;
4188 /* idmap will now keep track of that memory. */
4189 host_uid_map = NULL;
f8aa4bf3
CB
4190
4191 tmplist = malloc(sizeof(*tmplist));
4192 if (!tmplist)
4193 goto on_error;
4194 lxc_list_add_elem(tmplist, container_root_gid);
4195 lxc_list_add_tail(idmap, tmplist);
28a2d9e7 4196
1d90e064 4197 if (host_gid_map && (host_gid_map != container_root_gid)) {
28a2d9e7
CB
4198 /* idmap will now keep track of that memory. */
4199 container_root_gid = NULL;
4200
4201 tmplist = malloc(sizeof(*tmplist));
4202 if (!tmplist)
4203 goto on_error;
4204 lxc_list_add_elem(tmplist, host_gid_map);
4205 lxc_list_add_tail(idmap, tmplist);
28a2d9e7 4206 }
1d90e064
CB
4207 /* idmap will now keep track of that memory. */
4208 container_root_gid = NULL;
4209 /* idmap will now keep track of that memory. */
4210 host_gid_map = NULL;
f8aa4bf3 4211
dcf0ffdf
CB
4212 TRACE("Allocated minimal idmapping");
4213 return idmap;
4214
4215on_error:
4dc41f99 4216 if (idmap) {
dcf0ffdf 4217 lxc_free_idmap(idmap);
4dc41f99
SX
4218 free(idmap);
4219 }
dcf0ffdf
CB
4220 if (container_root_uid)
4221 free(container_root_uid);
4222 if (container_root_gid)
4223 free(container_root_gid);
4224 if (host_uid_map && (host_uid_map != container_root_uid))
4225 free(host_uid_map);
4226 if (host_gid_map && (host_gid_map != container_root_gid))
4227 free(host_gid_map);
4228
4229 return NULL;
4230}
4231
4232/* Run a function in a new user namespace.
4233 * The caller's euid/egid will be mapped if it is not already.
4234 * Afaict, userns_exec_1() is only used to operate based on privileges for the
4235 * user's own {g,u}id on the host and for the container root's unmapped {g,u}id.
4236 * This means we require only to establish a mapping from:
4237 * - the container root {g,u}id as seen from the host > user's host {g,u}id
4238 * - the container root -> some sub{g,u}id
4239 * The former we add, if the user did not specifiy a mapping. The latter we
4240 * retrieve from the ontainer's configured {g,u}id mappings as it must have been
4241 * there to start the container in the first place.
4242 */
4243int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data,
4244 const char *fn_name)
4245{
4246 pid_t pid;
dcf0ffdf 4247 int p[2];
0fd73091 4248 struct userns_fn_data d;
dcf0ffdf 4249 struct lxc_list *idmap;
0fd73091
CB
4250 int ret = -1, status = -1;
4251 char c = '1';
dcf0ffdf 4252
2b2655a8
CB
4253 if (!conf)
4254 return -EINVAL;
4255
dcf0ffdf
CB
4256 idmap = get_minimal_idmap(conf);
4257 if (!idmap)
4258 return -1;
4259
4260 ret = pipe(p);
4261 if (ret < 0) {
4262 SYSERROR("Failed to create pipe");
4263 return -1;
4264 }
4265 d.fn = fn;
4266 d.fn_name = fn_name;
4267 d.arg = data;
4268 d.p[0] = p[0];
4269 d.p[1] = p[1];
4270
4271 /* Clone child in new user namespace. */
4272 pid = lxc_raw_clone_cb(run_userns_fn, &d, CLONE_NEWUSER);
4273 if (pid < 0) {
0fd73091 4274 ERROR("Failed to clone process in new user namespace");
dcf0ffdf
CB
4275 goto on_error;
4276 }
4277
4278 close(p[0]);
4279 p[0] = -1;
4280
4b73005c
CB
4281 if (lxc_log_get_level() == LXC_LOG_LEVEL_TRACE ||
4282 conf->loglevel == LXC_LOG_LEVEL_TRACE) {
dcf0ffdf 4283 struct id_map *map;
0fd73091 4284 struct lxc_list *it;
dcf0ffdf 4285
0fd73091 4286 lxc_list_for_each (it, idmap) {
f8aa4bf3 4287 map = it->elem;
dcf0ffdf 4288 TRACE("Establishing %cid mapping for \"%d\" in new "
f8aa4bf3 4289 "user namespace: nsuid %lu - hostid %lu - range "
0fd73091
CB
4290 "%lu",
4291 (map->idtype == ID_TYPE_UID) ? 'u' : 'g', pid,
4292 map->nsid, map->hostid, map->range);
f8aa4bf3 4293 }
4355ab5f
SH
4294 }
4295
f8aa4bf3 4296 /* Set up {g,u}id mapping for user namespace of child process. */
4355ab5f 4297 ret = lxc_map_ids(idmap, pid);
f8aa4bf3 4298 if (ret < 0) {
0fd73091 4299 ERROR("Error setting up {g,u}id mappings for child process \"%d\"", pid);
f8aa4bf3 4300 goto on_error;
4355ab5f
SH
4301 }
4302
f8aa4bf3 4303 /* Tell child to proceed. */
489f39be 4304 if (lxc_write_nointr(p[1], &c, 1) != 1) {
dcf0ffdf 4305 SYSERROR("Failed telling child process \"%d\" to proceed", pid);
f8aa4bf3 4306 goto on_error;
4355ab5f
SH
4307 }
4308
686dd5d1 4309on_error:
4355ab5f
SH
4310 if (p[0] != -1)
4311 close(p[0]);
4312 close(p[1]);
f8aa4bf3 4313
ee1b16bc
TA
4314 /* Wait for child to finish. */
4315 if (pid > 0)
4316 status = wait_for_pid(pid);
4317
686dd5d1
CB
4318 if (status < 0)
4319 ret = -1;
4320
f8aa4bf3 4321 return ret;
4355ab5f 4322}
97e9cfa0 4323
415a8851
CB
4324int userns_exec_full(struct lxc_conf *conf, int (*fn)(void *), void *data,
4325 const char *fn_name)
4326{
4327 pid_t pid;
4328 uid_t euid, egid;
415a8851
CB
4329 int p[2];
4330 struct id_map *map;
4331 struct lxc_list *cur;
0fd73091 4332 struct userns_fn_data d;
415a8851 4333 int ret = -1;
0fd73091 4334 char c = '1';
415a8851
CB
4335 struct lxc_list *idmap = NULL, *tmplist = NULL;
4336 struct id_map *container_root_uid = NULL, *container_root_gid = NULL,
4337 *host_uid_map = NULL, *host_gid_map = NULL;
4338
2b2655a8
CB
4339 if (!conf)
4340 return -EINVAL;
4341
415a8851
CB
4342 ret = pipe(p);
4343 if (ret < 0) {
4344 SYSERROR("opening pipe");
4345 return -1;
4346 }
4347 d.fn = fn;
4348 d.fn_name = fn_name;
4349 d.arg = data;
4350 d.p[0] = p[0];
4351 d.p[1] = p[1];
4352
4353 /* Clone child in new user namespace. */
4354 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4355 if (pid < 0) {
0fd73091 4356 ERROR("Failed to clone process in new user namespace");
415a8851
CB
4357 goto on_error;
4358 }
4359
4360 close(p[0]);
4361 p[0] = -1;
4362
4363 euid = geteuid();
4364 egid = getegid();
4365
4366 /* Allocate new {g,u}id map list. */
4367 idmap = malloc(sizeof(*idmap));
4368 if (!idmap)
4369 goto on_error;
4370 lxc_list_init(idmap);
4371
4372 /* Find container root. */
0fd73091 4373 lxc_list_for_each (cur, &conf->id_map) {
415a8851
CB
4374 struct id_map *tmpmap;
4375
4376 tmplist = malloc(sizeof(*tmplist));
4377 if (!tmplist)
4378 goto on_error;
4379
4380 tmpmap = malloc(sizeof(*tmpmap));
4381 if (!tmpmap) {
4382 free(tmplist);
4383 goto on_error;
4384 }
4385
4386 memset(tmpmap, 0, sizeof(*tmpmap));
4387 memcpy(tmpmap, cur->elem, sizeof(*tmpmap));
4388 tmplist->elem = tmpmap;
4389
4390 lxc_list_add_tail(idmap, tmplist);
4391
4392 map = cur->elem;
4393
4394 if (map->idtype == ID_TYPE_UID)
4395 if (euid >= map->hostid && euid < map->hostid + map->range)
4396 host_uid_map = map;
4397
4398 if (map->idtype == ID_TYPE_GID)
4399 if (egid >= map->hostid && egid < map->hostid + map->range)
4400 host_gid_map = map;
4401
4402 if (map->nsid != 0)
4403 continue;
4404
4405 if (map->idtype == ID_TYPE_UID)
4406 if (container_root_uid == NULL)
4407 container_root_uid = map;
4408
4409 if (map->idtype == ID_TYPE_GID)
4410 if (container_root_gid == NULL)
4411 container_root_gid = map;
4412 }
4413
4414 if (!container_root_uid || !container_root_gid) {
4415 ERROR("No mapping for container root found");
4416 goto on_error;
4417 }
4418
4419 /* Check whether the {g,u}id of the user has a mapping. */
4420 if (!host_uid_map)
c4333195 4421 host_uid_map = mapped_hostid_add(conf, euid, ID_TYPE_UID);
415a8851
CB
4422 else
4423 host_uid_map = container_root_uid;
4424
4425 if (!host_gid_map)
c4333195 4426 host_gid_map = mapped_hostid_add(conf, egid, ID_TYPE_GID);
415a8851
CB
4427 else
4428 host_gid_map = container_root_gid;
4429
4430 if (!host_uid_map) {
4431 DEBUG("Failed to find mapping for uid %d", euid);
4432 goto on_error;
4433 }
4434
4435 if (!host_gid_map) {
4436 DEBUG("Failed to find mapping for gid %d", egid);
4437 goto on_error;
4438 }
4439
4440 if (host_uid_map && (host_uid_map != container_root_uid)) {
4441 /* Add container root to the map. */
4442 tmplist = malloc(sizeof(*tmplist));
4443 if (!tmplist)
4444 goto on_error;
4445 lxc_list_add_elem(tmplist, host_uid_map);
4446 lxc_list_add_tail(idmap, tmplist);
4447 }
4448 /* idmap will now keep track of that memory. */
4449 host_uid_map = NULL;
4450
4451 if (host_gid_map && (host_gid_map != container_root_gid)) {
4452 tmplist = malloc(sizeof(*tmplist));
4453 if (!tmplist)
4454 goto on_error;
4455 lxc_list_add_elem(tmplist, host_gid_map);
4456 lxc_list_add_tail(idmap, tmplist);
4457 }
4458 /* idmap will now keep track of that memory. */
4459 host_gid_map = NULL;
4460
4461 if (lxc_log_get_level() == LXC_LOG_LEVEL_TRACE ||
4462 conf->loglevel == LXC_LOG_LEVEL_TRACE) {
0fd73091 4463 lxc_list_for_each (cur, idmap) {
415a8851
CB
4464 map = cur->elem;
4465 TRACE("establishing %cid mapping for \"%d\" in new "
4466 "user namespace: nsuid %lu - hostid %lu - range "
4467 "%lu",
4468 (map->idtype == ID_TYPE_UID) ? 'u' : 'g', pid,
4469 map->nsid, map->hostid, map->range);
4470 }
4471 }
4472
4473 /* Set up {g,u}id mapping for user namespace of child process. */
4474 ret = lxc_map_ids(idmap, pid);
4475 if (ret < 0) {
0fd73091 4476 ERROR("error setting up {g,u}id mappings for child process \"%d\"", pid);
415a8851
CB
4477 goto on_error;
4478 }
4479
4480 /* Tell child to proceed. */
489f39be 4481 if (lxc_write_nointr(p[1], &c, 1) != 1) {
0fd73091 4482 SYSERROR("Failed telling child process \"%d\" to proceed", pid);
415a8851
CB
4483 goto on_error;
4484 }
4485
686dd5d1 4486on_error:
ee1b16bc
TA
4487 if (p[0] != -1)
4488 close(p[0]);
4489 close(p[1]);
4490
415a8851 4491 /* Wait for child to finish. */
686dd5d1
CB
4492 if (pid > 0)
4493 ret = wait_for_pid(pid);
415a8851 4494
80758b4b 4495 if (idmap) {
415a8851 4496 lxc_free_idmap(idmap);
80758b4b
DJ
4497 free(idmap);
4498 }
4499
415a8851
CB
4500 if (host_uid_map && (host_uid_map != container_root_uid))
4501 free(host_uid_map);
4502 if (host_gid_map && (host_gid_map != container_root_gid))
4503 free(host_gid_map);
4504
415a8851
CB
4505 return ret;
4506}
4507
a96a8e8c 4508/* not thread-safe, do not use from api without first forking */
0fd73091 4509static char *getuname(void)
97e9cfa0 4510{
a96a8e8c 4511 struct passwd *result;
97e9cfa0 4512
a96a8e8c
SH
4513 result = getpwuid(geteuid());
4514 if (!result)
97e9cfa0
SH
4515 return NULL;
4516
a96a8e8c 4517 return strdup(result->pw_name);
97e9cfa0
SH
4518}
4519
a96a8e8c 4520/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4521static char *getgname(void)
4522{
a96a8e8c 4523 struct group *result;
97e9cfa0 4524
a96a8e8c
SH
4525 result = getgrgid(getegid());
4526 if (!result)
97e9cfa0
SH
4527 return NULL;
4528
a96a8e8c 4529 return strdup(result->gr_name);
97e9cfa0
SH
4530}
4531
a96a8e8c 4532/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4533void suggest_default_idmap(void)
4534{
0fd73091 4535 char *uname, *gname;
97e9cfa0
SH
4536 FILE *f;
4537 unsigned int uid = 0, urange = 0, gid = 0, grange = 0;
97e9cfa0 4538 size_t len = 0;
0fd73091 4539 char *line = NULL;
97e9cfa0 4540
0fd73091
CB
4541 uname = getuname();
4542 if (!uname)
97e9cfa0
SH
4543 return;
4544
0fd73091
CB
4545 gname = getgname();
4546 if (!gname) {
97e9cfa0
SH
4547 free(uname);
4548 return;
4549 }
4550
4551 f = fopen(subuidfile, "r");
4552 if (!f) {
4553 ERROR("Your system is not configured with subuids");
4554 free(gname);
4555 free(uname);
4556 return;
4557 }
0fd73091 4558
97e9cfa0 4559 while (getline(&line, &len, f) != -1) {
0fd73091 4560 char *p, *p2;
b7930180 4561 size_t no_newline = 0;
0fd73091
CB
4562
4563 p = strchr(line, ':');
97e9cfa0
SH
4564 if (*line == '#')
4565 continue;
4566 if (!p)
4567 continue;
4568 *p = '\0';
4569 p++;
0fd73091 4570
97e9cfa0
SH
4571 if (strcmp(line, uname))
4572 continue;
0fd73091 4573
97e9cfa0
SH
4574 p2 = strchr(p, ':');
4575 if (!p2)
4576 continue;
4577 *p2 = '\0';
4578 p2++;
4579 if (!*p2)
4580 continue;
b7930180
CB
4581 no_newline = strcspn(p2, "\n");
4582 p2[no_newline] = '\0';
4583
b7b2fde4 4584 if (lxc_safe_uint(p, &uid) < 0)
0fd73091 4585 WARN("Could not parse UID");
b7b2fde4 4586 if (lxc_safe_uint(p2, &urange) < 0)
0fd73091 4587 WARN("Could not parse UID range");
97e9cfa0
SH
4588 }
4589 fclose(f);
4590
6be7389a 4591 f = fopen(subgidfile, "r");
97e9cfa0
SH
4592 if (!f) {
4593 ERROR("Your system is not configured with subgids");
4594 free(gname);
4595 free(uname);
4596 return;
4597 }
0fd73091 4598
97e9cfa0 4599 while (getline(&line, &len, f) != -1) {
0fd73091 4600 char *p, *p2;
b7930180 4601 size_t no_newline = 0;
0fd73091
CB
4602
4603 p = strchr(line, ':');
97e9cfa0
SH
4604 if (*line == '#')
4605 continue;
4606 if (!p)
4607 continue;
4608 *p = '\0';
4609 p++;
0fd73091 4610
97e9cfa0
SH
4611 if (strcmp(line, uname))
4612 continue;
0fd73091 4613
97e9cfa0
SH
4614 p2 = strchr(p, ':');
4615 if (!p2)
4616 continue;
4617 *p2 = '\0';
4618 p2++;
4619 if (!*p2)
4620 continue;
b7930180
CB
4621 no_newline = strcspn(p2, "\n");
4622 p2[no_newline] = '\0';
4623
b7b2fde4 4624 if (lxc_safe_uint(p, &gid) < 0)
0fd73091 4625 WARN("Could not parse GID");
b7b2fde4 4626 if (lxc_safe_uint(p2, &grange) < 0)
0fd73091 4627 WARN("Could not parse GID range");
97e9cfa0
SH
4628 }
4629 fclose(f);
4630
f10fad2f 4631 free(line);
97e9cfa0
SH
4632
4633 if (!urange || !grange) {
4634 ERROR("You do not have subuids or subgids allocated");
4635 ERROR("Unprivileged containers require subuids and subgids");
fbd4a4d1 4636 free(uname);
1e7cd2f7 4637 free(gname);
97e9cfa0
SH
4638 return;
4639 }
4640
4641 ERROR("You must either run as root, or define uid mappings");
4642 ERROR("To pass uid mappings to lxc-create, you could create");
4643 ERROR("~/.config/lxc/default.conf:");
4644 ERROR("lxc.include = %s", LXC_DEFAULT_CONFIG);
bdcbb6b3
CB
4645 ERROR("lxc.idmap = u 0 %u %u", uid, urange);
4646 ERROR("lxc.idmap = g 0 %u %u", gid, grange);
97e9cfa0
SH
4647
4648 free(gname);
4649 free(uname);
4650}
aaf26830 4651
a7307747
SH
4652static void free_cgroup_settings(struct lxc_list *result)
4653{
4654 struct lxc_list *iterator, *next;
4655
0fd73091 4656 lxc_list_for_each_safe (iterator, result, next) {
a7307747
SH
4657 lxc_list_del(iterator);
4658 free(iterator);
4659 }
4660 free(result);
4661}
4662
0fd73091 4663/* Return the list of cgroup_settings sorted according to the following rules
aaf26830
KT
4664 * 1. Put memory.limit_in_bytes before memory.memsw.limit_in_bytes
4665 */
0fd73091 4666struct lxc_list *sort_cgroup_settings(struct lxc_list *cgroup_settings)
aaf26830
KT
4667{
4668 struct lxc_list *result;
aaf26830 4669 struct lxc_cgroup *cg = NULL;
0fd73091 4670 struct lxc_list *it = NULL, *item = NULL, *memsw_limit = NULL;
aaf26830
KT
4671
4672 result = malloc(sizeof(*result));
0fd73091 4673 if (!result)
fac7c663 4674 return NULL;
aaf26830
KT
4675 lxc_list_init(result);
4676
0fd73091
CB
4677 /* Iterate over the cgroup settings and copy them to the output list. */
4678 lxc_list_for_each (it, cgroup_settings) {
aaf26830 4679 item = malloc(sizeof(*item));
fac7c663 4680 if (!item) {
a7307747 4681 free_cgroup_settings(result);
fac7c663
KT
4682 return NULL;
4683 }
0fd73091 4684
aaf26830
KT
4685 item->elem = it->elem;
4686 cg = it->elem;
4687 if (strcmp(cg->subsystem, "memory.memsw.limit_in_bytes") == 0) {
4688 /* Store the memsw_limit location */
4689 memsw_limit = item;
0fd73091
CB
4690 } else if (strcmp(cg->subsystem, "memory.limit_in_bytes") == 0 &&
4691 memsw_limit != NULL) {
4692 /* lxc.cgroup.memory.memsw.limit_in_bytes is found
4693 * before lxc.cgroup.memory.limit_in_bytes, swap these
4694 * two items */
aaf26830
KT
4695 item->elem = memsw_limit->elem;
4696 memsw_limit->elem = it->elem;
4697 }
4698 lxc_list_add_tail(result, item);
4699 }
4700
4701 return result;
a7307747 4702}