]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
cgfsng: strncat => strlcat
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
1d52bdf7
CB
23
24#define _GNU_SOURCE
d06245b8
NC
25#include "config.h"
26
9d257a2a 27#include <arpa/inet.h>
8f3e280e
CB
28#include <dirent.h>
29#include <errno.h>
30#include <fcntl.h>
31#include <grp.h>
32#include <inttypes.h>
33#include <libgen.h>
9d257a2a
CB
34#include <linux/loop.h>
35#include <net/if.h>
36#include <netinet/in.h>
8f3e280e
CB
37#include <pwd.h>
38#include <stdarg.h>
0ad19a3f 39#include <stdio.h>
0ad19a3f 40#include <stdlib.h>
0ad19a3f 41#include <string.h>
8f3e280e
CB
42#include <sys/mman.h>
43#include <sys/mount.h>
44#include <sys/param.h>
45#include <sys/prctl.h>
6a49f05e 46#include <sys/sendfile.h>
8f3e280e 47#include <sys/socket.h>
9d257a2a 48#include <sys/stat.h>
2d76d1d7 49#include <sys/syscall.h>
9d257a2a 50#include <sys/sysmacros.h>
97e9cfa0 51#include <sys/types.h>
8f3e280e
CB
52#include <sys/utsname.h>
53#include <sys/wait.h>
9d257a2a
CB
54#include <time.h>
55#include <unistd.h>
1d52bdf7 56
af6824fc 57#ifdef MAJOR_IN_MKDEV
9d257a2a 58#include <sys/mkdev.h>
af6824fc 59#endif
af6824fc 60
614305f3 61#ifdef HAVE_STATVFS
2938f7c8 62#include <sys/statvfs.h>
614305f3 63#endif
e827ff7e
SG
64
65#if HAVE_PTY_H
b0a33c1e 66#include <pty.h>
e827ff7e
SG
67#else
68#include <../include/openpty.h>
69#endif
0ad19a3f 70
9d257a2a
CB
71#if HAVE_LIBCAP
72#include <sys/capability.h>
73#endif
74
75#if HAVE_SYS_PERSONALITY_H
76#include <sys/personality.h>
77#endif
78
79#if IS_BIONIC
80#include <../include/lxcmntent.h>
81#else
82#include <mntent.h>
83#endif
84
85#if !defined(HAVE_PRLIMIT) && defined(HAVE_PRLIMIT64)
86#include <../include/prlimit.h>
87#endif
88
e8bd4e43 89#include "af_unix.h"
9d257a2a 90#include "caps.h"
8f3e280e 91#include "cgroup.h"
1b09f2c0 92#include "conf.h"
1ed6ba91 93#include "confile_utils.h"
8f3e280e 94#include "error.h"
1b09f2c0 95#include "log.h"
0ed9b1bc 96#include "lsm/lsm.h"
025ed0f3 97#include "lxclock.h"
8f3e280e 98#include "lxcseccomp.h"
4355ab5f 99#include "namespace.h"
8f3e280e
CB
100#include "network.h"
101#include "parse.h"
732375f5 102#include "ringbuf.h"
794248d0 103#include "start.h"
28d832c4 104#include "storage.h"
28d832c4 105#include "storage/overlay.h"
0ed9b1bc 106#include "terminal.h"
8f3e280e 107#include "utils.h"
d0a36f2c 108
9d257a2a
CB
109#ifndef MS_PRIVATE
110#define MS_PRIVATE (1<<18)
edaf8b1b
SG
111#endif
112
9d257a2a
CB
113#ifndef MS_LAZYTIME
114#define MS_LAZYTIME (1<<25)
f48b5fd8
FF
115#endif
116
36eb9bde 117lxc_log_define(lxc_conf, lxc);
e5bda9ee 118
0fd73091
CB
119/* The lxc_conf of the container currently being worked on in an API call.
120 * This is used in the error calls.
121 */
122#ifdef HAVE_TLS
123__thread struct lxc_conf *current_config;
124#else
125struct lxc_conf *current_config;
126#endif
127
2d76d1d7
SG
128/* Define pivot_root() if missing from the C library */
129#ifndef HAVE_PIVOT_ROOT
9d257a2a 130static int pivot_root(const char *new_root, const char *put_old)
2d76d1d7
SG
131{
132#ifdef __NR_pivot_root
8f3e280e 133 return syscall(__NR_pivot_root, new_root, put_old);
2d76d1d7 134#else
8f3e280e
CB
135 errno = ENOSYS;
136 return -1;
2d76d1d7
SG
137#endif
138}
139#else
9d257a2a 140extern int pivot_root(const char *new_root, const char *put_old);
8912711c
CB
141#endif
142
0fd73091
CB
143char *lxchook_names[NUM_LXC_HOOKS] = {
144 "pre-start",
145 "pre-mount",
146 "mount",
147 "autodev",
148 "start",
149 "stop",
150 "post-stop",
151 "clone",
152 "destroy",
153 "start-host"
154};
72d0e1cb 155
998ac676
RT
156struct mount_opt {
157 char *name;
158 int clear;
159 int flag;
160};
161
81810dd1
DL
162struct caps_opt {
163 char *name;
164 int value;
165};
166
c6d09e15
WB
167struct limit_opt {
168 char *name;
169 int value;
170};
171
998ac676 172static struct mount_opt mount_opt[] = {
470b359b
CB
173 { "async", 1, MS_SYNCHRONOUS },
174 { "atime", 1, MS_NOATIME },
175 { "bind", 0, MS_BIND },
88d413d5 176 { "defaults", 0, 0 },
88d413d5 177 { "dev", 1, MS_NODEV },
470b359b 178 { "diratime", 1, MS_NODIRATIME },
88d413d5 179 { "dirsync", 0, MS_DIRSYNC },
470b359b 180 { "exec", 1, MS_NOEXEC },
8912711c 181 { "lazytime", 0, MS_LAZYTIME },
88d413d5 182 { "mand", 0, MS_MANDLOCK },
88d413d5 183 { "noatime", 0, MS_NOATIME },
470b359b 184 { "nodev", 0, MS_NODEV },
88d413d5 185 { "nodiratime", 0, MS_NODIRATIME },
470b359b
CB
186 { "noexec", 0, MS_NOEXEC },
187 { "nomand", 1, MS_MANDLOCK },
188 { "norelatime", 1, MS_RELATIME },
189 { "nostrictatime", 1, MS_STRICTATIME },
190 { "nosuid", 0, MS_NOSUID },
88d413d5
SW
191 { "rbind", 0, MS_BIND|MS_REC },
192 { "relatime", 0, MS_RELATIME },
470b359b
CB
193 { "remount", 0, MS_REMOUNT },
194 { "ro", 0, MS_RDONLY },
195 { "rw", 1, MS_RDONLY },
88d413d5 196 { "strictatime", 0, MS_STRICTATIME },
470b359b
CB
197 { "suid", 1, MS_NOSUID },
198 { "sync", 0, MS_SYNCHRONOUS },
88d413d5 199 { NULL, 0, 0 },
998ac676
RT
200};
201
d840039e 202static struct mount_opt propagation_opt[] = {
0fd73091
CB
203 { "private", 0, MS_PRIVATE },
204 { "shared", 0, MS_SHARED },
205 { "slave", 0, MS_SLAVE },
206 { "unbindable", 0, MS_UNBINDABLE },
207 { "rprivate", 0, MS_PRIVATE|MS_REC },
208 { "rshared", 0, MS_SHARED|MS_REC },
209 { "rslave", 0, MS_SLAVE|MS_REC },
210 { "runbindable", 0, MS_UNBINDABLE|MS_REC },
211 { NULL, 0, 0 },
d840039e
YT
212};
213
81810dd1 214static struct caps_opt caps_opt[] = {
8560cd36 215#if HAVE_LIBCAP
0fd73091
CB
216 { "chown", CAP_CHOWN },
217 { "dac_override", CAP_DAC_OVERRIDE },
218 { "dac_read_search", CAP_DAC_READ_SEARCH },
219 { "fowner", CAP_FOWNER },
220 { "fsetid", CAP_FSETID },
221 { "kill", CAP_KILL },
222 { "setgid", CAP_SETGID },
223 { "setuid", CAP_SETUID },
224 { "setpcap", CAP_SETPCAP },
225 { "linux_immutable", CAP_LINUX_IMMUTABLE },
226 { "net_bind_service", CAP_NET_BIND_SERVICE },
227 { "net_broadcast", CAP_NET_BROADCAST },
228 { "net_admin", CAP_NET_ADMIN },
229 { "net_raw", CAP_NET_RAW },
230 { "ipc_lock", CAP_IPC_LOCK },
231 { "ipc_owner", CAP_IPC_OWNER },
232 { "sys_module", CAP_SYS_MODULE },
233 { "sys_rawio", CAP_SYS_RAWIO },
234 { "sys_chroot", CAP_SYS_CHROOT },
235 { "sys_ptrace", CAP_SYS_PTRACE },
236 { "sys_pacct", CAP_SYS_PACCT },
237 { "sys_admin", CAP_SYS_ADMIN },
238 { "sys_boot", CAP_SYS_BOOT },
239 { "sys_nice", CAP_SYS_NICE },
240 { "sys_resource", CAP_SYS_RESOURCE },
241 { "sys_time", CAP_SYS_TIME },
242 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
243 { "mknod", CAP_MKNOD },
244 { "lease", CAP_LEASE },
57b837e2 245#ifdef CAP_AUDIT_READ
0fd73091 246 { "audit_read", CAP_AUDIT_READ },
57b837e2 247#endif
9527e566 248#ifdef CAP_AUDIT_WRITE
0fd73091 249 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
250#endif
251#ifdef CAP_AUDIT_CONTROL
0fd73091 252 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 253#endif
0fd73091
CB
254 { "setfcap", CAP_SETFCAP },
255 { "mac_override", CAP_MAC_OVERRIDE },
256 { "mac_admin", CAP_MAC_ADMIN },
5170c716 257#ifdef CAP_SYSLOG
0fd73091 258 { "syslog", CAP_SYSLOG },
5170c716
CS
259#endif
260#ifdef CAP_WAKE_ALARM
0fd73091 261 { "wake_alarm", CAP_WAKE_ALARM },
5170c716 262#endif
2b54359b 263#ifdef CAP_BLOCK_SUSPEND
0fd73091 264 { "block_suspend", CAP_BLOCK_SUSPEND },
2b54359b 265#endif
495d2046 266#endif
8560cd36 267};
81810dd1 268
c6d09e15
WB
269static struct limit_opt limit_opt[] = {
270#ifdef RLIMIT_AS
271 { "as", RLIMIT_AS },
272#endif
273#ifdef RLIMIT_CORE
274 { "core", RLIMIT_CORE },
275#endif
276#ifdef RLIMIT_CPU
277 { "cpu", RLIMIT_CPU },
278#endif
279#ifdef RLIMIT_DATA
280 { "data", RLIMIT_DATA },
281#endif
282#ifdef RLIMIT_FSIZE
283 { "fsize", RLIMIT_FSIZE },
284#endif
285#ifdef RLIMIT_LOCKS
286 { "locks", RLIMIT_LOCKS },
287#endif
288#ifdef RLIMIT_MEMLOCK
289 { "memlock", RLIMIT_MEMLOCK },
290#endif
291#ifdef RLIMIT_MSGQUEUE
292 { "msgqueue", RLIMIT_MSGQUEUE },
293#endif
294#ifdef RLIMIT_NICE
295 { "nice", RLIMIT_NICE },
296#endif
297#ifdef RLIMIT_NOFILE
298 { "nofile", RLIMIT_NOFILE },
299#endif
300#ifdef RLIMIT_NPROC
301 { "nproc", RLIMIT_NPROC },
302#endif
303#ifdef RLIMIT_RSS
304 { "rss", RLIMIT_RSS },
305#endif
306#ifdef RLIMIT_RTPRIO
307 { "rtprio", RLIMIT_RTPRIO },
308#endif
309#ifdef RLIMIT_RTTIME
310 { "rttime", RLIMIT_RTTIME },
311#endif
312#ifdef RLIMIT_SIGPENDING
313 { "sigpending", RLIMIT_SIGPENDING },
314#endif
315#ifdef RLIMIT_STACK
316 { "stack", RLIMIT_STACK },
317#endif
318};
319
91c3830e
SH
320static int run_buffer(char *buffer)
321{
8e7da691 322 int ret;
0fd73091
CB
323 char *output;
324 struct lxc_popen_FILE *f;
91c3830e 325
ebec9176 326 f = lxc_popen(buffer);
91c3830e 327 if (!f) {
3f60c2f7 328 SYSERROR("Failed to popen() %s", buffer);
91c3830e
SH
329 return -1;
330 }
331
332 output = malloc(LXC_LOG_BUFFER_SIZE);
333 if (!output) {
3f60c2f7 334 ERROR("Failed to allocate memory for %s", buffer);
ebec9176 335 lxc_pclose(f);
91c3830e
SH
336 return -1;
337 }
338
062b72c6 339 while (fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
3f60c2f7 340 DEBUG("Script %s with output: %s", buffer, output);
91c3830e
SH
341
342 free(output);
343
ebec9176 344 ret = lxc_pclose(f);
8e7da691 345 if (ret == -1) {
3f60c2f7 346 SYSERROR("Script exited with error");
91c3830e 347 return -1;
8e7da691 348 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
3f60c2f7 349 ERROR("Script exited with status %d", WEXITSTATUS(ret));
8e7da691
DE
350 return -1;
351 } else if (WIFSIGNALED(ret)) {
3f60c2f7 352 ERROR("Script terminated by signal %d", WTERMSIG(ret));
8e7da691 353 return -1;
91c3830e
SH
354 }
355
356 return 0;
357}
358
14a7b0f9
CB
359int run_script_argv(const char *name, unsigned int hook_version,
360 const char *section, const char *script,
586b1ce7 361 const char *hookname, char **argv)
148e91f5 362{
3f60c2f7 363 int buf_pos, i, ret;
148e91f5 364 char *buffer;
6f8d00d2 365 int fret = -1;
d08e5708 366 size_t size = 0;
148e91f5 367
3f60c2f7
CB
368 if (hook_version == 0)
369 INFO("Executing script \"%s\" for container \"%s\", config "
370 "section \"%s\"", script, name, section);
371 else
372 INFO("Executing script \"%s\" for container \"%s\"", script, name);
148e91f5 373
586b1ce7
CB
374 for (i = 0; argv && argv[i]; i++)
375 size += strlen(argv[i]) + 1;
148e91f5 376
3f60c2f7 377 size += sizeof("exec");
148e91f5 378 size += strlen(script);
3f60c2f7
CB
379 size++;
380
148e91f5 381 if (size > INT_MAX)
3f60c2f7 382 return -EFBIG;
148e91f5 383
3f60c2f7 384 if (hook_version == 0) {
d08e5708
CB
385 size += strlen(hookname);
386 size++;
387
388 size += strlen(name);
389 size++;
390
391 size += strlen(section);
392 size++;
393
394 if (size > INT_MAX)
395 return -EFBIG;
327cce76 396 }
3f60c2f7 397
6f8d00d2
CB
398 buffer = malloc(size);
399 if (!buffer)
400 return -ENOMEM;
401
327cce76 402 if (hook_version == 0)
3f60c2f7 403 buf_pos = snprintf(buffer, size, "exec %s %s %s %s", script, name, section, hookname);
327cce76 404 else
3f60c2f7 405 buf_pos = snprintf(buffer, size, "exec %s", script);
327cce76
CB
406 if (buf_pos < 0 || (size_t)buf_pos >= size) {
407 ERROR("Failed to create command line for script \"%s\"", script);
6f8d00d2 408 goto on_error;
327cce76 409 }
3f60c2f7 410
327cce76 411 if (hook_version == 1) {
3f60c2f7
CB
412 ret = setenv("LXC_HOOK_TYPE", hookname, 1);
413 if (ret < 0) {
414 SYSERROR("Failed to set environment variable: "
415 "LXC_HOOK_TYPE=%s", hookname);
6f8d00d2 416 goto on_error;
3f60c2f7 417 }
90f20466 418 TRACE("Set environment variable: LXC_HOOK_TYPE=%s", hookname);
3f60c2f7
CB
419
420 ret = setenv("LXC_HOOK_SECTION", section, 1);
421 if (ret < 0) {
422 SYSERROR("Failed to set environment variable: "
423 "LXC_HOOK_SECTION=%s", section);
6f8d00d2 424 goto on_error;
3f60c2f7
CB
425 }
426 TRACE("Set environment variable: LXC_HOOK_SECTION=%s", section);
14a7b0f9
CB
427
428 if (strcmp(section, "net") == 0) {
429 char *parent;
430
586b1ce7 431 if (!argv || !argv[0])
6f8d00d2 432 goto on_error;
14a7b0f9 433
586b1ce7 434 ret = setenv("LXC_NET_TYPE", argv[0], 1);
14a7b0f9
CB
435 if (ret < 0) {
436 SYSERROR("Failed to set environment variable: "
586b1ce7 437 "LXC_NET_TYPE=%s", argv[0]);
6f8d00d2 438 goto on_error;
14a7b0f9 439 }
586b1ce7 440 TRACE("Set environment variable: LXC_NET_TYPE=%s", argv[0]);
14a7b0f9 441
586b1ce7 442 parent = argv[1] ? argv[1] : "";
14a7b0f9 443
a8144263 444 if (strcmp(argv[0], "macvlan") == 0) {
14a7b0f9
CB
445 ret = setenv("LXC_NET_PARENT", parent, 1);
446 if (ret < 0) {
447 SYSERROR("Failed to set environment "
448 "variable: LXC_NET_PARENT=%s", parent);
6f8d00d2 449 goto on_error;
14a7b0f9
CB
450 }
451 TRACE("Set environment variable: LXC_NET_PARENT=%s", parent);
a8144263 452 } else if (strcmp(argv[0], "phys") == 0) {
14a7b0f9
CB
453 ret = setenv("LXC_NET_PARENT", parent, 1);
454 if (ret < 0) {
455 SYSERROR("Failed to set environment "
456 "variable: LXC_NET_PARENT=%s", parent);
6f8d00d2 457 goto on_error;
14a7b0f9
CB
458 }
459 TRACE("Set environment variable: LXC_NET_PARENT=%s", parent);
a8144263 460 } else if (strcmp(argv[0], "veth") == 0) {
586b1ce7 461 char *peer = argv[2] ? argv[2] : "";
14a7b0f9
CB
462
463 ret = setenv("LXC_NET_PEER", peer, 1);
464 if (ret < 0) {
465 SYSERROR("Failed to set environment "
466 "variable: LXC_NET_PEER=%s", peer);
6f8d00d2 467 goto on_error;
14a7b0f9
CB
468 }
469 TRACE("Set environment variable: LXC_NET_PEER=%s", peer);
470
471 ret = setenv("LXC_NET_PARENT", parent, 1);
472 if (ret < 0) {
473 SYSERROR("Failed to set environment "
474 "variable: LXC_NET_PARENT=%s", parent);
6f8d00d2 475 goto on_error;
14a7b0f9
CB
476 }
477 TRACE("Set environment variable: LXC_NET_PARENT=%s", parent);
478 }
479 }
148e91f5
SH
480 }
481
586b1ce7 482 for (i = 0; argv && argv[i]; i++) {
3f60c2f7
CB
483 size_t len = size - buf_pos;
484
586b1ce7 485 ret = snprintf(buffer + buf_pos, len, " %s", argv[i]);
3f60c2f7
CB
486 if (ret < 0 || (size_t)ret >= len) {
487 ERROR("Failed to create command line for script \"%s\"", script);
6f8d00d2 488 goto on_error;
148e91f5 489 }
3f60c2f7 490 buf_pos += ret;
148e91f5
SH
491 }
492
6f8d00d2
CB
493 fret = run_buffer(buffer);
494
495on_error:
496 free(buffer);
497 return fret;
148e91f5
SH
498}
499
811ef482 500int run_script(const char *name, const char *section, const char *script, ...)
e3b4c4c4 501{
abbfd20b 502 int ret;
91c3830e 503 char *buffer, *p;
abbfd20b 504 va_list ap;
0fd73091 505 size_t size = 0;
751d9dcd 506
0fd73091 507 INFO("Executing script \"%s\" for container \"%s\", config section \"%s\"",
751d9dcd 508 script, name, section);
e3b4c4c4 509
abbfd20b
DL
510 va_start(ap, script);
511 while ((p = va_arg(ap, char *)))
95642a10 512 size += strlen(p) + 1;
abbfd20b
DL
513 va_end(ap);
514
6d1a5f93 515 size += strlen("exec");
abbfd20b
DL
516 size += strlen(script);
517 size += strlen(name);
518 size += strlen(section);
6d1a5f93 519 size += 4;
abbfd20b 520
95642a10
MS
521 if (size > INT_MAX)
522 return -1;
523
524 buffer = alloca(size);
6d1a5f93 525 ret = snprintf(buffer, size, "exec %s %s %s", script, name, section);
0fd73091 526 if (ret < 0 || ret >= size)
9ba8130c 527 return -1;
751d9dcd 528
abbfd20b 529 va_start(ap, script);
9ba8130c 530 while ((p = va_arg(ap, char *))) {
062b72c6 531 int len = size - ret;
9ba8130c
SH
532 int rc;
533 rc = snprintf(buffer + ret, len, " %s", p);
7b5a2435
DJ
534 if (rc < 0 || rc >= len) {
535 va_end(ap);
9ba8130c 536 return -1;
7b5a2435 537 }
9ba8130c
SH
538 ret += rc;
539 }
abbfd20b 540 va_end(ap);
751d9dcd 541
91c3830e 542 return run_buffer(buffer);
e3b4c4c4
ST
543}
544
0fd73091 545/* pin_rootfs
63fc76c3 546 * if rootfs is a directory, then open ${rootfs}/.lxc-keep for writing for
b7ed4bf0
CS
547 * the duration of the container run, to prevent the container from marking
548 * the underlying fs readonly on shutdown. unlink the file immediately so
63fc76c3
GJ
549 * no name pollution is happens.
550 * don't unlink on NFS to avoid random named stale handles.
0c547523
SH
551 * return -1 on error.
552 * return -2 if nothing needed to be pinned.
553 * return an open fd (>=0) if we pinned it.
554 */
555int pin_rootfs(const char *rootfs)
556{
0fd73091
CB
557 int fd, ret;
558 char absrootfs[MAXPATHLEN], absrootfspin[MAXPATHLEN];
0c547523 559 struct stat s;
63fc76c3 560 struct statfs sfs;
0c547523 561
e99ee0de 562 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 563 return -2;
e99ee0de 564
00ec333b 565 if (!realpath(rootfs, absrootfs))
9be53773 566 return -2;
0c547523 567
0fd73091
CB
568 ret = stat(absrootfs, &s);
569 if (ret < 0)
0c547523 570 return -1;
0c547523 571
72f919c4 572 if (!S_ISDIR(s.st_mode))
0c547523
SH
573 return -2;
574
63fc76c3 575 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/.lxc-keep", absrootfs);
00ec333b 576 if (ret >= MAXPATHLEN)
0c547523 577 return -1;
0c547523 578
0fd73091 579 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR | S_IRUSR);
b7ed4bf0
CS
580 if (fd < 0)
581 return fd;
0fd73091 582
205fc010
CB
583 ret = fstatfs (fd, &sfs);
584 if (ret < 0)
585 return fd;
63fc76c3
GJ
586
587 if (sfs.f_type == NFS_SUPER_MAGIC) {
205fc010 588 DEBUG("Rootfs on NFS, not unlinking pin file \"%s\"", absrootfspin);
63fc76c3
GJ
589 return fd;
590 }
591
b7ed4bf0 592 (void)unlink(absrootfspin);
0fd73091 593
0c547523
SH
594 return fd;
595}
596
0fd73091
CB
597/* If we are asking to remount something, make sure that any NOEXEC etc are
598 * honored.
e2a7e8dc 599 */
5ae72b98 600unsigned long add_required_remount_flags(const char *s, const char *d,
5285689c 601 unsigned long flags)
e2a7e8dc 602{
614305f3 603#ifdef HAVE_STATVFS
0fd73091 604 int ret;
e2a7e8dc
SH
605 struct statvfs sb;
606 unsigned long required_flags = 0;
607
608 if (!(flags & MS_REMOUNT))
609 return flags;
610
611 if (!s)
612 s = d;
613
614 if (!s)
615 return flags;
0fd73091
CB
616
617 ret = statvfs(s, &sb);
618 if (ret < 0)
e2a7e8dc
SH
619 return flags;
620
621 if (sb.f_flag & MS_NOSUID)
622 required_flags |= MS_NOSUID;
623 if (sb.f_flag & MS_NODEV)
624 required_flags |= MS_NODEV;
625 if (sb.f_flag & MS_RDONLY)
626 required_flags |= MS_RDONLY;
627 if (sb.f_flag & MS_NOEXEC)
628 required_flags |= MS_NOEXEC;
629
630 return flags | required_flags;
614305f3
SH
631#else
632 return flags;
633#endif
e2a7e8dc
SH
634}
635
4fb3cba5 636static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
368bbc02 637{
0fd73091 638 int i, r;
b06b8511
CS
639 static struct {
640 int match_mask;
641 int match_flag;
642 const char *source;
643 const char *destination;
644 const char *fstype;
645 unsigned long flags;
646 const char *options;
647 } default_mounts[] = {
0fd73091
CB
648 /* Read-only bind-mounting... In older kernels, doing that
649 * required to do one MS_BIND mount and then
650 * MS_REMOUNT|MS_RDONLY the same one. According to mount(2)
651 * manpage, MS_BIND honors MS_RDONLY from kernel 2.6.26
652 * onwards. However, this apparently does not work on kernel
653 * 3.8. Unfortunately, on that very same kernel, doing the same
654 * trick as above doesn't seem to work either, there one needs
655 * to ALSO specify MS_BIND for the remount, otherwise the
656 * entire fs is remounted read-only or the mount fails because
657 * it's busy... MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for
658 * kernels as low as 2.6.32...
368bbc02 659 */
0fd73091 660 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
592fd47a 661 /* proc/tty is used as a temporary placeholder for proc/sys/net which we'll move back in a few steps */
0fd73091
CB
662 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys/net", "%r/proc/tty", NULL, MS_BIND, NULL },
663 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
664 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
665 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/tty", "%r/proc/sys/net", NULL, MS_MOVE, NULL },
666 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
667 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
668 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
669 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
670 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
671 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
672 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL },
673 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
674 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys/devices/virtual/net", "sysfs", 0, NULL },
675 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys/devices/virtual/net/devices/virtual/net", "%r/sys/devices/virtual/net", NULL, MS_BIND, NULL },
676 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys/devices/virtual/net", NULL, MS_REMOUNT|MS_BIND|MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL },
677 { 0, 0, NULL, NULL, NULL, 0, NULL }
b06b8511 678 };
368bbc02 679
b06b8511 680 for (i = 0; default_mounts[i].match_mask; i++) {
0fd73091
CB
681 int saved_errno;
682 unsigned long mflags;
683 char *destination = NULL;
684 char *source = NULL;
685 if ((flags & default_mounts[i].match_mask) != default_mounts[i].match_flag)
686 continue;
687
688 if (default_mounts[i].source) {
cc4fd506 689 /* will act like strdup if %r is not present */
0fd73091
CB
690 source = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].source);
691 if (!source)
cc4fd506 692 return -1;
0fd73091 693 }
f24a52d5 694
0fd73091
CB
695 if (!default_mounts[i].destination) {
696 ERROR("BUG: auto mounts destination %d was NULL", i);
b06b8511 697 free(source);
0fd73091
CB
698 return -1;
699 }
700
701 /* will act like strdup if %r is not present */
702 destination = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].destination);
703 if (!destination) {
704 saved_errno = errno;
705 free(source);
706 errno = saved_errno;
707 return -1;
708 }
709
710 mflags = add_required_remount_flags(source, destination,
711 default_mounts[i].flags);
712 r = safe_mount(source, destination, default_mounts[i].fstype,
713 mflags, default_mounts[i].options,
714 conf->rootfs.path ? conf->rootfs.mount : NULL);
715 saved_errno = errno;
716 if (r < 0 && errno == ENOENT) {
717 INFO("Mount source or target for \"%s\" on \"%s\" does "
718 "not exist. Skipping", source, destination);
719 r = 0;
720 } else if (r < 0) {
721 SYSERROR("Failed to mount \"%s\" on \"%s\" with flags %lu", source, destination, mflags);
722 }
723
724 free(source);
725 free(destination);
726 if (r < 0) {
727 errno = saved_errno;
728 return -1;
368bbc02 729 }
368bbc02
CS
730 }
731
b06b8511 732 if (flags & LXC_AUTO_CGROUP_MASK) {
0769b82a
CS
733 int cg_flags;
734
3f69fb12 735 cg_flags = flags & (LXC_AUTO_CGROUP_MASK & ~LXC_AUTO_CGROUP_FORCE);
0fd73091
CB
736 /* If the type of cgroup mount was not specified, it depends on
737 * the container's capabilities as to what makes sense: if we
738 * have CAP_SYS_ADMIN, the read-only part can be remounted
739 * read-write anyway, so we may as well default to read-write;
740 * then the admin will not be given a false sense of security.
741 * (And if they really want mixed r/o r/w, then they can
742 * explicitly specify :mixed.) OTOH, if the container lacks
743 * CAP_SYS_ADMIN, do only default to :mixed, because then the
744 * container can't remount it read-write.
745 */
0769b82a
CS
746 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
747 int has_sys_admin = 0;
b0ee5983
CB
748
749 if (!lxc_list_empty(&conf->keepcaps))
0769b82a 750 has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps);
b0ee5983 751 else
0769b82a 752 has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps);
b0ee5983
CB
753
754 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC)
0769b82a 755 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED;
b0ee5983 756 else
0769b82a 757 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
0769b82a 758 }
0fd73091 759
3f69fb12 760 if (flags & LXC_AUTO_CGROUP_FORCE)
0fd73091
CB
761 cg_flags |= LXC_AUTO_CGROUP_FORCE;
762
2202afc9
CB
763 if (!handler->cgroup_ops->mount(handler->cgroup_ops,
764 handler,
765 conf->rootfs.path ? conf->rootfs.mount : "",
766 cg_flags)) {
0fd73091 767 SYSERROR("Failed to mount \"/sys/fs/cgroup\"");
b06b8511 768 return -1;
368bbc02
CS
769 }
770 }
771
368bbc02 772 return 0;
368bbc02
CS
773}
774
4e5440c6 775static int setup_utsname(struct utsname *utsname)
0ad19a3f 776{
0fd73091
CB
777 int ret;
778
4e5440c6
DL
779 if (!utsname)
780 return 0;
0ad19a3f 781
0fd73091
CB
782 ret = sethostname(utsname->nodename, strlen(utsname->nodename));
783 if (ret < 0) {
784 SYSERROR("Failed to set the hostname to \"%s\"", utsname->nodename);
0ad19a3f 785 return -1;
786 }
787
0fd73091 788 INFO("Set hostname to \"%s\"", utsname->nodename);
cd54d859 789
0ad19a3f 790 return 0;
791}
792
69aa6655
DE
793struct dev_symlinks {
794 const char *oldpath;
795 const char *name;
796};
797
798static const struct dev_symlinks dev_symlinks[] = {
0fd73091
CB
799 { "/proc/self/fd", "fd" },
800 { "/proc/self/fd/0", "stdin" },
801 { "/proc/self/fd/1", "stdout" },
802 { "/proc/self/fd/2", "stderr" },
69aa6655
DE
803};
804
ed8704d0 805static int lxc_setup_dev_symlinks(const struct lxc_rootfs *rootfs)
69aa6655 806{
0fd73091 807 int i, ret;
69aa6655 808 char path[MAXPATHLEN];
09227be2 809 struct stat s;
69aa6655 810
69aa6655
DE
811 for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
812 const struct dev_symlinks *d = &dev_symlinks[i];
0fd73091
CB
813
814 ret = snprintf(path, sizeof(path), "%s/dev/%s",
815 rootfs->path ? rootfs->mount : "", d->name);
69aa6655
DE
816 if (ret < 0 || ret >= MAXPATHLEN)
817 return -1;
09227be2 818
0fd73091
CB
819 /* Stat the path first. If we don't get an error accept it as
820 * is and don't try to create it
09227be2 821 */
0fd73091
CB
822 ret = stat(path, &s);
823 if (ret == 0)
09227be2 824 continue;
09227be2 825
69aa6655
DE
826 ret = symlink(d->oldpath, path);
827 if (ret && errno != EEXIST) {
0fd73091
CB
828 if (errno == EROFS) {
829 WARN("Failed to create \"%s\". Read-only filesystem", path);
09227be2 830 } else {
0fd73091 831 SYSERROR("Failed to create \"%s\"", path);
09227be2
MW
832 return -1;
833 }
69aa6655
DE
834 }
835 }
0fd73091 836
69aa6655
DE
837 return 0;
838}
839
2187efd3 840/* Build a space-separate list of ptys to pass to systemd. */
885766f5 841static bool append_ttyname(char **pp, char *name)
b0a33c1e 842{
393903d1
SH
843 char *p;
844
845 if (!*pp) {
846 *pp = malloc(strlen(name) + strlen("container_ttys=") + 1);
847 if (!*pp)
848 return false;
0fd73091 849
393903d1
SH
850 sprintf(*pp, "container_ttys=%s", name);
851 return true;
852 }
0fd73091 853
393903d1
SH
854 p = realloc(*pp, strlen(*pp) + strlen(name) + 2);
855 if (!p)
856 return false;
0fd73091 857
393903d1 858 *pp = p;
efed99a4
DJ
859 strncat(p, " ", 1);
860 strncat(p, name, strlen(name));
0fd73091 861
393903d1
SH
862 return true;
863}
864
2187efd3 865static int lxc_setup_ttys(struct lxc_conf *conf)
393903d1 866{
9e1045e3 867 int i, ret;
0e4be3cf 868 const struct lxc_tty_info *ttys = &conf->ttys;
885766f5 869 char *ttydir = ttys->dir;
7c6ef2a2 870 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
b0a33c1e 871
e8bd4e43 872 if (!conf->rootfs.path)
bc9bd0e3
DL
873 return 0;
874
885766f5 875 for (i = 0; i < ttys->max; i++) {
0e4be3cf 876 struct lxc_terminal_info *tty = &ttys->tty[i];
b0a33c1e 877
e8bd4e43 878 ret = snprintf(path, sizeof(path), "/dev/tty%d", i + 1);
73363c61 879 if (ret < 0 || (size_t)ret >= sizeof(path))
7c6ef2a2 880 return -1;
9e1045e3 881
7c6ef2a2
SH
882 if (ttydir) {
883 /* create dev/lxc/tty%d" */
9e1045e3
CB
884 ret = snprintf(lxcpath, sizeof(lxcpath),
885 "/dev/%s/tty%d", ttydir, i + 1);
73363c61 886 if (ret < 0 || (size_t)ret >= sizeof(lxcpath))
7c6ef2a2 887 return -1;
9e1045e3 888
7c6ef2a2 889 ret = creat(lxcpath, 0660);
9e1045e3 890 if (ret < 0 && errno != EEXIST) {
73363c61 891 SYSERROR("Failed to create \"%s\"", lxcpath);
7c6ef2a2
SH
892 return -1;
893 }
4d44e274
SH
894 if (ret >= 0)
895 close(ret);
9e1045e3 896
7c6ef2a2 897 ret = unlink(path);
9e1045e3 898 if (ret < 0 && errno != ENOENT) {
73363c61 899 SYSERROR("Failed to unlink \"%s\"", path);
7c6ef2a2
SH
900 return -1;
901 }
b0a33c1e 902
2520facd 903 ret = mount(tty->name, lxcpath, "none", MS_BIND, 0);
9e1045e3 904 if (ret < 0) {
73363c61 905 WARN("Failed to bind mount \"%s\" onto \"%s\"",
2520facd 906 tty->name, path);
7c6ef2a2
SH
907 continue;
908 }
0fd73091 909 DEBUG("Bind mounted \"%s\" onto \"%s\"", tty->name,
9e1045e3 910 path);
13954cce 911
9e1045e3
CB
912 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d",
913 ttydir, i + 1);
73363c61 914 if (ret < 0 || (size_t)ret >= sizeof(lxcpath))
9ba8130c 915 return -1;
9e1045e3 916
7c6ef2a2 917 ret = symlink(lxcpath, path);
9e1045e3 918 if (ret < 0) {
73363c61 919 SYSERROR("Failed to create symlink \"%s\" -> \"%s\"",
9e1045e3 920 path, lxcpath);
7c6ef2a2
SH
921 return -1;
922 }
923 } else {
9e1045e3
CB
924 /* If we populated /dev, then we need to create
925 * /dev/ttyN
926 */
d3ccc04e
CB
927 ret = mknod(path, S_IFREG | 0000, 0);
928 if (ret < 0) /* this isn't fatal, continue */
929 ERROR("%s - Failed to create \"%s\"", strerror(errno), path);
9e1045e3 930
2520facd 931 ret = mount(tty->name, path, "none", MS_BIND, 0);
9e1045e3 932 if (ret < 0) {
2520facd 933 SYSERROR("Failed to mount '%s'->'%s'", tty->name, path);
7c6ef2a2
SH
934 continue;
935 }
9e1045e3 936
d3ccc04e 937 DEBUG("Bind mounted \"%s\" onto \"%s\"", tty->name, path);
393903d1 938 }
9e1045e3 939
885766f5 940 if (!append_ttyname(&conf->ttys.tty_names, tty->name)) {
393903d1
SH
941 ERROR("Error setting up container_ttys string");
942 return -1;
b0a33c1e 943 }
944 }
945
885766f5 946 INFO("Finished setting up %zu /dev/tty<N> device(s)", ttys->max);
b0a33c1e 947 return 0;
948}
949
663014ee 950int lxc_allocate_ttys(struct lxc_conf *conf)
2187efd3 951{
2187efd3 952 int i, ret;
0fd73091 953 struct lxc_tty_info *ttys = &conf->ttys;
2187efd3
CB
954
955 /* no tty in the configuration */
885766f5 956 if (ttys->max == 0)
2187efd3
CB
957 return 0;
958
885766f5 959 ttys->tty = malloc(sizeof(*ttys->tty) * ttys->max);
0e4be3cf 960 if (!ttys->tty)
2187efd3 961 return -ENOMEM;
2187efd3 962
885766f5 963 for (i = 0; i < ttys->max; i++) {
0e4be3cf 964 struct lxc_terminal_info *tty = &ttys->tty[i];
2187efd3 965
386e6768
CB
966 tty->master = -EBADF;
967 tty->slave = -EBADF;
2520facd
CB
968 ret = openpty(&tty->master, &tty->slave,
969 tty->name, NULL, NULL);
2187efd3 970 if (ret) {
0fd73091 971 SYSERROR("Failed to create tty %d", i);
885766f5 972 ttys->max = i;
0e4be3cf 973 lxc_delete_tty(ttys);
2187efd3
CB
974 return -ENOTTY;
975 }
976
0fd73091 977 DEBUG("Created tty \"%s\" with master fd %d and slave fd %d",
2520facd 978 tty->name, tty->master, tty->slave);
2187efd3
CB
979
980 /* Prevent leaking the file descriptors to the container */
2520facd 981 ret = fcntl(tty->master, F_SETFD, FD_CLOEXEC);
2187efd3 982 if (ret < 0)
0fd73091
CB
983 WARN("Failed to set FD_CLOEXEC flag on master fd %d of "
984 "tty device \"%s\": %s",
2520facd 985 tty->master, tty->name, strerror(errno));
2187efd3 986
2520facd 987 ret = fcntl(tty->slave, F_SETFD, FD_CLOEXEC);
2187efd3 988 if (ret < 0)
0fd73091
CB
989 WARN("Failed to set FD_CLOEXEC flag on slave fd %d of "
990 "tty device \"%s\": %s",
2520facd 991 tty->slave, tty->name, strerror(errno));
2187efd3 992
2520facd 993 tty->busy = 0;
2187efd3
CB
994 }
995
885766f5 996 INFO("Finished creating %zu tty devices", ttys->max);
2187efd3
CB
997 return 0;
998}
999
0e4be3cf 1000void lxc_delete_tty(struct lxc_tty_info *ttys)
2187efd3
CB
1001{
1002 int i;
1003
386e6768
CB
1004 if (!ttys->tty)
1005 return;
1006
885766f5 1007 for (i = 0; i < ttys->max; i++) {
0e4be3cf 1008 struct lxc_terminal_info *tty = &ttys->tty[i];
2187efd3 1009
386e6768
CB
1010 if (tty->master >= 0) {
1011 close(tty->master);
1012 tty->master = -EBADF;
1013 }
1014
1015 if (tty->slave >= 0) {
1016 close(tty->slave);
1017 tty->slave = -EBADF;
1018 }
2187efd3
CB
1019 }
1020
0e4be3cf
CB
1021 free(ttys->tty);
1022 ttys->tty = NULL;
2187efd3
CB
1023}
1024
1025static int lxc_send_ttys_to_parent(struct lxc_handler *handler)
1026{
1027 int i;
0fd73091 1028 int ret = -1;
2187efd3 1029 struct lxc_conf *conf = handler->conf;
0e4be3cf 1030 struct lxc_tty_info *ttys = &conf->ttys;
2187efd3 1031 int sock = handler->data_sock[0];
2187efd3 1032
885766f5 1033 if (ttys->max == 0)
2187efd3
CB
1034 return 0;
1035
885766f5 1036 for (i = 0; i < ttys->max; i++) {
2187efd3 1037 int ttyfds[2];
0e4be3cf 1038 struct lxc_terminal_info *tty = &ttys->tty[i];
2187efd3 1039
2520facd
CB
1040 ttyfds[0] = tty->master;
1041 ttyfds[1] = tty->slave;
2187efd3
CB
1042
1043 ret = lxc_abstract_unix_send_fds(sock, ttyfds, 2, NULL, 0);
1044 if (ret < 0)
1045 break;
1046
0fd73091 1047 TRACE("Sent ty \"%s\" with master fd %d and slave fd %d to "
2520facd 1048 "parent", tty->name, tty->master, tty->slave);
2187efd3
CB
1049 }
1050
1051 if (ret < 0)
885766f5 1052 ERROR("Failed to send %zu ttys to parent: %s", ttys->max,
2187efd3
CB
1053 strerror(errno));
1054 else
885766f5 1055 TRACE("Sent %zu ttys to parent", ttys->max);
2187efd3
CB
1056
1057 return ret;
1058}
1059
1060static int lxc_create_ttys(struct lxc_handler *handler)
1061{
1062 int ret = -1;
1063 struct lxc_conf *conf = handler->conf;
1064
663014ee 1065 ret = lxc_allocate_ttys(conf);
2187efd3
CB
1066 if (ret < 0) {
1067 ERROR("Failed to allocate ttys");
1068 goto on_error;
1069 }
1070
1071 ret = lxc_send_ttys_to_parent(handler);
1072 if (ret < 0) {
1073 ERROR("Failed to send ttys to parent");
1074 goto on_error;
1075 }
1076
1077 if (!conf->is_execute) {
1078 ret = lxc_setup_ttys(conf);
1079 if (ret < 0) {
1080 ERROR("Failed to setup ttys");
1081 goto on_error;
1082 }
1083 }
1084
885766f5
CB
1085 if (conf->ttys.tty_names) {
1086 ret = setenv("container_ttys", conf->ttys.tty_names, 1);
2187efd3 1087 if (ret < 0)
885766f5 1088 SYSERROR("Failed to set \"container_ttys=%s\"", conf->ttys.tty_names);
2187efd3
CB
1089 }
1090
1091 ret = 0;
1092
1093on_error:
0e4be3cf 1094 lxc_delete_tty(&conf->ttys);
2187efd3
CB
1095
1096 return ret;
1097}
1098
59bb8698 1099static int setup_rootfs_pivot_root(const char *rootfs)
bf601689 1100{
0fd73091
CB
1101 int ret;
1102 int newroot = -1, oldroot = -1;
bf601689 1103
2d489f9e
SH
1104 oldroot = open("/", O_DIRECTORY | O_RDONLY);
1105 if (oldroot < 0) {
0fd73091 1106 SYSERROR("Failed to open old root directory");
9ba8130c
SH
1107 return -1;
1108 }
0fd73091 1109
2d489f9e
SH
1110 newroot = open(rootfs, O_DIRECTORY | O_RDONLY);
1111 if (newroot < 0) {
0fd73091
CB
1112 SYSERROR("Failed to open new root directory");
1113 goto on_error;
c08556c6 1114 }
bf601689 1115
cc6f6dd7 1116 /* change into new root fs */
0fd73091
CB
1117 ret = fchdir(newroot);
1118 if (ret < 0) {
1119 SYSERROR("Failed to change to new rootfs \"%s\"", rootfs);
1120 goto on_error;
cc6f6dd7
DL
1121 }
1122
cc6f6dd7 1123 /* pivot_root into our new root fs */
0fd73091
CB
1124 ret = pivot_root(".", ".");
1125 if (ret < 0) {
1126 SYSERROR("Failed to pivot_root()");
1127 goto on_error;
bf601689 1128 }
cc6f6dd7 1129
e599717b 1130 /* At this point the old-root is mounted on top of our new-root. To
0fd73091
CB
1131 * unmounted it we must not be chdir'd into it, so escape back to
1132 * old-root.
2d489f9e 1133 */
0fd73091
CB
1134 ret = fchdir(oldroot);
1135 if (ret < 0) {
1136 SYSERROR("Failed to enter old root directory");
1137 goto on_error;
2d489f9e 1138 }
0fd73091 1139
e599717b
FW
1140 /* Make oldroot rslave to make sure our umounts don't propagate to the
1141 * host.
1142 */
1143 ret = mount("", ".", "", MS_SLAVE | MS_REC, NULL);
1144 if (ret < 0) {
1145 SYSERROR("Failed to make oldroot rslave");
1146 goto on_error;
1147 }
1148
0fd73091
CB
1149 ret = umount2(".", MNT_DETACH);
1150 if (ret < 0) {
1151 SYSERROR("Failed to detach old root directory");
1152 goto on_error;
cc6f6dd7
DL
1153 }
1154
0fd73091
CB
1155 ret = fchdir(newroot);
1156 if (ret < 0) {
1157 SYSERROR("Failed to re-enter new root directory");
1158 goto on_error;
2d489f9e 1159 }
cc6f6dd7 1160
2d489f9e
SH
1161 close(oldroot);
1162 close(newroot);
bf601689 1163
0fd73091 1164 DEBUG("pivot_root(\"%s\") successful", rootfs);
bf601689 1165
bf601689 1166 return 0;
2d489f9e 1167
0fd73091 1168on_error:
2d489f9e
SH
1169 if (oldroot != -1)
1170 close(oldroot);
1171 if (newroot != -1)
1172 close(newroot);
0fd73091 1173
2d489f9e 1174 return -1;
bf601689
MH
1175}
1176
7133b912
CB
1177/* Just create a path for /dev under $lxcpath/$name and in rootfs If we hit an
1178 * error, log it but don't fail yet.
91c3830e 1179 */
7133b912
CB
1180static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs,
1181 const char *lxcpath)
91c3830e
SH
1182{
1183 int ret;
87da4ec3
SH
1184 size_t clen;
1185 char *path;
91c3830e 1186
7133b912 1187 INFO("Preparing \"/dev\"");
bc6928ff 1188
14221cbb 1189 /* $(rootfs->mount) + "/dev/pts" + '\0' */
ec50007f 1190 clen = (rootfs->path ? strlen(rootfs->mount) : 0) + 9;
87da4ec3 1191 path = alloca(clen);
bc6928ff 1192
ec50007f 1193 ret = snprintf(path, clen, "%s/dev", rootfs->path ? rootfs->mount : "");
7133b912 1194 if (ret < 0 || (size_t)ret >= clen)
91c3830e 1195 return -1;
bc6928ff 1196
87da4ec3 1197 if (!dir_exists(path)) {
7133b912
CB
1198 WARN("\"/dev\" directory does not exist. Proceeding without "
1199 "autodev being set up");
87da4ec3 1200 return 0;
bc6928ff 1201 }
87da4ec3 1202
1ec0e8e3 1203 ret = safe_mount("none", path, "tmpfs", 0, "size=500000,mode=755",
7133b912
CB
1204 rootfs->path ? rootfs->mount : NULL);
1205 if (ret < 0) {
1206 SYSERROR("Failed to mount tmpfs on \"%s\"", path);
1ec0e8e3 1207 return -1;
91c3830e 1208 }
7133b912 1209 INFO("Mounted tmpfs on \"%s\"", path);
87da4ec3 1210
ec50007f 1211 ret = snprintf(path, clen, "%s/dev/pts", rootfs->path ? rootfs->mount : "");
7133b912 1212 if (ret < 0 || (size_t)ret >= clen)
91c3830e 1213 return -1;
87da4ec3 1214
7133b912 1215 /* If we are running on a devtmpfs mapping, dev/pts may already exist.
bc6928ff
MW
1216 * If not, then create it and exit if that fails...
1217 */
87da4ec3 1218 if (!dir_exists(path)) {
bc6928ff 1219 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
7133b912
CB
1220 if (ret < 0) {
1221 SYSERROR("Failed to create directory \"%s\"", path);
bc6928ff
MW
1222 return -1;
1223 }
91c3830e
SH
1224 }
1225
7133b912 1226 INFO("Prepared \"/dev\"");
91c3830e
SH
1227 return 0;
1228}
1229
5e73416f 1230struct lxc_device_node {
74a3920a 1231 const char *name;
5e73416f
CB
1232 const mode_t mode;
1233 const int maj;
1234 const int min;
c6883f38
SH
1235};
1236
5e73416f 1237static const struct lxc_device_node lxc_devices[] = {
06749971 1238 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
5e73416f 1239 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
06749971
CB
1240 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1241 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
5e73416f
CB
1242 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1243 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
c6883f38
SH
1244};
1245
27245ff7 1246static int lxc_fill_autodev(const struct lxc_rootfs *rootfs)
c6883f38 1247{
5e73416f 1248 int i, ret;
c6883f38 1249 char path[MAXPATHLEN];
3a32201c 1250 mode_t cmask;
5e73416f 1251 bool can_mknod = true;
c6883f38 1252
3999be0a
CB
1253 ret = snprintf(path, MAXPATHLEN, "%s/dev",
1254 rootfs->path ? rootfs->mount : "");
1255 if (ret < 0 || ret >= MAXPATHLEN)
c6883f38 1256 return -1;
91c3830e 1257
0bbf8572
CB
1258 /* ignore, just don't try to fill in */
1259 if (!dir_exists(path))
9cb4d183
SH
1260 return 0;
1261
3999be0a
CB
1262 INFO("Populating \"/dev\"");
1263
3a32201c 1264 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
5e73416f
CB
1265 for (i = 0; i < sizeof(lxc_devices) / sizeof(lxc_devices[0]); i++) {
1266 char hostpath[MAXPATHLEN];
1267 const struct lxc_device_node *device = &lxc_devices[i];
0728ebf4 1268
3999be0a 1269 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s",
5e73416f 1270 rootfs->path ? rootfs->mount : "", device->name);
c6883f38
SH
1271 if (ret < 0 || ret >= MAXPATHLEN)
1272 return -1;
0bbf8572 1273
5e73416f
CB
1274 if (can_mknod) {
1275 ret = mknod(path, device->mode, makedev(device->maj, device->min));
1276 if (ret == 0 || (ret < 0 && errno == EEXIST)) {
1277 DEBUG("Created device node \"%s\"", path);
0bbf8572
CB
1278 continue;
1279 }
1280
5e73416f
CB
1281 if (errno != EPERM) {
1282 SYSERROR("Failed to create device node \"%s\"", path);
9cb4d183
SH
1283 return -1;
1284 }
3999be0a 1285
5e73416f
CB
1286 /* This can e.g. happen when the container is
1287 * unprivileged or CAP_MKNOD has been dropped.
1288 */
1289 can_mknod = false;
1290 }
1291
1292 ret = mknod(path, S_IFREG, 0);
1293 if (ret < 0 && errno != EEXIST) {
1294 SYSERROR("Failed to create file \"%s\"", path);
1295 return -1;
1296 }
1297
1298 /* Fallback to bind-mounting the device from the host. */
1299 ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", device->name);
1300 if (ret < 0 || ret >= MAXPATHLEN)
1301 return -1;
1302
1303 ret = safe_mount(hostpath, path, 0, MS_BIND, NULL,
1304 rootfs->path ? rootfs->mount : NULL);
1305 if (ret < 0) {
1306 SYSERROR("Failed to bind mount host device node \"%s\" "
1307 "onto \"%s\"", hostpath, path);
1308 return -1;
c6883f38 1309 }
5e73416f
CB
1310 DEBUG("Bind mounted host device node \"%s\" onto \"%s\"",
1311 hostpath, path);
c6883f38 1312 }
5e73416f 1313 (void)umask(cmask);
c6883f38 1314
3999be0a 1315 INFO("Populated \"/dev\"");
c6883f38
SH
1316 return 0;
1317}
1318
9aa76a17 1319static int lxc_setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1320{
9aa76a17 1321 int ret;
10bc1861 1322 struct lxc_storage *bdev;
91c3e281 1323 const struct lxc_rootfs *rootfs;
cc28d0b0 1324
91c3e281 1325 rootfs = &conf->rootfs;
a0f379bf 1326 if (!rootfs->path) {
0fd73091
CB
1327 ret = mount("", "/", NULL, MS_SLAVE | MS_REC, 0);
1328 if (ret < 0) {
1329 SYSERROR("Failed to make / rslave");
a0f379bf
DW
1330 return -1;
1331 }
0fd73091 1332
c69bd12f 1333 return 0;
a0f379bf 1334 }
0ad19a3f 1335
0fd73091
CB
1336 ret = access(rootfs->mount, F_OK);
1337 if (ret != 0) {
1338 SYSERROR("Failed to access to \"%s\". Check it is present",
12297168 1339 rootfs->mount);
b1789442
DL
1340 return -1;
1341 }
1342
8a388ed4 1343 bdev = storage_init(conf);
9aa76a17 1344 if (!bdev) {
0fd73091 1345 ERROR("Failed to mount rootfs \"%s\" onto \"%s\" with options \"%s\"",
91c3e281
CB
1346 rootfs->path, rootfs->mount,
1347 rootfs->options ? rootfs->options : "(null)");
9aa76a17 1348 return -1;
9be53773 1349 }
9aa76a17
CB
1350
1351 ret = bdev->ops->mount(bdev);
10bc1861 1352 storage_put(bdev);
9aa76a17 1353 if (ret < 0) {
0fd73091 1354 ERROR("Failed to mount rootfs \"%s\" onto \"%s\" with options \"%s\"",
91c3e281
CB
1355 rootfs->path, rootfs->mount,
1356 rootfs->options ? rootfs->options : "(null)");
c3f0a28c 1357 return -1;
1358 }
0ad19a3f 1359
0fd73091 1360 DEBUG("Mounted rootfs \"%s\" onto \"%s\" with options \"%s\"",
91c3e281
CB
1361 rootfs->path, rootfs->mount,
1362 rootfs->options ? rootfs->options : "(null)");
9aa76a17 1363
ac778708
DL
1364 return 0;
1365}
1366
91e93c71
AV
1367int prepare_ramfs_root(char *root)
1368{
0fd73091
CB
1369 int i, ret;
1370 char *p, *p2;
1371 char buf[LXC_LINELEN], nroot[PATH_MAX];
91e93c71 1372 FILE *f;
91e93c71 1373
0fd73091
CB
1374 if (!realpath(root, nroot))
1375 return -1;
91e93c71 1376
0fd73091
CB
1377 ret = chdir("/");
1378 if (ret < 0)
1379 return -1;
91e93c71 1380
0fd73091
CB
1381 /* We could use here MS_MOVE, but in userns this mount is locked and
1382 * can't be moved.
91e93c71 1383 */
0fd73091
CB
1384 ret = mount(root, "/", NULL, MS_REC | MS_BIND, NULL);
1385 if (ret < 0) {
1386 SYSERROR("Failed to move \"%s\" into \"/\"", root);
1387 return -1;
91e93c71
AV
1388 }
1389
0fd73091
CB
1390 ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL);
1391 if (ret < 0) {
1392 SYSERROR("Failed to make \"/\" rprivate");
1393 return -1;
91e93c71
AV
1394 }
1395
0fd73091
CB
1396 /* The following code cleans up inhereted mounts which are not required
1397 * for CT.
91e93c71
AV
1398 *
1399 * The mountinfo file shows not all mounts, if a few points have been
1400 * unmounted between read operations from the mountinfo. So we need to
1401 * read mountinfo a few times.
1402 *
1403 * This loop can be skipped if a container uses unserns, because all
1404 * inherited mounts are locked and we should live with all this trash.
1405 */
0fd73091 1406 for (;;) {
91e93c71
AV
1407 int progress = 0;
1408
1409 f = fopen("./proc/self/mountinfo", "r");
1410 if (!f) {
1411 SYSERROR("Unable to open /proc/self/mountinfo");
1412 return -1;
1413 }
0fd73091 1414
eab15c1e 1415 while (fgets(buf, LXC_LINELEN, f)) {
91e93c71
AV
1416 for (p = buf, i=0; p && i < 4; i++)
1417 p = strchr(p+1, ' ');
0fd73091 1418
91e93c71
AV
1419 if (!p)
1420 continue;
0fd73091 1421
91e93c71
AV
1422 p2 = strchr(p+1, ' ');
1423 if (!p2)
1424 continue;
1425
1426 *p2 = '\0';
1427 *p = '.';
1428
1429 if (strcmp(p + 1, "/") == 0)
1430 continue;
0fd73091 1431
91e93c71
AV
1432 if (strcmp(p + 1, "/proc") == 0)
1433 continue;
1434
0fd73091
CB
1435 ret = umount2(p, MNT_DETACH);
1436 if (ret == 0)
91e93c71
AV
1437 progress++;
1438 }
0fd73091 1439
91e93c71 1440 fclose(f);
0fd73091 1441
91e93c71
AV
1442 if (!progress)
1443 break;
1444 }
1445
0fd73091
CB
1446 /* This also can be skipped if a container uses unserns. */
1447 (void)umount2("./proc", MNT_DETACH);
91e93c71
AV
1448
1449 /* It is weird, but chdir("..") moves us in a new root */
0fd73091
CB
1450 ret = chdir("..");
1451 if (ret < 0) {
91e93c71
AV
1452 SYSERROR("Unable to change working directory");
1453 return -1;
1454 }
1455
0fd73091
CB
1456 ret = chroot(".");
1457 if (ret < 0) {
91e93c71
AV
1458 SYSERROR("Unable to chroot");
1459 return -1;
1460 }
1461
1462 return 0;
1463}
1464
74a3920a 1465static int setup_pivot_root(const struct lxc_rootfs *rootfs)
ac778708 1466{
0fd73091
CB
1467 int ret;
1468
39c7b795 1469 if (!rootfs->path) {
0fd73091 1470 DEBUG("Container does not have a rootfs");
ac778708 1471 return 0;
39c7b795 1472 }
ac778708 1473
91e93c71 1474 if (detect_ramfs_rootfs()) {
0fd73091
CB
1475 DEBUG("Detected that container is on ramfs");
1476
1477 ret = prepare_ramfs_root(rootfs->mount);
1478 if (ret < 0) {
1479 ERROR("Failed to prepare minimal ramfs root");
91e93c71 1480 return -1;
39c7b795
CB
1481 }
1482
0fd73091 1483 DEBUG("Prepared ramfs root for container");
39c7b795
CB
1484 return 0;
1485 }
1486
0fd73091
CB
1487 ret = setup_rootfs_pivot_root(rootfs->mount);
1488 if (ret < 0) {
1489 ERROR("Failed to pivot_root()");
25368b52 1490 return -1;
c69bd12f
DL
1491 }
1492
0fd73091 1493 DEBUG("Finished pivot_root()");
25368b52 1494 return 0;
0ad19a3f 1495}
1496
5173b710 1497static const struct id_map *find_mapped_nsid_entry(struct lxc_conf *conf, unsigned id,
f4900711
CB
1498 enum idtype idtype)
1499{
1500 struct lxc_list *it;
1501 struct id_map *map;
1502 struct id_map *retmap = NULL;
1503
dcf0ffdf
CB
1504 /* Shortcut for container's root mappings. */
1505 if (id == 0) {
1506 if (idtype == ID_TYPE_UID)
1507 return conf->root_nsuid_map;
1508
1509 if (idtype == ID_TYPE_GID)
1510 return conf->root_nsgid_map;
1511 }
1512
f4900711
CB
1513 lxc_list_for_each(it, &conf->id_map) {
1514 map = it->elem;
1515 if (map->idtype != idtype)
1516 continue;
1517
1518 if (id >= map->nsid && id < map->nsid + map->range) {
1519 retmap = map;
1520 break;
1521 }
1522 }
1523
1524 return retmap;
1525}
1526
1527static int lxc_setup_devpts(struct lxc_conf *conf)
3c26f34e 1528{
70761e5e 1529 int ret;
11293068 1530 const char *default_devpts_mntopts = "gid=5,newinstance,ptmxmode=0666,mode=0620";
9d28c4f9 1531 char devpts_mntopts[256];
77890c6d 1532
e528c735 1533 if (conf->pty_max <= 0) {
0fd73091 1534 DEBUG("No new devpts instance will be mounted since no pts "
70761e5e 1535 "devices are requested");
d852c78c 1536 return 0;
3c26f34e 1537 }
1538
e528c735
CB
1539 ret = snprintf(devpts_mntopts, sizeof(devpts_mntopts), "%s,max=%zu",
1540 default_devpts_mntopts, conf->pty_max);
9d28c4f9
CB
1541 if (ret < 0 || (size_t)ret >= sizeof(devpts_mntopts))
1542 return -1;
1543
77f94854
CB
1544 ret = umount2("/dev/pts", MNT_DETACH);
1545 if (ret < 0)
1546 WARN("%s - Failed to unmount old devpts instance", strerror(errno));
1547 else
0fd73091 1548 DEBUG("Unmounted old devpts instance");
7e40254a 1549
70761e5e
CB
1550 /* Create mountpoint for devpts instance. */
1551 ret = mkdir("/dev/pts", 0755);
1552 if (ret < 0 && errno != EEXIST) {
0fd73091 1553 SYSERROR("Failed to create \"/dev/pts\" directory");
3c26f34e 1554 return -1;
1555 }
1556
11293068 1557 /* mount new devpts instance */
f4900711 1558 ret = mount("devpts", "/dev/pts", "devpts", MS_NOSUID | MS_NOEXEC, devpts_mntopts);
70761e5e 1559 if (ret < 0) {
11293068
CB
1560 /* try mounting without gid=5 */
1561 ret = mount("devpts", "/dev/pts", "devpts",
1562 MS_NOSUID | MS_NOEXEC, devpts_mntopts + sizeof("gid=5"));
1563 if (ret < 0) {
1564 SYSERROR("Failed to mount new devpts instance");
1565 return -1;
1566 }
70761e5e 1567 }
0fd73091 1568 DEBUG("Mount new devpts instance with options \"%s\"", devpts_mntopts);
70761e5e 1569
d5cb35d6 1570 /* Remove any pre-existing /dev/ptmx file. */
b29e05d6
CB
1571 ret = remove("/dev/ptmx");
1572 if (ret < 0) {
1573 if (errno != ENOENT) {
0fd73091 1574 SYSERROR("Failed to remove existing \"/dev/ptmx\" file");
d5cb35d6 1575 return -1;
70761e5e 1576 }
b29e05d6 1577 } else {
0fd73091 1578 DEBUG("Removed existing \"/dev/ptmx\" file");
3c26f34e 1579 }
1580
d5cb35d6
CB
1581 /* Create dummy /dev/ptmx file as bind mountpoint for /dev/pts/ptmx. */
1582 ret = open("/dev/ptmx", O_CREAT, 0666);
1583 if (ret < 0) {
0fd73091 1584 SYSERROR("Failed to create dummy \"/dev/ptmx\" file as bind mount target");
d5cb35d6
CB
1585 return -1;
1586 }
e87bd19c 1587 close(ret);
0fd73091 1588 DEBUG("Created dummy \"/dev/ptmx\" file as bind mount target");
77890c6d 1589
d5cb35d6 1590 /* Fallback option: create symlink /dev/ptmx -> /dev/pts/ptmx */
e87bd19c 1591 ret = mount("/dev/pts/ptmx", "/dev/ptmx", NULL, MS_BIND, NULL);
d5cb35d6 1592 if (!ret) {
0fd73091 1593 DEBUG("Bind mounted \"/dev/pts/ptmx\" to \"/dev/ptmx\"");
d5cb35d6
CB
1594 return 0;
1595 } else {
1596 /* Fallthrough and try to create a symlink. */
0fd73091 1597 ERROR("Failed to bind mount \"/dev/pts/ptmx\" to \"/dev/ptmx\"");
d5cb35d6
CB
1598 }
1599
1600 /* Remove the dummy /dev/ptmx file we created above. */
1601 ret = remove("/dev/ptmx");
70761e5e 1602 if (ret < 0) {
0fd73091 1603 SYSERROR("Failed to remove existing \"/dev/ptmx\"");
d5cb35d6
CB
1604 return -1;
1605 }
1606
1607 /* Fallback option: Create symlink /dev/ptmx -> /dev/pts/ptmx. */
1608 ret = symlink("/dev/pts/ptmx", "/dev/ptmx");
1609 if (ret < 0) {
0fd73091 1610 SYSERROR("Failed to create symlink from \"/dev/ptmx\" to \"/dev/pts/ptmx\"");
3c26f34e 1611 return -1;
1612 }
0fd73091 1613 DEBUG("Created symlink from \"/dev/ptmx\" to \"/dev/pts/ptmx\"");
cd54d859 1614
3c26f34e 1615 return 0;
1616}
1617
cccc74b5
DL
1618static int setup_personality(int persona)
1619{
0fd73091
CB
1620 int ret;
1621
1622#if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1623 if (persona == -1)
1624 return 0;
1625
0fd73091
CB
1626 ret = personality(persona);
1627 if (ret < 0) {
1628 SYSERROR("Failed to set personality to \"0x%x\"", persona);
cccc74b5
DL
1629 return -1;
1630 }
1631
0fd73091
CB
1632 INFO("Set personality to \"0x%x\"", persona);
1633#endif
cccc74b5
DL
1634
1635 return 0;
1636}
1637
3d7d929a 1638static int lxc_setup_dev_console(const struct lxc_rootfs *rootfs,
dcad02f8 1639 const struct lxc_terminal *console)
6e590161 1640{
0fd73091 1641 int fd, ret;
63376d7d 1642 char path[MAXPATHLEN];
86530b0a 1643 char *rootfs_path = rootfs->path ? rootfs->mount : "";
52e35957 1644
8b1b1210
CB
1645 if (console->path && !strcmp(console->path, "none"))
1646 return 0;
1647
86530b0a 1648 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs_path);
3d7d929a 1649 if (ret < 0 || (size_t)ret >= sizeof(path))
7c6ef2a2 1650 return -1;
52e35957 1651
8b1b1210
CB
1652 /* When we are asked to setup a console we remove any previous
1653 * /dev/console bind-mounts.
1654 */
a7ba3c7f
CB
1655 if (file_exists(path)) {
1656 ret = lxc_unstack_mountpoint(path, false);
1657 if (ret < 0) {
86530b0a 1658 ERROR("Failed to unmount \"%s\": %s", path, strerror(errno));
a7ba3c7f
CB
1659 return -ret;
1660 } else {
86530b0a 1661 DEBUG("Cleared all (%d) mounts from \"%s\"", ret, path);
a7ba3c7f 1662 }
8b1b1210
CB
1663 }
1664
1665 /* For unprivileged containers autodev or automounts will already have
1666 * taken care of creating /dev/console.
1667 */
0728ebf4
TA
1668 fd = open(path, O_CREAT | O_EXCL, S_IXUSR | S_IXGRP | S_IXOTH);
1669 if (fd < 0) {
1670 if (errno != EEXIST) {
86530b0a 1671 SYSERROR("Failed to create console");
3d7d929a 1672 return -errno;
0728ebf4
TA
1673 }
1674 } else {
1675 close(fd);
52e35957
DL
1676 }
1677
86530b0a
L
1678 ret = chmod(console->name, S_IXUSR | S_IXGRP | S_IXOTH);
1679 if (ret < 0) {
0fd73091
CB
1680 SYSERROR("Failed to set mode \"0%o\" to \"%s\"",
1681 S_IXUSR | S_IXGRP | S_IXOTH, console->name);
3d7d929a 1682 return -errno;
63376d7d 1683 }
13954cce 1684
86530b0a
L
1685 ret = safe_mount(console->name, path, "none", MS_BIND, 0, rootfs_path);
1686 if (ret < 0) {
0fd73091 1687 ERROR("Failed to mount \"%s\" on \"%s\"", console->name, path);
6e590161 1688 return -1;
1689 }
1690
86530b0a 1691 DEBUG("Mounted pts device \"%s\" onto \"%s\"", console->name, path);
7c6ef2a2
SH
1692 return 0;
1693}
1694
3d7d929a 1695static int lxc_setup_ttydir_console(const struct lxc_rootfs *rootfs,
dcad02f8 1696 const struct lxc_terminal *console,
3d7d929a 1697 char *ttydir)
7c6ef2a2 1698{
3dc035f1 1699 int ret, fd;
3d7d929a 1700 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
86530b0a 1701 char *rootfs_path = rootfs->path ? rootfs->mount : "";
7c6ef2a2 1702
3dc035f1
L
1703 if (console->path && !strcmp(console->path, "none"))
1704 return 0;
1705
7c6ef2a2 1706 /* create rootfs/dev/<ttydir> directory */
86530b0a 1707 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs_path, ttydir);
3d7d929a 1708 if (ret < 0 || (size_t)ret >= sizeof(path))
7c6ef2a2 1709 return -1;
3d7d929a 1710
7c6ef2a2
SH
1711 ret = mkdir(path, 0755);
1712 if (ret && errno != EEXIST) {
0fd73091 1713 SYSERROR("Failed to create \"%s\"", path);
3d7d929a 1714 return -errno;
7c6ef2a2 1715 }
4742cd9a 1716 DEBUG("Created directory for console and tty devices at \"%s\"", path);
7c6ef2a2 1717
86530b0a 1718 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console", rootfs_path, ttydir);
3d7d929a
CB
1719 if (ret < 0 || (size_t)ret >= sizeof(lxcpath))
1720 return -1;
1721
7c6ef2a2 1722 ret = creat(lxcpath, 0660);
3d7d929a 1723 if (ret == -1 && errno != EEXIST) {
0fd73091 1724 SYSERROR("Failed to create \"%s\"", lxcpath);
3d7d929a 1725 return -errno;
7c6ef2a2 1726 }
4d44e274
SH
1727 if (ret >= 0)
1728 close(ret);
7c6ef2a2 1729
86530b0a 1730 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs_path);
3dc035f1 1731 if (ret < 0 || (size_t)ret >= sizeof(path))
7c6ef2a2 1732 return -1;
2a12fefd 1733
3dc035f1 1734 if (file_exists(path)) {
a7ba3c7f 1735 ret = lxc_unstack_mountpoint(path, false);
2a12fefd 1736 if (ret < 0) {
0fd73091 1737 ERROR("%s - Failed to unmount \"%s\"", strerror(errno), path);
a7ba3c7f
CB
1738 return -ret;
1739 } else {
86530b0a 1740 DEBUG("Cleared all (%d) mounts from \"%s\"", ret, path);
a7ba3c7f 1741 }
3dc035f1 1742 }
2a12fefd 1743
3dc035f1
L
1744 fd = open(path, O_CREAT | O_EXCL, S_IXUSR | S_IXGRP | S_IXOTH);
1745 if (fd < 0) {
1746 if (errno != EEXIST) {
86530b0a 1747 SYSERROR("Failed to create console");
3dc035f1 1748 return -errno;
2a12fefd 1749 }
3dc035f1
L
1750 } else {
1751 close(fd);
7c6ef2a2
SH
1752 }
1753
86530b0a
L
1754 ret = chmod(console->name, S_IXUSR | S_IXGRP | S_IXOTH);
1755 if (ret < 0) {
0fd73091
CB
1756 SYSERROR("Failed to set mode \"0%o\" to \"%s\"",
1757 S_IXUSR | S_IXGRP | S_IXOTH, console->name);
2a12fefd
CB
1758 return -errno;
1759 }
1760
3dc035f1 1761 /* bind mount console->name to '/dev/<ttydir>/console' */
86530b0a
L
1762 ret = safe_mount(console->name, lxcpath, "none", MS_BIND, 0, rootfs_path);
1763 if (ret < 0) {
0fd73091 1764 ERROR("Failed to mount \"%s\" on \"%s\"", console->name, lxcpath);
7c6ef2a2
SH
1765 return -1;
1766 }
86530b0a 1767 DEBUG("Mounted \"%s\" onto \"%s\"", console->name, lxcpath);
3dc035f1
L
1768
1769 /* bind mount '/dev/<ttydir>/console' to '/dev/console' */
86530b0a
L
1770 ret = safe_mount(lxcpath, path, "none", MS_BIND, 0, rootfs_path);
1771 if (ret < 0) {
0fd73091 1772 ERROR("Failed to mount \"%s\" on \"%s\"", console->name, lxcpath);
3dc035f1
L
1773 return -1;
1774 }
86530b0a 1775 DEBUG("Mounted \"%s\" onto \"%s\"", console->name, lxcpath);
3dc035f1 1776
86530b0a 1777 DEBUG("Console has been setup under \"%s\" and mounted to \"%s\"", lxcpath, path);
6e590161 1778 return 0;
1779}
1780
3d7d929a 1781static int lxc_setup_console(const struct lxc_rootfs *rootfs,
dcad02f8 1782 const struct lxc_terminal *console, char *ttydir)
7c6ef2a2 1783{
3d7d929a 1784
7c6ef2a2 1785 if (!ttydir)
3d7d929a 1786 return lxc_setup_dev_console(rootfs, console);
7c6ef2a2 1787
3d7d929a 1788 return lxc_setup_ttydir_console(rootfs, console, ttydir);
7c6ef2a2
SH
1789}
1790
efed99a4 1791static void parse_mntopt(char *opt, unsigned long *flags, char **data, size_t size)
998ac676
RT
1792{
1793 struct mount_opt *mo;
efed99a4 1794 size_t cursize;
998ac676
RT
1795
1796 /* If opt is found in mount_opt, set or clear flags.
1797 * Otherwise append it to data. */
1798
1799 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
0fd73091 1800 if (strncmp(opt, mo->name, strlen(mo->name)) == 0) {
998ac676
RT
1801 if (mo->clear)
1802 *flags &= ~mo->flag;
1803 else
1804 *flags |= mo->flag;
1805 return;
1806 }
1807 }
1808
efed99a4
DJ
1809 cursize = strlen(*data);
1810 if (cursize)
1811 cursize += 1;
1812
1813 if (size - cursize > 1) {
1814 if (cursize)
1815 strncat(*data, ",", 1);
1816
1817 strncat(*data, opt, size - cursize - 1);
1818 }
998ac676
RT
1819}
1820
0fd73091 1821int parse_mntopts(const char *mntopts, unsigned long *mntflags, char **mntdata)
998ac676 1822{
0fd73091
CB
1823 char *data, *p, *s;
1824 char *saveptr = NULL;
efed99a4 1825 size_t size;
998ac676 1826
911324ef 1827 *mntdata = NULL;
91656ce5 1828 *mntflags = 0L;
911324ef
DL
1829
1830 if (!mntopts)
998ac676
RT
1831 return 0;
1832
911324ef 1833 s = strdup(mntopts);
0fd73091 1834 if (!s)
998ac676 1835 return -1;
998ac676 1836
efed99a4
DJ
1837 size = strlen(s) + 1;
1838 data = malloc(size);
998ac676 1839 if (!data) {
998ac676
RT
1840 free(s);
1841 return -1;
1842 }
1843 *data = 0;
1844
0fd73091 1845 for (; (p = strtok_r(s, ",", &saveptr)); s = NULL)
efed99a4 1846 parse_mntopt(p, mntflags, &data, size);
998ac676
RT
1847
1848 if (*data)
1849 *mntdata = data;
1850 else
1851 free(data);
1852 free(s);
1853
1854 return 0;
1855}
1856
d840039e
YT
1857static void parse_propagationopt(char *opt, unsigned long *flags)
1858{
1859 struct mount_opt *mo;
1860
1861 /* If opt is found in propagation_opt, set or clear flags. */
d840039e 1862 for (mo = &propagation_opt[0]; mo->name != NULL; mo++) {
0fd73091
CB
1863 if (strncmp(opt, mo->name, strlen(mo->name)) != 0)
1864 continue;
1865
1866 if (mo->clear)
1867 *flags &= ~mo->flag;
1868 else
1869 *flags |= mo->flag;
1870
1871 return;
d840039e
YT
1872 }
1873}
1874
1875static int parse_propagationopts(const char *mntopts, unsigned long *pflags)
1876{
0fd73091
CB
1877 char *p, *s;
1878 char *saveptr = NULL;
d840039e
YT
1879
1880 if (!mntopts)
1881 return 0;
1882
1883 s = strdup(mntopts);
1884 if (!s) {
1885 SYSERROR("Failed to allocate memory");
1886 return -ENOMEM;
1887 }
1888
0fd73091
CB
1889 *pflags = 0L;
1890 for (; (p = strtok_r(s, ",", &saveptr)); s = NULL)
d840039e 1891 parse_propagationopt(p, pflags);
d840039e 1892 free(s);
0fd73091 1893
d840039e
YT
1894 return 0;
1895}
1896
6fd5e769
SH
1897static void null_endofword(char *word)
1898{
1899 while (*word && *word != ' ' && *word != '\t')
1900 word++;
1901 *word = '\0';
1902}
1903
0fd73091 1904/* skip @nfields spaces in @src */
6fd5e769
SH
1905static char *get_field(char *src, int nfields)
1906{
6fd5e769 1907 int i;
0fd73091 1908 char *p = src;
6fd5e769
SH
1909
1910 for (i = 0; i < nfields; i++) {
1911 while (*p && *p != ' ' && *p != '\t')
1912 p++;
0fd73091 1913
6fd5e769
SH
1914 if (!*p)
1915 break;
0fd73091 1916
6fd5e769
SH
1917 p++;
1918 }
0fd73091 1919
6fd5e769
SH
1920 return p;
1921}
1922
911324ef
DL
1923static int mount_entry(const char *fsname, const char *target,
1924 const char *fstype, unsigned long mountflags,
d840039e
YT
1925 unsigned long pflags, const char *data, bool optional,
1926 bool dev, bool relative, const char *rootfs)
911324ef 1927{
0ac4b28a 1928 int ret;
181437fd
YT
1929 char srcbuf[MAXPATHLEN];
1930 const char *srcpath = fsname;
614305f3 1931#ifdef HAVE_STATVFS
2938f7c8 1932 struct statvfs sb;
614305f3 1933#endif
2938f7c8 1934
181437fd
YT
1935 if (relative) {
1936 ret = snprintf(srcbuf, MAXPATHLEN, "%s/%s", rootfs ? rootfs : "/", fsname ? fsname : "");
1937 if (ret < 0 || ret >= MAXPATHLEN) {
1938 ERROR("source path is too long");
1939 return -1;
1940 }
1941 srcpath = srcbuf;
1942 }
1943
1944 ret = safe_mount(srcpath, target, fstype, mountflags & ~MS_REMOUNT, data,
0ac4b28a
CB
1945 rootfs);
1946 if (ret < 0) {
1fc64d22 1947 if (optional) {
0fd73091
CB
1948 INFO("%s - Failed to mount \"%s\" on \"%s\" "
1949 "(optional)", strerror(errno),
1950 srcpath ? srcpath : "(null)", target);
1fc64d22
SG
1951 return 0;
1952 }
0ac4b28a 1953
0103eb53 1954 SYSERROR("Failed to mount \"%s\" on \"%s\"",
181437fd 1955 srcpath ? srcpath : "(null)", target);
0ac4b28a 1956 return -1;
911324ef
DL
1957 }
1958
1959 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
7c5b6e7c 1960 unsigned long rqd_flags = 0;
0ac4b28a
CB
1961
1962 DEBUG("Remounting \"%s\" on \"%s\" to respect bind or remount "
181437fd 1963 "options", srcpath ? srcpath : "(none)", target ? target : "(none)");
0ac4b28a 1964
7c5b6e7c
AS
1965 if (mountflags & MS_RDONLY)
1966 rqd_flags |= MS_RDONLY;
614305f3 1967#ifdef HAVE_STATVFS
181437fd 1968 if (srcpath && statvfs(srcpath, &sb) == 0) {
7c5b6e7c 1969 unsigned long required_flags = rqd_flags;
0ac4b28a 1970
2938f7c8
SH
1971 if (sb.f_flag & MS_NOSUID)
1972 required_flags |= MS_NOSUID;
0ac4b28a 1973
ae7a770e 1974 if (sb.f_flag & MS_NODEV && !dev)
2938f7c8 1975 required_flags |= MS_NODEV;
0ac4b28a 1976
2938f7c8
SH
1977 if (sb.f_flag & MS_RDONLY)
1978 required_flags |= MS_RDONLY;
0ac4b28a 1979
2938f7c8
SH
1980 if (sb.f_flag & MS_NOEXEC)
1981 required_flags |= MS_NOEXEC;
0ac4b28a
CB
1982
1983 DEBUG("Flags for \"%s\" were %lu, required extra flags "
181437fd 1984 "are %lu", srcpath, sb.f_flag, required_flags);
0ac4b28a
CB
1985
1986 /* If this was a bind mount request, and required_flags
2938f7c8 1987 * does not have any flags which are not already in
0ac4b28a 1988 * mountflags, then skip the remount.
2938f7c8
SH
1989 */
1990 if (!(mountflags & MS_REMOUNT)) {
0ac4b28a
CB
1991 if (!(required_flags & ~mountflags) &&
1992 rqd_flags == 0) {
1993 DEBUG("Mountflags already were %lu, "
1994 "skipping remount", mountflags);
2938f7c8
SH
1995 goto skipremount;
1996 }
1997 }
0ac4b28a 1998
2938f7c8 1999 mountflags |= required_flags;
6fd5e769 2000 }
614305f3 2001#endif
911324ef 2002
181437fd 2003 ret = mount(srcpath, target, fstype, mountflags | MS_REMOUNT, data);
0ac4b28a 2004 if (ret < 0) {
1fc64d22 2005 if (optional) {
0ac4b28a 2006 INFO("Failed to mount \"%s\" on \"%s\" "
0103eb53 2007 "(optional): %s",
181437fd 2008 srcpath ? srcpath : "(null)", target,
0ac4b28a 2009 strerror(errno));
1fc64d22
SG
2010 return 0;
2011 }
0ac4b28a 2012
0103eb53 2013 SYSERROR("Failed to mount \"%s\" on \"%s\"",
181437fd 2014 srcpath ? srcpath : "(null)", target);
0ac4b28a 2015 return -1;
911324ef
DL
2016 }
2017 }
2018
d840039e
YT
2019 if (pflags) {
2020 ret = mount(NULL, target, NULL, pflags, NULL);
2021 if (ret < 0) {
2022 if (optional) {
2023 INFO("%s - Failed to change mount propagation "
2024 "for \"%s\" (optional)", strerror(errno), target);
2025 return 0;
2026 } else {
2027 SYSERROR("Failed to change mount propagation "
2028 "for \"%s\" (optional)", target);
2029 return -1;
2030 }
2031 }
2032 DEBUG("Changed mount propagation for \"%s\"", target);
2033 }
2034
2035
614305f3 2036#ifdef HAVE_STATVFS
6fd5e769 2037skipremount:
614305f3 2038#endif
0103eb53 2039 DEBUG("Mounted \"%s\" on \"%s\" with filesystem type \"%s\"",
181437fd 2040 srcpath ? srcpath : "(null)", target, fstype);
911324ef
DL
2041
2042 return 0;
2043}
2044
c5e30de4 2045/* Remove "optional", "create=dir", and "create=file" from mntopt */
4e4ca161
SH
2046static void cull_mntent_opt(struct mntent *mntent)
2047{
2048 int i;
0fd73091
CB
2049 char *list[] = {
2050 "create=dir",
2051 "create=file",
2052 "optional",
2053 "relative",
2054 NULL
2055 };
c5e30de4
CB
2056
2057 for (i = 0; list[i]; i++) {
2058 char *p, *p2;
2059
2060 p = strstr(mntent->mnt_opts, list[i]);
2061 if (!p)
4e4ca161 2062 continue;
c5e30de4 2063
4e4ca161
SH
2064 p2 = strchr(p, ',');
2065 if (!p2) {
2066 /* no more mntopts, so just chop it here */
2067 *p = '\0';
2068 continue;
2069 }
c5e30de4
CB
2070
2071 memmove(p, p2 + 1, strlen(p2 + 1) + 1);
4e4ca161
SH
2072 }
2073}
2074
4d5b72a1 2075static int mount_entry_create_dir_file(const struct mntent *mntent,
749f98d9
CB
2076 const char *path,
2077 const struct lxc_rootfs *rootfs,
0fd73091 2078 const char *lxc_name, const char *lxc_path)
0ad19a3f 2079{
12e6ab5d
CB
2080 int fd, ret;
2081 char *p1, *p2;
911324ef 2082
12e6ab5d 2083 if (strncmp(mntent->mnt_type, "overlay", 7) == 0) {
749f98d9 2084 ret = ovl_mkdir(mntent, rootfs, lxc_name, lxc_path);
12e6ab5d
CB
2085 if (ret < 0)
2086 return -1;
2087 }
6e46cc0d 2088
34cfffb3 2089 if (hasmntopt(mntent, "create=dir")) {
749f98d9
CB
2090 ret = mkdir_p(path, 0755);
2091 if (ret < 0 && errno != EEXIST) {
2092 SYSERROR("Failed to create directory \"%s\"", path);
2093 return -1;
34cfffb3
SG
2094 }
2095 }
2096
0fd73091
CB
2097 if (!hasmntopt(mntent, "create=file"))
2098 return 0;
749f98d9 2099
0fd73091
CB
2100 ret = access(path, F_OK);
2101 if (ret == 0)
2102 return 0;
749f98d9 2103
0fd73091
CB
2104 p1 = strdup(path);
2105 if (!p1)
2106 return -1;
749f98d9 2107
0fd73091 2108 p2 = dirname(p1);
749f98d9 2109
0fd73091
CB
2110 ret = mkdir_p(p2, 0755);
2111 free(p1);
2112 if (ret < 0 && errno != EEXIST) {
2113 SYSERROR("Failed to create directory \"%s\"", path);
2114 return -1;
34cfffb3 2115 }
749f98d9 2116
0fd73091
CB
2117 fd = open(path, O_CREAT, 0644);
2118 if (fd < 0)
2119 return -1;
2120 close(fd);
2121
749f98d9 2122 return 0;
4d5b72a1
NC
2123}
2124
ec50007f
CB
2125/* rootfs, lxc_name, and lxc_path can be NULL when the container is created
2126 * without a rootfs. */
db4aba38 2127static inline int mount_entry_on_generic(struct mntent *mntent,
d8b712bc
CB
2128 const char *path,
2129 const struct lxc_rootfs *rootfs,
2130 const char *lxc_name,
2131 const char *lxc_path)
4d5b72a1 2132{
d8b712bc 2133 int ret;
949d0338 2134 unsigned long mntflags;
4d5b72a1 2135 char *mntdata;
181437fd 2136 bool dev, optional, relative;
949d0338 2137 unsigned long pflags = 0;
ec50007f 2138 char *rootfs_path = NULL;
d8b712bc
CB
2139
2140 optional = hasmntopt(mntent, "optional") != NULL;
2141 dev = hasmntopt(mntent, "dev") != NULL;
181437fd 2142 relative = hasmntopt(mntent, "relative") != NULL;
d8b712bc 2143
ec50007f
CB
2144 if (rootfs && rootfs->path)
2145 rootfs_path = rootfs->mount;
2146
d8b712bc
CB
2147 ret = mount_entry_create_dir_file(mntent, path, rootfs, lxc_name,
2148 lxc_path);
2149 if (ret < 0) {
2150 if (optional)
2151 return 0;
608e3567 2152
d8b712bc
CB
2153 return -1;
2154 }
4e4ca161
SH
2155 cull_mntent_opt(mntent);
2156
d840039e
YT
2157 ret = parse_propagationopts(mntent->mnt_opts, &pflags);
2158 if (ret < 0)
2159 return -1;
2160
d8b712bc
CB
2161 ret = parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata);
2162 if (ret < 0)
a17b1e65 2163 return -1;
a17b1e65 2164
6e46cc0d 2165 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type, mntflags,
d840039e 2166 pflags, mntdata, optional, dev, relative, rootfs_path);
68c152ef 2167
911324ef 2168 free(mntdata);
911324ef
DL
2169 return ret;
2170}
2171
db4aba38
NC
2172static inline int mount_entry_on_systemfs(struct mntent *mntent)
2173{
1433c9f9 2174 int ret;
07667a6a 2175 char path[MAXPATHLEN];
1433c9f9
CB
2176
2177 /* For containers created without a rootfs all mounts are treated as
07667a6a
CB
2178 * absolute paths starting at / on the host.
2179 */
1433c9f9
CB
2180 if (mntent->mnt_dir[0] != '/')
2181 ret = snprintf(path, sizeof(path), "/%s", mntent->mnt_dir);
2182 else
2183 ret = snprintf(path, sizeof(path), "%s", mntent->mnt_dir);
07667a6a 2184 if (ret < 0 || ret >= sizeof(path))
1433c9f9 2185 return -1;
1433c9f9
CB
2186
2187 return mount_entry_on_generic(mntent, path, NULL, NULL, NULL);
db4aba38
NC
2188}
2189
4e4ca161 2190static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2 2191 const struct lxc_rootfs *rootfs,
0a2dddd4
CB
2192 const char *lxc_name,
2193 const char *lxc_path)
911324ef 2194{
bdd2b34c 2195 int offset;
013bd428 2196 char *aux;
67e571de 2197 const char *lxcpath;
bdd2b34c
CB
2198 char path[MAXPATHLEN];
2199 int ret = 0;
0ad19a3f 2200
593e8478 2201 lxcpath = lxc_global_config_value("lxc.lxcpath");
bdd2b34c 2202 if (!lxcpath)
2a59a681 2203 return -1;
2a59a681 2204
bdd2b34c
CB
2205 /* If rootfs->path is a blockdev path, allow container fstab to use
2206 * <lxcpath>/<name>/rootfs" as the target prefix.
2207 */
2208 ret = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
2209 if (ret < 0 || ret >= MAXPATHLEN)
80a881b2
SH
2210 goto skipvarlib;
2211
2212 aux = strstr(mntent->mnt_dir, path);
2213 if (aux) {
2214 offset = strlen(path);
2215 goto skipabs;
2216 }
2217
2218skipvarlib:
013bd428
DL
2219 aux = strstr(mntent->mnt_dir, rootfs->path);
2220 if (!aux) {
bdd2b34c 2221 WARN("Ignoring mount point \"%s\"", mntent->mnt_dir);
db4aba38 2222 return ret;
013bd428 2223 }
80a881b2
SH
2224 offset = strlen(rootfs->path);
2225
2226skipabs:
bdd2b34c
CB
2227 ret = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount, aux + offset);
2228 if (ret < 0 || ret >= MAXPATHLEN)
a17b1e65 2229 return -1;
a17b1e65 2230
0a2dddd4 2231 return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
911324ef 2232}
d330fe7b 2233
4e4ca161 2234static int mount_entry_on_relative_rootfs(struct mntent *mntent,
0a2dddd4
CB
2235 const struct lxc_rootfs *rootfs,
2236 const char *lxc_name,
2237 const char *lxc_path)
911324ef 2238{
911324ef 2239 int ret;
0fd73091 2240 char path[MAXPATHLEN];
d330fe7b 2241
34cfffb3 2242 /* relative to root mount point */
6e46cc0d 2243 ret = snprintf(path, sizeof(path), "%s/%s", rootfs->mount, mntent->mnt_dir);
0fd73091 2244 if (ret < 0 || (size_t)ret >= sizeof(path))
9ba8130c 2245 return -1;
911324ef 2246
0a2dddd4 2247 return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
911324ef
DL
2248}
2249
06749971
CB
2250static int mount_file_entries(const struct lxc_conf *conf,
2251 const struct lxc_rootfs *rootfs, FILE *file,
1ae3c19f 2252 const char *lxc_name, const char *lxc_path)
911324ef 2253{
aaf901be 2254 char buf[4096];
0fd73091 2255 struct mntent mntent;
911324ef 2256 int ret = -1;
e76b8764 2257
aaf901be 2258 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
1ae3c19f
CB
2259 if (!rootfs->path)
2260 ret = mount_entry_on_systemfs(&mntent);
2261 else if (mntent.mnt_dir[0] != '/')
2262 ret = mount_entry_on_relative_rootfs(&mntent, rootfs,
2263 lxc_name, lxc_path);
2264 else
2265 ret = mount_entry_on_absolute_rootfs(&mntent, rootfs,
2266 lxc_name, lxc_path);
2267 if (ret < 0)
2268 return -1;
0ad19a3f 2269 }
2270 ret = 0;
cd54d859 2271
0fd73091 2272 INFO("Finished setting up mounts");
e7938e9e
MN
2273 return ret;
2274}
2275
06749971
CB
2276static int setup_mount(const struct lxc_conf *conf,
2277 const struct lxc_rootfs *rootfs, const char *fstab,
42dff448 2278 const char *lxc_name, const char *lxc_path)
e7938e9e 2279{
42dff448 2280 FILE *f;
e7938e9e
MN
2281 int ret;
2282
2283 if (!fstab)
2284 return 0;
2285
42dff448
CB
2286 f = setmntent(fstab, "r");
2287 if (!f) {
2288 SYSERROR("Failed to open \"%s\"", fstab);
e7938e9e
MN
2289 return -1;
2290 }
2291
06749971 2292 ret = mount_file_entries(conf, rootfs, f, lxc_name, lxc_path);
42dff448
CB
2293 if (ret < 0)
2294 ERROR("Failed to set up mount entries");
e7938e9e 2295
42dff448 2296 endmntent(f);
0ad19a3f 2297 return ret;
2298}
2299
5ef5c9a3 2300FILE *make_anonymous_mount_file(struct lxc_list *mount)
e7938e9e 2301{
5ef5c9a3 2302 int ret;
e7938e9e 2303 char *mount_entry;
5ef5c9a3 2304 struct lxc_list *iterator;
5ef5c9a3
CB
2305 int fd = -1;
2306
0fd73091 2307 fd = memfd_create(".lxc_mount_file", MFD_CLOEXEC);
5ef5c9a3 2308 if (fd < 0) {
a324e7eb
CB
2309 char template[] = P_tmpdir "/.lxc_mount_file_XXXXXX";
2310
5ef5c9a3
CB
2311 if (errno != ENOSYS)
2312 return NULL;
a324e7eb
CB
2313
2314 fd = lxc_make_tmpfile(template, true);
0fd73091
CB
2315 if (fd < 0) {
2316 SYSERROR("Could not create temporary mount file");
2317 return NULL;
2318 }
2319
6bd04140 2320 TRACE("Created temporary mount file");
5ef5c9a3 2321 }
0fd73091
CB
2322 if (fd < 0) {
2323 SYSERROR("Could not create temporary mount file");
9fc7f8c0 2324 return NULL;
e7938e9e
MN
2325 }
2326
0fd73091
CB
2327 lxc_list_for_each (iterator, mount) {
2328 size_t len;
2329
e7938e9e 2330 mount_entry = iterator->elem;
0fd73091 2331 len = strlen(mount_entry);
5ef5c9a3 2332
489f39be 2333 ret = lxc_write_nointr(fd, mount_entry, len);
0fd73091
CB
2334 if (ret != len)
2335 goto on_error;
2336
489f39be 2337 ret = lxc_write_nointr(fd, "\n", 1);
0fd73091
CB
2338 if (ret != 1)
2339 goto on_error;
e7938e9e
MN
2340 }
2341
0fd73091
CB
2342 ret = lseek(fd, 0, SEEK_SET);
2343 if (ret < 0)
2344 goto on_error;
2345
2346 return fdopen(fd, "r+");
2347
2348on_error:
2349 SYSERROR("Failed to write mount entry to temporary mount file");
2350 close(fd);
2351 return NULL;
9fc7f8c0
TA
2352}
2353
06749971
CB
2354static int setup_mount_entries(const struct lxc_conf *conf,
2355 const struct lxc_rootfs *rootfs,
5ef5c9a3
CB
2356 struct lxc_list *mount, const char *lxc_name,
2357 const char *lxc_path)
9fc7f8c0 2358{
9fc7f8c0 2359 int ret;
0fd73091 2360 FILE *f;
9fc7f8c0 2361
19b5d755
CB
2362 f = make_anonymous_mount_file(mount);
2363 if (!f)
9fc7f8c0 2364 return -1;
e7938e9e 2365
06749971 2366 ret = mount_file_entries(conf, rootfs, f, lxc_name, lxc_path);
19b5d755 2367 fclose(f);
0fd73091 2368
e7938e9e
MN
2369 return ret;
2370}
2371
bab88e68
CS
2372static int parse_cap(const char *cap)
2373{
84760c11 2374 size_t i;
2375 int capid = -1;
0fd73091
CB
2376 size_t end = sizeof(caps_opt) / sizeof(caps_opt[0]);
2377 char *ptr = NULL;
bab88e68 2378
0fd73091 2379 if (strcmp(cap, "none") == 0)
7035407c
DE
2380 return -2;
2381
8560cd36 2382 for (i = 0; i < end; i++) {
bab88e68
CS
2383 if (strcmp(cap, caps_opt[i].name))
2384 continue;
2385
2386 capid = caps_opt[i].value;
2387 break;
2388 }
2389
2390 if (capid < 0) {
0fd73091
CB
2391 /* Try to see if it's numeric, so the user may specify
2392 * capabilities that the running kernel knows about but we
2393 * don't
2394 */
bab88e68
CS
2395 errno = 0;
2396 capid = strtol(cap, &ptr, 10);
2397 if (!ptr || *ptr != '\0' || errno != 0)
2398 /* not a valid number */
2399 capid = -1;
2400 else if (capid > lxc_caps_last_cap())
2401 /* we have a number but it's not a valid
2402 * capability */
2403 capid = -1;
2404 }
2405
2406 return capid;
2407}
2408
0769b82a
CS
2409int in_caplist(int cap, struct lxc_list *caps)
2410{
0769b82a 2411 int capid;
0fd73091 2412 struct lxc_list *iterator;
0769b82a 2413
0fd73091 2414 lxc_list_for_each (iterator, caps) {
0769b82a
CS
2415 capid = parse_cap(iterator->elem);
2416 if (capid == cap)
2417 return 1;
2418 }
2419
2420 return 0;
2421}
2422
81810dd1
DL
2423static int setup_caps(struct lxc_list *caps)
2424{
bab88e68 2425 int capid;
0fd73091
CB
2426 char *drop_entry;
2427 struct lxc_list *iterator;
81810dd1 2428
0fd73091
CB
2429 lxc_list_for_each (iterator, caps) {
2430 int ret;
81810dd1
DL
2431
2432 drop_entry = iterator->elem;
2433
bab88e68 2434 capid = parse_cap(drop_entry);
0fd73091 2435 if (capid < 0) {
1e11be34
DL
2436 ERROR("unknown capability %s", drop_entry);
2437 return -1;
81810dd1
DL
2438 }
2439
0fd73091
CB
2440 ret = prctl(PR_CAPBSET_DROP, capid, 0, 0, 0);
2441 if (ret < 0) {
2442 SYSERROR("Failed to remove %s capability", drop_entry);
3ec1648d
SH
2443 return -1;
2444 }
0fd73091 2445 DEBUG("Dropped %s (%d) capability", drop_entry, capid);
81810dd1
DL
2446 }
2447
0fd73091 2448 DEBUG("Capabilities have been setup");
1fb86a7c
SH
2449 return 0;
2450}
2451
2452static int dropcaps_except(struct lxc_list *caps)
2453{
0fd73091 2454 int i, capid, numcaps;
1fb86a7c 2455 char *keep_entry;
0fd73091 2456 struct lxc_list *iterator;
1fb86a7c 2457
0fd73091 2458 numcaps = lxc_caps_last_cap() + 1;
2caf9a97
SH
2459 if (numcaps <= 0 || numcaps > 200)
2460 return -1;
0fd73091 2461 TRACE("Found %d capabilities", numcaps);
2caf9a97 2462
1a0e70ac 2463 /* caplist[i] is 1 if we keep capability i */
1fb86a7c
SH
2464 int *caplist = alloca(numcaps * sizeof(int));
2465 memset(caplist, 0, numcaps * sizeof(int));
2466
0fd73091 2467 lxc_list_for_each (iterator, caps) {
1fb86a7c
SH
2468 keep_entry = iterator->elem;
2469
bab88e68 2470 capid = parse_cap(keep_entry);
7035407c
DE
2471 if (capid == -2)
2472 continue;
2473
0fd73091
CB
2474 if (capid < 0) {
2475 ERROR("Unknown capability %s", keep_entry);
1fb86a7c
SH
2476 return -1;
2477 }
2478
0fd73091 2479 DEBUG("Keep capability %s (%d)", keep_entry, capid);
1fb86a7c
SH
2480 caplist[capid] = 1;
2481 }
0fd73091
CB
2482
2483 for (i = 0; i < numcaps; i++) {
2484 int ret;
2485
1fb86a7c
SH
2486 if (caplist[i])
2487 continue;
0fd73091
CB
2488
2489 ret = prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
2490 if (ret < 0) {
2491 SYSERROR("Failed to remove capability %d", i);
3ec1648d
SH
2492 return -1;
2493 }
1fb86a7c
SH
2494 }
2495
0fd73091 2496 DEBUG("Capabilities have been setup");
81810dd1
DL
2497 return 0;
2498}
2499
0fd73091
CB
2500static int parse_resource(const char *res)
2501{
2502 int ret;
c6d09e15
WB
2503 size_t i;
2504 int resid = -1;
2505
0fd73091 2506 for (i = 0; i < sizeof(limit_opt) / sizeof(limit_opt[0]); ++i)
c6d09e15
WB
2507 if (strcmp(res, limit_opt[i].name) == 0)
2508 return limit_opt[i].value;
c6d09e15 2509
0fd73091 2510 /* Try to see if it's numeric, so the user may specify
c6d09e15 2511 * resources that the running kernel knows about but
0fd73091
CB
2512 * we don't.
2513 */
2514 ret = lxc_safe_int(res, &resid);
2515 if (ret < 0)
2516 return -1;
2517
2518 return resid;
c6d09e15
WB
2519}
2520
0fd73091
CB
2521int setup_resource_limits(struct lxc_list *limits, pid_t pid)
2522{
2523 int resid;
c6d09e15
WB
2524 struct lxc_list *it;
2525 struct lxc_limit *lim;
c6d09e15 2526
0fd73091 2527 lxc_list_for_each (it, limits) {
c6d09e15
WB
2528 lim = it->elem;
2529
2530 resid = parse_resource(lim->resource);
2531 if (resid < 0) {
0fd73091 2532 ERROR("Unknown resource %s", lim->resource);
c6d09e15
WB
2533 return -1;
2534 }
2535
f48b5fd8 2536#if HAVE_PRLIMIT || HAVE_PRLIMIT64
c6d09e15 2537 if (prlimit(pid, resid, &lim->limit, NULL) != 0) {
0fd73091
CB
2538 ERROR("Failed to set limit %s: %s", lim->resource,
2539 strerror(errno));
c6d09e15
WB
2540 return -1;
2541 }
f48b5fd8
FF
2542#else
2543 ERROR("Cannot set limit %s as prlimit is missing", lim->resource);
2544 return -1;
2545#endif
c6d09e15 2546 }
0fd73091 2547
c6d09e15
WB
2548 return 0;
2549}
2550
7edd0540
L
2551int setup_sysctl_parameters(struct lxc_list *sysctls)
2552{
2553 struct lxc_list *it;
2554 struct lxc_sysctl *elem;
0fd73091 2555 int ret = 0;
7edd0540
L
2556 char *tmp = NULL;
2557 char filename[MAXPATHLEN] = {0};
7edd0540 2558
0fd73091 2559 lxc_list_for_each (it, sysctls) {
7edd0540
L
2560 elem = it->elem;
2561 tmp = lxc_string_replace(".", "/", elem->key);
2562 if (!tmp) {
2563 ERROR("Failed to replace key %s", elem->key);
2564 return -1;
2565 }
2566
2567 ret = snprintf(filename, sizeof(filename), "/proc/sys/%s", tmp);
2568 free(tmp);
2569 if (ret < 0 || (size_t)ret >= sizeof(filename)) {
2570 ERROR("Error setting up sysctl parameters path");
2571 return -1;
2572 }
2573
0fd73091 2574 ret = lxc_write_to_file(filename, elem->value,
7cea5905 2575 strlen(elem->value), false, 0666);
7edd0540 2576 if (ret < 0) {
0fd73091
CB
2577 ERROR("Failed to setup sysctl parameters %s to %s",
2578 elem->key, elem->value);
7edd0540
L
2579 return -1;
2580 }
2581 }
0fd73091 2582
7edd0540
L
2583 return 0;
2584}
2585
61d7a733
YT
2586int setup_proc_filesystem(struct lxc_list *procs, pid_t pid)
2587{
2588 struct lxc_list *it;
2589 struct lxc_proc *elem;
0fd73091 2590 int ret = 0;
61d7a733
YT
2591 char *tmp = NULL;
2592 char filename[MAXPATHLEN] = {0};
61d7a733 2593
0fd73091 2594 lxc_list_for_each (it, procs) {
61d7a733
YT
2595 elem = it->elem;
2596 tmp = lxc_string_replace(".", "/", elem->filename);
2597 if (!tmp) {
2598 ERROR("Failed to replace key %s", elem->filename);
2599 return -1;
2600 }
2601
2602 ret = snprintf(filename, sizeof(filename), "/proc/%d/%s", pid, tmp);
2603 free(tmp);
2604 if (ret < 0 || (size_t)ret >= sizeof(filename)) {
2605 ERROR("Error setting up proc filesystem path");
2606 return -1;
2607 }
2608
0fd73091 2609 ret = lxc_write_to_file(filename, elem->value,
7cea5905 2610 strlen(elem->value), false, 0666);
61d7a733 2611 if (ret < 0) {
0fd73091
CB
2612 ERROR("Failed to setup proc filesystem %s to %s",
2613 elem->filename, elem->value);
61d7a733
YT
2614 return -1;
2615 }
2616 }
0fd73091 2617
61d7a733
YT
2618 return 0;
2619}
2620
ae9242c8
SH
2621static char *default_rootfs_mount = LXCROOTFSMOUNT;
2622
7b379ab3 2623struct lxc_conf *lxc_conf_init(void)
089cd8b8 2624{
26ddeedd 2625 int i;
0fd73091 2626 struct lxc_conf *new;
7b379ab3 2627
13277ec4 2628 new = malloc(sizeof(*new));
0fd73091 2629 if (!new)
7b379ab3 2630 return NULL;
7b379ab3
MN
2631 memset(new, 0, sizeof(*new));
2632
4b73005c 2633 new->loglevel = LXC_LOG_LEVEL_NOTSET;
cccc74b5 2634 new->personality = -1;
124fa0a8 2635 new->autodev = 1;
3a784510 2636 new->console.buffer_size = 0;
596a818d
DE
2637 new->console.log_path = NULL;
2638 new->console.log_fd = -1;
861813e5 2639 new->console.log_size = 0;
28a4b0e5 2640 new->console.path = NULL;
63376d7d 2641 new->console.peer = -1;
fb87aa6a
CB
2642 new->console.proxy.busy = -1;
2643 new->console.proxy.master = -1;
2644 new->console.proxy.slave = -1;
63376d7d
DL
2645 new->console.master = -1;
2646 new->console.slave = -1;
2647 new->console.name[0] = '\0';
732375f5 2648 memset(&new->console.ringbuf, 0, sizeof(struct lxc_ringbuf));
d2e30e99 2649 new->maincmd_fd = -1;
76a26f55 2650 new->nbd_idx = -1;
54c30e29 2651 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048 2652 if (!new->rootfs.mount) {
53f3f048
SH
2653 free(new);
2654 return NULL;
2655 }
858377e4 2656 new->logfd = -1;
7b379ab3 2657 lxc_list_init(&new->cgroup);
54860ed0 2658 lxc_list_init(&new->cgroup2);
7b379ab3
MN
2659 lxc_list_init(&new->network);
2660 lxc_list_init(&new->mount_list);
81810dd1 2661 lxc_list_init(&new->caps);
1fb86a7c 2662 lxc_list_init(&new->keepcaps);
f6d3e3e4 2663 lxc_list_init(&new->id_map);
46ad64ab
CB
2664 new->root_nsuid_map = NULL;
2665 new->root_nsgid_map = NULL;
f979ac15 2666 lxc_list_init(&new->includes);
4184c3e1 2667 lxc_list_init(&new->aliens);
7c661726 2668 lxc_list_init(&new->environment);
c6d09e15 2669 lxc_list_init(&new->limits);
7edd0540 2670 lxc_list_init(&new->sysctls);
61d7a733 2671 lxc_list_init(&new->procs);
44ae0fb6 2672 new->hooks_version = 0;
28d9e29e 2673 for (i = 0; i < NUM_LXC_HOOKS; i++)
26ddeedd 2674 lxc_list_init(&new->hooks[i]);
ee1e7aa0 2675 lxc_list_init(&new->groups);
d39b10eb 2676 lxc_list_init(&new->state_clients);
fe4de9a6
DE
2677 new->lsm_aa_profile = NULL;
2678 new->lsm_se_context = NULL;
7a0bcca3 2679 new->tmp_umount_proc = false;
7b379ab3 2680
72bb04e4
PT
2681 /* if running in a new user namespace, init and COMMAND
2682 * default to running as UID/GID 0 when using lxc-execute */
2683 new->init_uid = 0;
2684 new->init_gid = 0;
43654d34 2685 memset(&new->cgroup_meta, 0, sizeof(struct lxc_cgroup));
b074bbf1 2686 memset(&new->ns_share, 0, sizeof(char *) * LXC_NS_MAX);
72bb04e4 2687
7b379ab3 2688 return new;
089cd8b8
DL
2689}
2690
344c9d81 2691int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
a19b974f 2692 size_t buf_size)
f6d3e3e4 2693{
29053180 2694 int fd, ret;
0fd73091 2695 char path[MAXPATHLEN];
f6d3e3e4 2696
a19b974f
CB
2697 if (geteuid() != 0 && idtype == ID_TYPE_GID) {
2698 size_t buflen;
2699
2700 ret = snprintf(path, MAXPATHLEN, "/proc/%d/setgroups", pid);
0fd73091 2701 if (ret < 0 || ret >= MAXPATHLEN)
a19b974f 2702 return -E2BIG;
a19b974f
CB
2703
2704 fd = open(path, O_WRONLY);
2705 if (fd < 0 && errno != ENOENT) {
2706 SYSERROR("Failed to open \"%s\"", path);
2707 return -1;
2708 }
2709
2388737b
CB
2710 if (fd >= 0) {
2711 buflen = sizeof("deny\n") - 1;
2712 errno = 0;
2713 ret = lxc_write_nointr(fd, "deny\n", buflen);
395b1a3e 2714 close(fd);
2388737b 2715 if (ret != buflen) {
0fd73091
CB
2716 SYSERROR("Failed to write \"deny\" to "
2717 "\"/proc/%d/setgroups\"", pid);
2388737b
CB
2718 return -1;
2719 }
395b1a3e 2720 TRACE("Wrote \"deny\" to \"/proc/%d/setgroups\"", pid);
a19b974f 2721 }
a19b974f
CB
2722 }
2723
29053180
CB
2724 ret = snprintf(path, MAXPATHLEN, "/proc/%d/%cid_map", pid,
2725 idtype == ID_TYPE_UID ? 'u' : 'g');
0fd73091 2726 if (ret < 0 || ret >= MAXPATHLEN)
f6d3e3e4 2727 return -E2BIG;
29053180
CB
2728
2729 fd = open(path, O_WRONLY);
2730 if (fd < 0) {
a19b974f 2731 SYSERROR("Failed to open \"%s\"", path);
29053180 2732 return -1;
f6d3e3e4 2733 }
29053180
CB
2734
2735 errno = 0;
2736 ret = lxc_write_nointr(fd, buf, buf_size);
395b1a3e 2737 close(fd);
29053180 2738 if (ret != buf_size) {
a19b974f 2739 SYSERROR("Failed to write %cid mapping to \"%s\"",
29053180 2740 idtype == ID_TYPE_UID ? 'u' : 'g', path);
29053180
CB
2741 return -1;
2742 }
29053180
CB
2743
2744 return 0;
f6d3e3e4
SH
2745}
2746
6e50e704
CB
2747/* Check whether a binary exist and has either CAP_SETUID, CAP_SETGID or both.
2748 *
2749 * @return 1 if functional binary was found
2750 * @return 0 if binary exists but is lacking privilege
2751 * @return -ENOENT if binary does not exist
2752 * @return -EINVAL if cap to check is neither CAP_SETUID nor CAP_SETGID
6e50e704 2753 */
df6a2945
CB
2754static int idmaptool_on_path_and_privileged(const char *binary, cap_value_t cap)
2755{
2756 char *path;
2757 int ret;
2758 struct stat st;
2759 int fret = 0;
2760
6e50e704
CB
2761 if (cap != CAP_SETUID && cap != CAP_SETGID)
2762 return -EINVAL;
2763
df6a2945
CB
2764 path = on_path(binary, NULL);
2765 if (!path)
2766 return -ENOENT;
2767
2768 ret = stat(path, &st);
2769 if (ret < 0) {
2770 fret = -errno;
2771 goto cleanup;
2772 }
2773
2774 /* Check if the binary is setuid. */
2775 if (st.st_mode & S_ISUID) {
0fd73091 2776 DEBUG("The binary \"%s\" does have the setuid bit set", path);
df6a2945
CB
2777 fret = 1;
2778 goto cleanup;
2779 }
2780
0fd73091 2781#if HAVE_LIBCAP && LIBCAP_SUPPORTS_FILE_CAPABILITIES
df6a2945
CB
2782 /* Check if it has the CAP_SETUID capability. */
2783 if ((cap & CAP_SETUID) &&
2784 lxc_file_cap_is_set(path, CAP_SETUID, CAP_EFFECTIVE) &&
2785 lxc_file_cap_is_set(path, CAP_SETUID, CAP_PERMITTED)) {
2786 DEBUG("The binary \"%s\" has CAP_SETUID in its CAP_EFFECTIVE "
0fd73091 2787 "and CAP_PERMITTED sets", path);
df6a2945
CB
2788 fret = 1;
2789 goto cleanup;
2790 }
2791
2792 /* Check if it has the CAP_SETGID capability. */
2793 if ((cap & CAP_SETGID) &&
2794 lxc_file_cap_is_set(path, CAP_SETGID, CAP_EFFECTIVE) &&
2795 lxc_file_cap_is_set(path, CAP_SETGID, CAP_PERMITTED)) {
2796 DEBUG("The binary \"%s\" has CAP_SETGID in its CAP_EFFECTIVE "
0fd73091 2797 "and CAP_PERMITTED sets", path);
df6a2945
CB
2798 fret = 1;
2799 goto cleanup;
2800 }
0fd73091 2801#else
69924fff
CB
2802 /* If we cannot check for file capabilities we need to give the benefit
2803 * of the doubt. Otherwise we might fail even though all the necessary
2804 * file capabilities are set.
2805 */
d6018f88 2806 DEBUG("Cannot check for file capabilites as full capability support is "
0fd73091 2807 "missing. Manual intervention needed");
d6018f88 2808 fret = 1;
0fd73091 2809#endif
df6a2945
CB
2810
2811cleanup:
2812 free(path);
2813 return fret;
2814}
2815
986ef930
CB
2816int lxc_map_ids_exec_wrapper(void *args)
2817{
2818 execl("/bin/sh", "sh", "-c", (char *)args, (char *)NULL);
2819 return -1;
2820}
2821
f6d3e3e4
SH
2822int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
2823{
0fd73091 2824 int fill, left;
986ef930 2825 char u_or_g;
4bc3b759 2826 char *pos;
986ef930 2827 char cmd_output[MAXPATHLEN];
0fd73091
CB
2828 struct id_map *map;
2829 struct lxc_list *iterator;
2830 enum idtype type;
986ef930
CB
2831 /* strlen("new@idmap") = 9
2832 * +
2833 * strlen(" ") = 1
2834 * +
2835 * LXC_NUMSTRLEN64
2836 * +
2837 * strlen(" ") = 1
2838 *
2839 * We add some additional space to make sure that we really have
2840 * LXC_IDMAPLEN bytes available for our the {g,u]id mapping.
2841 */
0fd73091 2842 int ret = 0, gidmap = 0, uidmap = 0;
986ef930 2843 char mapbuf[9 + 1 + LXC_NUMSTRLEN64 + 1 + LXC_IDMAPLEN] = {0};
0fd73091 2844 bool had_entry = false, use_shadow = false;
c724025c
JC
2845 int hostuid, hostgid;
2846
2847 hostuid = geteuid();
2848 hostgid = getegid();
df6a2945
CB
2849
2850 /* If new{g,u}idmap exists, that is, if shadow is handing out subuid
2851 * ranges, then insist that root also reserve ranges in subuid. This
22038de5
SH
2852 * will protected it by preventing another user from being handed the
2853 * range by shadow.
2854 */
df6a2945 2855 uidmap = idmaptool_on_path_and_privileged("newuidmap", CAP_SETUID);
6e50e704
CB
2856 if (uidmap == -ENOENT)
2857 WARN("newuidmap binary is missing");
2858 else if (!uidmap)
2859 WARN("newuidmap is lacking necessary privileges");
2860
df6a2945 2861 gidmap = idmaptool_on_path_and_privileged("newgidmap", CAP_SETGID);
6e50e704
CB
2862 if (gidmap == -ENOENT)
2863 WARN("newgidmap binary is missing");
2864 else if (!gidmap)
2865 WARN("newgidmap is lacking necessary privileges");
2866
df6a2945 2867 if (uidmap > 0 && gidmap > 0) {
0fd73091 2868 DEBUG("Functional newuidmap and newgidmap binary found");
4bc3b759 2869 use_shadow = true;
df6a2945 2870 } else {
99d43365
CB
2871 /* In case unprivileged users run application containers via
2872 * execute() or a start*() there are valid cases where they may
2873 * only want to map their own {g,u}id. Let's not block them from
2874 * doing so by requiring geteuid() == 0.
2875 */
2876 DEBUG("No newuidmap and newgidmap binary found. Trying to "
c724025c
JC
2877 "write directly with euid %d", hostuid);
2878 }
2879
2880 /* Check if we really need to use newuidmap and newgidmap.
2881 * If the user is only remapping his own {g,u}id, we don't need it.
2882 */
2883 if (use_shadow && lxc_list_len(idmap) == 2) {
2884 use_shadow = false;
2885 lxc_list_for_each(iterator, idmap) {
2886 map = iterator->elem;
2887 if (map->idtype == ID_TYPE_UID && map->range == 1 &&
2888 map->nsid == hostuid && map->hostid == hostuid)
2889 continue;
2890 if (map->idtype == ID_TYPE_GID && map->range == 1 &&
2891 map->nsid == hostgid && map->hostid == hostgid)
2892 continue;
2893 use_shadow = true;
2894 break;
2895 }
0e6e3a41 2896 }
251d0d2a 2897
986ef930
CB
2898 for (type = ID_TYPE_UID, u_or_g = 'u'; type <= ID_TYPE_GID;
2899 type++, u_or_g = 'g') {
2900 pos = mapbuf;
2901
0e6e3a41 2902 if (use_shadow)
986ef930 2903 pos += sprintf(mapbuf, "new%cidmap %d", u_or_g, pid);
4f7521b4 2904
cf3ef16d 2905 lxc_list_for_each(iterator, idmap) {
251d0d2a 2906 map = iterator->elem;
cf3ef16d
SH
2907 if (map->idtype != type)
2908 continue;
2909
4bc3b759
CB
2910 had_entry = true;
2911
986ef930 2912 left = LXC_IDMAPLEN - (pos - mapbuf);
d1838f34 2913 fill = snprintf(pos, left, "%s%lu %lu %lu%s",
4bc3b759
CB
2914 use_shadow ? " " : "", map->nsid,
2915 map->hostid, map->range,
0e6e3a41 2916 use_shadow ? "" : "\n");
a427e268
CB
2917 if (fill <= 0 || fill >= left) {
2918 /* The kernel only takes <= 4k for writes to
2919 * /proc/<pid>/{g,u}id_map
2920 */
2921 SYSERROR("Too many %cid mappings defined", u_or_g);
2922 return -1;
2923 }
4bc3b759 2924
cf3ef16d 2925 pos += fill;
251d0d2a 2926 }
cf3ef16d 2927 if (!had_entry)
4f7521b4 2928 continue;
cf3ef16d 2929
986ef930
CB
2930 /* Try to catch the ouput of new{g,u}idmap to make debugging
2931 * easier.
2932 */
2933 if (use_shadow) {
2934 ret = run_command(cmd_output, sizeof(cmd_output),
2935 lxc_map_ids_exec_wrapper,
2936 (void *)mapbuf);
2937 if (ret < 0) {
54fbbeb5
CB
2938 ERROR("new%cidmap failed to write mapping \"%s\": %s",
2939 u_or_g, cmd_output, mapbuf);
986ef930
CB
2940 return -1;
2941 }
54fbbeb5 2942 TRACE("new%cidmap wrote mapping \"%s\"", u_or_g, mapbuf);
d1838f34 2943 } else {
986ef930 2944 ret = write_id_mapping(type, pid, mapbuf, pos - mapbuf);
54fbbeb5 2945 if (ret < 0) {
da0f9977 2946 ERROR("Failed to write mapping: %s", mapbuf);
986ef930 2947 return -1;
54fbbeb5
CB
2948 }
2949 TRACE("Wrote mapping \"%s\"", mapbuf);
d1838f34 2950 }
986ef930
CB
2951
2952 memset(mapbuf, 0, sizeof(mapbuf));
f6d3e3e4 2953 }
251d0d2a 2954
986ef930 2955 return 0;
f6d3e3e4
SH
2956}
2957
0fd73091 2958/* Return the host uid/gid to which the container root is mapped in val.
0b3a6504 2959 * Return true if id was found, false otherwise.
cf3ef16d 2960 */
2a9a80cb 2961bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
4160c3a0 2962 unsigned long *val)
cf3ef16d 2963{
4160c3a0 2964 unsigned nsid;
0fd73091
CB
2965 struct id_map *map;
2966 struct lxc_list *it;
4160c3a0
CB
2967
2968 if (idtype == ID_TYPE_UID)
2969 nsid = (conf->root_nsuid_map != NULL) ? 0 : conf->init_uid;
2970 else
2971 nsid = (conf->root_nsgid_map != NULL) ? 0 : conf->init_gid;
cf3ef16d 2972
0fd73091 2973 lxc_list_for_each (it, &conf->id_map) {
cf3ef16d 2974 map = it->elem;
7b50c609 2975 if (map->idtype != idtype)
cf3ef16d 2976 continue;
4160c3a0 2977 if (map->nsid != nsid)
cf3ef16d 2978 continue;
2a9a80cb
SH
2979 *val = map->hostid;
2980 return true;
cf3ef16d 2981 }
4160c3a0 2982
2a9a80cb 2983 return false;
cf3ef16d
SH
2984}
2985
2133f58c 2986int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype)
cf3ef16d 2987{
cf3ef16d 2988 struct id_map *map;
0fd73091
CB
2989 struct lxc_list *it;
2990
2991 lxc_list_for_each (it, &conf->id_map) {
cf3ef16d 2992 map = it->elem;
2133f58c 2993 if (map->idtype != idtype)
cf3ef16d 2994 continue;
0fd73091 2995
cf3ef16d 2996 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 2997 return (id - map->hostid) + map->nsid;
cf3ef16d 2998 }
0fd73091 2999
57d116ab 3000 return -1;
cf3ef16d
SH
3001}
3002
339efad9 3003int find_unmapped_nsid(struct lxc_conf *conf, enum idtype idtype)
cf3ef16d 3004{
cf3ef16d 3005 struct id_map *map;
0fd73091 3006 struct lxc_list *it;
2133f58c 3007 unsigned int freeid = 0;
0fd73091 3008
cf3ef16d 3009again:
0fd73091 3010 lxc_list_for_each (it, &conf->id_map) {
cf3ef16d 3011 map = it->elem;
2133f58c 3012 if (map->idtype != idtype)
cf3ef16d 3013 continue;
0fd73091 3014
cf3ef16d
SH
3015 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3016 freeid = map->nsid + map->range;
3017 goto again;
3018 }
3019 }
0fd73091 3020
cf3ef16d
SH
3021 return freeid;
3022}
3023
f4f52cb5
CB
3024int chown_mapped_root_exec_wrapper(void *args)
3025{
3026 execvp("lxc-usernsexec", args);
3027 return -1;
3028}
3029
0fd73091 3030/* chown_mapped_root: for an unprivileged user with uid/gid X to
7b50c609
TS
3031 * chown a dir to subuid/subgid Y, he needs to run chown as root
3032 * in a userns where nsid 0 is mapped to hostuid/hostgid Y, and
3033 * nsid Y is mapped to hostuid/hostgid X. That way, the container
3034 * root is privileged with respect to hostuid/hostgid X, allowing
3035 * him to do the chown.
f6d3e3e4 3036 */
41dc7155 3037int chown_mapped_root(const char *path, struct lxc_conf *conf)
f6d3e3e4 3038{
f4f52cb5 3039 uid_t rootuid, rootgid;
2a9a80cb 3040 unsigned long val;
f4f52cb5
CB
3041 int hostuid, hostgid, ret;
3042 struct stat sb;
3043 char map1[100], map2[100], map3[100], map4[100], map5[100];
3044 char ugid[100];
41dc7155 3045 const char *args1[] = {"lxc-usernsexec",
f4f52cb5
CB
3046 "-m", map1,
3047 "-m", map2,
3048 "-m", map3,
3049 "-m", map5,
3050 "--", "chown", ugid, path,
3051 NULL};
41dc7155 3052 const char *args2[] = {"lxc-usernsexec",
f4f52cb5
CB
3053 "-m", map1,
3054 "-m", map2,
3055 "-m", map3,
3056 "-m", map4,
3057 "-m", map5,
3058 "--", "chown", ugid, path,
3059 NULL};
3060 char cmd_output[MAXPATHLEN];
3061
3062 hostuid = geteuid();
3063 hostgid = getegid();
f6d3e3e4 3064
2a9a80cb 3065 if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
bc80f098 3066 ERROR("No uid mapping for container root");
c4d10a05 3067 return -1;
f6d3e3e4 3068 }
f4f52cb5 3069 rootuid = (uid_t)val;
0fd73091 3070
7b50c609 3071 if (!get_mapped_rootid(conf, ID_TYPE_GID, &val)) {
bc80f098 3072 ERROR("No gid mapping for container root");
7b50c609
TS
3073 return -1;
3074 }
f4f52cb5 3075 rootgid = (gid_t)val;
2a9a80cb 3076
f4f52cb5 3077 if (hostuid == 0) {
7b50c609 3078 if (chown(path, rootuid, rootgid) < 0) {
c4d10a05
SH
3079 ERROR("Error chowning %s", path);
3080 return -1;
3081 }
0fd73091 3082
c4d10a05
SH
3083 return 0;
3084 }
f3d7e4ca 3085
f4f52cb5 3086 if (rootuid == hostuid) {
1a0e70ac 3087 /* nothing to do */
b103ceac 3088 INFO("Container root is our uid; no need to chown");
f3d7e4ca
SH
3089 return 0;
3090 }
3091
bbdbf8f0 3092 /* save the current gid of "path" */
f4f52cb5
CB
3093 if (stat(path, &sb) < 0) {
3094 ERROR("Error stat %s", path);
f6d3e3e4
SH
3095 return -1;
3096 }
7b50c609 3097
bbdbf8f0
CB
3098 /* Update the path argument in case this was overlayfs. */
3099 args1[sizeof(args1) / sizeof(args1[0]) - 2] = path;
3100 args2[sizeof(args2) / sizeof(args2[0]) - 2] = path;
3101
f4f52cb5
CB
3102 /*
3103 * A file has to be group-owned by a gid mapped into the
3104 * container, or the container won't be privileged over it.
3105 */
3106 DEBUG("trying to chown \"%s\" to %d", path, hostgid);
3107 if (sb.st_uid == hostuid &&
3108 mapped_hostid(sb.st_gid, conf, ID_TYPE_GID) < 0 &&
3109 chown(path, -1, hostgid) < 0) {
3110 ERROR("Failed chgrping %s", path);
3111 return -1;
3112 }
f6d3e3e4 3113
1a0e70ac 3114 /* "u:0:rootuid:1" */
f4f52cb5
CB
3115 ret = snprintf(map1, 100, "u:0:%d:1", rootuid);
3116 if (ret < 0 || ret >= 100) {
3117 ERROR("Error uid printing map string");
3118 return -1;
3119 }
7b50c609 3120
1a0e70ac 3121 /* "u:hostuid:hostuid:1" */
f4f52cb5
CB
3122 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3123 if (ret < 0 || ret >= 100) {
3124 ERROR("Error uid printing map string");
3125 return -1;
3126 }
c4d10a05 3127
1a0e70ac 3128 /* "g:0:rootgid:1" */
f4f52cb5
CB
3129 ret = snprintf(map3, 100, "g:0:%d:1", rootgid);
3130 if (ret < 0 || ret >= 100) {
3131 ERROR("Error gid printing map string");
3132 return -1;
3133 }
98e5ba51 3134
1a0e70ac 3135 /* "g:pathgid:rootgid+pathgid:1" */
f4f52cb5
CB
3136 ret = snprintf(map4, 100, "g:%d:%d:1", (gid_t)sb.st_gid,
3137 rootgid + (gid_t)sb.st_gid);
3138 if (ret < 0 || ret >= 100) {
3139 ERROR("Error gid printing map string");
3140 return -1;
3141 }
c4d10a05 3142
1a0e70ac 3143 /* "g:hostgid:hostgid:1" */
f4f52cb5
CB
3144 ret = snprintf(map5, 100, "g:%d:%d:1", hostgid, hostgid);
3145 if (ret < 0 || ret >= 100) {
3146 ERROR("Error gid printing map string");
3147 return -1;
3148 }
7b50c609 3149
1a0e70ac 3150 /* "0:pathgid" (chown) */
f4f52cb5
CB
3151 ret = snprintf(ugid, 100, "0:%d", (gid_t)sb.st_gid);
3152 if (ret < 0 || ret >= 100) {
3153 ERROR("Error owner printing format string for chown");
3154 return -1;
3155 }
7b50c609 3156
f4f52cb5
CB
3157 if (hostgid == sb.st_gid)
3158 ret = run_command(cmd_output, sizeof(cmd_output),
3159 chown_mapped_root_exec_wrapper,
3160 (void *)args1);
3161 else
3162 ret = run_command(cmd_output, sizeof(cmd_output),
3163 chown_mapped_root_exec_wrapper,
3164 (void *)args2);
3165 if (ret < 0)
3166 ERROR("lxc-usernsexec failed: %s", cmd_output);
7b50c609 3167
f4f52cb5 3168 return ret;
f6d3e3e4
SH
3169}
3170
943144d9
CB
3171/* NOTE: Must not be called from inside the container namespace! */
3172int lxc_create_tmp_proc_mount(struct lxc_conf *conf)
5112cd70
SH
3173{
3174 int mounted;
3175
943144d9 3176 mounted = lxc_mount_proc_if_needed(conf->rootfs.path ? conf->rootfs.mount : "");
5112cd70 3177 if (mounted == -1) {
0fd73091 3178 SYSERROR("Failed to mount proc in the container");
01958b1f 3179 /* continue only if there is no rootfs */
943144d9 3180 if (conf->rootfs.path)
01958b1f 3181 return -1;
5112cd70 3182 } else if (mounted == 1) {
7a0bcca3 3183 conf->tmp_umount_proc = true;
5112cd70 3184 }
943144d9 3185
5112cd70
SH
3186 return 0;
3187}
3188
3189void tmp_proc_unmount(struct lxc_conf *lxc_conf)
3190{
7a0bcca3 3191 if (!lxc_conf->tmp_umount_proc)
0fd73091
CB
3192 return;
3193
7a0bcca3
CB
3194 (void)umount2("/proc", MNT_DETACH);
3195 lxc_conf->tmp_umount_proc = false;
5112cd70
SH
3196}
3197
0fd73091 3198/* Walk /proc/mounts and change any shared entries to slave. */
6a0c909a 3199void remount_all_slave(void)
e995d7a2 3200{
6a49f05e
CB
3201 int memfd, mntinfo_fd, ret;
3202 ssize_t copied;
0fd73091 3203 FILE *f;
e995d7a2 3204 size_t len = 0;
0fd73091 3205 char *line = NULL;
e995d7a2 3206
6a49f05e 3207 mntinfo_fd = open("/proc/self/mountinfo", O_RDONLY | O_CLOEXEC);
fea3b91d
DJ
3208 if (mntinfo_fd < 0) {
3209 SYSERROR("Failed to open \"/proc/self/mountinfo\"");
6a49f05e 3210 return;
fea3b91d 3211 }
6a49f05e
CB
3212
3213 memfd = memfd_create(".lxc_mountinfo", MFD_CLOEXEC);
3214 if (memfd < 0) {
3215 char template[] = P_tmpdir "/.lxc_mountinfo_XXXXXX";
3216
3217 if (errno != ENOSYS) {
fea3b91d 3218 SYSERROR("Failed to create temporary in-memory file");
6a49f05e 3219 close(mntinfo_fd);
6a49f05e
CB
3220 return;
3221 }
3222
3223 memfd = lxc_make_tmpfile(template, true);
fea3b91d
DJ
3224 if (memfd < 0) {
3225 close(mntinfo_fd);
3226 WARN("Failed to create temporary file");
3227 return;
3228 }
6a49f05e
CB
3229 }
3230
3231#define __LXC_SENDFILE_MAX 0x7ffff000 /* maximum number of bytes sendfile can handle */
3232again:
3233 copied = sendfile(memfd, mntinfo_fd, NULL, __LXC_SENDFILE_MAX);
3234 if (copied < 0) {
3235 if (errno == EINTR)
3236 goto again;
3237
fea3b91d 3238 SYSERROR("Failed to copy \"/proc/self/mountinfo\"");
6a49f05e
CB
3239 close(mntinfo_fd);
3240 close(memfd);
6a49f05e
CB
3241 return;
3242 }
3243 close(mntinfo_fd);
3244
3245 /* After a successful fdopen() memfd will be closed when calling
3246 * fclose(f). Calling close(memfd) afterwards is undefined.
3247 */
3248 ret = lseek(memfd, 0, SEEK_SET);
3249 if (ret < 0) {
fea3b91d 3250 SYSERROR("Failed to reset file descriptor offset");
6a49f05e 3251 close(memfd);
6a49f05e
CB
3252 return;
3253 }
3254
3255 f = fdopen(memfd, "r");
e995d7a2 3256 if (!f) {
fea3b91d
DJ
3257 SYSERROR("Failed to open copy of \"/proc/self/mountinfo\" to mark "
3258 "all shared. Continuing");
6a49f05e 3259 close(memfd);
e995d7a2
SH
3260 return;
3261 }
3262
3263 while (getline(&line, &len, f) != -1) {
0fd73091
CB
3264 int ret;
3265 char *opts, *target;
3266
e995d7a2
SH
3267 target = get_field(line, 4);
3268 if (!target)
3269 continue;
0fd73091 3270
e995d7a2
SH
3271 opts = get_field(target, 2);
3272 if (!opts)
3273 continue;
0fd73091 3274
e995d7a2
SH
3275 null_endofword(opts);
3276 if (!strstr(opts, "shared"))
3277 continue;
0fd73091 3278
e995d7a2 3279 null_endofword(target);
0fd73091
CB
3280 ret = mount(NULL, target, NULL, MS_SLAVE, NULL);
3281 if (ret < 0) {
3282 SYSERROR("Failed to make \"%s\" MS_SLAVE", target);
e995d7a2 3283 ERROR("Continuing...");
6a49f05e 3284 continue;
e995d7a2 3285 }
6a49f05e 3286 TRACE("Remounted \"%s\" as MS_SLAVE", target);
e995d7a2
SH
3287 }
3288 fclose(f);
f10fad2f 3289 free(line);
6a49f05e 3290 TRACE("Remounted all mount table entries as MS_SLAVE");
e995d7a2
SH
3291}
3292
794248d0 3293static int lxc_execute_bind_init(struct lxc_handler *handler)
2322903b
SH
3294{
3295 int ret;
794248d0
CB
3296 char *p;
3297 char path[PATH_MAX], destpath[PATH_MAX];
3298 struct lxc_conf *conf = handler->conf;
9d9c111c
SH
3299
3300 /* If init exists in the container, don't bind mount a static one */
3301 p = choose_init(conf->rootfs.mount);
3302 if (p) {
41089848
TA
3303 char *old = p;
3304
3305 p = strdup(old + strlen(conf->rootfs.mount));
3306 free(old);
3307 if (!p)
3308 return -ENOMEM;
3309
3310 INFO("Found existing init at \"%s\"", p);
3311 goto out;
9d9c111c 3312 }
2322903b
SH
3313
3314 ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
0fd73091 3315 if (ret < 0 || ret >= PATH_MAX)
8353b4c9 3316 return -1;
2322903b
SH
3317
3318 if (!file_exists(path)) {
0fd73091 3319 ERROR("The file \"%s\" does not exist on host", path);
8353b4c9 3320 return -1;
2322903b
SH
3321 }
3322
794248d0 3323 ret = snprintf(destpath, PATH_MAX, "%s" P_tmpdir "%s", conf->rootfs.mount, "/.lxc-init");
0fd73091 3324 if (ret < 0 || ret >= PATH_MAX)
8353b4c9 3325 return -1;
2322903b
SH
3326
3327 if (!file_exists(destpath)) {
794248d0
CB
3328 ret = mknod(destpath, S_IFREG | 0000, 0);
3329 if (ret < 0 && errno != EEXIST) {
3330 SYSERROR("Failed to create dummy \"%s\" file as bind mount target", destpath);
8353b4c9 3331 return -1;
2322903b 3332 }
2322903b
SH
3333 }
3334
592fd47a 3335 ret = safe_mount(path, destpath, "none", MS_BIND, NULL, conf->rootfs.mount);
8353b4c9 3336 if (ret < 0) {
0fd73091 3337 SYSERROR("Failed to bind mount lxc.init.static into container");
8353b4c9
CB
3338 return -1;
3339 }
3340
794248d0
CB
3341 p = strdup(destpath + strlen(conf->rootfs.mount));
3342 if (!p)
3343 return -ENOMEM;
794248d0 3344
8353b4c9 3345 INFO("Bind mounted lxc.init.static into container at \"%s\"", path);
41089848 3346out:
4b5b3a2a 3347 ((struct execute_args *)handler->data)->init_fd = -1;
41089848 3348 ((struct execute_args *)handler->data)->init_path = p;
8353b4c9 3349 return 0;
2322903b
SH
3350}
3351
0fd73091
CB
3352/* This does the work of remounting / if it is shared, calling the container
3353 * pre-mount hooks, and mounting the rootfs.
35120d9c
SH
3354 */
3355int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath)
0ad19a3f 3356{
0fd73091
CB
3357 int ret;
3358
35120d9c 3359 if (conf->rootfs_setup) {
35120d9c 3360 const char *path = conf->rootfs.mount;
0fd73091
CB
3361
3362 /* The rootfs was set up in another namespace. bind-mount it to
3363 * give us a mount in our own ns so we can pivot_root to it
3364 */
3365 ret = mount(path, path, "rootfs", MS_BIND, NULL);
3366 if (ret < 0) {
3367 ERROR("Failed to bind mount container / onto itself");
145832ba 3368 return -1;
35120d9c 3369 }
0fd73091
CB
3370
3371 TRACE("Bind mounted container / onto itself");
145832ba 3372 return 0;
35120d9c 3373 }
d4ef7c50 3374
e995d7a2
SH
3375 remount_all_slave();
3376
0fd73091
CB
3377 ret = run_lxc_hooks(name, "pre-mount", conf, NULL);
3378 if (ret < 0) {
3379 ERROR("Failed to run pre-mount hooks");
35120d9c
SH
3380 return -1;
3381 }
3382
0fd73091
CB
3383 ret = lxc_setup_rootfs(conf);
3384 if (ret < 0) {
3385 ERROR("Failed to setup rootfs for");
35120d9c
SH
3386 return -1;
3387 }
3388
3389 conf->rootfs_setup = true;
3390 return 0;
3391}
3392
1c1c7051
SH
3393static bool verify_start_hooks(struct lxc_conf *conf)
3394{
1c1c7051 3395 char path[MAXPATHLEN];
0fd73091
CB
3396 struct lxc_list *it;
3397
3398 lxc_list_for_each (it, &conf->hooks[LXCHOOK_START]) {
1c1c7051 3399 int ret;
0fd73091
CB
3400 struct stat st;
3401 char *hookname = it->elem;
1c1c7051
SH
3402
3403 ret = snprintf(path, MAXPATHLEN, "%s%s",
0fd73091
CB
3404 conf->rootfs.path ? conf->rootfs.mount : "",
3405 hookname);
1c1c7051
SH
3406 if (ret < 0 || ret >= MAXPATHLEN)
3407 return false;
0fd73091 3408
1c1c7051 3409 ret = stat(path, &st);
0fd73091 3410 if (ret < 0) {
7b6753e7 3411 SYSERROR("Start hook %s not found in container",
0fd73091 3412 hookname);
1c1c7051
SH
3413 return false;
3414 }
0fd73091 3415
6a0c909a 3416 return true;
1c1c7051
SH
3417 }
3418
3419 return true;
3420}
3421
4b5b3a2a
TA
3422static bool execveat_supported(void)
3423{
3424#ifdef __NR_execveat
3425 /*
3426 * We use the syscall here, because it was introduced in kernel 3.19,
3427 * while glibc got support for using the syscall much later, in 2.27.
3428 * We don't want to use glibc because it falls back to /proc, and the
3429 * container may not have /proc mounted depending on its configuration.
3430 */
3431 syscall(__NR_execveat, -1, "", NULL, NULL, AT_EMPTY_PATH);
3432 if (errno == ENOSYS)
3433 return false;
3434
3435 return true;
3436#else
3437 return false;
3438#endif
3439}
3440
3b988b33 3441int lxc_setup(struct lxc_handler *handler)
35120d9c 3442{
2187efd3 3443 int ret;
0fd73091 3444 const char *lxcpath = handler->lxcpath, *name = handler->name;
35120d9c 3445 struct lxc_conf *lxc_conf = handler->conf;
35120d9c 3446
8353b4c9
CB
3447 ret = do_rootfs_setup(lxc_conf, name, lxcpath);
3448 if (ret < 0) {
3449 ERROR("Failed to setup rootfs");
35120d9c
SH
3450 return -1;
3451 }
3452
28d9e29e 3453 if (handler->nsfd[LXC_NS_UTS] == -1) {
8353b4c9
CB
3454 ret = setup_utsname(lxc_conf->utsname);
3455 if (ret < 0) {
0fd73091 3456 ERROR("Failed to setup the utsname %s", name);
6c544cb3
MM
3457 return -1;
3458 }
0ad19a3f 3459 }
3460
8353b4c9
CB
3461 ret = lxc_setup_network_in_child_namespaces(lxc_conf, &lxc_conf->network);
3462 if (ret < 0) {
3463 ERROR("Failed to setup network");
95b5ffaf 3464 return -1;
0ad19a3f 3465 }
3466
8353b4c9
CB
3467 ret = lxc_network_send_name_and_ifindex_to_parent(handler);
3468 if (ret < 0) {
3469 ERROR("Failed to send network device names and ifindices to parent");
790255cf
CB
3470 return -1;
3471 }
3472
bc6928ff 3473 if (lxc_conf->autodev > 0) {
8353b4c9
CB
3474 ret = mount_autodev(name, &lxc_conf->rootfs, lxcpath);
3475 if (ret < 0) {
3476 ERROR("Failed to mount \"/dev\"");
c6883f38
SH
3477 return -1;
3478 }
3479 }
3480
8353b4c9
CB
3481 /* Do automatic mounts (mainly /proc and /sys), but exclude those that
3482 * need to wait until other stuff has finished.
368bbc02 3483 */
8353b4c9
CB
3484 ret = lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler);
3485 if (ret < 0) {
3486 ERROR("Failed to setup first automatic mounts");
368bbc02
CS
3487 return -1;
3488 }
3489
8353b4c9
CB
3490 ret = setup_mount(lxc_conf, &lxc_conf->rootfs, lxc_conf->fstab, name, lxcpath);
3491 if (ret < 0) {
3492 ERROR("Failed to setup mounts");
95b5ffaf 3493 return -1;
576f946d 3494 }
3495
7b6753e7 3496 /* Make sure any start hooks are in the container */
1c1c7051
SH
3497 if (!verify_start_hooks(lxc_conf))
3498 return -1;
3499
8353b4c9 3500 if (lxc_conf->is_execute) {
4b5b3a2a
TA
3501 if (execveat_supported()) {
3502 int fd;
3503 char path[PATH_MAX];
3504
3505 ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
3506 if (ret < 0 || ret >= PATH_MAX) {
3507 ERROR("Path to init.lxc.static too long");
3508 return -1;
3509 }
3510
3511 fd = open(path, O_PATH | O_CLOEXEC);
3512 if (fd < 0) {
3513 SYSERROR("Unable to open lxc.init.static");
3514 return -1;
3515 }
3516
3517 ((struct execute_args *)handler->data)->init_fd = fd;
3518 ((struct execute_args *)handler->data)->init_path = NULL;
3519 } else {
3520 ret = lxc_execute_bind_init(handler);
3521 if (ret < 0) {
3522 ERROR("Failed to bind-mount the lxc init system");
3523 return -1;
3524 }
8353b4c9
CB
3525 }
3526 }
2322903b 3527
8353b4c9
CB
3528 /* Now mount only cgroups, if wanted. Before, /sys could not have been
3529 * mounted. It is guaranteed to be mounted now either through
3530 * automatically or via fstab entries.
368bbc02 3531 */
8353b4c9
CB
3532 ret = lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler);
3533 if (ret < 0) {
3534 ERROR("Failed to setup remaining automatic mounts");
368bbc02
CS
3535 return -1;
3536 }
3537
8353b4c9 3538 ret = run_lxc_hooks(name, "mount", lxc_conf, NULL);
1a2cf89d 3539 if (ret < 0) {
8353b4c9 3540 ERROR("Failed to run mount hooks");
773fb9ca
SH
3541 return -1;
3542 }
3543
bc6928ff 3544 if (lxc_conf->autodev > 0) {
8353b4c9
CB
3545 ret = run_lxc_hooks(name, "autodev", lxc_conf, NULL);
3546 if (ret < 0) {
3547 ERROR("Failed to run autodev hooks");
f7bee6c6
MW
3548 return -1;
3549 }
06749971 3550
8353b4c9
CB
3551 ret = lxc_fill_autodev(&lxc_conf->rootfs);
3552 if (ret < 0) {
3553 ERROR("Failed to populate \"/dev\"");
91c3830e
SH
3554 return -1;
3555 }
3556 }
368bbc02 3557
8353b4c9
CB
3558 if (!lxc_list_empty(&lxc_conf->mount_list)) {
3559 ret = setup_mount_entries(lxc_conf, &lxc_conf->rootfs,
3560 &lxc_conf->mount_list, name, lxcpath);
3561 if (ret < 0) {
3562 ERROR("Failed to setup mount entries");
3563 return -1;
3564 }
181437fd
YT
3565 }
3566
ed8704d0 3567 ret = lxc_setup_console(&lxc_conf->rootfs, &lxc_conf->console,
885766f5 3568 lxc_conf->ttys.dir);
ed8704d0
CB
3569 if (ret < 0) {
3570 ERROR("Failed to setup console");
95b5ffaf 3571 return -1;
6e590161 3572 }
3573
ed8704d0
CB
3574 ret = lxc_setup_dev_symlinks(&lxc_conf->rootfs);
3575 if (ret < 0) {
8353b4c9 3576 ERROR("Failed to setup \"/dev\" symlinks");
69aa6655
DE
3577 return -1;
3578 }
3579
8353b4c9
CB
3580 ret = lxc_create_tmp_proc_mount(lxc_conf);
3581 if (ret < 0) {
3582 ERROR("Failed to \"/proc\" LSMs");
e075f5d9 3583 return -1;
e075f5d9 3584 }
e075f5d9 3585
8353b4c9
CB
3586 ret = setup_pivot_root(&lxc_conf->rootfs);
3587 if (ret < 0) {
3588 ERROR("Failed to pivot root into rootfs");
95b5ffaf 3589 return -1;
ed502555 3590 }
3591
8353b4c9
CB
3592 ret = lxc_setup_devpts(lxc_conf);
3593 if (ret < 0) {
3594 ERROR("Failed to setup new devpts instance");
95b5ffaf 3595 return -1;
3c26f34e 3596 }
3597
2187efd3
CB
3598 ret = lxc_create_ttys(handler);
3599 if (ret < 0)
e8bd4e43 3600 return -1;
e8bd4e43 3601
8353b4c9
CB
3602 ret = setup_personality(lxc_conf->personality);
3603 if (ret < 0) {
3604 ERROR("Failed to set personality");
cccc74b5
DL
3605 return -1;
3606 }
3607
8353b4c9
CB
3608 /* Set sysctl value to a path under /proc/sys as determined from the
3609 * key. For e.g. net.ipv4.ip_forward translated to
3610 * /proc/sys/net/ipv4/ip_forward.
7edd0540
L
3611 */
3612 if (!lxc_list_empty(&lxc_conf->sysctls)) {
3613 ret = setup_sysctl_parameters(&lxc_conf->sysctls);
8353b4c9
CB
3614 if (ret < 0) {
3615 ERROR("Failed to setup sysctl parameters");
7edd0540 3616 return -1;
8353b4c9 3617 }
7edd0540
L
3618 }
3619
97a8f74f
SG
3620 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
3621 if (!lxc_list_empty(&lxc_conf->caps)) {
8353b4c9
CB
3622 ERROR("Container requests lxc.cap.drop and "
3623 "lxc.cap.keep: either use lxc.cap.drop or "
3624 "lxc.cap.keep, not both");
f6d3e3e4
SH
3625 return -1;
3626 }
8353b4c9 3627
97a8f74f 3628 if (dropcaps_except(&lxc_conf->keepcaps)) {
8353b4c9 3629 ERROR("Failed to keep capabilities");
97a8f74f
SG
3630 return -1;
3631 }
3632 } else if (setup_caps(&lxc_conf->caps)) {
8353b4c9 3633 ERROR("Failed to drop capabilities");
97a8f74f 3634 return -1;
81810dd1
DL
3635 }
3636
8353b4c9 3637 NOTICE("The container \"%s\" is set up", name);
cd54d859 3638
0ad19a3f 3639 return 0;
3640}
26ddeedd 3641
3f60c2f7 3642int run_lxc_hooks(const char *name, char *hookname, struct lxc_conf *conf,
14a7b0f9 3643 char *argv[])
26ddeedd 3644{
26ddeedd 3645 struct lxc_list *it;
3f60c2f7 3646 int which = -1;
26ddeedd 3647
3f60c2f7 3648 if (strcmp(hookname, "pre-start") == 0)
26ddeedd 3649 which = LXCHOOK_PRESTART;
3f60c2f7 3650 else if (strcmp(hookname, "start-host") == 0)
08dd2805 3651 which = LXCHOOK_START_HOST;
3f60c2f7 3652 else if (strcmp(hookname, "pre-mount") == 0)
5ea6163a 3653 which = LXCHOOK_PREMOUNT;
3f60c2f7 3654 else if (strcmp(hookname, "mount") == 0)
26ddeedd 3655 which = LXCHOOK_MOUNT;
3f60c2f7 3656 else if (strcmp(hookname, "autodev") == 0)
f7bee6c6 3657 which = LXCHOOK_AUTODEV;
3f60c2f7 3658 else if (strcmp(hookname, "start") == 0)
26ddeedd 3659 which = LXCHOOK_START;
3f60c2f7 3660 else if (strcmp(hookname, "stop") == 0)
52492063 3661 which = LXCHOOK_STOP;
3f60c2f7 3662 else if (strcmp(hookname, "post-stop") == 0)
26ddeedd 3663 which = LXCHOOK_POSTSTOP;
3f60c2f7 3664 else if (strcmp(hookname, "clone") == 0)
148e91f5 3665 which = LXCHOOK_CLONE;
3f60c2f7 3666 else if (strcmp(hookname, "destroy") == 0)
37cf711b 3667 which = LXCHOOK_DESTROY;
26ddeedd
SH
3668 else
3669 return -1;
3f60c2f7 3670
0fd73091 3671 lxc_list_for_each (it, &conf->hooks[which]) {
26ddeedd 3672 int ret;
3f60c2f7
CB
3673 char *hook = it->elem;
3674
3675 ret = run_script_argv(name, conf->hooks_version, "lxc", hook,
14a7b0f9 3676 hookname, argv);
3f60c2f7
CB
3677 if (ret < 0)
3678 return -1;
26ddeedd 3679 }
3f60c2f7 3680
26ddeedd
SH
3681 return 0;
3682}
72d0e1cb 3683
72d0e1cb
SG
3684int lxc_clear_config_caps(struct lxc_conf *c)
3685{
1a0e70ac 3686 struct lxc_list *it, *next;
72d0e1cb 3687
0fd73091 3688 lxc_list_for_each_safe (it, &c->caps, next) {
72d0e1cb
SG
3689 lxc_list_del(it);
3690 free(it->elem);
3691 free(it);
3692 }
0fd73091 3693
72d0e1cb
SG
3694 return 0;
3695}
3696
c7e345ae
CB
3697static int lxc_free_idmap(struct lxc_list *id_map)
3698{
27c27d73
SH
3699 struct lxc_list *it, *next;
3700
0fd73091 3701 lxc_list_for_each_safe (it, id_map, next) {
27c27d73
SH
3702 lxc_list_del(it);
3703 free(it->elem);
3704 free(it);
3705 }
c7e345ae 3706
27c27d73
SH
3707 return 0;
3708}
3709
4355ab5f
SH
3710int lxc_clear_idmaps(struct lxc_conf *c)
3711{
3712 return lxc_free_idmap(&c->id_map);
3713}
3714
1fb86a7c
SH
3715int lxc_clear_config_keepcaps(struct lxc_conf *c)
3716{
0fd73091 3717 struct lxc_list *it, *next;
1fb86a7c 3718
0fd73091 3719 lxc_list_for_each_safe (it, &c->keepcaps, next) {
1fb86a7c
SH
3720 lxc_list_del(it);
3721 free(it->elem);
3722 free(it);
3723 }
0fd73091 3724
1fb86a7c
SH
3725 return 0;
3726}
3727
54860ed0 3728int lxc_clear_cgroups(struct lxc_conf *c, const char *key, int version)
72d0e1cb 3729{
54860ed0 3730 char *global_token, *namespaced_token;
ab1a6cac 3731 size_t namespaced_token_len;
54860ed0 3732 struct lxc_list *it, *next, *list;
ab1a6cac 3733 const char *k = key;
54860ed0 3734 bool all = false;
72d0e1cb 3735
54860ed0
CB
3736 if (version == CGROUP2_SUPER_MAGIC) {
3737 global_token = "lxc.cgroup2";
3738 namespaced_token = "lxc.cgroup2.";
0fd73091 3739 namespaced_token_len = sizeof("lxc.cgroup2.") - 1;
54860ed0
CB
3740 list = &c->cgroup2;
3741 } else if (version == CGROUP_SUPER_MAGIC) {
3742 global_token = "lxc.cgroup";
3743 namespaced_token = "lxc.cgroup.";
0fd73091 3744 namespaced_token_len = sizeof("lxc.cgroup.") - 1;
54860ed0
CB
3745 list = &c->cgroup;
3746 } else {
ab1a6cac 3747 return -EINVAL;
54860ed0
CB
3748 }
3749
3750 if (strcmp(key, global_token) == 0)
72d0e1cb 3751 all = true;
54860ed0 3752 else if (strncmp(key, namespaced_token, sizeof(namespaced_token) - 1) == 0)
ab1a6cac 3753 k += namespaced_token_len;
a6390f01 3754 else
ab1a6cac 3755 return -EINVAL;
72d0e1cb 3756
0fd73091 3757 lxc_list_for_each_safe (it, list, next) {
72d0e1cb 3758 struct lxc_cgroup *cg = it->elem;
54860ed0 3759
72d0e1cb
SG
3760 if (!all && strcmp(cg->subsystem, k) != 0)
3761 continue;
54860ed0 3762
72d0e1cb
SG
3763 lxc_list_del(it);
3764 free(cg->subsystem);
3765 free(cg->value);
3766 free(cg);
3767 free(it);
3768 }
e409b214 3769
72d0e1cb
SG
3770 return 0;
3771}
3772
c6d09e15
WB
3773int lxc_clear_limits(struct lxc_conf *c, const char *key)
3774{
3775 struct lxc_list *it, *next;
c6d09e15 3776 const char *k = NULL;
0fd73091 3777 bool all = false;
c6d09e15 3778
b668653c 3779 if (strcmp(key, "lxc.limit") == 0 || strcmp(key, "lxc.prlimit") == 0)
c6d09e15 3780 all = true;
b668653c
CB
3781 else if (strncmp(key, "lxc.limit.", sizeof("lxc.limit.") - 1) == 0)
3782 k = key + sizeof("lxc.limit.") - 1;
3783 else if (strncmp(key, "lxc.prlimit.", sizeof("lxc.prlimit.") - 1) == 0)
3784 k = key + sizeof("lxc.prlimit.") - 1;
c6d09e15
WB
3785 else
3786 return -1;
3787
0fd73091 3788 lxc_list_for_each_safe (it, &c->limits, next) {
c6d09e15 3789 struct lxc_limit *lim = it->elem;
0fd73091 3790
c6d09e15
WB
3791 if (!all && strcmp(lim->resource, k) != 0)
3792 continue;
0fd73091 3793
c6d09e15
WB
3794 lxc_list_del(it);
3795 free(lim->resource);
3796 free(lim);
3797 free(it);
3798 }
b668653c 3799
c6d09e15
WB
3800 return 0;
3801}
3802
7edd0540
L
3803int lxc_clear_sysctls(struct lxc_conf *c, const char *key)
3804{
3805 struct lxc_list *it, *next;
7edd0540 3806 const char *k = NULL;
0fd73091 3807 bool all = false;
7edd0540
L
3808
3809 if (strcmp(key, "lxc.sysctl") == 0)
3810 all = true;
3811 else if (strncmp(key, "lxc.sysctl.", sizeof("lxc.sysctl.") - 1) == 0)
3812 k = key + sizeof("lxc.sysctl.") - 1;
3813 else
3814 return -1;
3815
0fd73091 3816 lxc_list_for_each_safe (it, &c->sysctls, next) {
7edd0540 3817 struct lxc_sysctl *elem = it->elem;
0fd73091 3818
7edd0540
L
3819 if (!all && strcmp(elem->key, k) != 0)
3820 continue;
0fd73091 3821
7edd0540
L
3822 lxc_list_del(it);
3823 free(elem->key);
3824 free(elem->value);
3825 free(elem);
3826 free(it);
3827 }
0fd73091 3828
7edd0540
L
3829 return 0;
3830}
3831
61d7a733
YT
3832int lxc_clear_procs(struct lxc_conf *c, const char *key)
3833{
0fd73091 3834 struct lxc_list *it, *next;
61d7a733 3835 const char *k = NULL;
0fd73091 3836 bool all = false;
61d7a733
YT
3837
3838 if (strcmp(key, "lxc.proc") == 0)
3839 all = true;
3840 else if (strncmp(key, "lxc.proc.", sizeof("lxc.proc.") - 1) == 0)
3841 k = key + sizeof("lxc.proc.") - 1;
3842 else
3843 return -1;
3844
0fd73091 3845 lxc_list_for_each_safe (it, &c->procs, next) {
61d7a733 3846 struct lxc_proc *proc = it->elem;
0fd73091 3847
61d7a733
YT
3848 if (!all && strcmp(proc->filename, k) != 0)
3849 continue;
0fd73091 3850
61d7a733
YT
3851 lxc_list_del(it);
3852 free(proc->filename);
3853 free(proc->value);
3854 free(proc);
3855 free(it);
3856 }
3857
3858 return 0;
3859}
3860
ee1e7aa0
SG
3861int lxc_clear_groups(struct lxc_conf *c)
3862{
0fd73091 3863 struct lxc_list *it, *next;
ee1e7aa0 3864
0fd73091 3865 lxc_list_for_each_safe (it, &c->groups, next) {
ee1e7aa0
SG
3866 lxc_list_del(it);
3867 free(it->elem);
3868 free(it);
3869 }
0fd73091 3870
ee1e7aa0
SG
3871 return 0;
3872}
3873
ab799c0b
SG
3874int lxc_clear_environment(struct lxc_conf *c)
3875{
0fd73091 3876 struct lxc_list *it, *next;
ab799c0b 3877
0fd73091 3878 lxc_list_for_each_safe (it, &c->environment, next) {
ab799c0b
SG
3879 lxc_list_del(it);
3880 free(it->elem);
3881 free(it);
3882 }
0fd73091 3883
ab799c0b
SG
3884 return 0;
3885}
3886
72d0e1cb
SG
3887int lxc_clear_mount_entries(struct lxc_conf *c)
3888{
0fd73091 3889 struct lxc_list *it, *next;
72d0e1cb 3890
0fd73091 3891 lxc_list_for_each_safe (it, &c->mount_list, next) {
72d0e1cb
SG
3892 lxc_list_del(it);
3893 free(it->elem);
3894 free(it);
3895 }
0fd73091 3896
72d0e1cb
SG
3897 return 0;
3898}
3899
b099e9e9
SH
3900int lxc_clear_automounts(struct lxc_conf *c)
3901{
3902 c->auto_mounts = 0;
3903 return 0;
3904}
3905
12a50cc6 3906int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 3907{
72d0e1cb 3908 int i;
0fd73091
CB
3909 struct lxc_list *it, *next;
3910 const char *k = NULL;
3911 bool all = false, done = false;
72d0e1cb 3912
17ed13a3
SH
3913 if (strcmp(key, "lxc.hook") == 0)
3914 all = true;
0fd73091
CB
3915 else if (strncmp(key, "lxc.hook.", sizeof("lxc.hook.") - 1) == 0)
3916 k = key + sizeof("lxc.hook.") - 1;
a6390f01
WB
3917 else
3918 return -1;
17ed13a3 3919
0fd73091 3920 for (i = 0; i < NUM_LXC_HOOKS; i++) {
17ed13a3 3921 if (all || strcmp(k, lxchook_names[i]) == 0) {
0fd73091 3922 lxc_list_for_each_safe (it, &c->hooks[i], next) {
17ed13a3
SH
3923 lxc_list_del(it);
3924 free(it->elem);
3925 free(it);
3926 }
0fd73091 3927
17ed13a3 3928 done = true;
72d0e1cb
SG
3929 }
3930 }
17ed13a3
SH
3931
3932 if (!done) {
3933 ERROR("Invalid hook key: %s", key);
3934 return -1;
3935 }
0fd73091 3936
72d0e1cb
SG
3937 return 0;
3938}
8eb5694b 3939
4184c3e1
SH
3940static inline void lxc_clear_aliens(struct lxc_conf *conf)
3941{
0fd73091 3942 struct lxc_list *it, *next;
4184c3e1 3943
0fd73091 3944 lxc_list_for_each_safe (it, &conf->aliens, next) {
4184c3e1
SH
3945 lxc_list_del(it);
3946 free(it->elem);
3947 free(it);
3948 }
3949}
3950
c7b15d1e 3951void lxc_clear_includes(struct lxc_conf *conf)
f979ac15 3952{
0fd73091 3953 struct lxc_list *it, *next;
f979ac15 3954
0fd73091 3955 lxc_list_for_each_safe (it, &conf->includes, next) {
f979ac15
SH
3956 lxc_list_del(it);
3957 free(it->elem);
3958 free(it);
3959 }
3960}
3961
8eb5694b
SH
3962void lxc_conf_free(struct lxc_conf *conf)
3963{
3964 if (!conf)
3965 return;
0fd73091 3966
858377e4
SH
3967 if (current_config == conf)
3968 current_config = NULL;
aed105d5 3969 lxc_terminal_conf_free(&conf->console);
f10fad2f 3970 free(conf->rootfs.mount);
b3b8c97f 3971 free(conf->rootfs.bdev_type);
f10fad2f
ME
3972 free(conf->rootfs.options);
3973 free(conf->rootfs.path);
f10fad2f 3974 free(conf->logfile);
858377e4
SH
3975 if (conf->logfd != -1)
3976 close(conf->logfd);
f10fad2f 3977 free(conf->utsname);
885766f5
CB
3978 free(conf->ttys.dir);
3979 free(conf->ttys.tty_names);
f10fad2f
ME
3980 free(conf->fstab);
3981 free(conf->rcfile);
5cda27c1 3982 free(conf->execute_cmd);
f10fad2f 3983 free(conf->init_cmd);
3c491553 3984 free(conf->init_cwd);
6b0d5538 3985 free(conf->unexpanded_config);
76d0127f 3986 free(conf->syslog);
c302b476 3987 lxc_free_networks(&conf->network);
f10fad2f
ME
3988 free(conf->lsm_aa_profile);
3989 free(conf->lsm_se_context);
769872f9 3990 lxc_seccomp_free(conf);
8eb5694b 3991 lxc_clear_config_caps(conf);
1fb86a7c 3992 lxc_clear_config_keepcaps(conf);
54860ed0
CB
3993 lxc_clear_cgroups(conf, "lxc.cgroup", CGROUP_SUPER_MAGIC);
3994 lxc_clear_cgroups(conf, "lxc.cgroup2", CGROUP2_SUPER_MAGIC);
17ed13a3 3995 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 3996 lxc_clear_mount_entries(conf);
27c27d73 3997 lxc_clear_idmaps(conf);
ee1e7aa0 3998 lxc_clear_groups(conf);
f979ac15 3999 lxc_clear_includes(conf);
761d81ca 4000 lxc_clear_aliens(conf);
ab799c0b 4001 lxc_clear_environment(conf);
240d4b74 4002 lxc_clear_limits(conf, "lxc.prlimit");
7edd0540 4003 lxc_clear_sysctls(conf, "lxc.sysctl");
61d7a733 4004 lxc_clear_procs(conf, "lxc.proc");
43654d34
CB
4005 free(conf->cgroup_meta.dir);
4006 free(conf->cgroup_meta.controllers);
8eb5694b
SH
4007 free(conf);
4008}
4355ab5f
SH
4009
4010struct userns_fn_data {
4011 int (*fn)(void *);
c9b7c33e 4012 const char *fn_name;
4355ab5f
SH
4013 void *arg;
4014 int p[2];
4015};
4016
4017static int run_userns_fn(void *data)
4018{
4355ab5f 4019 char c;
0fd73091 4020 struct userns_fn_data *d = data;
4355ab5f 4021
f8aa4bf3 4022 /* Close write end of the pipe. */
4355ab5f 4023 close(d->p[1]);
f8aa4bf3
CB
4024
4025 /* Wait for parent to finish establishing a new mapping in the user
4026 * namespace we are executing in.
4027 */
489f39be 4028 if (lxc_read_nointr(d->p[0], &c, 1) != 1)
4355ab5f 4029 return -1;
f8aa4bf3
CB
4030
4031 /* Close read end of the pipe. */
4355ab5f 4032 close(d->p[0]);
f8aa4bf3 4033
c9b7c33e
CB
4034 if (d->fn_name)
4035 TRACE("calling function \"%s\"", d->fn_name);
0fd73091 4036
f8aa4bf3 4037 /* Call function to run. */
4355ab5f
SH
4038 return d->fn(d->arg);
4039}
4040
db7cfe23
CB
4041static struct id_map *mapped_nsid_add(struct lxc_conf *conf, unsigned id,
4042 enum idtype idtype)
4043{
5173b710
CB
4044 const struct id_map *map;
4045 struct id_map *retmap;
db7cfe23
CB
4046
4047 map = find_mapped_nsid_entry(conf, id, idtype);
4048 if (!map)
4049 return NULL;
4050
4051 retmap = malloc(sizeof(*retmap));
4052 if (!retmap)
4053 return NULL;
4054
4055 memcpy(retmap, map, sizeof(*retmap));
4056 return retmap;
4057}
4058
c4333195
CB
4059static struct id_map *find_mapped_hostid_entry(struct lxc_conf *conf,
4060 unsigned id, enum idtype idtype)
f8aa4bf3 4061{
f8aa4bf3 4062 struct id_map *map;
0fd73091 4063 struct lxc_list *it;
f8aa4bf3
CB
4064 struct id_map *retmap = NULL;
4065
0fd73091 4066 lxc_list_for_each (it, &conf->id_map) {
f8aa4bf3
CB
4067 map = it->elem;
4068 if (map->idtype != idtype)
4069 continue;
4070
4071 if (id >= map->hostid && id < map->hostid + map->range) {
4072 retmap = map;
4073 break;
4074 }
4075 }
4076
f8aa4bf3
CB
4077 return retmap;
4078}
4079
0fd73091 4080/* Allocate a new {g,u}id mapping for the given {g,u}id. Re-use an already
f8aa4bf3 4081 * existing one or establish a new one.
4355ab5f 4082 */
0fd73091
CB
4083static struct id_map *mapped_hostid_add(struct lxc_conf *conf, uid_t id,
4084 enum idtype type)
4355ab5f 4085{
28a2d9e7 4086 int hostid_mapped;
c4333195
CB
4087 struct id_map *entry = NULL, *tmp = NULL;
4088
4089 entry = malloc(sizeof(*entry));
4090 if (!entry)
4091 return NULL;
f8aa4bf3 4092
28a2d9e7 4093 /* Reuse existing mapping. */
c4333195
CB
4094 tmp = find_mapped_hostid_entry(conf, id, type);
4095 if (tmp)
4096 return memcpy(entry, tmp, sizeof(*entry));
f8aa4bf3 4097
28a2d9e7
CB
4098 /* Find new mapping. */
4099 hostid_mapped = find_unmapped_nsid(conf, type);
4100 if (hostid_mapped < 0) {
c4333195
CB
4101 DEBUG("Failed to find free mapping for id %d", id);
4102 free(entry);
28a2d9e7 4103 return NULL;
f8aa4bf3 4104 }
f8aa4bf3 4105
28a2d9e7
CB
4106 entry->idtype = type;
4107 entry->nsid = hostid_mapped;
4108 entry->hostid = (unsigned long)id;
4109 entry->range = 1;
4355ab5f 4110
28a2d9e7 4111 return entry;
4355ab5f
SH
4112}
4113
dcf0ffdf 4114struct lxc_list *get_minimal_idmap(struct lxc_conf *conf)
4355ab5f 4115{
f8aa4bf3 4116 uid_t euid, egid;
4160c3a0
CB
4117 uid_t nsuid = (conf->root_nsuid_map != NULL) ? 0 : conf->init_uid;
4118 gid_t nsgid = (conf->root_nsgid_map != NULL) ? 0 : conf->init_gid;
f8aa4bf3 4119 struct lxc_list *idmap = NULL, *tmplist = NULL;
28a2d9e7
CB
4120 struct id_map *container_root_uid = NULL, *container_root_gid = NULL,
4121 *host_uid_map = NULL, *host_gid_map = NULL;
4355ab5f 4122
db7cfe23 4123 /* Find container root mappings. */
4160c3a0 4124 container_root_uid = mapped_nsid_add(conf, nsuid, ID_TYPE_UID);
db7cfe23 4125 if (!container_root_uid) {
dcf0ffdf 4126 DEBUG("Failed to find mapping for namespace uid %d", 0);
db7cfe23 4127 goto on_error;
f8aa4bf3 4128 }
dcf0ffdf
CB
4129 euid = geteuid();
4130 if (euid >= container_root_uid->hostid &&
4131 euid < (container_root_uid->hostid + container_root_uid->range))
db7cfe23 4132 host_uid_map = container_root_uid;
f8aa4bf3 4133
4160c3a0 4134 container_root_gid = mapped_nsid_add(conf, nsgid, ID_TYPE_GID);
db7cfe23 4135 if (!container_root_gid) {
dcf0ffdf 4136 DEBUG("Failed to find mapping for namespace gid %d", 0);
f8aa4bf3
CB
4137 goto on_error;
4138 }
dcf0ffdf
CB
4139 egid = getegid();
4140 if (egid >= container_root_gid->hostid &&
4141 egid < (container_root_gid->hostid + container_root_gid->range))
db7cfe23 4142 host_gid_map = container_root_gid;
f8aa4bf3
CB
4143
4144 /* Check whether the {g,u}id of the user has a mapping. */
954b7d9b 4145 if (!host_uid_map)
c4333195 4146 host_uid_map = mapped_hostid_add(conf, euid, ID_TYPE_UID);
28a2d9e7 4147 if (!host_uid_map) {
db7cfe23 4148 DEBUG("Failed to find mapping for uid %d", euid);
f8aa4bf3
CB
4149 goto on_error;
4150 }
4151
dcf0ffdf
CB
4152 if (!host_gid_map)
4153 host_gid_map = mapped_hostid_add(conf, egid, ID_TYPE_GID);
28a2d9e7 4154 if (!host_gid_map) {
db7cfe23 4155 DEBUG("Failed to find mapping for gid %d", egid);
28a2d9e7
CB
4156 goto on_error;
4157 }
4158
4159 /* Allocate new {g,u}id map list. */
4160 idmap = malloc(sizeof(*idmap));
4161 if (!idmap)
4162 goto on_error;
4163 lxc_list_init(idmap);
4164
f8aa4bf3
CB
4165 /* Add container root to the map. */
4166 tmplist = malloc(sizeof(*tmplist));
4167 if (!tmplist)
4168 goto on_error;
4169 lxc_list_add_elem(tmplist, container_root_uid);
4170 lxc_list_add_tail(idmap, tmplist);
28a2d9e7 4171
1d90e064 4172 if (host_uid_map && (host_uid_map != container_root_uid)) {
28a2d9e7
CB
4173 /* idmap will now keep track of that memory. */
4174 container_root_uid = NULL;
4175
4176 /* Add container root to the map. */
4177 tmplist = malloc(sizeof(*tmplist));
4178 if (!tmplist)
4179 goto on_error;
4180 lxc_list_add_elem(tmplist, host_uid_map);
4181 lxc_list_add_tail(idmap, tmplist);
28a2d9e7 4182 }
1d90e064
CB
4183 /* idmap will now keep track of that memory. */
4184 container_root_uid = NULL;
4185 /* idmap will now keep track of that memory. */
4186 host_uid_map = NULL;
f8aa4bf3
CB
4187
4188 tmplist = malloc(sizeof(*tmplist));
4189 if (!tmplist)
4190 goto on_error;
4191 lxc_list_add_elem(tmplist, container_root_gid);
4192 lxc_list_add_tail(idmap, tmplist);
28a2d9e7 4193
1d90e064 4194 if (host_gid_map && (host_gid_map != container_root_gid)) {
28a2d9e7
CB
4195 /* idmap will now keep track of that memory. */
4196 container_root_gid = NULL;
4197
4198 tmplist = malloc(sizeof(*tmplist));
4199 if (!tmplist)
4200 goto on_error;
4201 lxc_list_add_elem(tmplist, host_gid_map);
4202 lxc_list_add_tail(idmap, tmplist);
28a2d9e7 4203 }
1d90e064
CB
4204 /* idmap will now keep track of that memory. */
4205 container_root_gid = NULL;
4206 /* idmap will now keep track of that memory. */
4207 host_gid_map = NULL;
f8aa4bf3 4208
dcf0ffdf
CB
4209 TRACE("Allocated minimal idmapping");
4210 return idmap;
4211
4212on_error:
4dc41f99 4213 if (idmap) {
dcf0ffdf 4214 lxc_free_idmap(idmap);
4dc41f99
SX
4215 free(idmap);
4216 }
dcf0ffdf
CB
4217 if (container_root_uid)
4218 free(container_root_uid);
4219 if (container_root_gid)
4220 free(container_root_gid);
4221 if (host_uid_map && (host_uid_map != container_root_uid))
4222 free(host_uid_map);
4223 if (host_gid_map && (host_gid_map != container_root_gid))
4224 free(host_gid_map);
4225
4226 return NULL;
4227}
4228
4229/* Run a function in a new user namespace.
4230 * The caller's euid/egid will be mapped if it is not already.
4231 * Afaict, userns_exec_1() is only used to operate based on privileges for the
4232 * user's own {g,u}id on the host and for the container root's unmapped {g,u}id.
4233 * This means we require only to establish a mapping from:
4234 * - the container root {g,u}id as seen from the host > user's host {g,u}id
4235 * - the container root -> some sub{g,u}id
4236 * The former we add, if the user did not specifiy a mapping. The latter we
4237 * retrieve from the ontainer's configured {g,u}id mappings as it must have been
4238 * there to start the container in the first place.
4239 */
4240int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data,
4241 const char *fn_name)
4242{
4243 pid_t pid;
dcf0ffdf 4244 int p[2];
0fd73091 4245 struct userns_fn_data d;
dcf0ffdf 4246 struct lxc_list *idmap;
0fd73091
CB
4247 int ret = -1, status = -1;
4248 char c = '1';
dcf0ffdf 4249
2b2655a8
CB
4250 if (!conf)
4251 return -EINVAL;
4252
dcf0ffdf
CB
4253 idmap = get_minimal_idmap(conf);
4254 if (!idmap)
4255 return -1;
4256
4257 ret = pipe(p);
4258 if (ret < 0) {
4259 SYSERROR("Failed to create pipe");
4260 return -1;
4261 }
4262 d.fn = fn;
4263 d.fn_name = fn_name;
4264 d.arg = data;
4265 d.p[0] = p[0];
4266 d.p[1] = p[1];
4267
4268 /* Clone child in new user namespace. */
4269 pid = lxc_raw_clone_cb(run_userns_fn, &d, CLONE_NEWUSER);
4270 if (pid < 0) {
0fd73091 4271 ERROR("Failed to clone process in new user namespace");
dcf0ffdf
CB
4272 goto on_error;
4273 }
4274
4275 close(p[0]);
4276 p[0] = -1;
4277
4b73005c
CB
4278 if (lxc_log_get_level() == LXC_LOG_LEVEL_TRACE ||
4279 conf->loglevel == LXC_LOG_LEVEL_TRACE) {
dcf0ffdf 4280 struct id_map *map;
0fd73091 4281 struct lxc_list *it;
dcf0ffdf 4282
0fd73091 4283 lxc_list_for_each (it, idmap) {
f8aa4bf3 4284 map = it->elem;
dcf0ffdf 4285 TRACE("Establishing %cid mapping for \"%d\" in new "
f8aa4bf3 4286 "user namespace: nsuid %lu - hostid %lu - range "
0fd73091
CB
4287 "%lu",
4288 (map->idtype == ID_TYPE_UID) ? 'u' : 'g', pid,
4289 map->nsid, map->hostid, map->range);
f8aa4bf3 4290 }
4355ab5f
SH
4291 }
4292
f8aa4bf3 4293 /* Set up {g,u}id mapping for user namespace of child process. */
4355ab5f 4294 ret = lxc_map_ids(idmap, pid);
f8aa4bf3 4295 if (ret < 0) {
0fd73091 4296 ERROR("Error setting up {g,u}id mappings for child process \"%d\"", pid);
f8aa4bf3 4297 goto on_error;
4355ab5f
SH
4298 }
4299
f8aa4bf3 4300 /* Tell child to proceed. */
489f39be 4301 if (lxc_write_nointr(p[1], &c, 1) != 1) {
dcf0ffdf 4302 SYSERROR("Failed telling child process \"%d\" to proceed", pid);
f8aa4bf3 4303 goto on_error;
4355ab5f
SH
4304 }
4305
686dd5d1 4306on_error:
4355ab5f
SH
4307 if (p[0] != -1)
4308 close(p[0]);
4309 close(p[1]);
f8aa4bf3 4310
ee1b16bc
TA
4311 /* Wait for child to finish. */
4312 if (pid > 0)
4313 status = wait_for_pid(pid);
4314
686dd5d1
CB
4315 if (status < 0)
4316 ret = -1;
4317
f8aa4bf3 4318 return ret;
4355ab5f 4319}
97e9cfa0 4320
415a8851
CB
4321int userns_exec_full(struct lxc_conf *conf, int (*fn)(void *), void *data,
4322 const char *fn_name)
4323{
4324 pid_t pid;
4325 uid_t euid, egid;
415a8851
CB
4326 int p[2];
4327 struct id_map *map;
4328 struct lxc_list *cur;
0fd73091 4329 struct userns_fn_data d;
415a8851 4330 int ret = -1;
0fd73091 4331 char c = '1';
415a8851
CB
4332 struct lxc_list *idmap = NULL, *tmplist = NULL;
4333 struct id_map *container_root_uid = NULL, *container_root_gid = NULL,
4334 *host_uid_map = NULL, *host_gid_map = NULL;
4335
2b2655a8
CB
4336 if (!conf)
4337 return -EINVAL;
4338
415a8851
CB
4339 ret = pipe(p);
4340 if (ret < 0) {
4341 SYSERROR("opening pipe");
4342 return -1;
4343 }
4344 d.fn = fn;
4345 d.fn_name = fn_name;
4346 d.arg = data;
4347 d.p[0] = p[0];
4348 d.p[1] = p[1];
4349
4350 /* Clone child in new user namespace. */
4351 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4352 if (pid < 0) {
0fd73091 4353 ERROR("Failed to clone process in new user namespace");
415a8851
CB
4354 goto on_error;
4355 }
4356
4357 close(p[0]);
4358 p[0] = -1;
4359
4360 euid = geteuid();
4361 egid = getegid();
4362
4363 /* Allocate new {g,u}id map list. */
4364 idmap = malloc(sizeof(*idmap));
4365 if (!idmap)
4366 goto on_error;
4367 lxc_list_init(idmap);
4368
4369 /* Find container root. */
0fd73091 4370 lxc_list_for_each (cur, &conf->id_map) {
415a8851
CB
4371 struct id_map *tmpmap;
4372
4373 tmplist = malloc(sizeof(*tmplist));
4374 if (!tmplist)
4375 goto on_error;
4376
4377 tmpmap = malloc(sizeof(*tmpmap));
4378 if (!tmpmap) {
4379 free(tmplist);
4380 goto on_error;
4381 }
4382
4383 memset(tmpmap, 0, sizeof(*tmpmap));
4384 memcpy(tmpmap, cur->elem, sizeof(*tmpmap));
4385 tmplist->elem = tmpmap;
4386
4387 lxc_list_add_tail(idmap, tmplist);
4388
4389 map = cur->elem;
4390
4391 if (map->idtype == ID_TYPE_UID)
4392 if (euid >= map->hostid && euid < map->hostid + map->range)
4393 host_uid_map = map;
4394
4395 if (map->idtype == ID_TYPE_GID)
4396 if (egid >= map->hostid && egid < map->hostid + map->range)
4397 host_gid_map = map;
4398
4399 if (map->nsid != 0)
4400 continue;
4401
4402 if (map->idtype == ID_TYPE_UID)
4403 if (container_root_uid == NULL)
4404 container_root_uid = map;
4405
4406 if (map->idtype == ID_TYPE_GID)
4407 if (container_root_gid == NULL)
4408 container_root_gid = map;
4409 }
4410
4411 if (!container_root_uid || !container_root_gid) {
4412 ERROR("No mapping for container root found");
4413 goto on_error;
4414 }
4415
4416 /* Check whether the {g,u}id of the user has a mapping. */
4417 if (!host_uid_map)
c4333195 4418 host_uid_map = mapped_hostid_add(conf, euid, ID_TYPE_UID);
415a8851
CB
4419 else
4420 host_uid_map = container_root_uid;
4421
4422 if (!host_gid_map)
c4333195 4423 host_gid_map = mapped_hostid_add(conf, egid, ID_TYPE_GID);
415a8851
CB
4424 else
4425 host_gid_map = container_root_gid;
4426
4427 if (!host_uid_map) {
4428 DEBUG("Failed to find mapping for uid %d", euid);
4429 goto on_error;
4430 }
4431
4432 if (!host_gid_map) {
4433 DEBUG("Failed to find mapping for gid %d", egid);
4434 goto on_error;
4435 }
4436
4437 if (host_uid_map && (host_uid_map != container_root_uid)) {
4438 /* Add container root to the map. */
4439 tmplist = malloc(sizeof(*tmplist));
4440 if (!tmplist)
4441 goto on_error;
4442 lxc_list_add_elem(tmplist, host_uid_map);
4443 lxc_list_add_tail(idmap, tmplist);
4444 }
4445 /* idmap will now keep track of that memory. */
4446 host_uid_map = NULL;
4447
4448 if (host_gid_map && (host_gid_map != container_root_gid)) {
4449 tmplist = malloc(sizeof(*tmplist));
4450 if (!tmplist)
4451 goto on_error;
4452 lxc_list_add_elem(tmplist, host_gid_map);
4453 lxc_list_add_tail(idmap, tmplist);
4454 }
4455 /* idmap will now keep track of that memory. */
4456 host_gid_map = NULL;
4457
4458 if (lxc_log_get_level() == LXC_LOG_LEVEL_TRACE ||
4459 conf->loglevel == LXC_LOG_LEVEL_TRACE) {
0fd73091 4460 lxc_list_for_each (cur, idmap) {
415a8851
CB
4461 map = cur->elem;
4462 TRACE("establishing %cid mapping for \"%d\" in new "
4463 "user namespace: nsuid %lu - hostid %lu - range "
4464 "%lu",
4465 (map->idtype == ID_TYPE_UID) ? 'u' : 'g', pid,
4466 map->nsid, map->hostid, map->range);
4467 }
4468 }
4469
4470 /* Set up {g,u}id mapping for user namespace of child process. */
4471 ret = lxc_map_ids(idmap, pid);
4472 if (ret < 0) {
0fd73091 4473 ERROR("error setting up {g,u}id mappings for child process \"%d\"", pid);
415a8851
CB
4474 goto on_error;
4475 }
4476
4477 /* Tell child to proceed. */
489f39be 4478 if (lxc_write_nointr(p[1], &c, 1) != 1) {
0fd73091 4479 SYSERROR("Failed telling child process \"%d\" to proceed", pid);
415a8851
CB
4480 goto on_error;
4481 }
4482
686dd5d1 4483on_error:
ee1b16bc
TA
4484 if (p[0] != -1)
4485 close(p[0]);
4486 close(p[1]);
4487
415a8851 4488 /* Wait for child to finish. */
686dd5d1
CB
4489 if (pid > 0)
4490 ret = wait_for_pid(pid);
415a8851 4491
80758b4b 4492 if (idmap) {
415a8851 4493 lxc_free_idmap(idmap);
80758b4b
DJ
4494 free(idmap);
4495 }
4496
415a8851
CB
4497 if (host_uid_map && (host_uid_map != container_root_uid))
4498 free(host_uid_map);
4499 if (host_gid_map && (host_gid_map != container_root_gid))
4500 free(host_gid_map);
4501
415a8851
CB
4502 return ret;
4503}
4504
a96a8e8c 4505/* not thread-safe, do not use from api without first forking */
0fd73091 4506static char *getuname(void)
97e9cfa0 4507{
cb7aa5e8
DJ
4508 struct passwd pwent;
4509 struct passwd *pwentp = NULL;
4510 char *buf;
4511 char *username;
4512 size_t bufsize;
4513 int ret;
97e9cfa0 4514
cb7aa5e8
DJ
4515 bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
4516 if (bufsize == -1)
4517 bufsize = 1024;
4518
4519 buf = malloc(bufsize);
4520 if (!buf)
97e9cfa0
SH
4521 return NULL;
4522
cb7aa5e8
DJ
4523 ret = getpwuid_r(geteuid(), &pwent, buf, bufsize, &pwentp);
4524 if (!pwentp) {
4525 if (ret == 0)
4526 WARN("Could not find matched password record.");
4527
4528 ERROR("Failed to get password record - %u", geteuid());
4529 free(buf);
4530 return NULL;
4531 }
4532
4533 username = strdup(pwent.pw_name);
4534 free(buf);
4535
4536 return username;
97e9cfa0
SH
4537}
4538
a96a8e8c 4539/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4540static char *getgname(void)
4541{
3de9fb4c
DJ
4542 struct group grent;
4543 struct group *grentp = NULL;
4544 char *buf;
4545 char *grname;
4546 size_t bufsize;
4547 int ret;
4548
4549 bufsize = sysconf(_SC_GETGR_R_SIZE_MAX);
4550 if (bufsize == -1)
4551 bufsize = 1024;
4552
4553 buf = malloc(bufsize);
4554 if (!buf)
4555 return NULL;
4556
4557 ret = getgrgid_r(getegid(), &grent, buf, bufsize, &grentp);
4558 if (!grentp) {
4559 if (ret == 0)
4560 WARN("Could not find matched group record");
97e9cfa0 4561
3de9fb4c
DJ
4562 ERROR("Failed to get group record - %u", getegid());
4563 free(buf);
97e9cfa0 4564 return NULL;
3de9fb4c
DJ
4565 }
4566
4567 grname = strdup(grent.gr_name);
4568 free(buf);
97e9cfa0 4569
3de9fb4c 4570 return grname;
97e9cfa0
SH
4571}
4572
a96a8e8c 4573/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4574void suggest_default_idmap(void)
4575{
0fd73091 4576 char *uname, *gname;
97e9cfa0
SH
4577 FILE *f;
4578 unsigned int uid = 0, urange = 0, gid = 0, grange = 0;
97e9cfa0 4579 size_t len = 0;
0fd73091 4580 char *line = NULL;
97e9cfa0 4581
0fd73091
CB
4582 uname = getuname();
4583 if (!uname)
97e9cfa0
SH
4584 return;
4585
0fd73091
CB
4586 gname = getgname();
4587 if (!gname) {
97e9cfa0
SH
4588 free(uname);
4589 return;
4590 }
4591
4592 f = fopen(subuidfile, "r");
4593 if (!f) {
4594 ERROR("Your system is not configured with subuids");
4595 free(gname);
4596 free(uname);
4597 return;
4598 }
0fd73091 4599
97e9cfa0 4600 while (getline(&line, &len, f) != -1) {
0fd73091 4601 char *p, *p2;
b7930180 4602 size_t no_newline = 0;
0fd73091
CB
4603
4604 p = strchr(line, ':');
97e9cfa0
SH
4605 if (*line == '#')
4606 continue;
4607 if (!p)
4608 continue;
4609 *p = '\0';
4610 p++;
0fd73091 4611
97e9cfa0
SH
4612 if (strcmp(line, uname))
4613 continue;
0fd73091 4614
97e9cfa0
SH
4615 p2 = strchr(p, ':');
4616 if (!p2)
4617 continue;
4618 *p2 = '\0';
4619 p2++;
4620 if (!*p2)
4621 continue;
b7930180
CB
4622 no_newline = strcspn(p2, "\n");
4623 p2[no_newline] = '\0';
4624
b7b2fde4 4625 if (lxc_safe_uint(p, &uid) < 0)
0fd73091 4626 WARN("Could not parse UID");
b7b2fde4 4627 if (lxc_safe_uint(p2, &urange) < 0)
0fd73091 4628 WARN("Could not parse UID range");
97e9cfa0
SH
4629 }
4630 fclose(f);
4631
6be7389a 4632 f = fopen(subgidfile, "r");
97e9cfa0
SH
4633 if (!f) {
4634 ERROR("Your system is not configured with subgids");
4635 free(gname);
4636 free(uname);
4637 return;
4638 }
0fd73091 4639
97e9cfa0 4640 while (getline(&line, &len, f) != -1) {
0fd73091 4641 char *p, *p2;
b7930180 4642 size_t no_newline = 0;
0fd73091
CB
4643
4644 p = strchr(line, ':');
97e9cfa0
SH
4645 if (*line == '#')
4646 continue;
4647 if (!p)
4648 continue;
4649 *p = '\0';
4650 p++;
0fd73091 4651
97e9cfa0
SH
4652 if (strcmp(line, uname))
4653 continue;
0fd73091 4654
97e9cfa0
SH
4655 p2 = strchr(p, ':');
4656 if (!p2)
4657 continue;
4658 *p2 = '\0';
4659 p2++;
4660 if (!*p2)
4661 continue;
b7930180
CB
4662 no_newline = strcspn(p2, "\n");
4663 p2[no_newline] = '\0';
4664
b7b2fde4 4665 if (lxc_safe_uint(p, &gid) < 0)
0fd73091 4666 WARN("Could not parse GID");
b7b2fde4 4667 if (lxc_safe_uint(p2, &grange) < 0)
0fd73091 4668 WARN("Could not parse GID range");
97e9cfa0
SH
4669 }
4670 fclose(f);
4671
f10fad2f 4672 free(line);
97e9cfa0
SH
4673
4674 if (!urange || !grange) {
4675 ERROR("You do not have subuids or subgids allocated");
4676 ERROR("Unprivileged containers require subuids and subgids");
fbd4a4d1 4677 free(uname);
1e7cd2f7 4678 free(gname);
97e9cfa0
SH
4679 return;
4680 }
4681
4682 ERROR("You must either run as root, or define uid mappings");
4683 ERROR("To pass uid mappings to lxc-create, you could create");
4684 ERROR("~/.config/lxc/default.conf:");
4685 ERROR("lxc.include = %s", LXC_DEFAULT_CONFIG);
bdcbb6b3
CB
4686 ERROR("lxc.idmap = u 0 %u %u", uid, urange);
4687 ERROR("lxc.idmap = g 0 %u %u", gid, grange);
97e9cfa0
SH
4688
4689 free(gname);
4690 free(uname);
4691}
aaf26830 4692
a7307747
SH
4693static void free_cgroup_settings(struct lxc_list *result)
4694{
4695 struct lxc_list *iterator, *next;
4696
0fd73091 4697 lxc_list_for_each_safe (iterator, result, next) {
a7307747
SH
4698 lxc_list_del(iterator);
4699 free(iterator);
4700 }
4701 free(result);
4702}
4703
0fd73091 4704/* Return the list of cgroup_settings sorted according to the following rules
aaf26830
KT
4705 * 1. Put memory.limit_in_bytes before memory.memsw.limit_in_bytes
4706 */
0fd73091 4707struct lxc_list *sort_cgroup_settings(struct lxc_list *cgroup_settings)
aaf26830
KT
4708{
4709 struct lxc_list *result;
aaf26830 4710 struct lxc_cgroup *cg = NULL;
0fd73091 4711 struct lxc_list *it = NULL, *item = NULL, *memsw_limit = NULL;
aaf26830
KT
4712
4713 result = malloc(sizeof(*result));
0fd73091 4714 if (!result)
fac7c663 4715 return NULL;
aaf26830
KT
4716 lxc_list_init(result);
4717
0fd73091
CB
4718 /* Iterate over the cgroup settings and copy them to the output list. */
4719 lxc_list_for_each (it, cgroup_settings) {
aaf26830 4720 item = malloc(sizeof(*item));
fac7c663 4721 if (!item) {
a7307747 4722 free_cgroup_settings(result);
fac7c663
KT
4723 return NULL;
4724 }
0fd73091 4725
aaf26830
KT
4726 item->elem = it->elem;
4727 cg = it->elem;
4728 if (strcmp(cg->subsystem, "memory.memsw.limit_in_bytes") == 0) {
4729 /* Store the memsw_limit location */
4730 memsw_limit = item;
0fd73091
CB
4731 } else if (strcmp(cg->subsystem, "memory.limit_in_bytes") == 0 &&
4732 memsw_limit != NULL) {
4733 /* lxc.cgroup.memory.memsw.limit_in_bytes is found
4734 * before lxc.cgroup.memory.limit_in_bytes, swap these
4735 * two items */
aaf26830
KT
4736 item->elem = memsw_limit->elem;
4737 memsw_limit->elem = it->elem;
4738 }
4739 lxc_list_add_tail(result, item);
4740 }
4741
4742 return result;
a7307747 4743}