]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
secure coding: #2 strcpy => strlcpy
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
1d52bdf7
CB
23
24#define _GNU_SOURCE
d06245b8
NC
25#include "config.h"
26
9d257a2a 27#include <arpa/inet.h>
8f3e280e
CB
28#include <dirent.h>
29#include <errno.h>
30#include <fcntl.h>
31#include <grp.h>
32#include <inttypes.h>
33#include <libgen.h>
9d257a2a
CB
34#include <linux/loop.h>
35#include <net/if.h>
36#include <netinet/in.h>
8f3e280e
CB
37#include <pwd.h>
38#include <stdarg.h>
0ad19a3f 39#include <stdio.h>
0ad19a3f 40#include <stdlib.h>
0ad19a3f 41#include <string.h>
8f3e280e
CB
42#include <sys/mman.h>
43#include <sys/mount.h>
44#include <sys/param.h>
45#include <sys/prctl.h>
6a49f05e 46#include <sys/sendfile.h>
8f3e280e 47#include <sys/socket.h>
9d257a2a 48#include <sys/stat.h>
2d76d1d7 49#include <sys/syscall.h>
9d257a2a 50#include <sys/sysmacros.h>
97e9cfa0 51#include <sys/types.h>
8f3e280e
CB
52#include <sys/utsname.h>
53#include <sys/wait.h>
9d257a2a
CB
54#include <time.h>
55#include <unistd.h>
1d52bdf7 56
af6824fc 57#ifdef MAJOR_IN_MKDEV
9d257a2a 58#include <sys/mkdev.h>
af6824fc 59#endif
af6824fc 60
614305f3 61#ifdef HAVE_STATVFS
2938f7c8 62#include <sys/statvfs.h>
614305f3 63#endif
e827ff7e
SG
64
65#if HAVE_PTY_H
b0a33c1e 66#include <pty.h>
e827ff7e
SG
67#else
68#include <../include/openpty.h>
69#endif
0ad19a3f 70
9d257a2a
CB
71#if HAVE_LIBCAP
72#include <sys/capability.h>
73#endif
74
75#if HAVE_SYS_PERSONALITY_H
76#include <sys/personality.h>
77#endif
78
79#if IS_BIONIC
80#include <../include/lxcmntent.h>
81#else
82#include <mntent.h>
83#endif
84
85#if !defined(HAVE_PRLIMIT) && defined(HAVE_PRLIMIT64)
86#include <../include/prlimit.h>
87#endif
88
e8bd4e43 89#include "af_unix.h"
9d257a2a 90#include "caps.h"
8f3e280e 91#include "cgroup.h"
1b09f2c0 92#include "conf.h"
1ed6ba91 93#include "confile_utils.h"
8f3e280e 94#include "error.h"
1b09f2c0 95#include "log.h"
0ed9b1bc 96#include "lsm/lsm.h"
025ed0f3 97#include "lxclock.h"
8f3e280e 98#include "lxcseccomp.h"
4355ab5f 99#include "namespace.h"
8f3e280e
CB
100#include "network.h"
101#include "parse.h"
732375f5 102#include "ringbuf.h"
794248d0 103#include "start.h"
28d832c4 104#include "storage.h"
28d832c4 105#include "storage/overlay.h"
0ed9b1bc 106#include "terminal.h"
8f3e280e 107#include "utils.h"
d0a36f2c 108
9d257a2a
CB
109#ifndef MS_PRIVATE
110#define MS_PRIVATE (1<<18)
edaf8b1b
SG
111#endif
112
9d257a2a
CB
113#ifndef MS_LAZYTIME
114#define MS_LAZYTIME (1<<25)
f48b5fd8
FF
115#endif
116
36eb9bde 117lxc_log_define(lxc_conf, lxc);
e5bda9ee 118
0fd73091
CB
119/* The lxc_conf of the container currently being worked on in an API call.
120 * This is used in the error calls.
121 */
122#ifdef HAVE_TLS
123__thread struct lxc_conf *current_config;
124#else
125struct lxc_conf *current_config;
126#endif
127
2d76d1d7
SG
128/* Define pivot_root() if missing from the C library */
129#ifndef HAVE_PIVOT_ROOT
9d257a2a 130static int pivot_root(const char *new_root, const char *put_old)
2d76d1d7
SG
131{
132#ifdef __NR_pivot_root
8f3e280e 133 return syscall(__NR_pivot_root, new_root, put_old);
2d76d1d7 134#else
8f3e280e
CB
135 errno = ENOSYS;
136 return -1;
2d76d1d7
SG
137#endif
138}
139#else
9d257a2a 140extern int pivot_root(const char *new_root, const char *put_old);
8912711c
CB
141#endif
142
0fd73091
CB
143char *lxchook_names[NUM_LXC_HOOKS] = {
144 "pre-start",
145 "pre-mount",
146 "mount",
147 "autodev",
148 "start",
149 "stop",
150 "post-stop",
151 "clone",
152 "destroy",
153 "start-host"
154};
72d0e1cb 155
998ac676
RT
156struct mount_opt {
157 char *name;
158 int clear;
159 int flag;
160};
161
81810dd1
DL
162struct caps_opt {
163 char *name;
164 int value;
165};
166
c6d09e15
WB
167struct limit_opt {
168 char *name;
169 int value;
170};
171
998ac676 172static struct mount_opt mount_opt[] = {
470b359b
CB
173 { "async", 1, MS_SYNCHRONOUS },
174 { "atime", 1, MS_NOATIME },
175 { "bind", 0, MS_BIND },
88d413d5 176 { "defaults", 0, 0 },
88d413d5 177 { "dev", 1, MS_NODEV },
470b359b 178 { "diratime", 1, MS_NODIRATIME },
88d413d5 179 { "dirsync", 0, MS_DIRSYNC },
470b359b 180 { "exec", 1, MS_NOEXEC },
8912711c 181 { "lazytime", 0, MS_LAZYTIME },
88d413d5 182 { "mand", 0, MS_MANDLOCK },
88d413d5 183 { "noatime", 0, MS_NOATIME },
470b359b 184 { "nodev", 0, MS_NODEV },
88d413d5 185 { "nodiratime", 0, MS_NODIRATIME },
470b359b
CB
186 { "noexec", 0, MS_NOEXEC },
187 { "nomand", 1, MS_MANDLOCK },
188 { "norelatime", 1, MS_RELATIME },
189 { "nostrictatime", 1, MS_STRICTATIME },
190 { "nosuid", 0, MS_NOSUID },
88d413d5
SW
191 { "rbind", 0, MS_BIND|MS_REC },
192 { "relatime", 0, MS_RELATIME },
470b359b
CB
193 { "remount", 0, MS_REMOUNT },
194 { "ro", 0, MS_RDONLY },
195 { "rw", 1, MS_RDONLY },
88d413d5 196 { "strictatime", 0, MS_STRICTATIME },
470b359b
CB
197 { "suid", 1, MS_NOSUID },
198 { "sync", 0, MS_SYNCHRONOUS },
88d413d5 199 { NULL, 0, 0 },
998ac676
RT
200};
201
d840039e 202static struct mount_opt propagation_opt[] = {
0fd73091
CB
203 { "private", 0, MS_PRIVATE },
204 { "shared", 0, MS_SHARED },
205 { "slave", 0, MS_SLAVE },
206 { "unbindable", 0, MS_UNBINDABLE },
207 { "rprivate", 0, MS_PRIVATE|MS_REC },
208 { "rshared", 0, MS_SHARED|MS_REC },
209 { "rslave", 0, MS_SLAVE|MS_REC },
210 { "runbindable", 0, MS_UNBINDABLE|MS_REC },
211 { NULL, 0, 0 },
d840039e
YT
212};
213
81810dd1 214static struct caps_opt caps_opt[] = {
8560cd36 215#if HAVE_LIBCAP
0fd73091
CB
216 { "chown", CAP_CHOWN },
217 { "dac_override", CAP_DAC_OVERRIDE },
218 { "dac_read_search", CAP_DAC_READ_SEARCH },
219 { "fowner", CAP_FOWNER },
220 { "fsetid", CAP_FSETID },
221 { "kill", CAP_KILL },
222 { "setgid", CAP_SETGID },
223 { "setuid", CAP_SETUID },
224 { "setpcap", CAP_SETPCAP },
225 { "linux_immutable", CAP_LINUX_IMMUTABLE },
226 { "net_bind_service", CAP_NET_BIND_SERVICE },
227 { "net_broadcast", CAP_NET_BROADCAST },
228 { "net_admin", CAP_NET_ADMIN },
229 { "net_raw", CAP_NET_RAW },
230 { "ipc_lock", CAP_IPC_LOCK },
231 { "ipc_owner", CAP_IPC_OWNER },
232 { "sys_module", CAP_SYS_MODULE },
233 { "sys_rawio", CAP_SYS_RAWIO },
234 { "sys_chroot", CAP_SYS_CHROOT },
235 { "sys_ptrace", CAP_SYS_PTRACE },
236 { "sys_pacct", CAP_SYS_PACCT },
237 { "sys_admin", CAP_SYS_ADMIN },
238 { "sys_boot", CAP_SYS_BOOT },
239 { "sys_nice", CAP_SYS_NICE },
240 { "sys_resource", CAP_SYS_RESOURCE },
241 { "sys_time", CAP_SYS_TIME },
242 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
243 { "mknod", CAP_MKNOD },
244 { "lease", CAP_LEASE },
57b837e2 245#ifdef CAP_AUDIT_READ
0fd73091 246 { "audit_read", CAP_AUDIT_READ },
57b837e2 247#endif
9527e566 248#ifdef CAP_AUDIT_WRITE
0fd73091 249 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
250#endif
251#ifdef CAP_AUDIT_CONTROL
0fd73091 252 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 253#endif
0fd73091
CB
254 { "setfcap", CAP_SETFCAP },
255 { "mac_override", CAP_MAC_OVERRIDE },
256 { "mac_admin", CAP_MAC_ADMIN },
5170c716 257#ifdef CAP_SYSLOG
0fd73091 258 { "syslog", CAP_SYSLOG },
5170c716
CS
259#endif
260#ifdef CAP_WAKE_ALARM
0fd73091 261 { "wake_alarm", CAP_WAKE_ALARM },
5170c716 262#endif
2b54359b 263#ifdef CAP_BLOCK_SUSPEND
0fd73091 264 { "block_suspend", CAP_BLOCK_SUSPEND },
2b54359b 265#endif
495d2046 266#endif
8560cd36 267};
81810dd1 268
c6d09e15
WB
269static struct limit_opt limit_opt[] = {
270#ifdef RLIMIT_AS
271 { "as", RLIMIT_AS },
272#endif
273#ifdef RLIMIT_CORE
274 { "core", RLIMIT_CORE },
275#endif
276#ifdef RLIMIT_CPU
277 { "cpu", RLIMIT_CPU },
278#endif
279#ifdef RLIMIT_DATA
280 { "data", RLIMIT_DATA },
281#endif
282#ifdef RLIMIT_FSIZE
283 { "fsize", RLIMIT_FSIZE },
284#endif
285#ifdef RLIMIT_LOCKS
286 { "locks", RLIMIT_LOCKS },
287#endif
288#ifdef RLIMIT_MEMLOCK
289 { "memlock", RLIMIT_MEMLOCK },
290#endif
291#ifdef RLIMIT_MSGQUEUE
292 { "msgqueue", RLIMIT_MSGQUEUE },
293#endif
294#ifdef RLIMIT_NICE
295 { "nice", RLIMIT_NICE },
296#endif
297#ifdef RLIMIT_NOFILE
298 { "nofile", RLIMIT_NOFILE },
299#endif
300#ifdef RLIMIT_NPROC
301 { "nproc", RLIMIT_NPROC },
302#endif
303#ifdef RLIMIT_RSS
304 { "rss", RLIMIT_RSS },
305#endif
306#ifdef RLIMIT_RTPRIO
307 { "rtprio", RLIMIT_RTPRIO },
308#endif
309#ifdef RLIMIT_RTTIME
310 { "rttime", RLIMIT_RTTIME },
311#endif
312#ifdef RLIMIT_SIGPENDING
313 { "sigpending", RLIMIT_SIGPENDING },
314#endif
315#ifdef RLIMIT_STACK
316 { "stack", RLIMIT_STACK },
317#endif
318};
319
91c3830e
SH
320static int run_buffer(char *buffer)
321{
8e7da691 322 int ret;
0fd73091
CB
323 char *output;
324 struct lxc_popen_FILE *f;
91c3830e 325
ebec9176 326 f = lxc_popen(buffer);
91c3830e 327 if (!f) {
3f60c2f7 328 SYSERROR("Failed to popen() %s", buffer);
91c3830e
SH
329 return -1;
330 }
331
332 output = malloc(LXC_LOG_BUFFER_SIZE);
333 if (!output) {
3f60c2f7 334 ERROR("Failed to allocate memory for %s", buffer);
ebec9176 335 lxc_pclose(f);
91c3830e
SH
336 return -1;
337 }
338
062b72c6 339 while (fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
3f60c2f7 340 DEBUG("Script %s with output: %s", buffer, output);
91c3830e
SH
341
342 free(output);
343
ebec9176 344 ret = lxc_pclose(f);
8e7da691 345 if (ret == -1) {
3f60c2f7 346 SYSERROR("Script exited with error");
91c3830e 347 return -1;
8e7da691 348 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
3f60c2f7 349 ERROR("Script exited with status %d", WEXITSTATUS(ret));
8e7da691
DE
350 return -1;
351 } else if (WIFSIGNALED(ret)) {
3f60c2f7 352 ERROR("Script terminated by signal %d", WTERMSIG(ret));
8e7da691 353 return -1;
91c3830e
SH
354 }
355
356 return 0;
357}
358
14a7b0f9
CB
359int run_script_argv(const char *name, unsigned int hook_version,
360 const char *section, const char *script,
586b1ce7 361 const char *hookname, char **argv)
148e91f5 362{
3f60c2f7 363 int buf_pos, i, ret;
148e91f5 364 char *buffer;
6f8d00d2 365 int fret = -1;
d08e5708 366 size_t size = 0;
148e91f5 367
3f60c2f7
CB
368 if (hook_version == 0)
369 INFO("Executing script \"%s\" for container \"%s\", config "
370 "section \"%s\"", script, name, section);
371 else
372 INFO("Executing script \"%s\" for container \"%s\"", script, name);
148e91f5 373
586b1ce7
CB
374 for (i = 0; argv && argv[i]; i++)
375 size += strlen(argv[i]) + 1;
148e91f5 376
3f60c2f7 377 size += sizeof("exec");
148e91f5 378 size += strlen(script);
3f60c2f7
CB
379 size++;
380
148e91f5 381 if (size > INT_MAX)
3f60c2f7 382 return -EFBIG;
148e91f5 383
3f60c2f7 384 if (hook_version == 0) {
d08e5708
CB
385 size += strlen(hookname);
386 size++;
387
388 size += strlen(name);
389 size++;
390
391 size += strlen(section);
392 size++;
393
394 if (size > INT_MAX)
395 return -EFBIG;
327cce76 396 }
3f60c2f7 397
6f8d00d2
CB
398 buffer = malloc(size);
399 if (!buffer)
400 return -ENOMEM;
401
327cce76 402 if (hook_version == 0)
3f60c2f7 403 buf_pos = snprintf(buffer, size, "exec %s %s %s %s", script, name, section, hookname);
327cce76 404 else
3f60c2f7 405 buf_pos = snprintf(buffer, size, "exec %s", script);
327cce76
CB
406 if (buf_pos < 0 || (size_t)buf_pos >= size) {
407 ERROR("Failed to create command line for script \"%s\"", script);
6f8d00d2 408 goto on_error;
327cce76 409 }
3f60c2f7 410
327cce76 411 if (hook_version == 1) {
3f60c2f7
CB
412 ret = setenv("LXC_HOOK_TYPE", hookname, 1);
413 if (ret < 0) {
414 SYSERROR("Failed to set environment variable: "
415 "LXC_HOOK_TYPE=%s", hookname);
6f8d00d2 416 goto on_error;
3f60c2f7 417 }
90f20466 418 TRACE("Set environment variable: LXC_HOOK_TYPE=%s", hookname);
3f60c2f7
CB
419
420 ret = setenv("LXC_HOOK_SECTION", section, 1);
421 if (ret < 0) {
422 SYSERROR("Failed to set environment variable: "
423 "LXC_HOOK_SECTION=%s", section);
6f8d00d2 424 goto on_error;
3f60c2f7
CB
425 }
426 TRACE("Set environment variable: LXC_HOOK_SECTION=%s", section);
14a7b0f9
CB
427
428 if (strcmp(section, "net") == 0) {
429 char *parent;
430
586b1ce7 431 if (!argv || !argv[0])
6f8d00d2 432 goto on_error;
14a7b0f9 433
586b1ce7 434 ret = setenv("LXC_NET_TYPE", argv[0], 1);
14a7b0f9
CB
435 if (ret < 0) {
436 SYSERROR("Failed to set environment variable: "
586b1ce7 437 "LXC_NET_TYPE=%s", argv[0]);
6f8d00d2 438 goto on_error;
14a7b0f9 439 }
586b1ce7 440 TRACE("Set environment variable: LXC_NET_TYPE=%s", argv[0]);
14a7b0f9 441
586b1ce7 442 parent = argv[1] ? argv[1] : "";
14a7b0f9 443
a8144263 444 if (strcmp(argv[0], "macvlan") == 0) {
14a7b0f9
CB
445 ret = setenv("LXC_NET_PARENT", parent, 1);
446 if (ret < 0) {
447 SYSERROR("Failed to set environment "
448 "variable: LXC_NET_PARENT=%s", parent);
6f8d00d2 449 goto on_error;
14a7b0f9
CB
450 }
451 TRACE("Set environment variable: LXC_NET_PARENT=%s", parent);
a8144263 452 } else if (strcmp(argv[0], "phys") == 0) {
14a7b0f9
CB
453 ret = setenv("LXC_NET_PARENT", parent, 1);
454 if (ret < 0) {
455 SYSERROR("Failed to set environment "
456 "variable: LXC_NET_PARENT=%s", parent);
6f8d00d2 457 goto on_error;
14a7b0f9
CB
458 }
459 TRACE("Set environment variable: LXC_NET_PARENT=%s", parent);
a8144263 460 } else if (strcmp(argv[0], "veth") == 0) {
586b1ce7 461 char *peer = argv[2] ? argv[2] : "";
14a7b0f9
CB
462
463 ret = setenv("LXC_NET_PEER", peer, 1);
464 if (ret < 0) {
465 SYSERROR("Failed to set environment "
466 "variable: LXC_NET_PEER=%s", peer);
6f8d00d2 467 goto on_error;
14a7b0f9
CB
468 }
469 TRACE("Set environment variable: LXC_NET_PEER=%s", peer);
470
471 ret = setenv("LXC_NET_PARENT", parent, 1);
472 if (ret < 0) {
473 SYSERROR("Failed to set environment "
474 "variable: LXC_NET_PARENT=%s", parent);
6f8d00d2 475 goto on_error;
14a7b0f9
CB
476 }
477 TRACE("Set environment variable: LXC_NET_PARENT=%s", parent);
478 }
479 }
148e91f5
SH
480 }
481
586b1ce7 482 for (i = 0; argv && argv[i]; i++) {
3f60c2f7
CB
483 size_t len = size - buf_pos;
484
586b1ce7 485 ret = snprintf(buffer + buf_pos, len, " %s", argv[i]);
3f60c2f7
CB
486 if (ret < 0 || (size_t)ret >= len) {
487 ERROR("Failed to create command line for script \"%s\"", script);
6f8d00d2 488 goto on_error;
148e91f5 489 }
3f60c2f7 490 buf_pos += ret;
148e91f5
SH
491 }
492
6f8d00d2
CB
493 fret = run_buffer(buffer);
494
495on_error:
496 free(buffer);
497 return fret;
148e91f5
SH
498}
499
811ef482 500int run_script(const char *name, const char *section, const char *script, ...)
e3b4c4c4 501{
abbfd20b 502 int ret;
91c3830e 503 char *buffer, *p;
abbfd20b 504 va_list ap;
0fd73091 505 size_t size = 0;
751d9dcd 506
0fd73091 507 INFO("Executing script \"%s\" for container \"%s\", config section \"%s\"",
751d9dcd 508 script, name, section);
e3b4c4c4 509
abbfd20b
DL
510 va_start(ap, script);
511 while ((p = va_arg(ap, char *)))
95642a10 512 size += strlen(p) + 1;
abbfd20b
DL
513 va_end(ap);
514
6d1a5f93 515 size += strlen("exec");
abbfd20b
DL
516 size += strlen(script);
517 size += strlen(name);
518 size += strlen(section);
6d1a5f93 519 size += 4;
abbfd20b 520
95642a10
MS
521 if (size > INT_MAX)
522 return -1;
523
524 buffer = alloca(size);
6d1a5f93 525 ret = snprintf(buffer, size, "exec %s %s %s", script, name, section);
0fd73091 526 if (ret < 0 || ret >= size)
9ba8130c 527 return -1;
751d9dcd 528
abbfd20b 529 va_start(ap, script);
9ba8130c 530 while ((p = va_arg(ap, char *))) {
062b72c6 531 int len = size - ret;
9ba8130c
SH
532 int rc;
533 rc = snprintf(buffer + ret, len, " %s", p);
7b5a2435
DJ
534 if (rc < 0 || rc >= len) {
535 va_end(ap);
9ba8130c 536 return -1;
7b5a2435 537 }
9ba8130c
SH
538 ret += rc;
539 }
abbfd20b 540 va_end(ap);
751d9dcd 541
91c3830e 542 return run_buffer(buffer);
e3b4c4c4
ST
543}
544
0fd73091 545/* pin_rootfs
63fc76c3 546 * if rootfs is a directory, then open ${rootfs}/.lxc-keep for writing for
b7ed4bf0
CS
547 * the duration of the container run, to prevent the container from marking
548 * the underlying fs readonly on shutdown. unlink the file immediately so
63fc76c3
GJ
549 * no name pollution is happens.
550 * don't unlink on NFS to avoid random named stale handles.
0c547523
SH
551 * return -1 on error.
552 * return -2 if nothing needed to be pinned.
553 * return an open fd (>=0) if we pinned it.
554 */
555int pin_rootfs(const char *rootfs)
556{
0fd73091
CB
557 int fd, ret;
558 char absrootfs[MAXPATHLEN], absrootfspin[MAXPATHLEN];
0c547523 559 struct stat s;
63fc76c3 560 struct statfs sfs;
0c547523 561
e99ee0de 562 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 563 return -2;
e99ee0de 564
00ec333b 565 if (!realpath(rootfs, absrootfs))
9be53773 566 return -2;
0c547523 567
0fd73091
CB
568 ret = stat(absrootfs, &s);
569 if (ret < 0)
0c547523 570 return -1;
0c547523 571
72f919c4 572 if (!S_ISDIR(s.st_mode))
0c547523
SH
573 return -2;
574
63fc76c3 575 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/.lxc-keep", absrootfs);
00ec333b 576 if (ret >= MAXPATHLEN)
0c547523 577 return -1;
0c547523 578
0fd73091 579 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR | S_IRUSR);
b7ed4bf0
CS
580 if (fd < 0)
581 return fd;
0fd73091 582
205fc010
CB
583 ret = fstatfs (fd, &sfs);
584 if (ret < 0)
585 return fd;
63fc76c3
GJ
586
587 if (sfs.f_type == NFS_SUPER_MAGIC) {
205fc010 588 DEBUG("Rootfs on NFS, not unlinking pin file \"%s\"", absrootfspin);
63fc76c3
GJ
589 return fd;
590 }
591
b7ed4bf0 592 (void)unlink(absrootfspin);
0fd73091 593
0c547523
SH
594 return fd;
595}
596
0fd73091
CB
597/* If we are asking to remount something, make sure that any NOEXEC etc are
598 * honored.
e2a7e8dc 599 */
5ae72b98 600unsigned long add_required_remount_flags(const char *s, const char *d,
5285689c 601 unsigned long flags)
e2a7e8dc 602{
614305f3 603#ifdef HAVE_STATVFS
0fd73091 604 int ret;
e2a7e8dc
SH
605 struct statvfs sb;
606 unsigned long required_flags = 0;
607
608 if (!(flags & MS_REMOUNT))
609 return flags;
610
611 if (!s)
612 s = d;
613
614 if (!s)
615 return flags;
0fd73091
CB
616
617 ret = statvfs(s, &sb);
618 if (ret < 0)
e2a7e8dc
SH
619 return flags;
620
621 if (sb.f_flag & MS_NOSUID)
622 required_flags |= MS_NOSUID;
623 if (sb.f_flag & MS_NODEV)
624 required_flags |= MS_NODEV;
625 if (sb.f_flag & MS_RDONLY)
626 required_flags |= MS_RDONLY;
627 if (sb.f_flag & MS_NOEXEC)
628 required_flags |= MS_NOEXEC;
629
630 return flags | required_flags;
614305f3
SH
631#else
632 return flags;
633#endif
e2a7e8dc
SH
634}
635
4fb3cba5 636static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
368bbc02 637{
0fd73091 638 int i, r;
b06b8511
CS
639 static struct {
640 int match_mask;
641 int match_flag;
642 const char *source;
643 const char *destination;
644 const char *fstype;
645 unsigned long flags;
646 const char *options;
647 } default_mounts[] = {
0fd73091
CB
648 /* Read-only bind-mounting... In older kernels, doing that
649 * required to do one MS_BIND mount and then
650 * MS_REMOUNT|MS_RDONLY the same one. According to mount(2)
651 * manpage, MS_BIND honors MS_RDONLY from kernel 2.6.26
652 * onwards. However, this apparently does not work on kernel
653 * 3.8. Unfortunately, on that very same kernel, doing the same
654 * trick as above doesn't seem to work either, there one needs
655 * to ALSO specify MS_BIND for the remount, otherwise the
656 * entire fs is remounted read-only or the mount fails because
657 * it's busy... MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for
658 * kernels as low as 2.6.32...
368bbc02 659 */
0fd73091 660 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
592fd47a 661 /* proc/tty is used as a temporary placeholder for proc/sys/net which we'll move back in a few steps */
0fd73091
CB
662 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys/net", "%r/proc/tty", NULL, MS_BIND, NULL },
663 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
664 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
665 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/tty", "%r/proc/sys/net", NULL, MS_MOVE, NULL },
666 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
667 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
668 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
669 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
670 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
671 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
672 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL },
673 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
674 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys/devices/virtual/net", "sysfs", 0, NULL },
675 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys/devices/virtual/net/devices/virtual/net", "%r/sys/devices/virtual/net", NULL, MS_BIND, NULL },
676 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys/devices/virtual/net", NULL, MS_REMOUNT|MS_BIND|MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL },
677 { 0, 0, NULL, NULL, NULL, 0, NULL }
b06b8511 678 };
368bbc02 679
b06b8511 680 for (i = 0; default_mounts[i].match_mask; i++) {
0fd73091
CB
681 int saved_errno;
682 unsigned long mflags;
683 char *destination = NULL;
684 char *source = NULL;
685 if ((flags & default_mounts[i].match_mask) != default_mounts[i].match_flag)
686 continue;
687
688 if (default_mounts[i].source) {
cc4fd506 689 /* will act like strdup if %r is not present */
0fd73091
CB
690 source = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].source);
691 if (!source)
cc4fd506 692 return -1;
0fd73091 693 }
f24a52d5 694
0fd73091
CB
695 if (!default_mounts[i].destination) {
696 ERROR("BUG: auto mounts destination %d was NULL", i);
b06b8511 697 free(source);
0fd73091
CB
698 return -1;
699 }
700
701 /* will act like strdup if %r is not present */
702 destination = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].destination);
703 if (!destination) {
704 saved_errno = errno;
705 free(source);
706 errno = saved_errno;
707 return -1;
708 }
709
710 mflags = add_required_remount_flags(source, destination,
711 default_mounts[i].flags);
712 r = safe_mount(source, destination, default_mounts[i].fstype,
713 mflags, default_mounts[i].options,
714 conf->rootfs.path ? conf->rootfs.mount : NULL);
715 saved_errno = errno;
716 if (r < 0 && errno == ENOENT) {
717 INFO("Mount source or target for \"%s\" on \"%s\" does "
718 "not exist. Skipping", source, destination);
719 r = 0;
720 } else if (r < 0) {
721 SYSERROR("Failed to mount \"%s\" on \"%s\" with flags %lu", source, destination, mflags);
722 }
723
724 free(source);
725 free(destination);
726 if (r < 0) {
727 errno = saved_errno;
728 return -1;
368bbc02 729 }
368bbc02
CS
730 }
731
b06b8511 732 if (flags & LXC_AUTO_CGROUP_MASK) {
0769b82a
CS
733 int cg_flags;
734
3f69fb12 735 cg_flags = flags & (LXC_AUTO_CGROUP_MASK & ~LXC_AUTO_CGROUP_FORCE);
0fd73091
CB
736 /* If the type of cgroup mount was not specified, it depends on
737 * the container's capabilities as to what makes sense: if we
738 * have CAP_SYS_ADMIN, the read-only part can be remounted
739 * read-write anyway, so we may as well default to read-write;
740 * then the admin will not be given a false sense of security.
741 * (And if they really want mixed r/o r/w, then they can
742 * explicitly specify :mixed.) OTOH, if the container lacks
743 * CAP_SYS_ADMIN, do only default to :mixed, because then the
744 * container can't remount it read-write.
745 */
0769b82a
CS
746 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
747 int has_sys_admin = 0;
b0ee5983
CB
748
749 if (!lxc_list_empty(&conf->keepcaps))
0769b82a 750 has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps);
b0ee5983 751 else
0769b82a 752 has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps);
b0ee5983
CB
753
754 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC)
0769b82a 755 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED;
b0ee5983 756 else
0769b82a 757 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
0769b82a 758 }
0fd73091 759
3f69fb12 760 if (flags & LXC_AUTO_CGROUP_FORCE)
0fd73091
CB
761 cg_flags |= LXC_AUTO_CGROUP_FORCE;
762
2202afc9
CB
763 if (!handler->cgroup_ops->mount(handler->cgroup_ops,
764 handler,
765 conf->rootfs.path ? conf->rootfs.mount : "",
766 cg_flags)) {
0fd73091 767 SYSERROR("Failed to mount \"/sys/fs/cgroup\"");
b06b8511 768 return -1;
368bbc02
CS
769 }
770 }
771
368bbc02 772 return 0;
368bbc02
CS
773}
774
4e5440c6 775static int setup_utsname(struct utsname *utsname)
0ad19a3f 776{
0fd73091
CB
777 int ret;
778
4e5440c6
DL
779 if (!utsname)
780 return 0;
0ad19a3f 781
0fd73091
CB
782 ret = sethostname(utsname->nodename, strlen(utsname->nodename));
783 if (ret < 0) {
784 SYSERROR("Failed to set the hostname to \"%s\"", utsname->nodename);
0ad19a3f 785 return -1;
786 }
787
0fd73091 788 INFO("Set hostname to \"%s\"", utsname->nodename);
cd54d859 789
0ad19a3f 790 return 0;
791}
792
69aa6655
DE
793struct dev_symlinks {
794 const char *oldpath;
795 const char *name;
796};
797
798static const struct dev_symlinks dev_symlinks[] = {
0fd73091
CB
799 { "/proc/self/fd", "fd" },
800 { "/proc/self/fd/0", "stdin" },
801 { "/proc/self/fd/1", "stdout" },
802 { "/proc/self/fd/2", "stderr" },
69aa6655
DE
803};
804
ed8704d0 805static int lxc_setup_dev_symlinks(const struct lxc_rootfs *rootfs)
69aa6655 806{
0fd73091 807 int i, ret;
69aa6655 808 char path[MAXPATHLEN];
09227be2 809 struct stat s;
69aa6655 810
69aa6655
DE
811 for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
812 const struct dev_symlinks *d = &dev_symlinks[i];
0fd73091
CB
813
814 ret = snprintf(path, sizeof(path), "%s/dev/%s",
815 rootfs->path ? rootfs->mount : "", d->name);
69aa6655
DE
816 if (ret < 0 || ret >= MAXPATHLEN)
817 return -1;
09227be2 818
0fd73091
CB
819 /* Stat the path first. If we don't get an error accept it as
820 * is and don't try to create it
09227be2 821 */
0fd73091
CB
822 ret = stat(path, &s);
823 if (ret == 0)
09227be2 824 continue;
09227be2 825
69aa6655
DE
826 ret = symlink(d->oldpath, path);
827 if (ret && errno != EEXIST) {
0fd73091
CB
828 if (errno == EROFS) {
829 WARN("Failed to create \"%s\". Read-only filesystem", path);
09227be2 830 } else {
0fd73091 831 SYSERROR("Failed to create \"%s\"", path);
09227be2
MW
832 return -1;
833 }
69aa6655
DE
834 }
835 }
0fd73091 836
69aa6655
DE
837 return 0;
838}
839
2187efd3 840/* Build a space-separate list of ptys to pass to systemd. */
885766f5 841static bool append_ttyname(char **pp, char *name)
b0a33c1e 842{
393903d1
SH
843 char *p;
844
845 if (!*pp) {
846 *pp = malloc(strlen(name) + strlen("container_ttys=") + 1);
847 if (!*pp)
848 return false;
0fd73091 849
393903d1
SH
850 sprintf(*pp, "container_ttys=%s", name);
851 return true;
852 }
0fd73091 853
393903d1
SH
854 p = realloc(*pp, strlen(*pp) + strlen(name) + 2);
855 if (!p)
856 return false;
0fd73091 857
393903d1
SH
858 *pp = p;
859 strcat(p, " ");
860 strcat(p, name);
0fd73091 861
393903d1
SH
862 return true;
863}
864
2187efd3 865static int lxc_setup_ttys(struct lxc_conf *conf)
393903d1 866{
9e1045e3 867 int i, ret;
0e4be3cf 868 const struct lxc_tty_info *ttys = &conf->ttys;
885766f5 869 char *ttydir = ttys->dir;
7c6ef2a2 870 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
b0a33c1e 871
e8bd4e43 872 if (!conf->rootfs.path)
bc9bd0e3
DL
873 return 0;
874
885766f5 875 for (i = 0; i < ttys->max; i++) {
0e4be3cf 876 struct lxc_terminal_info *tty = &ttys->tty[i];
b0a33c1e 877
e8bd4e43 878 ret = snprintf(path, sizeof(path), "/dev/tty%d", i + 1);
73363c61 879 if (ret < 0 || (size_t)ret >= sizeof(path))
7c6ef2a2 880 return -1;
9e1045e3 881
7c6ef2a2
SH
882 if (ttydir) {
883 /* create dev/lxc/tty%d" */
9e1045e3
CB
884 ret = snprintf(lxcpath, sizeof(lxcpath),
885 "/dev/%s/tty%d", ttydir, i + 1);
73363c61 886 if (ret < 0 || (size_t)ret >= sizeof(lxcpath))
7c6ef2a2 887 return -1;
9e1045e3 888
7c6ef2a2 889 ret = creat(lxcpath, 0660);
9e1045e3 890 if (ret < 0 && errno != EEXIST) {
73363c61 891 SYSERROR("Failed to create \"%s\"", lxcpath);
7c6ef2a2
SH
892 return -1;
893 }
4d44e274
SH
894 if (ret >= 0)
895 close(ret);
9e1045e3 896
7c6ef2a2 897 ret = unlink(path);
9e1045e3 898 if (ret < 0 && errno != ENOENT) {
73363c61 899 SYSERROR("Failed to unlink \"%s\"", path);
7c6ef2a2
SH
900 return -1;
901 }
b0a33c1e 902
2520facd 903 ret = mount(tty->name, lxcpath, "none", MS_BIND, 0);
9e1045e3 904 if (ret < 0) {
73363c61 905 WARN("Failed to bind mount \"%s\" onto \"%s\"",
2520facd 906 tty->name, path);
7c6ef2a2
SH
907 continue;
908 }
0fd73091 909 DEBUG("Bind mounted \"%s\" onto \"%s\"", tty->name,
9e1045e3 910 path);
13954cce 911
9e1045e3
CB
912 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d",
913 ttydir, i + 1);
73363c61 914 if (ret < 0 || (size_t)ret >= sizeof(lxcpath))
9ba8130c 915 return -1;
9e1045e3 916
7c6ef2a2 917 ret = symlink(lxcpath, path);
9e1045e3 918 if (ret < 0) {
73363c61 919 SYSERROR("Failed to create symlink \"%s\" -> \"%s\"",
9e1045e3 920 path, lxcpath);
7c6ef2a2
SH
921 return -1;
922 }
923 } else {
9e1045e3
CB
924 /* If we populated /dev, then we need to create
925 * /dev/ttyN
926 */
d3ccc04e
CB
927 ret = mknod(path, S_IFREG | 0000, 0);
928 if (ret < 0) /* this isn't fatal, continue */
929 ERROR("%s - Failed to create \"%s\"", strerror(errno), path);
9e1045e3 930
2520facd 931 ret = mount(tty->name, path, "none", MS_BIND, 0);
9e1045e3 932 if (ret < 0) {
2520facd 933 SYSERROR("Failed to mount '%s'->'%s'", tty->name, path);
7c6ef2a2
SH
934 continue;
935 }
9e1045e3 936
d3ccc04e 937 DEBUG("Bind mounted \"%s\" onto \"%s\"", tty->name, path);
393903d1 938 }
9e1045e3 939
885766f5 940 if (!append_ttyname(&conf->ttys.tty_names, tty->name)) {
393903d1
SH
941 ERROR("Error setting up container_ttys string");
942 return -1;
b0a33c1e 943 }
944 }
945
885766f5 946 INFO("Finished setting up %zu /dev/tty<N> device(s)", ttys->max);
b0a33c1e 947 return 0;
948}
949
2187efd3
CB
950int lxc_allocate_ttys(const char *name, struct lxc_conf *conf)
951{
2187efd3 952 int i, ret;
0fd73091 953 struct lxc_tty_info *ttys = &conf->ttys;
2187efd3
CB
954
955 /* no tty in the configuration */
885766f5 956 if (ttys->max == 0)
2187efd3
CB
957 return 0;
958
885766f5 959 ttys->tty = malloc(sizeof(*ttys->tty) * ttys->max);
0e4be3cf 960 if (!ttys->tty)
2187efd3 961 return -ENOMEM;
2187efd3 962
885766f5 963 for (i = 0; i < ttys->max; i++) {
0e4be3cf 964 struct lxc_terminal_info *tty = &ttys->tty[i];
2187efd3 965
386e6768
CB
966 tty->master = -EBADF;
967 tty->slave = -EBADF;
2520facd
CB
968 ret = openpty(&tty->master, &tty->slave,
969 tty->name, NULL, NULL);
2187efd3 970 if (ret) {
0fd73091 971 SYSERROR("Failed to create tty %d", i);
885766f5 972 ttys->max = i;
0e4be3cf 973 lxc_delete_tty(ttys);
2187efd3
CB
974 return -ENOTTY;
975 }
976
0fd73091 977 DEBUG("Created tty \"%s\" with master fd %d and slave fd %d",
2520facd 978 tty->name, tty->master, tty->slave);
2187efd3
CB
979
980 /* Prevent leaking the file descriptors to the container */
2520facd 981 ret = fcntl(tty->master, F_SETFD, FD_CLOEXEC);
2187efd3 982 if (ret < 0)
0fd73091
CB
983 WARN("Failed to set FD_CLOEXEC flag on master fd %d of "
984 "tty device \"%s\": %s",
2520facd 985 tty->master, tty->name, strerror(errno));
2187efd3 986
2520facd 987 ret = fcntl(tty->slave, F_SETFD, FD_CLOEXEC);
2187efd3 988 if (ret < 0)
0fd73091
CB
989 WARN("Failed to set FD_CLOEXEC flag on slave fd %d of "
990 "tty device \"%s\": %s",
2520facd 991 tty->slave, tty->name, strerror(errno));
2187efd3 992
2520facd 993 tty->busy = 0;
2187efd3
CB
994 }
995
885766f5 996 INFO("Finished creating %zu tty devices", ttys->max);
2187efd3
CB
997 return 0;
998}
999
0e4be3cf 1000void lxc_delete_tty(struct lxc_tty_info *ttys)
2187efd3
CB
1001{
1002 int i;
1003
386e6768
CB
1004 if (!ttys->tty)
1005 return;
1006
885766f5 1007 for (i = 0; i < ttys->max; i++) {
0e4be3cf 1008 struct lxc_terminal_info *tty = &ttys->tty[i];
2187efd3 1009
386e6768
CB
1010 if (tty->master >= 0) {
1011 close(tty->master);
1012 tty->master = -EBADF;
1013 }
1014
1015 if (tty->slave >= 0) {
1016 close(tty->slave);
1017 tty->slave = -EBADF;
1018 }
2187efd3
CB
1019 }
1020
0e4be3cf
CB
1021 free(ttys->tty);
1022 ttys->tty = NULL;
2187efd3
CB
1023}
1024
1025static int lxc_send_ttys_to_parent(struct lxc_handler *handler)
1026{
1027 int i;
0fd73091 1028 int ret = -1;
2187efd3 1029 struct lxc_conf *conf = handler->conf;
0e4be3cf 1030 struct lxc_tty_info *ttys = &conf->ttys;
2187efd3 1031 int sock = handler->data_sock[0];
2187efd3 1032
885766f5 1033 if (ttys->max == 0)
2187efd3
CB
1034 return 0;
1035
885766f5 1036 for (i = 0; i < ttys->max; i++) {
2187efd3 1037 int ttyfds[2];
0e4be3cf 1038 struct lxc_terminal_info *tty = &ttys->tty[i];
2187efd3 1039
2520facd
CB
1040 ttyfds[0] = tty->master;
1041 ttyfds[1] = tty->slave;
2187efd3
CB
1042
1043 ret = lxc_abstract_unix_send_fds(sock, ttyfds, 2, NULL, 0);
1044 if (ret < 0)
1045 break;
1046
0fd73091 1047 TRACE("Sent ty \"%s\" with master fd %d and slave fd %d to "
2520facd 1048 "parent", tty->name, tty->master, tty->slave);
2187efd3
CB
1049 }
1050
1051 if (ret < 0)
885766f5 1052 ERROR("Failed to send %zu ttys to parent: %s", ttys->max,
2187efd3
CB
1053 strerror(errno));
1054 else
885766f5 1055 TRACE("Sent %zu ttys to parent", ttys->max);
2187efd3
CB
1056
1057 return ret;
1058}
1059
1060static int lxc_create_ttys(struct lxc_handler *handler)
1061{
1062 int ret = -1;
1063 struct lxc_conf *conf = handler->conf;
1064
1065 ret = lxc_allocate_ttys(handler->name, conf);
1066 if (ret < 0) {
1067 ERROR("Failed to allocate ttys");
1068 goto on_error;
1069 }
1070
1071 ret = lxc_send_ttys_to_parent(handler);
1072 if (ret < 0) {
1073 ERROR("Failed to send ttys to parent");
1074 goto on_error;
1075 }
1076
1077 if (!conf->is_execute) {
1078 ret = lxc_setup_ttys(conf);
1079 if (ret < 0) {
1080 ERROR("Failed to setup ttys");
1081 goto on_error;
1082 }
1083 }
1084
885766f5
CB
1085 if (conf->ttys.tty_names) {
1086 ret = setenv("container_ttys", conf->ttys.tty_names, 1);
2187efd3 1087 if (ret < 0)
885766f5 1088 SYSERROR("Failed to set \"container_ttys=%s\"", conf->ttys.tty_names);
2187efd3
CB
1089 }
1090
1091 ret = 0;
1092
1093on_error:
0e4be3cf 1094 lxc_delete_tty(&conf->ttys);
2187efd3
CB
1095
1096 return ret;
1097}
1098
59bb8698 1099static int setup_rootfs_pivot_root(const char *rootfs)
bf601689 1100{
0fd73091
CB
1101 int ret;
1102 int newroot = -1, oldroot = -1;
bf601689 1103
2d489f9e
SH
1104 oldroot = open("/", O_DIRECTORY | O_RDONLY);
1105 if (oldroot < 0) {
0fd73091 1106 SYSERROR("Failed to open old root directory");
9ba8130c
SH
1107 return -1;
1108 }
0fd73091 1109
2d489f9e
SH
1110 newroot = open(rootfs, O_DIRECTORY | O_RDONLY);
1111 if (newroot < 0) {
0fd73091
CB
1112 SYSERROR("Failed to open new root directory");
1113 goto on_error;
c08556c6 1114 }
bf601689 1115
cc6f6dd7 1116 /* change into new root fs */
0fd73091
CB
1117 ret = fchdir(newroot);
1118 if (ret < 0) {
1119 SYSERROR("Failed to change to new rootfs \"%s\"", rootfs);
1120 goto on_error;
cc6f6dd7
DL
1121 }
1122
cc6f6dd7 1123 /* pivot_root into our new root fs */
0fd73091
CB
1124 ret = pivot_root(".", ".");
1125 if (ret < 0) {
1126 SYSERROR("Failed to pivot_root()");
1127 goto on_error;
bf601689 1128 }
cc6f6dd7 1129
e599717b 1130 /* At this point the old-root is mounted on top of our new-root. To
0fd73091
CB
1131 * unmounted it we must not be chdir'd into it, so escape back to
1132 * old-root.
2d489f9e 1133 */
0fd73091
CB
1134 ret = fchdir(oldroot);
1135 if (ret < 0) {
1136 SYSERROR("Failed to enter old root directory");
1137 goto on_error;
2d489f9e 1138 }
0fd73091 1139
e599717b
FW
1140 /* Make oldroot rslave to make sure our umounts don't propagate to the
1141 * host.
1142 */
1143 ret = mount("", ".", "", MS_SLAVE | MS_REC, NULL);
1144 if (ret < 0) {
1145 SYSERROR("Failed to make oldroot rslave");
1146 goto on_error;
1147 }
1148
0fd73091
CB
1149 ret = umount2(".", MNT_DETACH);
1150 if (ret < 0) {
1151 SYSERROR("Failed to detach old root directory");
1152 goto on_error;
cc6f6dd7
DL
1153 }
1154
0fd73091
CB
1155 ret = fchdir(newroot);
1156 if (ret < 0) {
1157 SYSERROR("Failed to re-enter new root directory");
1158 goto on_error;
2d489f9e 1159 }
cc6f6dd7 1160
2d489f9e
SH
1161 close(oldroot);
1162 close(newroot);
bf601689 1163
0fd73091 1164 DEBUG("pivot_root(\"%s\") successful", rootfs);
bf601689 1165
bf601689 1166 return 0;
2d489f9e 1167
0fd73091 1168on_error:
2d489f9e
SH
1169 if (oldroot != -1)
1170 close(oldroot);
1171 if (newroot != -1)
1172 close(newroot);
0fd73091 1173
2d489f9e 1174 return -1;
bf601689
MH
1175}
1176
7133b912
CB
1177/* Just create a path for /dev under $lxcpath/$name and in rootfs If we hit an
1178 * error, log it but don't fail yet.
91c3830e 1179 */
7133b912
CB
1180static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs,
1181 const char *lxcpath)
91c3830e
SH
1182{
1183 int ret;
87da4ec3
SH
1184 size_t clen;
1185 char *path;
91c3830e 1186
7133b912 1187 INFO("Preparing \"/dev\"");
bc6928ff 1188
14221cbb 1189 /* $(rootfs->mount) + "/dev/pts" + '\0' */
ec50007f 1190 clen = (rootfs->path ? strlen(rootfs->mount) : 0) + 9;
87da4ec3 1191 path = alloca(clen);
bc6928ff 1192
ec50007f 1193 ret = snprintf(path, clen, "%s/dev", rootfs->path ? rootfs->mount : "");
7133b912 1194 if (ret < 0 || (size_t)ret >= clen)
91c3830e 1195 return -1;
bc6928ff 1196
87da4ec3 1197 if (!dir_exists(path)) {
7133b912
CB
1198 WARN("\"/dev\" directory does not exist. Proceeding without "
1199 "autodev being set up");
87da4ec3 1200 return 0;
bc6928ff 1201 }
87da4ec3 1202
1ec0e8e3 1203 ret = safe_mount("none", path, "tmpfs", 0, "size=500000,mode=755",
7133b912
CB
1204 rootfs->path ? rootfs->mount : NULL);
1205 if (ret < 0) {
1206 SYSERROR("Failed to mount tmpfs on \"%s\"", path);
1ec0e8e3 1207 return -1;
91c3830e 1208 }
7133b912 1209 INFO("Mounted tmpfs on \"%s\"", path);
87da4ec3 1210
ec50007f 1211 ret = snprintf(path, clen, "%s/dev/pts", rootfs->path ? rootfs->mount : "");
7133b912 1212 if (ret < 0 || (size_t)ret >= clen)
91c3830e 1213 return -1;
87da4ec3 1214
7133b912 1215 /* If we are running on a devtmpfs mapping, dev/pts may already exist.
bc6928ff
MW
1216 * If not, then create it and exit if that fails...
1217 */
87da4ec3 1218 if (!dir_exists(path)) {
bc6928ff 1219 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
7133b912
CB
1220 if (ret < 0) {
1221 SYSERROR("Failed to create directory \"%s\"", path);
bc6928ff
MW
1222 return -1;
1223 }
91c3830e
SH
1224 }
1225
7133b912 1226 INFO("Prepared \"/dev\"");
91c3830e
SH
1227 return 0;
1228}
1229
5e73416f 1230struct lxc_device_node {
74a3920a 1231 const char *name;
5e73416f
CB
1232 const mode_t mode;
1233 const int maj;
1234 const int min;
c6883f38
SH
1235};
1236
5e73416f 1237static const struct lxc_device_node lxc_devices[] = {
06749971 1238 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
5e73416f 1239 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
06749971
CB
1240 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1241 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
5e73416f
CB
1242 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1243 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
c6883f38
SH
1244};
1245
27245ff7 1246static int lxc_fill_autodev(const struct lxc_rootfs *rootfs)
c6883f38 1247{
5e73416f 1248 int i, ret;
c6883f38 1249 char path[MAXPATHLEN];
3a32201c 1250 mode_t cmask;
5e73416f 1251 bool can_mknod = true;
c6883f38 1252
3999be0a
CB
1253 ret = snprintf(path, MAXPATHLEN, "%s/dev",
1254 rootfs->path ? rootfs->mount : "");
1255 if (ret < 0 || ret >= MAXPATHLEN)
c6883f38 1256 return -1;
91c3830e 1257
0bbf8572
CB
1258 /* ignore, just don't try to fill in */
1259 if (!dir_exists(path))
9cb4d183
SH
1260 return 0;
1261
3999be0a
CB
1262 INFO("Populating \"/dev\"");
1263
3a32201c 1264 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
5e73416f
CB
1265 for (i = 0; i < sizeof(lxc_devices) / sizeof(lxc_devices[0]); i++) {
1266 char hostpath[MAXPATHLEN];
1267 const struct lxc_device_node *device = &lxc_devices[i];
0728ebf4 1268
3999be0a 1269 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s",
5e73416f 1270 rootfs->path ? rootfs->mount : "", device->name);
c6883f38
SH
1271 if (ret < 0 || ret >= MAXPATHLEN)
1272 return -1;
0bbf8572 1273
5e73416f
CB
1274 if (can_mknod) {
1275 ret = mknod(path, device->mode, makedev(device->maj, device->min));
1276 if (ret == 0 || (ret < 0 && errno == EEXIST)) {
1277 DEBUG("Created device node \"%s\"", path);
0bbf8572
CB
1278 continue;
1279 }
1280
5e73416f
CB
1281 if (errno != EPERM) {
1282 SYSERROR("Failed to create device node \"%s\"", path);
9cb4d183
SH
1283 return -1;
1284 }
3999be0a 1285
5e73416f
CB
1286 /* This can e.g. happen when the container is
1287 * unprivileged or CAP_MKNOD has been dropped.
1288 */
1289 can_mknod = false;
1290 }
1291
1292 ret = mknod(path, S_IFREG, 0);
1293 if (ret < 0 && errno != EEXIST) {
1294 SYSERROR("Failed to create file \"%s\"", path);
1295 return -1;
1296 }
1297
1298 /* Fallback to bind-mounting the device from the host. */
1299 ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", device->name);
1300 if (ret < 0 || ret >= MAXPATHLEN)
1301 return -1;
1302
1303 ret = safe_mount(hostpath, path, 0, MS_BIND, NULL,
1304 rootfs->path ? rootfs->mount : NULL);
1305 if (ret < 0) {
1306 SYSERROR("Failed to bind mount host device node \"%s\" "
1307 "onto \"%s\"", hostpath, path);
1308 return -1;
c6883f38 1309 }
5e73416f
CB
1310 DEBUG("Bind mounted host device node \"%s\" onto \"%s\"",
1311 hostpath, path);
c6883f38 1312 }
5e73416f 1313 (void)umask(cmask);
c6883f38 1314
3999be0a 1315 INFO("Populated \"/dev\"");
c6883f38
SH
1316 return 0;
1317}
1318
9aa76a17 1319static int lxc_setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1320{
9aa76a17 1321 int ret;
10bc1861 1322 struct lxc_storage *bdev;
91c3e281 1323 const struct lxc_rootfs *rootfs;
cc28d0b0 1324
91c3e281 1325 rootfs = &conf->rootfs;
a0f379bf 1326 if (!rootfs->path) {
0fd73091
CB
1327 ret = mount("", "/", NULL, MS_SLAVE | MS_REC, 0);
1328 if (ret < 0) {
1329 SYSERROR("Failed to make / rslave");
a0f379bf
DW
1330 return -1;
1331 }
0fd73091 1332
c69bd12f 1333 return 0;
a0f379bf 1334 }
0ad19a3f 1335
0fd73091
CB
1336 ret = access(rootfs->mount, F_OK);
1337 if (ret != 0) {
1338 SYSERROR("Failed to access to \"%s\". Check it is present",
12297168 1339 rootfs->mount);
b1789442
DL
1340 return -1;
1341 }
1342
8a388ed4 1343 bdev = storage_init(conf);
9aa76a17 1344 if (!bdev) {
0fd73091 1345 ERROR("Failed to mount rootfs \"%s\" onto \"%s\" with options \"%s\"",
91c3e281
CB
1346 rootfs->path, rootfs->mount,
1347 rootfs->options ? rootfs->options : "(null)");
9aa76a17 1348 return -1;
9be53773 1349 }
9aa76a17
CB
1350
1351 ret = bdev->ops->mount(bdev);
10bc1861 1352 storage_put(bdev);
9aa76a17 1353 if (ret < 0) {
0fd73091 1354 ERROR("Failed to mount rootfs \"%s\" onto \"%s\" with options \"%s\"",
91c3e281
CB
1355 rootfs->path, rootfs->mount,
1356 rootfs->options ? rootfs->options : "(null)");
c3f0a28c 1357 return -1;
1358 }
0ad19a3f 1359
0fd73091 1360 DEBUG("Mounted rootfs \"%s\" onto \"%s\" with options \"%s\"",
91c3e281
CB
1361 rootfs->path, rootfs->mount,
1362 rootfs->options ? rootfs->options : "(null)");
9aa76a17 1363
ac778708
DL
1364 return 0;
1365}
1366
91e93c71
AV
1367int prepare_ramfs_root(char *root)
1368{
0fd73091
CB
1369 int i, ret;
1370 char *p, *p2;
1371 char buf[LXC_LINELEN], nroot[PATH_MAX];
91e93c71 1372 FILE *f;
91e93c71 1373
0fd73091
CB
1374 if (!realpath(root, nroot))
1375 return -1;
91e93c71 1376
0fd73091
CB
1377 ret = chdir("/");
1378 if (ret < 0)
1379 return -1;
91e93c71 1380
0fd73091
CB
1381 /* We could use here MS_MOVE, but in userns this mount is locked and
1382 * can't be moved.
91e93c71 1383 */
0fd73091
CB
1384 ret = mount(root, "/", NULL, MS_REC | MS_BIND, NULL);
1385 if (ret < 0) {
1386 SYSERROR("Failed to move \"%s\" into \"/\"", root);
1387 return -1;
91e93c71
AV
1388 }
1389
0fd73091
CB
1390 ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL);
1391 if (ret < 0) {
1392 SYSERROR("Failed to make \"/\" rprivate");
1393 return -1;
91e93c71
AV
1394 }
1395
0fd73091
CB
1396 /* The following code cleans up inhereted mounts which are not required
1397 * for CT.
91e93c71
AV
1398 *
1399 * The mountinfo file shows not all mounts, if a few points have been
1400 * unmounted between read operations from the mountinfo. So we need to
1401 * read mountinfo a few times.
1402 *
1403 * This loop can be skipped if a container uses unserns, because all
1404 * inherited mounts are locked and we should live with all this trash.
1405 */
0fd73091 1406 for (;;) {
91e93c71
AV
1407 int progress = 0;
1408
1409 f = fopen("./proc/self/mountinfo", "r");
1410 if (!f) {
1411 SYSERROR("Unable to open /proc/self/mountinfo");
1412 return -1;
1413 }
0fd73091 1414
eab15c1e 1415 while (fgets(buf, LXC_LINELEN, f)) {
91e93c71
AV
1416 for (p = buf, i=0; p && i < 4; i++)
1417 p = strchr(p+1, ' ');
0fd73091 1418
91e93c71
AV
1419 if (!p)
1420 continue;
0fd73091 1421
91e93c71
AV
1422 p2 = strchr(p+1, ' ');
1423 if (!p2)
1424 continue;
1425
1426 *p2 = '\0';
1427 *p = '.';
1428
1429 if (strcmp(p + 1, "/") == 0)
1430 continue;
0fd73091 1431
91e93c71
AV
1432 if (strcmp(p + 1, "/proc") == 0)
1433 continue;
1434
0fd73091
CB
1435 ret = umount2(p, MNT_DETACH);
1436 if (ret == 0)
91e93c71
AV
1437 progress++;
1438 }
0fd73091 1439
91e93c71 1440 fclose(f);
0fd73091 1441
91e93c71
AV
1442 if (!progress)
1443 break;
1444 }
1445
0fd73091
CB
1446 /* This also can be skipped if a container uses unserns. */
1447 (void)umount2("./proc", MNT_DETACH);
91e93c71
AV
1448
1449 /* It is weird, but chdir("..") moves us in a new root */
0fd73091
CB
1450 ret = chdir("..");
1451 if (ret < 0) {
91e93c71
AV
1452 SYSERROR("Unable to change working directory");
1453 return -1;
1454 }
1455
0fd73091
CB
1456 ret = chroot(".");
1457 if (ret < 0) {
91e93c71
AV
1458 SYSERROR("Unable to chroot");
1459 return -1;
1460 }
1461
1462 return 0;
1463}
1464
74a3920a 1465static int setup_pivot_root(const struct lxc_rootfs *rootfs)
ac778708 1466{
0fd73091
CB
1467 int ret;
1468
39c7b795 1469 if (!rootfs->path) {
0fd73091 1470 DEBUG("Container does not have a rootfs");
ac778708 1471 return 0;
39c7b795 1472 }
ac778708 1473
91e93c71 1474 if (detect_ramfs_rootfs()) {
0fd73091
CB
1475 DEBUG("Detected that container is on ramfs");
1476
1477 ret = prepare_ramfs_root(rootfs->mount);
1478 if (ret < 0) {
1479 ERROR("Failed to prepare minimal ramfs root");
91e93c71 1480 return -1;
39c7b795
CB
1481 }
1482
0fd73091 1483 DEBUG("Prepared ramfs root for container");
39c7b795
CB
1484 return 0;
1485 }
1486
0fd73091
CB
1487 ret = setup_rootfs_pivot_root(rootfs->mount);
1488 if (ret < 0) {
1489 ERROR("Failed to pivot_root()");
25368b52 1490 return -1;
c69bd12f
DL
1491 }
1492
0fd73091 1493 DEBUG("Finished pivot_root()");
25368b52 1494 return 0;
0ad19a3f 1495}
1496
5173b710 1497static const struct id_map *find_mapped_nsid_entry(struct lxc_conf *conf, unsigned id,
f4900711
CB
1498 enum idtype idtype)
1499{
1500 struct lxc_list *it;
1501 struct id_map *map;
1502 struct id_map *retmap = NULL;
1503
dcf0ffdf
CB
1504 /* Shortcut for container's root mappings. */
1505 if (id == 0) {
1506 if (idtype == ID_TYPE_UID)
1507 return conf->root_nsuid_map;
1508
1509 if (idtype == ID_TYPE_GID)
1510 return conf->root_nsgid_map;
1511 }
1512
f4900711
CB
1513 lxc_list_for_each(it, &conf->id_map) {
1514 map = it->elem;
1515 if (map->idtype != idtype)
1516 continue;
1517
1518 if (id >= map->nsid && id < map->nsid + map->range) {
1519 retmap = map;
1520 break;
1521 }
1522 }
1523
1524 return retmap;
1525}
1526
1527static int lxc_setup_devpts(struct lxc_conf *conf)
3c26f34e 1528{
70761e5e 1529 int ret;
11293068 1530 const char *default_devpts_mntopts = "gid=5,newinstance,ptmxmode=0666,mode=0620";
9d28c4f9 1531 char devpts_mntopts[256];
77890c6d 1532
e528c735 1533 if (conf->pty_max <= 0) {
0fd73091 1534 DEBUG("No new devpts instance will be mounted since no pts "
70761e5e 1535 "devices are requested");
d852c78c 1536 return 0;
3c26f34e 1537 }
1538
e528c735
CB
1539 ret = snprintf(devpts_mntopts, sizeof(devpts_mntopts), "%s,max=%zu",
1540 default_devpts_mntopts, conf->pty_max);
9d28c4f9
CB
1541 if (ret < 0 || (size_t)ret >= sizeof(devpts_mntopts))
1542 return -1;
1543
77f94854
CB
1544 ret = umount2("/dev/pts", MNT_DETACH);
1545 if (ret < 0)
1546 WARN("%s - Failed to unmount old devpts instance", strerror(errno));
1547 else
0fd73091 1548 DEBUG("Unmounted old devpts instance");
7e40254a 1549
70761e5e
CB
1550 /* Create mountpoint for devpts instance. */
1551 ret = mkdir("/dev/pts", 0755);
1552 if (ret < 0 && errno != EEXIST) {
0fd73091 1553 SYSERROR("Failed to create \"/dev/pts\" directory");
3c26f34e 1554 return -1;
1555 }
1556
11293068 1557 /* mount new devpts instance */
f4900711 1558 ret = mount("devpts", "/dev/pts", "devpts", MS_NOSUID | MS_NOEXEC, devpts_mntopts);
70761e5e 1559 if (ret < 0) {
11293068
CB
1560 /* try mounting without gid=5 */
1561 ret = mount("devpts", "/dev/pts", "devpts",
1562 MS_NOSUID | MS_NOEXEC, devpts_mntopts + sizeof("gid=5"));
1563 if (ret < 0) {
1564 SYSERROR("Failed to mount new devpts instance");
1565 return -1;
1566 }
70761e5e 1567 }
0fd73091 1568 DEBUG("Mount new devpts instance with options \"%s\"", devpts_mntopts);
70761e5e 1569
d5cb35d6 1570 /* Remove any pre-existing /dev/ptmx file. */
b29e05d6
CB
1571 ret = remove("/dev/ptmx");
1572 if (ret < 0) {
1573 if (errno != ENOENT) {
0fd73091 1574 SYSERROR("Failed to remove existing \"/dev/ptmx\" file");
d5cb35d6 1575 return -1;
70761e5e 1576 }
b29e05d6 1577 } else {
0fd73091 1578 DEBUG("Removed existing \"/dev/ptmx\" file");
3c26f34e 1579 }
1580
d5cb35d6
CB
1581 /* Create dummy /dev/ptmx file as bind mountpoint for /dev/pts/ptmx. */
1582 ret = open("/dev/ptmx", O_CREAT, 0666);
1583 if (ret < 0) {
0fd73091 1584 SYSERROR("Failed to create dummy \"/dev/ptmx\" file as bind mount target");
d5cb35d6
CB
1585 return -1;
1586 }
e87bd19c 1587 close(ret);
0fd73091 1588 DEBUG("Created dummy \"/dev/ptmx\" file as bind mount target");
77890c6d 1589
d5cb35d6 1590 /* Fallback option: create symlink /dev/ptmx -> /dev/pts/ptmx */
e87bd19c 1591 ret = mount("/dev/pts/ptmx", "/dev/ptmx", NULL, MS_BIND, NULL);
d5cb35d6 1592 if (!ret) {
0fd73091 1593 DEBUG("Bind mounted \"/dev/pts/ptmx\" to \"/dev/ptmx\"");
d5cb35d6
CB
1594 return 0;
1595 } else {
1596 /* Fallthrough and try to create a symlink. */
0fd73091 1597 ERROR("Failed to bind mount \"/dev/pts/ptmx\" to \"/dev/ptmx\"");
d5cb35d6
CB
1598 }
1599
1600 /* Remove the dummy /dev/ptmx file we created above. */
1601 ret = remove("/dev/ptmx");
70761e5e 1602 if (ret < 0) {
0fd73091 1603 SYSERROR("Failed to remove existing \"/dev/ptmx\"");
d5cb35d6
CB
1604 return -1;
1605 }
1606
1607 /* Fallback option: Create symlink /dev/ptmx -> /dev/pts/ptmx. */
1608 ret = symlink("/dev/pts/ptmx", "/dev/ptmx");
1609 if (ret < 0) {
0fd73091 1610 SYSERROR("Failed to create symlink from \"/dev/ptmx\" to \"/dev/pts/ptmx\"");
3c26f34e 1611 return -1;
1612 }
0fd73091 1613 DEBUG("Created symlink from \"/dev/ptmx\" to \"/dev/pts/ptmx\"");
cd54d859 1614
3c26f34e 1615 return 0;
1616}
1617
cccc74b5
DL
1618static int setup_personality(int persona)
1619{
0fd73091
CB
1620 int ret;
1621
1622#if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1623 if (persona == -1)
1624 return 0;
1625
0fd73091
CB
1626 ret = personality(persona);
1627 if (ret < 0) {
1628 SYSERROR("Failed to set personality to \"0x%x\"", persona);
cccc74b5
DL
1629 return -1;
1630 }
1631
0fd73091
CB
1632 INFO("Set personality to \"0x%x\"", persona);
1633#endif
cccc74b5
DL
1634
1635 return 0;
1636}
1637
3d7d929a 1638static int lxc_setup_dev_console(const struct lxc_rootfs *rootfs,
dcad02f8 1639 const struct lxc_terminal *console)
6e590161 1640{
0fd73091 1641 int fd, ret;
63376d7d 1642 char path[MAXPATHLEN];
86530b0a 1643 char *rootfs_path = rootfs->path ? rootfs->mount : "";
52e35957 1644
8b1b1210
CB
1645 if (console->path && !strcmp(console->path, "none"))
1646 return 0;
1647
86530b0a 1648 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs_path);
3d7d929a 1649 if (ret < 0 || (size_t)ret >= sizeof(path))
7c6ef2a2 1650 return -1;
52e35957 1651
8b1b1210
CB
1652 /* When we are asked to setup a console we remove any previous
1653 * /dev/console bind-mounts.
1654 */
a7ba3c7f
CB
1655 if (file_exists(path)) {
1656 ret = lxc_unstack_mountpoint(path, false);
1657 if (ret < 0) {
86530b0a 1658 ERROR("Failed to unmount \"%s\": %s", path, strerror(errno));
a7ba3c7f
CB
1659 return -ret;
1660 } else {
86530b0a 1661 DEBUG("Cleared all (%d) mounts from \"%s\"", ret, path);
a7ba3c7f 1662 }
8b1b1210
CB
1663 }
1664
1665 /* For unprivileged containers autodev or automounts will already have
1666 * taken care of creating /dev/console.
1667 */
0728ebf4
TA
1668 fd = open(path, O_CREAT | O_EXCL, S_IXUSR | S_IXGRP | S_IXOTH);
1669 if (fd < 0) {
1670 if (errno != EEXIST) {
86530b0a 1671 SYSERROR("Failed to create console");
3d7d929a 1672 return -errno;
0728ebf4
TA
1673 }
1674 } else {
1675 close(fd);
52e35957
DL
1676 }
1677
86530b0a
L
1678 ret = chmod(console->name, S_IXUSR | S_IXGRP | S_IXOTH);
1679 if (ret < 0) {
0fd73091
CB
1680 SYSERROR("Failed to set mode \"0%o\" to \"%s\"",
1681 S_IXUSR | S_IXGRP | S_IXOTH, console->name);
3d7d929a 1682 return -errno;
63376d7d 1683 }
13954cce 1684
86530b0a
L
1685 ret = safe_mount(console->name, path, "none", MS_BIND, 0, rootfs_path);
1686 if (ret < 0) {
0fd73091 1687 ERROR("Failed to mount \"%s\" on \"%s\"", console->name, path);
6e590161 1688 return -1;
1689 }
1690
86530b0a 1691 DEBUG("Mounted pts device \"%s\" onto \"%s\"", console->name, path);
7c6ef2a2
SH
1692 return 0;
1693}
1694
3d7d929a 1695static int lxc_setup_ttydir_console(const struct lxc_rootfs *rootfs,
dcad02f8 1696 const struct lxc_terminal *console,
3d7d929a 1697 char *ttydir)
7c6ef2a2 1698{
3dc035f1 1699 int ret, fd;
3d7d929a 1700 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
86530b0a 1701 char *rootfs_path = rootfs->path ? rootfs->mount : "";
7c6ef2a2 1702
3dc035f1
L
1703 if (console->path && !strcmp(console->path, "none"))
1704 return 0;
1705
7c6ef2a2 1706 /* create rootfs/dev/<ttydir> directory */
86530b0a 1707 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs_path, ttydir);
3d7d929a 1708 if (ret < 0 || (size_t)ret >= sizeof(path))
7c6ef2a2 1709 return -1;
3d7d929a 1710
7c6ef2a2
SH
1711 ret = mkdir(path, 0755);
1712 if (ret && errno != EEXIST) {
0fd73091 1713 SYSERROR("Failed to create \"%s\"", path);
3d7d929a 1714 return -errno;
7c6ef2a2 1715 }
4742cd9a 1716 DEBUG("Created directory for console and tty devices at \"%s\"", path);
7c6ef2a2 1717
86530b0a 1718 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console", rootfs_path, ttydir);
3d7d929a
CB
1719 if (ret < 0 || (size_t)ret >= sizeof(lxcpath))
1720 return -1;
1721
7c6ef2a2 1722 ret = creat(lxcpath, 0660);
3d7d929a 1723 if (ret == -1 && errno != EEXIST) {
0fd73091 1724 SYSERROR("Failed to create \"%s\"", lxcpath);
3d7d929a 1725 return -errno;
7c6ef2a2 1726 }
4d44e274
SH
1727 if (ret >= 0)
1728 close(ret);
7c6ef2a2 1729
86530b0a 1730 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs_path);
3dc035f1 1731 if (ret < 0 || (size_t)ret >= sizeof(path))
7c6ef2a2 1732 return -1;
2a12fefd 1733
3dc035f1 1734 if (file_exists(path)) {
a7ba3c7f 1735 ret = lxc_unstack_mountpoint(path, false);
2a12fefd 1736 if (ret < 0) {
0fd73091 1737 ERROR("%s - Failed to unmount \"%s\"", strerror(errno), path);
a7ba3c7f
CB
1738 return -ret;
1739 } else {
86530b0a 1740 DEBUG("Cleared all (%d) mounts from \"%s\"", ret, path);
a7ba3c7f 1741 }
3dc035f1 1742 }
2a12fefd 1743
3dc035f1
L
1744 fd = open(path, O_CREAT | O_EXCL, S_IXUSR | S_IXGRP | S_IXOTH);
1745 if (fd < 0) {
1746 if (errno != EEXIST) {
86530b0a 1747 SYSERROR("Failed to create console");
3dc035f1 1748 return -errno;
2a12fefd 1749 }
3dc035f1
L
1750 } else {
1751 close(fd);
7c6ef2a2
SH
1752 }
1753
86530b0a
L
1754 ret = chmod(console->name, S_IXUSR | S_IXGRP | S_IXOTH);
1755 if (ret < 0) {
0fd73091
CB
1756 SYSERROR("Failed to set mode \"0%o\" to \"%s\"",
1757 S_IXUSR | S_IXGRP | S_IXOTH, console->name);
2a12fefd
CB
1758 return -errno;
1759 }
1760
3dc035f1 1761 /* bind mount console->name to '/dev/<ttydir>/console' */
86530b0a
L
1762 ret = safe_mount(console->name, lxcpath, "none", MS_BIND, 0, rootfs_path);
1763 if (ret < 0) {
0fd73091 1764 ERROR("Failed to mount \"%s\" on \"%s\"", console->name, lxcpath);
7c6ef2a2
SH
1765 return -1;
1766 }
86530b0a 1767 DEBUG("Mounted \"%s\" onto \"%s\"", console->name, lxcpath);
3dc035f1
L
1768
1769 /* bind mount '/dev/<ttydir>/console' to '/dev/console' */
86530b0a
L
1770 ret = safe_mount(lxcpath, path, "none", MS_BIND, 0, rootfs_path);
1771 if (ret < 0) {
0fd73091 1772 ERROR("Failed to mount \"%s\" on \"%s\"", console->name, lxcpath);
3dc035f1
L
1773 return -1;
1774 }
86530b0a 1775 DEBUG("Mounted \"%s\" onto \"%s\"", console->name, lxcpath);
3dc035f1 1776
86530b0a 1777 DEBUG("Console has been setup under \"%s\" and mounted to \"%s\"", lxcpath, path);
6e590161 1778 return 0;
1779}
1780
3d7d929a 1781static int lxc_setup_console(const struct lxc_rootfs *rootfs,
dcad02f8 1782 const struct lxc_terminal *console, char *ttydir)
7c6ef2a2 1783{
3d7d929a 1784
7c6ef2a2 1785 if (!ttydir)
3d7d929a 1786 return lxc_setup_dev_console(rootfs, console);
7c6ef2a2 1787
3d7d929a 1788 return lxc_setup_ttydir_console(rootfs, console, ttydir);
7c6ef2a2
SH
1789}
1790
998ac676
RT
1791static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1792{
1793 struct mount_opt *mo;
1794
1795 /* If opt is found in mount_opt, set or clear flags.
1796 * Otherwise append it to data. */
1797
1798 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
0fd73091 1799 if (strncmp(opt, mo->name, strlen(mo->name)) == 0) {
998ac676
RT
1800 if (mo->clear)
1801 *flags &= ~mo->flag;
1802 else
1803 *flags |= mo->flag;
1804 return;
1805 }
1806 }
1807
1808 if (strlen(*data))
1809 strcat(*data, ",");
1810 strcat(*data, opt);
1811}
1812
0fd73091 1813int parse_mntopts(const char *mntopts, unsigned long *mntflags, char **mntdata)
998ac676 1814{
0fd73091
CB
1815 char *data, *p, *s;
1816 char *saveptr = NULL;
998ac676 1817
911324ef 1818 *mntdata = NULL;
91656ce5 1819 *mntflags = 0L;
911324ef
DL
1820
1821 if (!mntopts)
998ac676
RT
1822 return 0;
1823
911324ef 1824 s = strdup(mntopts);
0fd73091 1825 if (!s)
998ac676 1826 return -1;
998ac676
RT
1827
1828 data = malloc(strlen(s) + 1);
1829 if (!data) {
998ac676
RT
1830 free(s);
1831 return -1;
1832 }
1833 *data = 0;
1834
0fd73091 1835 for (; (p = strtok_r(s, ",", &saveptr)); s = NULL)
998ac676
RT
1836 parse_mntopt(p, mntflags, &data);
1837
1838 if (*data)
1839 *mntdata = data;
1840 else
1841 free(data);
1842 free(s);
1843
1844 return 0;
1845}
1846
d840039e
YT
1847static void parse_propagationopt(char *opt, unsigned long *flags)
1848{
1849 struct mount_opt *mo;
1850
1851 /* If opt is found in propagation_opt, set or clear flags. */
d840039e 1852 for (mo = &propagation_opt[0]; mo->name != NULL; mo++) {
0fd73091
CB
1853 if (strncmp(opt, mo->name, strlen(mo->name)) != 0)
1854 continue;
1855
1856 if (mo->clear)
1857 *flags &= ~mo->flag;
1858 else
1859 *flags |= mo->flag;
1860
1861 return;
d840039e
YT
1862 }
1863}
1864
1865static int parse_propagationopts(const char *mntopts, unsigned long *pflags)
1866{
0fd73091
CB
1867 char *p, *s;
1868 char *saveptr = NULL;
d840039e
YT
1869
1870 if (!mntopts)
1871 return 0;
1872
1873 s = strdup(mntopts);
1874 if (!s) {
1875 SYSERROR("Failed to allocate memory");
1876 return -ENOMEM;
1877 }
1878
0fd73091
CB
1879 *pflags = 0L;
1880 for (; (p = strtok_r(s, ",", &saveptr)); s = NULL)
d840039e 1881 parse_propagationopt(p, pflags);
d840039e 1882 free(s);
0fd73091 1883
d840039e
YT
1884 return 0;
1885}
1886
6fd5e769
SH
1887static void null_endofword(char *word)
1888{
1889 while (*word && *word != ' ' && *word != '\t')
1890 word++;
1891 *word = '\0';
1892}
1893
0fd73091 1894/* skip @nfields spaces in @src */
6fd5e769
SH
1895static char *get_field(char *src, int nfields)
1896{
6fd5e769 1897 int i;
0fd73091 1898 char *p = src;
6fd5e769
SH
1899
1900 for (i = 0; i < nfields; i++) {
1901 while (*p && *p != ' ' && *p != '\t')
1902 p++;
0fd73091 1903
6fd5e769
SH
1904 if (!*p)
1905 break;
0fd73091 1906
6fd5e769
SH
1907 p++;
1908 }
0fd73091 1909
6fd5e769
SH
1910 return p;
1911}
1912
911324ef
DL
1913static int mount_entry(const char *fsname, const char *target,
1914 const char *fstype, unsigned long mountflags,
d840039e
YT
1915 unsigned long pflags, const char *data, bool optional,
1916 bool dev, bool relative, const char *rootfs)
911324ef 1917{
0ac4b28a 1918 int ret;
181437fd
YT
1919 char srcbuf[MAXPATHLEN];
1920 const char *srcpath = fsname;
614305f3 1921#ifdef HAVE_STATVFS
2938f7c8 1922 struct statvfs sb;
614305f3 1923#endif
2938f7c8 1924
181437fd
YT
1925 if (relative) {
1926 ret = snprintf(srcbuf, MAXPATHLEN, "%s/%s", rootfs ? rootfs : "/", fsname ? fsname : "");
1927 if (ret < 0 || ret >= MAXPATHLEN) {
1928 ERROR("source path is too long");
1929 return -1;
1930 }
1931 srcpath = srcbuf;
1932 }
1933
1934 ret = safe_mount(srcpath, target, fstype, mountflags & ~MS_REMOUNT, data,
0ac4b28a
CB
1935 rootfs);
1936 if (ret < 0) {
1fc64d22 1937 if (optional) {
0fd73091
CB
1938 INFO("%s - Failed to mount \"%s\" on \"%s\" "
1939 "(optional)", strerror(errno),
1940 srcpath ? srcpath : "(null)", target);
1fc64d22
SG
1941 return 0;
1942 }
0ac4b28a 1943
0103eb53 1944 SYSERROR("Failed to mount \"%s\" on \"%s\"",
181437fd 1945 srcpath ? srcpath : "(null)", target);
0ac4b28a 1946 return -1;
911324ef
DL
1947 }
1948
1949 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
7c5b6e7c 1950 unsigned long rqd_flags = 0;
0ac4b28a
CB
1951
1952 DEBUG("Remounting \"%s\" on \"%s\" to respect bind or remount "
181437fd 1953 "options", srcpath ? srcpath : "(none)", target ? target : "(none)");
0ac4b28a 1954
7c5b6e7c
AS
1955 if (mountflags & MS_RDONLY)
1956 rqd_flags |= MS_RDONLY;
614305f3 1957#ifdef HAVE_STATVFS
181437fd 1958 if (srcpath && statvfs(srcpath, &sb) == 0) {
7c5b6e7c 1959 unsigned long required_flags = rqd_flags;
0ac4b28a 1960
2938f7c8
SH
1961 if (sb.f_flag & MS_NOSUID)
1962 required_flags |= MS_NOSUID;
0ac4b28a 1963
ae7a770e 1964 if (sb.f_flag & MS_NODEV && !dev)
2938f7c8 1965 required_flags |= MS_NODEV;
0ac4b28a 1966
2938f7c8
SH
1967 if (sb.f_flag & MS_RDONLY)
1968 required_flags |= MS_RDONLY;
0ac4b28a 1969
2938f7c8
SH
1970 if (sb.f_flag & MS_NOEXEC)
1971 required_flags |= MS_NOEXEC;
0ac4b28a
CB
1972
1973 DEBUG("Flags for \"%s\" were %lu, required extra flags "
181437fd 1974 "are %lu", srcpath, sb.f_flag, required_flags);
0ac4b28a
CB
1975
1976 /* If this was a bind mount request, and required_flags
2938f7c8 1977 * does not have any flags which are not already in
0ac4b28a 1978 * mountflags, then skip the remount.
2938f7c8
SH
1979 */
1980 if (!(mountflags & MS_REMOUNT)) {
0ac4b28a
CB
1981 if (!(required_flags & ~mountflags) &&
1982 rqd_flags == 0) {
1983 DEBUG("Mountflags already were %lu, "
1984 "skipping remount", mountflags);
2938f7c8
SH
1985 goto skipremount;
1986 }
1987 }
0ac4b28a 1988
2938f7c8 1989 mountflags |= required_flags;
6fd5e769 1990 }
614305f3 1991#endif
911324ef 1992
181437fd 1993 ret = mount(srcpath, target, fstype, mountflags | MS_REMOUNT, data);
0ac4b28a 1994 if (ret < 0) {
1fc64d22 1995 if (optional) {
0ac4b28a 1996 INFO("Failed to mount \"%s\" on \"%s\" "
0103eb53 1997 "(optional): %s",
181437fd 1998 srcpath ? srcpath : "(null)", target,
0ac4b28a 1999 strerror(errno));
1fc64d22
SG
2000 return 0;
2001 }
0ac4b28a 2002
0103eb53 2003 SYSERROR("Failed to mount \"%s\" on \"%s\"",
181437fd 2004 srcpath ? srcpath : "(null)", target);
0ac4b28a 2005 return -1;
911324ef
DL
2006 }
2007 }
2008
d840039e
YT
2009 if (pflags) {
2010 ret = mount(NULL, target, NULL, pflags, NULL);
2011 if (ret < 0) {
2012 if (optional) {
2013 INFO("%s - Failed to change mount propagation "
2014 "for \"%s\" (optional)", strerror(errno), target);
2015 return 0;
2016 } else {
2017 SYSERROR("Failed to change mount propagation "
2018 "for \"%s\" (optional)", target);
2019 return -1;
2020 }
2021 }
2022 DEBUG("Changed mount propagation for \"%s\"", target);
2023 }
2024
2025
614305f3 2026#ifdef HAVE_STATVFS
6fd5e769 2027skipremount:
614305f3 2028#endif
0103eb53 2029 DEBUG("Mounted \"%s\" on \"%s\" with filesystem type \"%s\"",
181437fd 2030 srcpath ? srcpath : "(null)", target, fstype);
911324ef
DL
2031
2032 return 0;
2033}
2034
c5e30de4 2035/* Remove "optional", "create=dir", and "create=file" from mntopt */
4e4ca161
SH
2036static void cull_mntent_opt(struct mntent *mntent)
2037{
2038 int i;
0fd73091
CB
2039 char *list[] = {
2040 "create=dir",
2041 "create=file",
2042 "optional",
2043 "relative",
2044 NULL
2045 };
c5e30de4
CB
2046
2047 for (i = 0; list[i]; i++) {
2048 char *p, *p2;
2049
2050 p = strstr(mntent->mnt_opts, list[i]);
2051 if (!p)
4e4ca161 2052 continue;
c5e30de4 2053
4e4ca161
SH
2054 p2 = strchr(p, ',');
2055 if (!p2) {
2056 /* no more mntopts, so just chop it here */
2057 *p = '\0';
2058 continue;
2059 }
c5e30de4
CB
2060
2061 memmove(p, p2 + 1, strlen(p2 + 1) + 1);
4e4ca161
SH
2062 }
2063}
2064
4d5b72a1 2065static int mount_entry_create_dir_file(const struct mntent *mntent,
749f98d9
CB
2066 const char *path,
2067 const struct lxc_rootfs *rootfs,
0fd73091 2068 const char *lxc_name, const char *lxc_path)
0ad19a3f 2069{
12e6ab5d
CB
2070 int fd, ret;
2071 char *p1, *p2;
911324ef 2072
12e6ab5d 2073 if (strncmp(mntent->mnt_type, "overlay", 7) == 0) {
749f98d9 2074 ret = ovl_mkdir(mntent, rootfs, lxc_name, lxc_path);
12e6ab5d
CB
2075 if (ret < 0)
2076 return -1;
2077 }
6e46cc0d 2078
34cfffb3 2079 if (hasmntopt(mntent, "create=dir")) {
749f98d9
CB
2080 ret = mkdir_p(path, 0755);
2081 if (ret < 0 && errno != EEXIST) {
2082 SYSERROR("Failed to create directory \"%s\"", path);
2083 return -1;
34cfffb3
SG
2084 }
2085 }
2086
0fd73091
CB
2087 if (!hasmntopt(mntent, "create=file"))
2088 return 0;
749f98d9 2089
0fd73091
CB
2090 ret = access(path, F_OK);
2091 if (ret == 0)
2092 return 0;
749f98d9 2093
0fd73091
CB
2094 p1 = strdup(path);
2095 if (!p1)
2096 return -1;
749f98d9 2097
0fd73091 2098 p2 = dirname(p1);
749f98d9 2099
0fd73091
CB
2100 ret = mkdir_p(p2, 0755);
2101 free(p1);
2102 if (ret < 0 && errno != EEXIST) {
2103 SYSERROR("Failed to create directory \"%s\"", path);
2104 return -1;
34cfffb3 2105 }
749f98d9 2106
0fd73091
CB
2107 fd = open(path, O_CREAT, 0644);
2108 if (fd < 0)
2109 return -1;
2110 close(fd);
2111
749f98d9 2112 return 0;
4d5b72a1
NC
2113}
2114
ec50007f
CB
2115/* rootfs, lxc_name, and lxc_path can be NULL when the container is created
2116 * without a rootfs. */
db4aba38 2117static inline int mount_entry_on_generic(struct mntent *mntent,
d8b712bc
CB
2118 const char *path,
2119 const struct lxc_rootfs *rootfs,
2120 const char *lxc_name,
2121 const char *lxc_path)
4d5b72a1 2122{
d8b712bc 2123 int ret;
949d0338 2124 unsigned long mntflags;
4d5b72a1 2125 char *mntdata;
181437fd 2126 bool dev, optional, relative;
949d0338 2127 unsigned long pflags = 0;
ec50007f 2128 char *rootfs_path = NULL;
d8b712bc
CB
2129
2130 optional = hasmntopt(mntent, "optional") != NULL;
2131 dev = hasmntopt(mntent, "dev") != NULL;
181437fd 2132 relative = hasmntopt(mntent, "relative") != NULL;
d8b712bc 2133
ec50007f
CB
2134 if (rootfs && rootfs->path)
2135 rootfs_path = rootfs->mount;
2136
d8b712bc
CB
2137 ret = mount_entry_create_dir_file(mntent, path, rootfs, lxc_name,
2138 lxc_path);
2139 if (ret < 0) {
2140 if (optional)
2141 return 0;
608e3567 2142
d8b712bc
CB
2143 return -1;
2144 }
4e4ca161
SH
2145 cull_mntent_opt(mntent);
2146
d840039e
YT
2147 ret = parse_propagationopts(mntent->mnt_opts, &pflags);
2148 if (ret < 0)
2149 return -1;
2150
d8b712bc
CB
2151 ret = parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata);
2152 if (ret < 0)
a17b1e65 2153 return -1;
a17b1e65 2154
6e46cc0d 2155 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type, mntflags,
d840039e 2156 pflags, mntdata, optional, dev, relative, rootfs_path);
68c152ef 2157
911324ef 2158 free(mntdata);
911324ef
DL
2159 return ret;
2160}
2161
db4aba38
NC
2162static inline int mount_entry_on_systemfs(struct mntent *mntent)
2163{
1433c9f9 2164 int ret;
07667a6a 2165 char path[MAXPATHLEN];
1433c9f9
CB
2166
2167 /* For containers created without a rootfs all mounts are treated as
07667a6a
CB
2168 * absolute paths starting at / on the host.
2169 */
1433c9f9
CB
2170 if (mntent->mnt_dir[0] != '/')
2171 ret = snprintf(path, sizeof(path), "/%s", mntent->mnt_dir);
2172 else
2173 ret = snprintf(path, sizeof(path), "%s", mntent->mnt_dir);
07667a6a 2174 if (ret < 0 || ret >= sizeof(path))
1433c9f9 2175 return -1;
1433c9f9
CB
2176
2177 return mount_entry_on_generic(mntent, path, NULL, NULL, NULL);
db4aba38
NC
2178}
2179
4e4ca161 2180static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2 2181 const struct lxc_rootfs *rootfs,
0a2dddd4
CB
2182 const char *lxc_name,
2183 const char *lxc_path)
911324ef 2184{
bdd2b34c 2185 int offset;
013bd428 2186 char *aux;
67e571de 2187 const char *lxcpath;
bdd2b34c
CB
2188 char path[MAXPATHLEN];
2189 int ret = 0;
0ad19a3f 2190
593e8478 2191 lxcpath = lxc_global_config_value("lxc.lxcpath");
bdd2b34c 2192 if (!lxcpath)
2a59a681 2193 return -1;
2a59a681 2194
bdd2b34c
CB
2195 /* If rootfs->path is a blockdev path, allow container fstab to use
2196 * <lxcpath>/<name>/rootfs" as the target prefix.
2197 */
2198 ret = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
2199 if (ret < 0 || ret >= MAXPATHLEN)
80a881b2
SH
2200 goto skipvarlib;
2201
2202 aux = strstr(mntent->mnt_dir, path);
2203 if (aux) {
2204 offset = strlen(path);
2205 goto skipabs;
2206 }
2207
2208skipvarlib:
013bd428
DL
2209 aux = strstr(mntent->mnt_dir, rootfs->path);
2210 if (!aux) {
bdd2b34c 2211 WARN("Ignoring mount point \"%s\"", mntent->mnt_dir);
db4aba38 2212 return ret;
013bd428 2213 }
80a881b2
SH
2214 offset = strlen(rootfs->path);
2215
2216skipabs:
bdd2b34c
CB
2217 ret = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount, aux + offset);
2218 if (ret < 0 || ret >= MAXPATHLEN)
a17b1e65 2219 return -1;
a17b1e65 2220
0a2dddd4 2221 return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
911324ef 2222}
d330fe7b 2223
4e4ca161 2224static int mount_entry_on_relative_rootfs(struct mntent *mntent,
0a2dddd4
CB
2225 const struct lxc_rootfs *rootfs,
2226 const char *lxc_name,
2227 const char *lxc_path)
911324ef 2228{
911324ef 2229 int ret;
0fd73091 2230 char path[MAXPATHLEN];
d330fe7b 2231
34cfffb3 2232 /* relative to root mount point */
6e46cc0d 2233 ret = snprintf(path, sizeof(path), "%s/%s", rootfs->mount, mntent->mnt_dir);
0fd73091 2234 if (ret < 0 || (size_t)ret >= sizeof(path))
9ba8130c 2235 return -1;
911324ef 2236
0a2dddd4 2237 return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
911324ef
DL
2238}
2239
06749971
CB
2240static int mount_file_entries(const struct lxc_conf *conf,
2241 const struct lxc_rootfs *rootfs, FILE *file,
1ae3c19f 2242 const char *lxc_name, const char *lxc_path)
911324ef 2243{
aaf901be 2244 char buf[4096];
0fd73091 2245 struct mntent mntent;
911324ef 2246 int ret = -1;
e76b8764 2247
aaf901be 2248 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
1ae3c19f
CB
2249 if (!rootfs->path)
2250 ret = mount_entry_on_systemfs(&mntent);
2251 else if (mntent.mnt_dir[0] != '/')
2252 ret = mount_entry_on_relative_rootfs(&mntent, rootfs,
2253 lxc_name, lxc_path);
2254 else
2255 ret = mount_entry_on_absolute_rootfs(&mntent, rootfs,
2256 lxc_name, lxc_path);
2257 if (ret < 0)
2258 return -1;
0ad19a3f 2259 }
2260 ret = 0;
cd54d859 2261
0fd73091 2262 INFO("Finished setting up mounts");
e7938e9e
MN
2263 return ret;
2264}
2265
06749971
CB
2266static int setup_mount(const struct lxc_conf *conf,
2267 const struct lxc_rootfs *rootfs, const char *fstab,
42dff448 2268 const char *lxc_name, const char *lxc_path)
e7938e9e 2269{
42dff448 2270 FILE *f;
e7938e9e
MN
2271 int ret;
2272
2273 if (!fstab)
2274 return 0;
2275
42dff448
CB
2276 f = setmntent(fstab, "r");
2277 if (!f) {
2278 SYSERROR("Failed to open \"%s\"", fstab);
e7938e9e
MN
2279 return -1;
2280 }
2281
06749971 2282 ret = mount_file_entries(conf, rootfs, f, lxc_name, lxc_path);
42dff448
CB
2283 if (ret < 0)
2284 ERROR("Failed to set up mount entries");
e7938e9e 2285
42dff448 2286 endmntent(f);
0ad19a3f 2287 return ret;
2288}
2289
5ef5c9a3 2290FILE *make_anonymous_mount_file(struct lxc_list *mount)
e7938e9e 2291{
5ef5c9a3 2292 int ret;
e7938e9e 2293 char *mount_entry;
5ef5c9a3 2294 struct lxc_list *iterator;
5ef5c9a3
CB
2295 int fd = -1;
2296
0fd73091 2297 fd = memfd_create(".lxc_mount_file", MFD_CLOEXEC);
5ef5c9a3 2298 if (fd < 0) {
a324e7eb
CB
2299 char template[] = P_tmpdir "/.lxc_mount_file_XXXXXX";
2300
5ef5c9a3
CB
2301 if (errno != ENOSYS)
2302 return NULL;
a324e7eb
CB
2303
2304 fd = lxc_make_tmpfile(template, true);
0fd73091
CB
2305 if (fd < 0) {
2306 SYSERROR("Could not create temporary mount file");
2307 return NULL;
2308 }
2309
6bd04140 2310 TRACE("Created temporary mount file");
5ef5c9a3 2311 }
0fd73091
CB
2312 if (fd < 0) {
2313 SYSERROR("Could not create temporary mount file");
9fc7f8c0 2314 return NULL;
e7938e9e
MN
2315 }
2316
0fd73091
CB
2317 lxc_list_for_each (iterator, mount) {
2318 size_t len;
2319
e7938e9e 2320 mount_entry = iterator->elem;
0fd73091 2321 len = strlen(mount_entry);
5ef5c9a3 2322
489f39be 2323 ret = lxc_write_nointr(fd, mount_entry, len);
0fd73091
CB
2324 if (ret != len)
2325 goto on_error;
2326
489f39be 2327 ret = lxc_write_nointr(fd, "\n", 1);
0fd73091
CB
2328 if (ret != 1)
2329 goto on_error;
e7938e9e
MN
2330 }
2331
0fd73091
CB
2332 ret = lseek(fd, 0, SEEK_SET);
2333 if (ret < 0)
2334 goto on_error;
2335
2336 return fdopen(fd, "r+");
2337
2338on_error:
2339 SYSERROR("Failed to write mount entry to temporary mount file");
2340 close(fd);
2341 return NULL;
9fc7f8c0
TA
2342}
2343
06749971
CB
2344static int setup_mount_entries(const struct lxc_conf *conf,
2345 const struct lxc_rootfs *rootfs,
5ef5c9a3
CB
2346 struct lxc_list *mount, const char *lxc_name,
2347 const char *lxc_path)
9fc7f8c0 2348{
9fc7f8c0 2349 int ret;
0fd73091 2350 FILE *f;
9fc7f8c0 2351
19b5d755
CB
2352 f = make_anonymous_mount_file(mount);
2353 if (!f)
9fc7f8c0 2354 return -1;
e7938e9e 2355
06749971 2356 ret = mount_file_entries(conf, rootfs, f, lxc_name, lxc_path);
19b5d755 2357 fclose(f);
0fd73091 2358
e7938e9e
MN
2359 return ret;
2360}
2361
bab88e68
CS
2362static int parse_cap(const char *cap)
2363{
84760c11 2364 size_t i;
2365 int capid = -1;
0fd73091
CB
2366 size_t end = sizeof(caps_opt) / sizeof(caps_opt[0]);
2367 char *ptr = NULL;
bab88e68 2368
0fd73091 2369 if (strcmp(cap, "none") == 0)
7035407c
DE
2370 return -2;
2371
8560cd36 2372 for (i = 0; i < end; i++) {
bab88e68
CS
2373 if (strcmp(cap, caps_opt[i].name))
2374 continue;
2375
2376 capid = caps_opt[i].value;
2377 break;
2378 }
2379
2380 if (capid < 0) {
0fd73091
CB
2381 /* Try to see if it's numeric, so the user may specify
2382 * capabilities that the running kernel knows about but we
2383 * don't
2384 */
bab88e68
CS
2385 errno = 0;
2386 capid = strtol(cap, &ptr, 10);
2387 if (!ptr || *ptr != '\0' || errno != 0)
2388 /* not a valid number */
2389 capid = -1;
2390 else if (capid > lxc_caps_last_cap())
2391 /* we have a number but it's not a valid
2392 * capability */
2393 capid = -1;
2394 }
2395
2396 return capid;
2397}
2398
0769b82a
CS
2399int in_caplist(int cap, struct lxc_list *caps)
2400{
0769b82a 2401 int capid;
0fd73091 2402 struct lxc_list *iterator;
0769b82a 2403
0fd73091 2404 lxc_list_for_each (iterator, caps) {
0769b82a
CS
2405 capid = parse_cap(iterator->elem);
2406 if (capid == cap)
2407 return 1;
2408 }
2409
2410 return 0;
2411}
2412
81810dd1
DL
2413static int setup_caps(struct lxc_list *caps)
2414{
bab88e68 2415 int capid;
0fd73091
CB
2416 char *drop_entry;
2417 struct lxc_list *iterator;
81810dd1 2418
0fd73091
CB
2419 lxc_list_for_each (iterator, caps) {
2420 int ret;
81810dd1
DL
2421
2422 drop_entry = iterator->elem;
2423
bab88e68 2424 capid = parse_cap(drop_entry);
0fd73091 2425 if (capid < 0) {
1e11be34
DL
2426 ERROR("unknown capability %s", drop_entry);
2427 return -1;
81810dd1
DL
2428 }
2429
0fd73091
CB
2430 ret = prctl(PR_CAPBSET_DROP, capid, 0, 0, 0);
2431 if (ret < 0) {
2432 SYSERROR("Failed to remove %s capability", drop_entry);
3ec1648d
SH
2433 return -1;
2434 }
0fd73091 2435 DEBUG("Dropped %s (%d) capability", drop_entry, capid);
81810dd1
DL
2436 }
2437
0fd73091 2438 DEBUG("Capabilities have been setup");
1fb86a7c
SH
2439 return 0;
2440}
2441
2442static int dropcaps_except(struct lxc_list *caps)
2443{
0fd73091 2444 int i, capid, numcaps;
1fb86a7c 2445 char *keep_entry;
0fd73091 2446 struct lxc_list *iterator;
1fb86a7c 2447
0fd73091 2448 numcaps = lxc_caps_last_cap() + 1;
2caf9a97
SH
2449 if (numcaps <= 0 || numcaps > 200)
2450 return -1;
0fd73091 2451 TRACE("Found %d capabilities", numcaps);
2caf9a97 2452
1a0e70ac 2453 /* caplist[i] is 1 if we keep capability i */
1fb86a7c
SH
2454 int *caplist = alloca(numcaps * sizeof(int));
2455 memset(caplist, 0, numcaps * sizeof(int));
2456
0fd73091 2457 lxc_list_for_each (iterator, caps) {
1fb86a7c
SH
2458 keep_entry = iterator->elem;
2459
bab88e68 2460 capid = parse_cap(keep_entry);
7035407c
DE
2461 if (capid == -2)
2462 continue;
2463
0fd73091
CB
2464 if (capid < 0) {
2465 ERROR("Unknown capability %s", keep_entry);
1fb86a7c
SH
2466 return -1;
2467 }
2468
0fd73091 2469 DEBUG("Keep capability %s (%d)", keep_entry, capid);
1fb86a7c
SH
2470 caplist[capid] = 1;
2471 }
0fd73091
CB
2472
2473 for (i = 0; i < numcaps; i++) {
2474 int ret;
2475
1fb86a7c
SH
2476 if (caplist[i])
2477 continue;
0fd73091
CB
2478
2479 ret = prctl(PR_CAPBSET_DROP, i, 0, 0, 0);
2480 if (ret < 0) {
2481 SYSERROR("Failed to remove capability %d", i);
3ec1648d
SH
2482 return -1;
2483 }
1fb86a7c
SH
2484 }
2485
0fd73091 2486 DEBUG("Capabilities have been setup");
81810dd1
DL
2487 return 0;
2488}
2489
0fd73091
CB
2490static int parse_resource(const char *res)
2491{
2492 int ret;
c6d09e15
WB
2493 size_t i;
2494 int resid = -1;
2495
0fd73091 2496 for (i = 0; i < sizeof(limit_opt) / sizeof(limit_opt[0]); ++i)
c6d09e15
WB
2497 if (strcmp(res, limit_opt[i].name) == 0)
2498 return limit_opt[i].value;
c6d09e15 2499
0fd73091 2500 /* Try to see if it's numeric, so the user may specify
c6d09e15 2501 * resources that the running kernel knows about but
0fd73091
CB
2502 * we don't.
2503 */
2504 ret = lxc_safe_int(res, &resid);
2505 if (ret < 0)
2506 return -1;
2507
2508 return resid;
c6d09e15
WB
2509}
2510
0fd73091
CB
2511int setup_resource_limits(struct lxc_list *limits, pid_t pid)
2512{
2513 int resid;
c6d09e15
WB
2514 struct lxc_list *it;
2515 struct lxc_limit *lim;
c6d09e15 2516
0fd73091 2517 lxc_list_for_each (it, limits) {
c6d09e15
WB
2518 lim = it->elem;
2519
2520 resid = parse_resource(lim->resource);
2521 if (resid < 0) {
0fd73091 2522 ERROR("Unknown resource %s", lim->resource);
c6d09e15
WB
2523 return -1;
2524 }
2525
f48b5fd8 2526#if HAVE_PRLIMIT || HAVE_PRLIMIT64
c6d09e15 2527 if (prlimit(pid, resid, &lim->limit, NULL) != 0) {
0fd73091
CB
2528 ERROR("Failed to set limit %s: %s", lim->resource,
2529 strerror(errno));
c6d09e15
WB
2530 return -1;
2531 }
f48b5fd8
FF
2532#else
2533 ERROR("Cannot set limit %s as prlimit is missing", lim->resource);
2534 return -1;
2535#endif
c6d09e15 2536 }
0fd73091 2537
c6d09e15
WB
2538 return 0;
2539}
2540
7edd0540
L
2541int setup_sysctl_parameters(struct lxc_list *sysctls)
2542{
2543 struct lxc_list *it;
2544 struct lxc_sysctl *elem;
0fd73091 2545 int ret = 0;
7edd0540
L
2546 char *tmp = NULL;
2547 char filename[MAXPATHLEN] = {0};
7edd0540 2548
0fd73091 2549 lxc_list_for_each (it, sysctls) {
7edd0540
L
2550 elem = it->elem;
2551 tmp = lxc_string_replace(".", "/", elem->key);
2552 if (!tmp) {
2553 ERROR("Failed to replace key %s", elem->key);
2554 return -1;
2555 }
2556
2557 ret = snprintf(filename, sizeof(filename), "/proc/sys/%s", tmp);
2558 free(tmp);
2559 if (ret < 0 || (size_t)ret >= sizeof(filename)) {
2560 ERROR("Error setting up sysctl parameters path");
2561 return -1;
2562 }
2563
0fd73091 2564 ret = lxc_write_to_file(filename, elem->value,
7cea5905 2565 strlen(elem->value), false, 0666);
7edd0540 2566 if (ret < 0) {
0fd73091
CB
2567 ERROR("Failed to setup sysctl parameters %s to %s",
2568 elem->key, elem->value);
7edd0540
L
2569 return -1;
2570 }
2571 }
0fd73091 2572
7edd0540
L
2573 return 0;
2574}
2575
61d7a733
YT
2576int setup_proc_filesystem(struct lxc_list *procs, pid_t pid)
2577{
2578 struct lxc_list *it;
2579 struct lxc_proc *elem;
0fd73091 2580 int ret = 0;
61d7a733
YT
2581 char *tmp = NULL;
2582 char filename[MAXPATHLEN] = {0};
61d7a733 2583
0fd73091 2584 lxc_list_for_each (it, procs) {
61d7a733
YT
2585 elem = it->elem;
2586 tmp = lxc_string_replace(".", "/", elem->filename);
2587 if (!tmp) {
2588 ERROR("Failed to replace key %s", elem->filename);
2589 return -1;
2590 }
2591
2592 ret = snprintf(filename, sizeof(filename), "/proc/%d/%s", pid, tmp);
2593 free(tmp);
2594 if (ret < 0 || (size_t)ret >= sizeof(filename)) {
2595 ERROR("Error setting up proc filesystem path");
2596 return -1;
2597 }
2598
0fd73091 2599 ret = lxc_write_to_file(filename, elem->value,
7cea5905 2600 strlen(elem->value), false, 0666);
61d7a733 2601 if (ret < 0) {
0fd73091
CB
2602 ERROR("Failed to setup proc filesystem %s to %s",
2603 elem->filename, elem->value);
61d7a733
YT
2604 return -1;
2605 }
2606 }
0fd73091 2607
61d7a733
YT
2608 return 0;
2609}
2610
ae9242c8
SH
2611static char *default_rootfs_mount = LXCROOTFSMOUNT;
2612
7b379ab3 2613struct lxc_conf *lxc_conf_init(void)
089cd8b8 2614{
26ddeedd 2615 int i;
0fd73091 2616 struct lxc_conf *new;
7b379ab3 2617
13277ec4 2618 new = malloc(sizeof(*new));
0fd73091 2619 if (!new)
7b379ab3 2620 return NULL;
7b379ab3
MN
2621 memset(new, 0, sizeof(*new));
2622
4b73005c 2623 new->loglevel = LXC_LOG_LEVEL_NOTSET;
cccc74b5 2624 new->personality = -1;
124fa0a8 2625 new->autodev = 1;
3a784510 2626 new->console.buffer_size = 0;
596a818d
DE
2627 new->console.log_path = NULL;
2628 new->console.log_fd = -1;
861813e5 2629 new->console.log_size = 0;
28a4b0e5 2630 new->console.path = NULL;
63376d7d 2631 new->console.peer = -1;
fb87aa6a
CB
2632 new->console.proxy.busy = -1;
2633 new->console.proxy.master = -1;
2634 new->console.proxy.slave = -1;
63376d7d
DL
2635 new->console.master = -1;
2636 new->console.slave = -1;
2637 new->console.name[0] = '\0';
732375f5 2638 memset(&new->console.ringbuf, 0, sizeof(struct lxc_ringbuf));
d2e30e99 2639 new->maincmd_fd = -1;
76a26f55 2640 new->nbd_idx = -1;
54c30e29 2641 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048 2642 if (!new->rootfs.mount) {
53f3f048
SH
2643 free(new);
2644 return NULL;
2645 }
858377e4 2646 new->logfd = -1;
7b379ab3 2647 lxc_list_init(&new->cgroup);
54860ed0 2648 lxc_list_init(&new->cgroup2);
7b379ab3
MN
2649 lxc_list_init(&new->network);
2650 lxc_list_init(&new->mount_list);
81810dd1 2651 lxc_list_init(&new->caps);
1fb86a7c 2652 lxc_list_init(&new->keepcaps);
f6d3e3e4 2653 lxc_list_init(&new->id_map);
46ad64ab
CB
2654 new->root_nsuid_map = NULL;
2655 new->root_nsgid_map = NULL;
f979ac15 2656 lxc_list_init(&new->includes);
4184c3e1 2657 lxc_list_init(&new->aliens);
7c661726 2658 lxc_list_init(&new->environment);
c6d09e15 2659 lxc_list_init(&new->limits);
7edd0540 2660 lxc_list_init(&new->sysctls);
61d7a733 2661 lxc_list_init(&new->procs);
44ae0fb6 2662 new->hooks_version = 0;
28d9e29e 2663 for (i = 0; i < NUM_LXC_HOOKS; i++)
26ddeedd 2664 lxc_list_init(&new->hooks[i]);
ee1e7aa0 2665 lxc_list_init(&new->groups);
d39b10eb 2666 lxc_list_init(&new->state_clients);
fe4de9a6
DE
2667 new->lsm_aa_profile = NULL;
2668 new->lsm_se_context = NULL;
7a0bcca3 2669 new->tmp_umount_proc = false;
7b379ab3 2670
72bb04e4
PT
2671 /* if running in a new user namespace, init and COMMAND
2672 * default to running as UID/GID 0 when using lxc-execute */
2673 new->init_uid = 0;
2674 new->init_gid = 0;
43654d34 2675 memset(&new->cgroup_meta, 0, sizeof(struct lxc_cgroup));
b074bbf1 2676 memset(&new->ns_share, 0, sizeof(char *) * LXC_NS_MAX);
72bb04e4 2677
7b379ab3 2678 return new;
089cd8b8
DL
2679}
2680
344c9d81 2681int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
a19b974f 2682 size_t buf_size)
f6d3e3e4 2683{
29053180 2684 int fd, ret;
0fd73091 2685 char path[MAXPATHLEN];
f6d3e3e4 2686
a19b974f
CB
2687 if (geteuid() != 0 && idtype == ID_TYPE_GID) {
2688 size_t buflen;
2689
2690 ret = snprintf(path, MAXPATHLEN, "/proc/%d/setgroups", pid);
0fd73091 2691 if (ret < 0 || ret >= MAXPATHLEN)
a19b974f 2692 return -E2BIG;
a19b974f
CB
2693
2694 fd = open(path, O_WRONLY);
2695 if (fd < 0 && errno != ENOENT) {
2696 SYSERROR("Failed to open \"%s\"", path);
2697 return -1;
2698 }
2699
2388737b
CB
2700 if (fd >= 0) {
2701 buflen = sizeof("deny\n") - 1;
2702 errno = 0;
2703 ret = lxc_write_nointr(fd, "deny\n", buflen);
395b1a3e 2704 close(fd);
2388737b 2705 if (ret != buflen) {
0fd73091
CB
2706 SYSERROR("Failed to write \"deny\" to "
2707 "\"/proc/%d/setgroups\"", pid);
2388737b
CB
2708 return -1;
2709 }
395b1a3e 2710 TRACE("Wrote \"deny\" to \"/proc/%d/setgroups\"", pid);
a19b974f 2711 }
a19b974f
CB
2712 }
2713
29053180
CB
2714 ret = snprintf(path, MAXPATHLEN, "/proc/%d/%cid_map", pid,
2715 idtype == ID_TYPE_UID ? 'u' : 'g');
0fd73091 2716 if (ret < 0 || ret >= MAXPATHLEN)
f6d3e3e4 2717 return -E2BIG;
29053180
CB
2718
2719 fd = open(path, O_WRONLY);
2720 if (fd < 0) {
a19b974f 2721 SYSERROR("Failed to open \"%s\"", path);
29053180 2722 return -1;
f6d3e3e4 2723 }
29053180
CB
2724
2725 errno = 0;
2726 ret = lxc_write_nointr(fd, buf, buf_size);
395b1a3e 2727 close(fd);
29053180 2728 if (ret != buf_size) {
a19b974f 2729 SYSERROR("Failed to write %cid mapping to \"%s\"",
29053180 2730 idtype == ID_TYPE_UID ? 'u' : 'g', path);
29053180
CB
2731 return -1;
2732 }
29053180
CB
2733
2734 return 0;
f6d3e3e4
SH
2735}
2736
6e50e704
CB
2737/* Check whether a binary exist and has either CAP_SETUID, CAP_SETGID or both.
2738 *
2739 * @return 1 if functional binary was found
2740 * @return 0 if binary exists but is lacking privilege
2741 * @return -ENOENT if binary does not exist
2742 * @return -EINVAL if cap to check is neither CAP_SETUID nor CAP_SETGID
6e50e704 2743 */
df6a2945
CB
2744static int idmaptool_on_path_and_privileged(const char *binary, cap_value_t cap)
2745{
2746 char *path;
2747 int ret;
2748 struct stat st;
2749 int fret = 0;
2750
6e50e704
CB
2751 if (cap != CAP_SETUID && cap != CAP_SETGID)
2752 return -EINVAL;
2753
df6a2945
CB
2754 path = on_path(binary, NULL);
2755 if (!path)
2756 return -ENOENT;
2757
2758 ret = stat(path, &st);
2759 if (ret < 0) {
2760 fret = -errno;
2761 goto cleanup;
2762 }
2763
2764 /* Check if the binary is setuid. */
2765 if (st.st_mode & S_ISUID) {
0fd73091 2766 DEBUG("The binary \"%s\" does have the setuid bit set", path);
df6a2945
CB
2767 fret = 1;
2768 goto cleanup;
2769 }
2770
0fd73091 2771#if HAVE_LIBCAP && LIBCAP_SUPPORTS_FILE_CAPABILITIES
df6a2945
CB
2772 /* Check if it has the CAP_SETUID capability. */
2773 if ((cap & CAP_SETUID) &&
2774 lxc_file_cap_is_set(path, CAP_SETUID, CAP_EFFECTIVE) &&
2775 lxc_file_cap_is_set(path, CAP_SETUID, CAP_PERMITTED)) {
2776 DEBUG("The binary \"%s\" has CAP_SETUID in its CAP_EFFECTIVE "
0fd73091 2777 "and CAP_PERMITTED sets", path);
df6a2945
CB
2778 fret = 1;
2779 goto cleanup;
2780 }
2781
2782 /* Check if it has the CAP_SETGID capability. */
2783 if ((cap & CAP_SETGID) &&
2784 lxc_file_cap_is_set(path, CAP_SETGID, CAP_EFFECTIVE) &&
2785 lxc_file_cap_is_set(path, CAP_SETGID, CAP_PERMITTED)) {
2786 DEBUG("The binary \"%s\" has CAP_SETGID in its CAP_EFFECTIVE "
0fd73091 2787 "and CAP_PERMITTED sets", path);
df6a2945
CB
2788 fret = 1;
2789 goto cleanup;
2790 }
0fd73091 2791#else
69924fff
CB
2792 /* If we cannot check for file capabilities we need to give the benefit
2793 * of the doubt. Otherwise we might fail even though all the necessary
2794 * file capabilities are set.
2795 */
d6018f88 2796 DEBUG("Cannot check for file capabilites as full capability support is "
0fd73091 2797 "missing. Manual intervention needed");
d6018f88 2798 fret = 1;
0fd73091 2799#endif
df6a2945
CB
2800
2801cleanup:
2802 free(path);
2803 return fret;
2804}
2805
986ef930
CB
2806int lxc_map_ids_exec_wrapper(void *args)
2807{
2808 execl("/bin/sh", "sh", "-c", (char *)args, (char *)NULL);
2809 return -1;
2810}
2811
f6d3e3e4
SH
2812int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
2813{
0fd73091 2814 int fill, left;
986ef930 2815 char u_or_g;
4bc3b759 2816 char *pos;
986ef930 2817 char cmd_output[MAXPATHLEN];
0fd73091
CB
2818 struct id_map *map;
2819 struct lxc_list *iterator;
2820 enum idtype type;
986ef930
CB
2821 /* strlen("new@idmap") = 9
2822 * +
2823 * strlen(" ") = 1
2824 * +
2825 * LXC_NUMSTRLEN64
2826 * +
2827 * strlen(" ") = 1
2828 *
2829 * We add some additional space to make sure that we really have
2830 * LXC_IDMAPLEN bytes available for our the {g,u]id mapping.
2831 */
0fd73091 2832 int ret = 0, gidmap = 0, uidmap = 0;
986ef930 2833 char mapbuf[9 + 1 + LXC_NUMSTRLEN64 + 1 + LXC_IDMAPLEN] = {0};
0fd73091 2834 bool had_entry = false, use_shadow = false;
c724025c
JC
2835 int hostuid, hostgid;
2836
2837 hostuid = geteuid();
2838 hostgid = getegid();
df6a2945
CB
2839
2840 /* If new{g,u}idmap exists, that is, if shadow is handing out subuid
2841 * ranges, then insist that root also reserve ranges in subuid. This
22038de5
SH
2842 * will protected it by preventing another user from being handed the
2843 * range by shadow.
2844 */
df6a2945 2845 uidmap = idmaptool_on_path_and_privileged("newuidmap", CAP_SETUID);
6e50e704
CB
2846 if (uidmap == -ENOENT)
2847 WARN("newuidmap binary is missing");
2848 else if (!uidmap)
2849 WARN("newuidmap is lacking necessary privileges");
2850
df6a2945 2851 gidmap = idmaptool_on_path_and_privileged("newgidmap", CAP_SETGID);
6e50e704
CB
2852 if (gidmap == -ENOENT)
2853 WARN("newgidmap binary is missing");
2854 else if (!gidmap)
2855 WARN("newgidmap is lacking necessary privileges");
2856
df6a2945 2857 if (uidmap > 0 && gidmap > 0) {
0fd73091 2858 DEBUG("Functional newuidmap and newgidmap binary found");
4bc3b759 2859 use_shadow = true;
df6a2945 2860 } else {
99d43365
CB
2861 /* In case unprivileged users run application containers via
2862 * execute() or a start*() there are valid cases where they may
2863 * only want to map their own {g,u}id. Let's not block them from
2864 * doing so by requiring geteuid() == 0.
2865 */
2866 DEBUG("No newuidmap and newgidmap binary found. Trying to "
c724025c
JC
2867 "write directly with euid %d", hostuid);
2868 }
2869
2870 /* Check if we really need to use newuidmap and newgidmap.
2871 * If the user is only remapping his own {g,u}id, we don't need it.
2872 */
2873 if (use_shadow && lxc_list_len(idmap) == 2) {
2874 use_shadow = false;
2875 lxc_list_for_each(iterator, idmap) {
2876 map = iterator->elem;
2877 if (map->idtype == ID_TYPE_UID && map->range == 1 &&
2878 map->nsid == hostuid && map->hostid == hostuid)
2879 continue;
2880 if (map->idtype == ID_TYPE_GID && map->range == 1 &&
2881 map->nsid == hostgid && map->hostid == hostgid)
2882 continue;
2883 use_shadow = true;
2884 break;
2885 }
0e6e3a41 2886 }
251d0d2a 2887
986ef930
CB
2888 for (type = ID_TYPE_UID, u_or_g = 'u'; type <= ID_TYPE_GID;
2889 type++, u_or_g = 'g') {
2890 pos = mapbuf;
2891
0e6e3a41 2892 if (use_shadow)
986ef930 2893 pos += sprintf(mapbuf, "new%cidmap %d", u_or_g, pid);
4f7521b4 2894
cf3ef16d 2895 lxc_list_for_each(iterator, idmap) {
251d0d2a 2896 map = iterator->elem;
cf3ef16d
SH
2897 if (map->idtype != type)
2898 continue;
2899
4bc3b759
CB
2900 had_entry = true;
2901
986ef930 2902 left = LXC_IDMAPLEN - (pos - mapbuf);
d1838f34 2903 fill = snprintf(pos, left, "%s%lu %lu %lu%s",
4bc3b759
CB
2904 use_shadow ? " " : "", map->nsid,
2905 map->hostid, map->range,
0e6e3a41 2906 use_shadow ? "" : "\n");
a427e268
CB
2907 if (fill <= 0 || fill >= left) {
2908 /* The kernel only takes <= 4k for writes to
2909 * /proc/<pid>/{g,u}id_map
2910 */
2911 SYSERROR("Too many %cid mappings defined", u_or_g);
2912 return -1;
2913 }
4bc3b759 2914
cf3ef16d 2915 pos += fill;
251d0d2a 2916 }
cf3ef16d 2917 if (!had_entry)
4f7521b4 2918 continue;
cf3ef16d 2919
986ef930
CB
2920 /* Try to catch the ouput of new{g,u}idmap to make debugging
2921 * easier.
2922 */
2923 if (use_shadow) {
2924 ret = run_command(cmd_output, sizeof(cmd_output),
2925 lxc_map_ids_exec_wrapper,
2926 (void *)mapbuf);
2927 if (ret < 0) {
54fbbeb5
CB
2928 ERROR("new%cidmap failed to write mapping \"%s\": %s",
2929 u_or_g, cmd_output, mapbuf);
986ef930
CB
2930 return -1;
2931 }
54fbbeb5 2932 TRACE("new%cidmap wrote mapping \"%s\"", u_or_g, mapbuf);
d1838f34 2933 } else {
986ef930 2934 ret = write_id_mapping(type, pid, mapbuf, pos - mapbuf);
54fbbeb5 2935 if (ret < 0) {
da0f9977 2936 ERROR("Failed to write mapping: %s", mapbuf);
986ef930 2937 return -1;
54fbbeb5
CB
2938 }
2939 TRACE("Wrote mapping \"%s\"", mapbuf);
d1838f34 2940 }
986ef930
CB
2941
2942 memset(mapbuf, 0, sizeof(mapbuf));
f6d3e3e4 2943 }
251d0d2a 2944
986ef930 2945 return 0;
f6d3e3e4
SH
2946}
2947
0fd73091 2948/* Return the host uid/gid to which the container root is mapped in val.
0b3a6504 2949 * Return true if id was found, false otherwise.
cf3ef16d 2950 */
2a9a80cb 2951bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
4160c3a0 2952 unsigned long *val)
cf3ef16d 2953{
4160c3a0 2954 unsigned nsid;
0fd73091
CB
2955 struct id_map *map;
2956 struct lxc_list *it;
4160c3a0
CB
2957
2958 if (idtype == ID_TYPE_UID)
2959 nsid = (conf->root_nsuid_map != NULL) ? 0 : conf->init_uid;
2960 else
2961 nsid = (conf->root_nsgid_map != NULL) ? 0 : conf->init_gid;
cf3ef16d 2962
0fd73091 2963 lxc_list_for_each (it, &conf->id_map) {
cf3ef16d 2964 map = it->elem;
7b50c609 2965 if (map->idtype != idtype)
cf3ef16d 2966 continue;
4160c3a0 2967 if (map->nsid != nsid)
cf3ef16d 2968 continue;
2a9a80cb
SH
2969 *val = map->hostid;
2970 return true;
cf3ef16d 2971 }
4160c3a0 2972
2a9a80cb 2973 return false;
cf3ef16d
SH
2974}
2975
2133f58c 2976int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype)
cf3ef16d 2977{
cf3ef16d 2978 struct id_map *map;
0fd73091
CB
2979 struct lxc_list *it;
2980
2981 lxc_list_for_each (it, &conf->id_map) {
cf3ef16d 2982 map = it->elem;
2133f58c 2983 if (map->idtype != idtype)
cf3ef16d 2984 continue;
0fd73091 2985
cf3ef16d 2986 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 2987 return (id - map->hostid) + map->nsid;
cf3ef16d 2988 }
0fd73091 2989
57d116ab 2990 return -1;
cf3ef16d
SH
2991}
2992
339efad9 2993int find_unmapped_nsid(struct lxc_conf *conf, enum idtype idtype)
cf3ef16d 2994{
cf3ef16d 2995 struct id_map *map;
0fd73091 2996 struct lxc_list *it;
2133f58c 2997 unsigned int freeid = 0;
0fd73091 2998
cf3ef16d 2999again:
0fd73091 3000 lxc_list_for_each (it, &conf->id_map) {
cf3ef16d 3001 map = it->elem;
2133f58c 3002 if (map->idtype != idtype)
cf3ef16d 3003 continue;
0fd73091 3004
cf3ef16d
SH
3005 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3006 freeid = map->nsid + map->range;
3007 goto again;
3008 }
3009 }
0fd73091 3010
cf3ef16d
SH
3011 return freeid;
3012}
3013
f4f52cb5
CB
3014int chown_mapped_root_exec_wrapper(void *args)
3015{
3016 execvp("lxc-usernsexec", args);
3017 return -1;
3018}
3019
0fd73091 3020/* chown_mapped_root: for an unprivileged user with uid/gid X to
7b50c609
TS
3021 * chown a dir to subuid/subgid Y, he needs to run chown as root
3022 * in a userns where nsid 0 is mapped to hostuid/hostgid Y, and
3023 * nsid Y is mapped to hostuid/hostgid X. That way, the container
3024 * root is privileged with respect to hostuid/hostgid X, allowing
3025 * him to do the chown.
f6d3e3e4 3026 */
41dc7155 3027int chown_mapped_root(const char *path, struct lxc_conf *conf)
f6d3e3e4 3028{
f4f52cb5 3029 uid_t rootuid, rootgid;
2a9a80cb 3030 unsigned long val;
f4f52cb5
CB
3031 int hostuid, hostgid, ret;
3032 struct stat sb;
3033 char map1[100], map2[100], map3[100], map4[100], map5[100];
3034 char ugid[100];
41dc7155 3035 const char *args1[] = {"lxc-usernsexec",
f4f52cb5
CB
3036 "-m", map1,
3037 "-m", map2,
3038 "-m", map3,
3039 "-m", map5,
3040 "--", "chown", ugid, path,
3041 NULL};
41dc7155 3042 const char *args2[] = {"lxc-usernsexec",
f4f52cb5
CB
3043 "-m", map1,
3044 "-m", map2,
3045 "-m", map3,
3046 "-m", map4,
3047 "-m", map5,
3048 "--", "chown", ugid, path,
3049 NULL};
3050 char cmd_output[MAXPATHLEN];
3051
3052 hostuid = geteuid();
3053 hostgid = getegid();
f6d3e3e4 3054
2a9a80cb 3055 if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
bc80f098 3056 ERROR("No uid mapping for container root");
c4d10a05 3057 return -1;
f6d3e3e4 3058 }
f4f52cb5 3059 rootuid = (uid_t)val;
0fd73091 3060
7b50c609 3061 if (!get_mapped_rootid(conf, ID_TYPE_GID, &val)) {
bc80f098 3062 ERROR("No gid mapping for container root");
7b50c609
TS
3063 return -1;
3064 }
f4f52cb5 3065 rootgid = (gid_t)val;
2a9a80cb 3066
f4f52cb5 3067 if (hostuid == 0) {
7b50c609 3068 if (chown(path, rootuid, rootgid) < 0) {
c4d10a05
SH
3069 ERROR("Error chowning %s", path);
3070 return -1;
3071 }
0fd73091 3072
c4d10a05
SH
3073 return 0;
3074 }
f3d7e4ca 3075
f4f52cb5 3076 if (rootuid == hostuid) {
1a0e70ac 3077 /* nothing to do */
b103ceac 3078 INFO("Container root is our uid; no need to chown");
f3d7e4ca
SH
3079 return 0;
3080 }
3081
bbdbf8f0 3082 /* save the current gid of "path" */
f4f52cb5
CB
3083 if (stat(path, &sb) < 0) {
3084 ERROR("Error stat %s", path);
f6d3e3e4
SH
3085 return -1;
3086 }
7b50c609 3087
bbdbf8f0
CB
3088 /* Update the path argument in case this was overlayfs. */
3089 args1[sizeof(args1) / sizeof(args1[0]) - 2] = path;
3090 args2[sizeof(args2) / sizeof(args2[0]) - 2] = path;
3091
f4f52cb5
CB
3092 /*
3093 * A file has to be group-owned by a gid mapped into the
3094 * container, or the container won't be privileged over it.
3095 */
3096 DEBUG("trying to chown \"%s\" to %d", path, hostgid);
3097 if (sb.st_uid == hostuid &&
3098 mapped_hostid(sb.st_gid, conf, ID_TYPE_GID) < 0 &&
3099 chown(path, -1, hostgid) < 0) {
3100 ERROR("Failed chgrping %s", path);
3101 return -1;
3102 }
f6d3e3e4 3103
1a0e70ac 3104 /* "u:0:rootuid:1" */
f4f52cb5
CB
3105 ret = snprintf(map1, 100, "u:0:%d:1", rootuid);
3106 if (ret < 0 || ret >= 100) {
3107 ERROR("Error uid printing map string");
3108 return -1;
3109 }
7b50c609 3110
1a0e70ac 3111 /* "u:hostuid:hostuid:1" */
f4f52cb5
CB
3112 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3113 if (ret < 0 || ret >= 100) {
3114 ERROR("Error uid printing map string");
3115 return -1;
3116 }
c4d10a05 3117
1a0e70ac 3118 /* "g:0:rootgid:1" */
f4f52cb5
CB
3119 ret = snprintf(map3, 100, "g:0:%d:1", rootgid);
3120 if (ret < 0 || ret >= 100) {
3121 ERROR("Error gid printing map string");
3122 return -1;
3123 }
98e5ba51 3124
1a0e70ac 3125 /* "g:pathgid:rootgid+pathgid:1" */
f4f52cb5
CB
3126 ret = snprintf(map4, 100, "g:%d:%d:1", (gid_t)sb.st_gid,
3127 rootgid + (gid_t)sb.st_gid);
3128 if (ret < 0 || ret >= 100) {
3129 ERROR("Error gid printing map string");
3130 return -1;
3131 }
c4d10a05 3132
1a0e70ac 3133 /* "g:hostgid:hostgid:1" */
f4f52cb5
CB
3134 ret = snprintf(map5, 100, "g:%d:%d:1", hostgid, hostgid);
3135 if (ret < 0 || ret >= 100) {
3136 ERROR("Error gid printing map string");
3137 return -1;
3138 }
7b50c609 3139
1a0e70ac 3140 /* "0:pathgid" (chown) */
f4f52cb5
CB
3141 ret = snprintf(ugid, 100, "0:%d", (gid_t)sb.st_gid);
3142 if (ret < 0 || ret >= 100) {
3143 ERROR("Error owner printing format string for chown");
3144 return -1;
3145 }
7b50c609 3146
f4f52cb5
CB
3147 if (hostgid == sb.st_gid)
3148 ret = run_command(cmd_output, sizeof(cmd_output),
3149 chown_mapped_root_exec_wrapper,
3150 (void *)args1);
3151 else
3152 ret = run_command(cmd_output, sizeof(cmd_output),
3153 chown_mapped_root_exec_wrapper,
3154 (void *)args2);
3155 if (ret < 0)
3156 ERROR("lxc-usernsexec failed: %s", cmd_output);
7b50c609 3157
f4f52cb5 3158 return ret;
f6d3e3e4
SH
3159}
3160
943144d9
CB
3161/* NOTE: Must not be called from inside the container namespace! */
3162int lxc_create_tmp_proc_mount(struct lxc_conf *conf)
5112cd70
SH
3163{
3164 int mounted;
3165
943144d9 3166 mounted = lxc_mount_proc_if_needed(conf->rootfs.path ? conf->rootfs.mount : "");
5112cd70 3167 if (mounted == -1) {
0fd73091 3168 SYSERROR("Failed to mount proc in the container");
01958b1f 3169 /* continue only if there is no rootfs */
943144d9 3170 if (conf->rootfs.path)
01958b1f 3171 return -1;
5112cd70 3172 } else if (mounted == 1) {
7a0bcca3 3173 conf->tmp_umount_proc = true;
5112cd70 3174 }
943144d9 3175
5112cd70
SH
3176 return 0;
3177}
3178
3179void tmp_proc_unmount(struct lxc_conf *lxc_conf)
3180{
7a0bcca3 3181 if (!lxc_conf->tmp_umount_proc)
0fd73091
CB
3182 return;
3183
7a0bcca3
CB
3184 (void)umount2("/proc", MNT_DETACH);
3185 lxc_conf->tmp_umount_proc = false;
5112cd70
SH
3186}
3187
0fd73091 3188/* Walk /proc/mounts and change any shared entries to slave. */
6a0c909a 3189void remount_all_slave(void)
e995d7a2 3190{
6a49f05e
CB
3191 int memfd, mntinfo_fd, ret;
3192 ssize_t copied;
0fd73091 3193 FILE *f;
e995d7a2 3194 size_t len = 0;
0fd73091 3195 char *line = NULL;
e995d7a2 3196
6a49f05e 3197 mntinfo_fd = open("/proc/self/mountinfo", O_RDONLY | O_CLOEXEC);
fea3b91d
DJ
3198 if (mntinfo_fd < 0) {
3199 SYSERROR("Failed to open \"/proc/self/mountinfo\"");
6a49f05e 3200 return;
fea3b91d 3201 }
6a49f05e
CB
3202
3203 memfd = memfd_create(".lxc_mountinfo", MFD_CLOEXEC);
3204 if (memfd < 0) {
3205 char template[] = P_tmpdir "/.lxc_mountinfo_XXXXXX";
3206
3207 if (errno != ENOSYS) {
fea3b91d 3208 SYSERROR("Failed to create temporary in-memory file");
6a49f05e 3209 close(mntinfo_fd);
6a49f05e
CB
3210 return;
3211 }
3212
3213 memfd = lxc_make_tmpfile(template, true);
fea3b91d
DJ
3214 if (memfd < 0) {
3215 close(mntinfo_fd);
3216 WARN("Failed to create temporary file");
3217 return;
3218 }
6a49f05e
CB
3219 }
3220
3221#define __LXC_SENDFILE_MAX 0x7ffff000 /* maximum number of bytes sendfile can handle */
3222again:
3223 copied = sendfile(memfd, mntinfo_fd, NULL, __LXC_SENDFILE_MAX);
3224 if (copied < 0) {
3225 if (errno == EINTR)
3226 goto again;
3227
fea3b91d 3228 SYSERROR("Failed to copy \"/proc/self/mountinfo\"");
6a49f05e
CB
3229 close(mntinfo_fd);
3230 close(memfd);
6a49f05e
CB
3231 return;
3232 }
3233 close(mntinfo_fd);
3234
3235 /* After a successful fdopen() memfd will be closed when calling
3236 * fclose(f). Calling close(memfd) afterwards is undefined.
3237 */
3238 ret = lseek(memfd, 0, SEEK_SET);
3239 if (ret < 0) {
fea3b91d 3240 SYSERROR("Failed to reset file descriptor offset");
6a49f05e 3241 close(memfd);
6a49f05e
CB
3242 return;
3243 }
3244
3245 f = fdopen(memfd, "r");
e995d7a2 3246 if (!f) {
fea3b91d
DJ
3247 SYSERROR("Failed to open copy of \"/proc/self/mountinfo\" to mark "
3248 "all shared. Continuing");
6a49f05e 3249 close(memfd);
e995d7a2
SH
3250 return;
3251 }
3252
3253 while (getline(&line, &len, f) != -1) {
0fd73091
CB
3254 int ret;
3255 char *opts, *target;
3256
e995d7a2
SH
3257 target = get_field(line, 4);
3258 if (!target)
3259 continue;
0fd73091 3260
e995d7a2
SH
3261 opts = get_field(target, 2);
3262 if (!opts)
3263 continue;
0fd73091 3264
e995d7a2
SH
3265 null_endofword(opts);
3266 if (!strstr(opts, "shared"))
3267 continue;
0fd73091 3268
e995d7a2 3269 null_endofword(target);
0fd73091
CB
3270 ret = mount(NULL, target, NULL, MS_SLAVE, NULL);
3271 if (ret < 0) {
3272 SYSERROR("Failed to make \"%s\" MS_SLAVE", target);
e995d7a2 3273 ERROR("Continuing...");
6a49f05e 3274 continue;
e995d7a2 3275 }
6a49f05e 3276 TRACE("Remounted \"%s\" as MS_SLAVE", target);
e995d7a2
SH
3277 }
3278 fclose(f);
f10fad2f 3279 free(line);
6a49f05e 3280 TRACE("Remounted all mount table entries as MS_SLAVE");
e995d7a2
SH
3281}
3282
794248d0 3283static int lxc_execute_bind_init(struct lxc_handler *handler)
2322903b
SH
3284{
3285 int ret;
794248d0
CB
3286 char *p;
3287 char path[PATH_MAX], destpath[PATH_MAX];
3288 struct lxc_conf *conf = handler->conf;
9d9c111c
SH
3289
3290 /* If init exists in the container, don't bind mount a static one */
3291 p = choose_init(conf->rootfs.mount);
3292 if (p) {
41089848
TA
3293 char *old = p;
3294
3295 p = strdup(old + strlen(conf->rootfs.mount));
3296 free(old);
3297 if (!p)
3298 return -ENOMEM;
3299
3300 INFO("Found existing init at \"%s\"", p);
3301 goto out;
9d9c111c 3302 }
2322903b
SH
3303
3304 ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
0fd73091 3305 if (ret < 0 || ret >= PATH_MAX)
8353b4c9 3306 return -1;
2322903b
SH
3307
3308 if (!file_exists(path)) {
0fd73091 3309 ERROR("The file \"%s\" does not exist on host", path);
8353b4c9 3310 return -1;
2322903b
SH
3311 }
3312
794248d0 3313 ret = snprintf(destpath, PATH_MAX, "%s" P_tmpdir "%s", conf->rootfs.mount, "/.lxc-init");
0fd73091 3314 if (ret < 0 || ret >= PATH_MAX)
8353b4c9 3315 return -1;
2322903b
SH
3316
3317 if (!file_exists(destpath)) {
794248d0
CB
3318 ret = mknod(destpath, S_IFREG | 0000, 0);
3319 if (ret < 0 && errno != EEXIST) {
3320 SYSERROR("Failed to create dummy \"%s\" file as bind mount target", destpath);
8353b4c9 3321 return -1;
2322903b 3322 }
2322903b
SH
3323 }
3324
592fd47a 3325 ret = safe_mount(path, destpath, "none", MS_BIND, NULL, conf->rootfs.mount);
8353b4c9 3326 if (ret < 0) {
0fd73091 3327 SYSERROR("Failed to bind mount lxc.init.static into container");
8353b4c9
CB
3328 return -1;
3329 }
3330
794248d0
CB
3331 p = strdup(destpath + strlen(conf->rootfs.mount));
3332 if (!p)
3333 return -ENOMEM;
794248d0 3334
8353b4c9 3335 INFO("Bind mounted lxc.init.static into container at \"%s\"", path);
41089848 3336out:
4b5b3a2a 3337 ((struct execute_args *)handler->data)->init_fd = -1;
41089848 3338 ((struct execute_args *)handler->data)->init_path = p;
8353b4c9 3339 return 0;
2322903b
SH
3340}
3341
0fd73091
CB
3342/* This does the work of remounting / if it is shared, calling the container
3343 * pre-mount hooks, and mounting the rootfs.
35120d9c
SH
3344 */
3345int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath)
0ad19a3f 3346{
0fd73091
CB
3347 int ret;
3348
35120d9c 3349 if (conf->rootfs_setup) {
35120d9c 3350 const char *path = conf->rootfs.mount;
0fd73091
CB
3351
3352 /* The rootfs was set up in another namespace. bind-mount it to
3353 * give us a mount in our own ns so we can pivot_root to it
3354 */
3355 ret = mount(path, path, "rootfs", MS_BIND, NULL);
3356 if (ret < 0) {
3357 ERROR("Failed to bind mount container / onto itself");
145832ba 3358 return -1;
35120d9c 3359 }
0fd73091
CB
3360
3361 TRACE("Bind mounted container / onto itself");
145832ba 3362 return 0;
35120d9c 3363 }
d4ef7c50 3364
e995d7a2
SH
3365 remount_all_slave();
3366
0fd73091
CB
3367 ret = run_lxc_hooks(name, "pre-mount", conf, NULL);
3368 if (ret < 0) {
3369 ERROR("Failed to run pre-mount hooks");
35120d9c
SH
3370 return -1;
3371 }
3372
0fd73091
CB
3373 ret = lxc_setup_rootfs(conf);
3374 if (ret < 0) {
3375 ERROR("Failed to setup rootfs for");
35120d9c
SH
3376 return -1;
3377 }
3378
3379 conf->rootfs_setup = true;
3380 return 0;
3381}
3382
1c1c7051
SH
3383static bool verify_start_hooks(struct lxc_conf *conf)
3384{
1c1c7051 3385 char path[MAXPATHLEN];
0fd73091
CB
3386 struct lxc_list *it;
3387
3388 lxc_list_for_each (it, &conf->hooks[LXCHOOK_START]) {
1c1c7051 3389 int ret;
0fd73091
CB
3390 struct stat st;
3391 char *hookname = it->elem;
1c1c7051
SH
3392
3393 ret = snprintf(path, MAXPATHLEN, "%s%s",
0fd73091
CB
3394 conf->rootfs.path ? conf->rootfs.mount : "",
3395 hookname);
1c1c7051
SH
3396 if (ret < 0 || ret >= MAXPATHLEN)
3397 return false;
0fd73091 3398
1c1c7051 3399 ret = stat(path, &st);
0fd73091 3400 if (ret < 0) {
7b6753e7 3401 SYSERROR("Start hook %s not found in container",
0fd73091 3402 hookname);
1c1c7051
SH
3403 return false;
3404 }
0fd73091 3405
6a0c909a 3406 return true;
1c1c7051
SH
3407 }
3408
3409 return true;
3410}
3411
4b5b3a2a
TA
3412static bool execveat_supported(void)
3413{
3414#ifdef __NR_execveat
3415 /*
3416 * We use the syscall here, because it was introduced in kernel 3.19,
3417 * while glibc got support for using the syscall much later, in 2.27.
3418 * We don't want to use glibc because it falls back to /proc, and the
3419 * container may not have /proc mounted depending on its configuration.
3420 */
3421 syscall(__NR_execveat, -1, "", NULL, NULL, AT_EMPTY_PATH);
3422 if (errno == ENOSYS)
3423 return false;
3424
3425 return true;
3426#else
3427 return false;
3428#endif
3429}
3430
3b988b33 3431int lxc_setup(struct lxc_handler *handler)
35120d9c 3432{
2187efd3 3433 int ret;
0fd73091 3434 const char *lxcpath = handler->lxcpath, *name = handler->name;
35120d9c 3435 struct lxc_conf *lxc_conf = handler->conf;
35120d9c 3436
8353b4c9
CB
3437 ret = do_rootfs_setup(lxc_conf, name, lxcpath);
3438 if (ret < 0) {
3439 ERROR("Failed to setup rootfs");
35120d9c
SH
3440 return -1;
3441 }
3442
28d9e29e 3443 if (handler->nsfd[LXC_NS_UTS] == -1) {
8353b4c9
CB
3444 ret = setup_utsname(lxc_conf->utsname);
3445 if (ret < 0) {
0fd73091 3446 ERROR("Failed to setup the utsname %s", name);
6c544cb3
MM
3447 return -1;
3448 }
0ad19a3f 3449 }
3450
8353b4c9
CB
3451 ret = lxc_setup_network_in_child_namespaces(lxc_conf, &lxc_conf->network);
3452 if (ret < 0) {
3453 ERROR("Failed to setup network");
95b5ffaf 3454 return -1;
0ad19a3f 3455 }
3456
8353b4c9
CB
3457 ret = lxc_network_send_name_and_ifindex_to_parent(handler);
3458 if (ret < 0) {
3459 ERROR("Failed to send network device names and ifindices to parent");
790255cf
CB
3460 return -1;
3461 }
3462
bc6928ff 3463 if (lxc_conf->autodev > 0) {
8353b4c9
CB
3464 ret = mount_autodev(name, &lxc_conf->rootfs, lxcpath);
3465 if (ret < 0) {
3466 ERROR("Failed to mount \"/dev\"");
c6883f38
SH
3467 return -1;
3468 }
3469 }
3470
8353b4c9
CB
3471 /* Do automatic mounts (mainly /proc and /sys), but exclude those that
3472 * need to wait until other stuff has finished.
368bbc02 3473 */
8353b4c9
CB
3474 ret = lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler);
3475 if (ret < 0) {
3476 ERROR("Failed to setup first automatic mounts");
368bbc02
CS
3477 return -1;
3478 }
3479
8353b4c9
CB
3480 ret = setup_mount(lxc_conf, &lxc_conf->rootfs, lxc_conf->fstab, name, lxcpath);
3481 if (ret < 0) {
3482 ERROR("Failed to setup mounts");
95b5ffaf 3483 return -1;
576f946d 3484 }
3485
7b6753e7 3486 /* Make sure any start hooks are in the container */
1c1c7051
SH
3487 if (!verify_start_hooks(lxc_conf))
3488 return -1;
3489
8353b4c9 3490 if (lxc_conf->is_execute) {
4b5b3a2a
TA
3491 if (execveat_supported()) {
3492 int fd;
3493 char path[PATH_MAX];
3494
3495 ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
3496 if (ret < 0 || ret >= PATH_MAX) {
3497 ERROR("Path to init.lxc.static too long");
3498 return -1;
3499 }
3500
3501 fd = open(path, O_PATH | O_CLOEXEC);
3502 if (fd < 0) {
3503 SYSERROR("Unable to open lxc.init.static");
3504 return -1;
3505 }
3506
3507 ((struct execute_args *)handler->data)->init_fd = fd;
3508 ((struct execute_args *)handler->data)->init_path = NULL;
3509 } else {
3510 ret = lxc_execute_bind_init(handler);
3511 if (ret < 0) {
3512 ERROR("Failed to bind-mount the lxc init system");
3513 return -1;
3514 }
8353b4c9
CB
3515 }
3516 }
2322903b 3517
8353b4c9
CB
3518 /* Now mount only cgroups, if wanted. Before, /sys could not have been
3519 * mounted. It is guaranteed to be mounted now either through
3520 * automatically or via fstab entries.
368bbc02 3521 */
8353b4c9
CB
3522 ret = lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler);
3523 if (ret < 0) {
3524 ERROR("Failed to setup remaining automatic mounts");
368bbc02
CS
3525 return -1;
3526 }
3527
8353b4c9 3528 ret = run_lxc_hooks(name, "mount", lxc_conf, NULL);
1a2cf89d 3529 if (ret < 0) {
8353b4c9 3530 ERROR("Failed to run mount hooks");
773fb9ca
SH
3531 return -1;
3532 }
3533
bc6928ff 3534 if (lxc_conf->autodev > 0) {
8353b4c9
CB
3535 ret = run_lxc_hooks(name, "autodev", lxc_conf, NULL);
3536 if (ret < 0) {
3537 ERROR("Failed to run autodev hooks");
f7bee6c6
MW
3538 return -1;
3539 }
06749971 3540
8353b4c9
CB
3541 ret = lxc_fill_autodev(&lxc_conf->rootfs);
3542 if (ret < 0) {
3543 ERROR("Failed to populate \"/dev\"");
91c3830e
SH
3544 return -1;
3545 }
3546 }
368bbc02 3547
8353b4c9
CB
3548 if (!lxc_list_empty(&lxc_conf->mount_list)) {
3549 ret = setup_mount_entries(lxc_conf, &lxc_conf->rootfs,
3550 &lxc_conf->mount_list, name, lxcpath);
3551 if (ret < 0) {
3552 ERROR("Failed to setup mount entries");
3553 return -1;
3554 }
181437fd
YT
3555 }
3556
ed8704d0 3557 ret = lxc_setup_console(&lxc_conf->rootfs, &lxc_conf->console,
885766f5 3558 lxc_conf->ttys.dir);
ed8704d0
CB
3559 if (ret < 0) {
3560 ERROR("Failed to setup console");
95b5ffaf 3561 return -1;
6e590161 3562 }
3563
ed8704d0
CB
3564 ret = lxc_setup_dev_symlinks(&lxc_conf->rootfs);
3565 if (ret < 0) {
8353b4c9 3566 ERROR("Failed to setup \"/dev\" symlinks");
69aa6655
DE
3567 return -1;
3568 }
3569
8353b4c9
CB
3570 ret = lxc_create_tmp_proc_mount(lxc_conf);
3571 if (ret < 0) {
3572 ERROR("Failed to \"/proc\" LSMs");
e075f5d9 3573 return -1;
e075f5d9 3574 }
e075f5d9 3575
8353b4c9
CB
3576 ret = setup_pivot_root(&lxc_conf->rootfs);
3577 if (ret < 0) {
3578 ERROR("Failed to pivot root into rootfs");
95b5ffaf 3579 return -1;
ed502555 3580 }
3581
8353b4c9
CB
3582 ret = lxc_setup_devpts(lxc_conf);
3583 if (ret < 0) {
3584 ERROR("Failed to setup new devpts instance");
95b5ffaf 3585 return -1;
3c26f34e 3586 }
3587
2187efd3
CB
3588 ret = lxc_create_ttys(handler);
3589 if (ret < 0)
e8bd4e43 3590 return -1;
e8bd4e43 3591
8353b4c9
CB
3592 ret = setup_personality(lxc_conf->personality);
3593 if (ret < 0) {
3594 ERROR("Failed to set personality");
cccc74b5
DL
3595 return -1;
3596 }
3597
8353b4c9
CB
3598 /* Set sysctl value to a path under /proc/sys as determined from the
3599 * key. For e.g. net.ipv4.ip_forward translated to
3600 * /proc/sys/net/ipv4/ip_forward.
7edd0540
L
3601 */
3602 if (!lxc_list_empty(&lxc_conf->sysctls)) {
3603 ret = setup_sysctl_parameters(&lxc_conf->sysctls);
8353b4c9
CB
3604 if (ret < 0) {
3605 ERROR("Failed to setup sysctl parameters");
7edd0540 3606 return -1;
8353b4c9 3607 }
7edd0540
L
3608 }
3609
97a8f74f
SG
3610 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
3611 if (!lxc_list_empty(&lxc_conf->caps)) {
8353b4c9
CB
3612 ERROR("Container requests lxc.cap.drop and "
3613 "lxc.cap.keep: either use lxc.cap.drop or "
3614 "lxc.cap.keep, not both");
f6d3e3e4
SH
3615 return -1;
3616 }
8353b4c9 3617
97a8f74f 3618 if (dropcaps_except(&lxc_conf->keepcaps)) {
8353b4c9 3619 ERROR("Failed to keep capabilities");
97a8f74f
SG
3620 return -1;
3621 }
3622 } else if (setup_caps(&lxc_conf->caps)) {
8353b4c9 3623 ERROR("Failed to drop capabilities");
97a8f74f 3624 return -1;
81810dd1
DL
3625 }
3626
8353b4c9 3627 NOTICE("The container \"%s\" is set up", name);
cd54d859 3628
0ad19a3f 3629 return 0;
3630}
26ddeedd 3631
3f60c2f7 3632int run_lxc_hooks(const char *name, char *hookname, struct lxc_conf *conf,
14a7b0f9 3633 char *argv[])
26ddeedd 3634{
26ddeedd 3635 struct lxc_list *it;
3f60c2f7 3636 int which = -1;
26ddeedd 3637
3f60c2f7 3638 if (strcmp(hookname, "pre-start") == 0)
26ddeedd 3639 which = LXCHOOK_PRESTART;
3f60c2f7 3640 else if (strcmp(hookname, "start-host") == 0)
08dd2805 3641 which = LXCHOOK_START_HOST;
3f60c2f7 3642 else if (strcmp(hookname, "pre-mount") == 0)
5ea6163a 3643 which = LXCHOOK_PREMOUNT;
3f60c2f7 3644 else if (strcmp(hookname, "mount") == 0)
26ddeedd 3645 which = LXCHOOK_MOUNT;
3f60c2f7 3646 else if (strcmp(hookname, "autodev") == 0)
f7bee6c6 3647 which = LXCHOOK_AUTODEV;
3f60c2f7 3648 else if (strcmp(hookname, "start") == 0)
26ddeedd 3649 which = LXCHOOK_START;
3f60c2f7 3650 else if (strcmp(hookname, "stop") == 0)
52492063 3651 which = LXCHOOK_STOP;
3f60c2f7 3652 else if (strcmp(hookname, "post-stop") == 0)
26ddeedd 3653 which = LXCHOOK_POSTSTOP;
3f60c2f7 3654 else if (strcmp(hookname, "clone") == 0)
148e91f5 3655 which = LXCHOOK_CLONE;
3f60c2f7 3656 else if (strcmp(hookname, "destroy") == 0)
37cf711b 3657 which = LXCHOOK_DESTROY;
26ddeedd
SH
3658 else
3659 return -1;
3f60c2f7 3660
0fd73091 3661 lxc_list_for_each (it, &conf->hooks[which]) {
26ddeedd 3662 int ret;
3f60c2f7
CB
3663 char *hook = it->elem;
3664
3665 ret = run_script_argv(name, conf->hooks_version, "lxc", hook,
14a7b0f9 3666 hookname, argv);
3f60c2f7
CB
3667 if (ret < 0)
3668 return -1;
26ddeedd 3669 }
3f60c2f7 3670
26ddeedd
SH
3671 return 0;
3672}
72d0e1cb 3673
72d0e1cb
SG
3674int lxc_clear_config_caps(struct lxc_conf *c)
3675{
1a0e70ac 3676 struct lxc_list *it, *next;
72d0e1cb 3677
0fd73091 3678 lxc_list_for_each_safe (it, &c->caps, next) {
72d0e1cb
SG
3679 lxc_list_del(it);
3680 free(it->elem);
3681 free(it);
3682 }
0fd73091 3683
72d0e1cb
SG
3684 return 0;
3685}
3686
c7e345ae
CB
3687static int lxc_free_idmap(struct lxc_list *id_map)
3688{
27c27d73
SH
3689 struct lxc_list *it, *next;
3690
0fd73091 3691 lxc_list_for_each_safe (it, id_map, next) {
27c27d73
SH
3692 lxc_list_del(it);
3693 free(it->elem);
3694 free(it);
3695 }
c7e345ae 3696
27c27d73
SH
3697 return 0;
3698}
3699
4355ab5f
SH
3700int lxc_clear_idmaps(struct lxc_conf *c)
3701{
3702 return lxc_free_idmap(&c->id_map);
3703}
3704
1fb86a7c
SH
3705int lxc_clear_config_keepcaps(struct lxc_conf *c)
3706{
0fd73091 3707 struct lxc_list *it, *next;
1fb86a7c 3708
0fd73091 3709 lxc_list_for_each_safe (it, &c->keepcaps, next) {
1fb86a7c
SH
3710 lxc_list_del(it);
3711 free(it->elem);
3712 free(it);
3713 }
0fd73091 3714
1fb86a7c
SH
3715 return 0;
3716}
3717
54860ed0 3718int lxc_clear_cgroups(struct lxc_conf *c, const char *key, int version)
72d0e1cb 3719{
54860ed0 3720 char *global_token, *namespaced_token;
ab1a6cac 3721 size_t namespaced_token_len;
54860ed0 3722 struct lxc_list *it, *next, *list;
ab1a6cac 3723 const char *k = key;
54860ed0 3724 bool all = false;
72d0e1cb 3725
54860ed0
CB
3726 if (version == CGROUP2_SUPER_MAGIC) {
3727 global_token = "lxc.cgroup2";
3728 namespaced_token = "lxc.cgroup2.";
0fd73091 3729 namespaced_token_len = sizeof("lxc.cgroup2.") - 1;
54860ed0
CB
3730 list = &c->cgroup2;
3731 } else if (version == CGROUP_SUPER_MAGIC) {
3732 global_token = "lxc.cgroup";
3733 namespaced_token = "lxc.cgroup.";
0fd73091 3734 namespaced_token_len = sizeof("lxc.cgroup.") - 1;
54860ed0
CB
3735 list = &c->cgroup;
3736 } else {
ab1a6cac 3737 return -EINVAL;
54860ed0
CB
3738 }
3739
3740 if (strcmp(key, global_token) == 0)
72d0e1cb 3741 all = true;
54860ed0 3742 else if (strncmp(key, namespaced_token, sizeof(namespaced_token) - 1) == 0)
ab1a6cac 3743 k += namespaced_token_len;
a6390f01 3744 else
ab1a6cac 3745 return -EINVAL;
72d0e1cb 3746
0fd73091 3747 lxc_list_for_each_safe (it, list, next) {
72d0e1cb 3748 struct lxc_cgroup *cg = it->elem;
54860ed0 3749
72d0e1cb
SG
3750 if (!all && strcmp(cg->subsystem, k) != 0)
3751 continue;
54860ed0 3752
72d0e1cb
SG
3753 lxc_list_del(it);
3754 free(cg->subsystem);
3755 free(cg->value);
3756 free(cg);
3757 free(it);
3758 }
e409b214 3759
72d0e1cb
SG
3760 return 0;
3761}
3762
c6d09e15
WB
3763int lxc_clear_limits(struct lxc_conf *c, const char *key)
3764{
3765 struct lxc_list *it, *next;
c6d09e15 3766 const char *k = NULL;
0fd73091 3767 bool all = false;
c6d09e15 3768
b668653c 3769 if (strcmp(key, "lxc.limit") == 0 || strcmp(key, "lxc.prlimit") == 0)
c6d09e15 3770 all = true;
b668653c
CB
3771 else if (strncmp(key, "lxc.limit.", sizeof("lxc.limit.") - 1) == 0)
3772 k = key + sizeof("lxc.limit.") - 1;
3773 else if (strncmp(key, "lxc.prlimit.", sizeof("lxc.prlimit.") - 1) == 0)
3774 k = key + sizeof("lxc.prlimit.") - 1;
c6d09e15
WB
3775 else
3776 return -1;
3777
0fd73091 3778 lxc_list_for_each_safe (it, &c->limits, next) {
c6d09e15 3779 struct lxc_limit *lim = it->elem;
0fd73091 3780
c6d09e15
WB
3781 if (!all && strcmp(lim->resource, k) != 0)
3782 continue;
0fd73091 3783
c6d09e15
WB
3784 lxc_list_del(it);
3785 free(lim->resource);
3786 free(lim);
3787 free(it);
3788 }
b668653c 3789
c6d09e15
WB
3790 return 0;
3791}
3792
7edd0540
L
3793int lxc_clear_sysctls(struct lxc_conf *c, const char *key)
3794{
3795 struct lxc_list *it, *next;
7edd0540 3796 const char *k = NULL;
0fd73091 3797 bool all = false;
7edd0540
L
3798
3799 if (strcmp(key, "lxc.sysctl") == 0)
3800 all = true;
3801 else if (strncmp(key, "lxc.sysctl.", sizeof("lxc.sysctl.") - 1) == 0)
3802 k = key + sizeof("lxc.sysctl.") - 1;
3803 else
3804 return -1;
3805
0fd73091 3806 lxc_list_for_each_safe (it, &c->sysctls, next) {
7edd0540 3807 struct lxc_sysctl *elem = it->elem;
0fd73091 3808
7edd0540
L
3809 if (!all && strcmp(elem->key, k) != 0)
3810 continue;
0fd73091 3811
7edd0540
L
3812 lxc_list_del(it);
3813 free(elem->key);
3814 free(elem->value);
3815 free(elem);
3816 free(it);
3817 }
0fd73091 3818
7edd0540
L
3819 return 0;
3820}
3821
61d7a733
YT
3822int lxc_clear_procs(struct lxc_conf *c, const char *key)
3823{
0fd73091 3824 struct lxc_list *it, *next;
61d7a733 3825 const char *k = NULL;
0fd73091 3826 bool all = false;
61d7a733
YT
3827
3828 if (strcmp(key, "lxc.proc") == 0)
3829 all = true;
3830 else if (strncmp(key, "lxc.proc.", sizeof("lxc.proc.") - 1) == 0)
3831 k = key + sizeof("lxc.proc.") - 1;
3832 else
3833 return -1;
3834
0fd73091 3835 lxc_list_for_each_safe (it, &c->procs, next) {
61d7a733 3836 struct lxc_proc *proc = it->elem;
0fd73091 3837
61d7a733
YT
3838 if (!all && strcmp(proc->filename, k) != 0)
3839 continue;
0fd73091 3840
61d7a733
YT
3841 lxc_list_del(it);
3842 free(proc->filename);
3843 free(proc->value);
3844 free(proc);
3845 free(it);
3846 }
3847
3848 return 0;
3849}
3850
ee1e7aa0
SG
3851int lxc_clear_groups(struct lxc_conf *c)
3852{
0fd73091 3853 struct lxc_list *it, *next;
ee1e7aa0 3854
0fd73091 3855 lxc_list_for_each_safe (it, &c->groups, next) {
ee1e7aa0
SG
3856 lxc_list_del(it);
3857 free(it->elem);
3858 free(it);
3859 }
0fd73091 3860
ee1e7aa0
SG
3861 return 0;
3862}
3863
ab799c0b
SG
3864int lxc_clear_environment(struct lxc_conf *c)
3865{
0fd73091 3866 struct lxc_list *it, *next;
ab799c0b 3867
0fd73091 3868 lxc_list_for_each_safe (it, &c->environment, next) {
ab799c0b
SG
3869 lxc_list_del(it);
3870 free(it->elem);
3871 free(it);
3872 }
0fd73091 3873
ab799c0b
SG
3874 return 0;
3875}
3876
72d0e1cb
SG
3877int lxc_clear_mount_entries(struct lxc_conf *c)
3878{
0fd73091 3879 struct lxc_list *it, *next;
72d0e1cb 3880
0fd73091 3881 lxc_list_for_each_safe (it, &c->mount_list, next) {
72d0e1cb
SG
3882 lxc_list_del(it);
3883 free(it->elem);
3884 free(it);
3885 }
0fd73091 3886
72d0e1cb
SG
3887 return 0;
3888}
3889
b099e9e9
SH
3890int lxc_clear_automounts(struct lxc_conf *c)
3891{
3892 c->auto_mounts = 0;
3893 return 0;
3894}
3895
12a50cc6 3896int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 3897{
72d0e1cb 3898 int i;
0fd73091
CB
3899 struct lxc_list *it, *next;
3900 const char *k = NULL;
3901 bool all = false, done = false;
72d0e1cb 3902
17ed13a3
SH
3903 if (strcmp(key, "lxc.hook") == 0)
3904 all = true;
0fd73091
CB
3905 else if (strncmp(key, "lxc.hook.", sizeof("lxc.hook.") - 1) == 0)
3906 k = key + sizeof("lxc.hook.") - 1;
a6390f01
WB
3907 else
3908 return -1;
17ed13a3 3909
0fd73091 3910 for (i = 0; i < NUM_LXC_HOOKS; i++) {
17ed13a3 3911 if (all || strcmp(k, lxchook_names[i]) == 0) {
0fd73091 3912 lxc_list_for_each_safe (it, &c->hooks[i], next) {
17ed13a3
SH
3913 lxc_list_del(it);
3914 free(it->elem);
3915 free(it);
3916 }
0fd73091 3917
17ed13a3 3918 done = true;
72d0e1cb
SG
3919 }
3920 }
17ed13a3
SH
3921
3922 if (!done) {
3923 ERROR("Invalid hook key: %s", key);
3924 return -1;
3925 }
0fd73091 3926
72d0e1cb
SG
3927 return 0;
3928}
8eb5694b 3929
4184c3e1
SH
3930static inline void lxc_clear_aliens(struct lxc_conf *conf)
3931{
0fd73091 3932 struct lxc_list *it, *next;
4184c3e1 3933
0fd73091 3934 lxc_list_for_each_safe (it, &conf->aliens, next) {
4184c3e1
SH
3935 lxc_list_del(it);
3936 free(it->elem);
3937 free(it);
3938 }
3939}
3940
c7b15d1e 3941void lxc_clear_includes(struct lxc_conf *conf)
f979ac15 3942{
0fd73091 3943 struct lxc_list *it, *next;
f979ac15 3944
0fd73091 3945 lxc_list_for_each_safe (it, &conf->includes, next) {
f979ac15
SH
3946 lxc_list_del(it);
3947 free(it->elem);
3948 free(it);
3949 }
3950}
3951
8eb5694b
SH
3952void lxc_conf_free(struct lxc_conf *conf)
3953{
3954 if (!conf)
3955 return;
0fd73091 3956
858377e4
SH
3957 if (current_config == conf)
3958 current_config = NULL;
aed105d5 3959 lxc_terminal_conf_free(&conf->console);
f10fad2f 3960 free(conf->rootfs.mount);
b3b8c97f 3961 free(conf->rootfs.bdev_type);
f10fad2f
ME
3962 free(conf->rootfs.options);
3963 free(conf->rootfs.path);
f10fad2f 3964 free(conf->logfile);
858377e4
SH
3965 if (conf->logfd != -1)
3966 close(conf->logfd);
f10fad2f 3967 free(conf->utsname);
885766f5
CB
3968 free(conf->ttys.dir);
3969 free(conf->ttys.tty_names);
f10fad2f
ME
3970 free(conf->fstab);
3971 free(conf->rcfile);
5cda27c1 3972 free(conf->execute_cmd);
f10fad2f 3973 free(conf->init_cmd);
3c491553 3974 free(conf->init_cwd);
6b0d5538 3975 free(conf->unexpanded_config);
76d0127f 3976 free(conf->syslog);
c302b476 3977 lxc_free_networks(&conf->network);
f10fad2f
ME
3978 free(conf->lsm_aa_profile);
3979 free(conf->lsm_se_context);
769872f9 3980 lxc_seccomp_free(conf);
8eb5694b 3981 lxc_clear_config_caps(conf);
1fb86a7c 3982 lxc_clear_config_keepcaps(conf);
54860ed0
CB
3983 lxc_clear_cgroups(conf, "lxc.cgroup", CGROUP_SUPER_MAGIC);
3984 lxc_clear_cgroups(conf, "lxc.cgroup2", CGROUP2_SUPER_MAGIC);
17ed13a3 3985 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 3986 lxc_clear_mount_entries(conf);
27c27d73 3987 lxc_clear_idmaps(conf);
ee1e7aa0 3988 lxc_clear_groups(conf);
f979ac15 3989 lxc_clear_includes(conf);
761d81ca 3990 lxc_clear_aliens(conf);
ab799c0b 3991 lxc_clear_environment(conf);
240d4b74 3992 lxc_clear_limits(conf, "lxc.prlimit");
7edd0540 3993 lxc_clear_sysctls(conf, "lxc.sysctl");
61d7a733 3994 lxc_clear_procs(conf, "lxc.proc");
43654d34
CB
3995 free(conf->cgroup_meta.dir);
3996 free(conf->cgroup_meta.controllers);
8eb5694b
SH
3997 free(conf);
3998}
4355ab5f
SH
3999
4000struct userns_fn_data {
4001 int (*fn)(void *);
c9b7c33e 4002 const char *fn_name;
4355ab5f
SH
4003 void *arg;
4004 int p[2];
4005};
4006
4007static int run_userns_fn(void *data)
4008{
4355ab5f 4009 char c;
0fd73091 4010 struct userns_fn_data *d = data;
4355ab5f 4011
f8aa4bf3 4012 /* Close write end of the pipe. */
4355ab5f 4013 close(d->p[1]);
f8aa4bf3
CB
4014
4015 /* Wait for parent to finish establishing a new mapping in the user
4016 * namespace we are executing in.
4017 */
489f39be 4018 if (lxc_read_nointr(d->p[0], &c, 1) != 1)
4355ab5f 4019 return -1;
f8aa4bf3
CB
4020
4021 /* Close read end of the pipe. */
4355ab5f 4022 close(d->p[0]);
f8aa4bf3 4023
c9b7c33e
CB
4024 if (d->fn_name)
4025 TRACE("calling function \"%s\"", d->fn_name);
0fd73091 4026
f8aa4bf3 4027 /* Call function to run. */
4355ab5f
SH
4028 return d->fn(d->arg);
4029}
4030
db7cfe23
CB
4031static struct id_map *mapped_nsid_add(struct lxc_conf *conf, unsigned id,
4032 enum idtype idtype)
4033{
5173b710
CB
4034 const struct id_map *map;
4035 struct id_map *retmap;
db7cfe23
CB
4036
4037 map = find_mapped_nsid_entry(conf, id, idtype);
4038 if (!map)
4039 return NULL;
4040
4041 retmap = malloc(sizeof(*retmap));
4042 if (!retmap)
4043 return NULL;
4044
4045 memcpy(retmap, map, sizeof(*retmap));
4046 return retmap;
4047}
4048
c4333195
CB
4049static struct id_map *find_mapped_hostid_entry(struct lxc_conf *conf,
4050 unsigned id, enum idtype idtype)
f8aa4bf3 4051{
f8aa4bf3 4052 struct id_map *map;
0fd73091 4053 struct lxc_list *it;
f8aa4bf3
CB
4054 struct id_map *retmap = NULL;
4055
0fd73091 4056 lxc_list_for_each (it, &conf->id_map) {
f8aa4bf3
CB
4057 map = it->elem;
4058 if (map->idtype != idtype)
4059 continue;
4060
4061 if (id >= map->hostid && id < map->hostid + map->range) {
4062 retmap = map;
4063 break;
4064 }
4065 }
4066
f8aa4bf3
CB
4067 return retmap;
4068}
4069
0fd73091 4070/* Allocate a new {g,u}id mapping for the given {g,u}id. Re-use an already
f8aa4bf3 4071 * existing one or establish a new one.
4355ab5f 4072 */
0fd73091
CB
4073static struct id_map *mapped_hostid_add(struct lxc_conf *conf, uid_t id,
4074 enum idtype type)
4355ab5f 4075{
28a2d9e7 4076 int hostid_mapped;
c4333195
CB
4077 struct id_map *entry = NULL, *tmp = NULL;
4078
4079 entry = malloc(sizeof(*entry));
4080 if (!entry)
4081 return NULL;
f8aa4bf3 4082
28a2d9e7 4083 /* Reuse existing mapping. */
c4333195
CB
4084 tmp = find_mapped_hostid_entry(conf, id, type);
4085 if (tmp)
4086 return memcpy(entry, tmp, sizeof(*entry));
f8aa4bf3 4087
28a2d9e7
CB
4088 /* Find new mapping. */
4089 hostid_mapped = find_unmapped_nsid(conf, type);
4090 if (hostid_mapped < 0) {
c4333195
CB
4091 DEBUG("Failed to find free mapping for id %d", id);
4092 free(entry);
28a2d9e7 4093 return NULL;
f8aa4bf3 4094 }
f8aa4bf3 4095
28a2d9e7
CB
4096 entry->idtype = type;
4097 entry->nsid = hostid_mapped;
4098 entry->hostid = (unsigned long)id;
4099 entry->range = 1;
4355ab5f 4100
28a2d9e7 4101 return entry;
4355ab5f
SH
4102}
4103
dcf0ffdf 4104struct lxc_list *get_minimal_idmap(struct lxc_conf *conf)
4355ab5f 4105{
f8aa4bf3 4106 uid_t euid, egid;
4160c3a0
CB
4107 uid_t nsuid = (conf->root_nsuid_map != NULL) ? 0 : conf->init_uid;
4108 gid_t nsgid = (conf->root_nsgid_map != NULL) ? 0 : conf->init_gid;
f8aa4bf3 4109 struct lxc_list *idmap = NULL, *tmplist = NULL;
28a2d9e7
CB
4110 struct id_map *container_root_uid = NULL, *container_root_gid = NULL,
4111 *host_uid_map = NULL, *host_gid_map = NULL;
4355ab5f 4112
db7cfe23 4113 /* Find container root mappings. */
4160c3a0 4114 container_root_uid = mapped_nsid_add(conf, nsuid, ID_TYPE_UID);
db7cfe23 4115 if (!container_root_uid) {
dcf0ffdf 4116 DEBUG("Failed to find mapping for namespace uid %d", 0);
db7cfe23 4117 goto on_error;
f8aa4bf3 4118 }
dcf0ffdf
CB
4119 euid = geteuid();
4120 if (euid >= container_root_uid->hostid &&
4121 euid < (container_root_uid->hostid + container_root_uid->range))
db7cfe23 4122 host_uid_map = container_root_uid;
f8aa4bf3 4123
4160c3a0 4124 container_root_gid = mapped_nsid_add(conf, nsgid, ID_TYPE_GID);
db7cfe23 4125 if (!container_root_gid) {
dcf0ffdf 4126 DEBUG("Failed to find mapping for namespace gid %d", 0);
f8aa4bf3
CB
4127 goto on_error;
4128 }
dcf0ffdf
CB
4129 egid = getegid();
4130 if (egid >= container_root_gid->hostid &&
4131 egid < (container_root_gid->hostid + container_root_gid->range))
db7cfe23 4132 host_gid_map = container_root_gid;
f8aa4bf3
CB
4133
4134 /* Check whether the {g,u}id of the user has a mapping. */
954b7d9b 4135 if (!host_uid_map)
c4333195 4136 host_uid_map = mapped_hostid_add(conf, euid, ID_TYPE_UID);
28a2d9e7 4137 if (!host_uid_map) {
db7cfe23 4138 DEBUG("Failed to find mapping for uid %d", euid);
f8aa4bf3
CB
4139 goto on_error;
4140 }
4141
dcf0ffdf
CB
4142 if (!host_gid_map)
4143 host_gid_map = mapped_hostid_add(conf, egid, ID_TYPE_GID);
28a2d9e7 4144 if (!host_gid_map) {
db7cfe23 4145 DEBUG("Failed to find mapping for gid %d", egid);
28a2d9e7
CB
4146 goto on_error;
4147 }
4148
4149 /* Allocate new {g,u}id map list. */
4150 idmap = malloc(sizeof(*idmap));
4151 if (!idmap)
4152 goto on_error;
4153 lxc_list_init(idmap);
4154
f8aa4bf3
CB
4155 /* Add container root to the map. */
4156 tmplist = malloc(sizeof(*tmplist));
4157 if (!tmplist)
4158 goto on_error;
4159 lxc_list_add_elem(tmplist, container_root_uid);
4160 lxc_list_add_tail(idmap, tmplist);
28a2d9e7 4161
1d90e064 4162 if (host_uid_map && (host_uid_map != container_root_uid)) {
28a2d9e7
CB
4163 /* idmap will now keep track of that memory. */
4164 container_root_uid = NULL;
4165
4166 /* Add container root to the map. */
4167 tmplist = malloc(sizeof(*tmplist));
4168 if (!tmplist)
4169 goto on_error;
4170 lxc_list_add_elem(tmplist, host_uid_map);
4171 lxc_list_add_tail(idmap, tmplist);
28a2d9e7 4172 }
1d90e064
CB
4173 /* idmap will now keep track of that memory. */
4174 container_root_uid = NULL;
4175 /* idmap will now keep track of that memory. */
4176 host_uid_map = NULL;
f8aa4bf3
CB
4177
4178 tmplist = malloc(sizeof(*tmplist));
4179 if (!tmplist)
4180 goto on_error;
4181 lxc_list_add_elem(tmplist, container_root_gid);
4182 lxc_list_add_tail(idmap, tmplist);
28a2d9e7 4183
1d90e064 4184 if (host_gid_map && (host_gid_map != container_root_gid)) {
28a2d9e7
CB
4185 /* idmap will now keep track of that memory. */
4186 container_root_gid = NULL;
4187
4188 tmplist = malloc(sizeof(*tmplist));
4189 if (!tmplist)
4190 goto on_error;
4191 lxc_list_add_elem(tmplist, host_gid_map);
4192 lxc_list_add_tail(idmap, tmplist);
28a2d9e7 4193 }
1d90e064
CB
4194 /* idmap will now keep track of that memory. */
4195 container_root_gid = NULL;
4196 /* idmap will now keep track of that memory. */
4197 host_gid_map = NULL;
f8aa4bf3 4198
dcf0ffdf
CB
4199 TRACE("Allocated minimal idmapping");
4200 return idmap;
4201
4202on_error:
4dc41f99 4203 if (idmap) {
dcf0ffdf 4204 lxc_free_idmap(idmap);
4dc41f99
SX
4205 free(idmap);
4206 }
dcf0ffdf
CB
4207 if (container_root_uid)
4208 free(container_root_uid);
4209 if (container_root_gid)
4210 free(container_root_gid);
4211 if (host_uid_map && (host_uid_map != container_root_uid))
4212 free(host_uid_map);
4213 if (host_gid_map && (host_gid_map != container_root_gid))
4214 free(host_gid_map);
4215
4216 return NULL;
4217}
4218
4219/* Run a function in a new user namespace.
4220 * The caller's euid/egid will be mapped if it is not already.
4221 * Afaict, userns_exec_1() is only used to operate based on privileges for the
4222 * user's own {g,u}id on the host and for the container root's unmapped {g,u}id.
4223 * This means we require only to establish a mapping from:
4224 * - the container root {g,u}id as seen from the host > user's host {g,u}id
4225 * - the container root -> some sub{g,u}id
4226 * The former we add, if the user did not specifiy a mapping. The latter we
4227 * retrieve from the ontainer's configured {g,u}id mappings as it must have been
4228 * there to start the container in the first place.
4229 */
4230int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data,
4231 const char *fn_name)
4232{
4233 pid_t pid;
dcf0ffdf 4234 int p[2];
0fd73091 4235 struct userns_fn_data d;
dcf0ffdf 4236 struct lxc_list *idmap;
0fd73091
CB
4237 int ret = -1, status = -1;
4238 char c = '1';
dcf0ffdf 4239
2b2655a8
CB
4240 if (!conf)
4241 return -EINVAL;
4242
dcf0ffdf
CB
4243 idmap = get_minimal_idmap(conf);
4244 if (!idmap)
4245 return -1;
4246
4247 ret = pipe(p);
4248 if (ret < 0) {
4249 SYSERROR("Failed to create pipe");
4250 return -1;
4251 }
4252 d.fn = fn;
4253 d.fn_name = fn_name;
4254 d.arg = data;
4255 d.p[0] = p[0];
4256 d.p[1] = p[1];
4257
4258 /* Clone child in new user namespace. */
4259 pid = lxc_raw_clone_cb(run_userns_fn, &d, CLONE_NEWUSER);
4260 if (pid < 0) {
0fd73091 4261 ERROR("Failed to clone process in new user namespace");
dcf0ffdf
CB
4262 goto on_error;
4263 }
4264
4265 close(p[0]);
4266 p[0] = -1;
4267
4b73005c
CB
4268 if (lxc_log_get_level() == LXC_LOG_LEVEL_TRACE ||
4269 conf->loglevel == LXC_LOG_LEVEL_TRACE) {
dcf0ffdf 4270 struct id_map *map;
0fd73091 4271 struct lxc_list *it;
dcf0ffdf 4272
0fd73091 4273 lxc_list_for_each (it, idmap) {
f8aa4bf3 4274 map = it->elem;
dcf0ffdf 4275 TRACE("Establishing %cid mapping for \"%d\" in new "
f8aa4bf3 4276 "user namespace: nsuid %lu - hostid %lu - range "
0fd73091
CB
4277 "%lu",
4278 (map->idtype == ID_TYPE_UID) ? 'u' : 'g', pid,
4279 map->nsid, map->hostid, map->range);
f8aa4bf3 4280 }
4355ab5f
SH
4281 }
4282
f8aa4bf3 4283 /* Set up {g,u}id mapping for user namespace of child process. */
4355ab5f 4284 ret = lxc_map_ids(idmap, pid);
f8aa4bf3 4285 if (ret < 0) {
0fd73091 4286 ERROR("Error setting up {g,u}id mappings for child process \"%d\"", pid);
f8aa4bf3 4287 goto on_error;
4355ab5f
SH
4288 }
4289
f8aa4bf3 4290 /* Tell child to proceed. */
489f39be 4291 if (lxc_write_nointr(p[1], &c, 1) != 1) {
dcf0ffdf 4292 SYSERROR("Failed telling child process \"%d\" to proceed", pid);
f8aa4bf3 4293 goto on_error;
4355ab5f
SH
4294 }
4295
686dd5d1 4296on_error:
4355ab5f
SH
4297 if (p[0] != -1)
4298 close(p[0]);
4299 close(p[1]);
f8aa4bf3 4300
ee1b16bc
TA
4301 /* Wait for child to finish. */
4302 if (pid > 0)
4303 status = wait_for_pid(pid);
4304
686dd5d1
CB
4305 if (status < 0)
4306 ret = -1;
4307
f8aa4bf3 4308 return ret;
4355ab5f 4309}
97e9cfa0 4310
415a8851
CB
4311int userns_exec_full(struct lxc_conf *conf, int (*fn)(void *), void *data,
4312 const char *fn_name)
4313{
4314 pid_t pid;
4315 uid_t euid, egid;
415a8851
CB
4316 int p[2];
4317 struct id_map *map;
4318 struct lxc_list *cur;
0fd73091 4319 struct userns_fn_data d;
415a8851 4320 int ret = -1;
0fd73091 4321 char c = '1';
415a8851
CB
4322 struct lxc_list *idmap = NULL, *tmplist = NULL;
4323 struct id_map *container_root_uid = NULL, *container_root_gid = NULL,
4324 *host_uid_map = NULL, *host_gid_map = NULL;
4325
2b2655a8
CB
4326 if (!conf)
4327 return -EINVAL;
4328
415a8851
CB
4329 ret = pipe(p);
4330 if (ret < 0) {
4331 SYSERROR("opening pipe");
4332 return -1;
4333 }
4334 d.fn = fn;
4335 d.fn_name = fn_name;
4336 d.arg = data;
4337 d.p[0] = p[0];
4338 d.p[1] = p[1];
4339
4340 /* Clone child in new user namespace. */
4341 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4342 if (pid < 0) {
0fd73091 4343 ERROR("Failed to clone process in new user namespace");
415a8851
CB
4344 goto on_error;
4345 }
4346
4347 close(p[0]);
4348 p[0] = -1;
4349
4350 euid = geteuid();
4351 egid = getegid();
4352
4353 /* Allocate new {g,u}id map list. */
4354 idmap = malloc(sizeof(*idmap));
4355 if (!idmap)
4356 goto on_error;
4357 lxc_list_init(idmap);
4358
4359 /* Find container root. */
0fd73091 4360 lxc_list_for_each (cur, &conf->id_map) {
415a8851
CB
4361 struct id_map *tmpmap;
4362
4363 tmplist = malloc(sizeof(*tmplist));
4364 if (!tmplist)
4365 goto on_error;
4366
4367 tmpmap = malloc(sizeof(*tmpmap));
4368 if (!tmpmap) {
4369 free(tmplist);
4370 goto on_error;
4371 }
4372
4373 memset(tmpmap, 0, sizeof(*tmpmap));
4374 memcpy(tmpmap, cur->elem, sizeof(*tmpmap));
4375 tmplist->elem = tmpmap;
4376
4377 lxc_list_add_tail(idmap, tmplist);
4378
4379 map = cur->elem;
4380
4381 if (map->idtype == ID_TYPE_UID)
4382 if (euid >= map->hostid && euid < map->hostid + map->range)
4383 host_uid_map = map;
4384
4385 if (map->idtype == ID_TYPE_GID)
4386 if (egid >= map->hostid && egid < map->hostid + map->range)
4387 host_gid_map = map;
4388
4389 if (map->nsid != 0)
4390 continue;
4391
4392 if (map->idtype == ID_TYPE_UID)
4393 if (container_root_uid == NULL)
4394 container_root_uid = map;
4395
4396 if (map->idtype == ID_TYPE_GID)
4397 if (container_root_gid == NULL)
4398 container_root_gid = map;
4399 }
4400
4401 if (!container_root_uid || !container_root_gid) {
4402 ERROR("No mapping for container root found");
4403 goto on_error;
4404 }
4405
4406 /* Check whether the {g,u}id of the user has a mapping. */
4407 if (!host_uid_map)
c4333195 4408 host_uid_map = mapped_hostid_add(conf, euid, ID_TYPE_UID);
415a8851
CB
4409 else
4410 host_uid_map = container_root_uid;
4411
4412 if (!host_gid_map)
c4333195 4413 host_gid_map = mapped_hostid_add(conf, egid, ID_TYPE_GID);
415a8851
CB
4414 else
4415 host_gid_map = container_root_gid;
4416
4417 if (!host_uid_map) {
4418 DEBUG("Failed to find mapping for uid %d", euid);
4419 goto on_error;
4420 }
4421
4422 if (!host_gid_map) {
4423 DEBUG("Failed to find mapping for gid %d", egid);
4424 goto on_error;
4425 }
4426
4427 if (host_uid_map && (host_uid_map != container_root_uid)) {
4428 /* Add container root to the map. */
4429 tmplist = malloc(sizeof(*tmplist));
4430 if (!tmplist)
4431 goto on_error;
4432 lxc_list_add_elem(tmplist, host_uid_map);
4433 lxc_list_add_tail(idmap, tmplist);
4434 }
4435 /* idmap will now keep track of that memory. */
4436 host_uid_map = NULL;
4437
4438 if (host_gid_map && (host_gid_map != container_root_gid)) {
4439 tmplist = malloc(sizeof(*tmplist));
4440 if (!tmplist)
4441 goto on_error;
4442 lxc_list_add_elem(tmplist, host_gid_map);
4443 lxc_list_add_tail(idmap, tmplist);
4444 }
4445 /* idmap will now keep track of that memory. */
4446 host_gid_map = NULL;
4447
4448 if (lxc_log_get_level() == LXC_LOG_LEVEL_TRACE ||
4449 conf->loglevel == LXC_LOG_LEVEL_TRACE) {
0fd73091 4450 lxc_list_for_each (cur, idmap) {
415a8851
CB
4451 map = cur->elem;
4452 TRACE("establishing %cid mapping for \"%d\" in new "
4453 "user namespace: nsuid %lu - hostid %lu - range "
4454 "%lu",
4455 (map->idtype == ID_TYPE_UID) ? 'u' : 'g', pid,
4456 map->nsid, map->hostid, map->range);
4457 }
4458 }
4459
4460 /* Set up {g,u}id mapping for user namespace of child process. */
4461 ret = lxc_map_ids(idmap, pid);
4462 if (ret < 0) {
0fd73091 4463 ERROR("error setting up {g,u}id mappings for child process \"%d\"", pid);
415a8851
CB
4464 goto on_error;
4465 }
4466
4467 /* Tell child to proceed. */
489f39be 4468 if (lxc_write_nointr(p[1], &c, 1) != 1) {
0fd73091 4469 SYSERROR("Failed telling child process \"%d\" to proceed", pid);
415a8851
CB
4470 goto on_error;
4471 }
4472
686dd5d1 4473on_error:
ee1b16bc
TA
4474 if (p[0] != -1)
4475 close(p[0]);
4476 close(p[1]);
4477
415a8851 4478 /* Wait for child to finish. */
686dd5d1
CB
4479 if (pid > 0)
4480 ret = wait_for_pid(pid);
415a8851 4481
80758b4b 4482 if (idmap) {
415a8851 4483 lxc_free_idmap(idmap);
80758b4b
DJ
4484 free(idmap);
4485 }
4486
415a8851
CB
4487 if (host_uid_map && (host_uid_map != container_root_uid))
4488 free(host_uid_map);
4489 if (host_gid_map && (host_gid_map != container_root_gid))
4490 free(host_gid_map);
4491
415a8851
CB
4492 return ret;
4493}
4494
a96a8e8c 4495/* not thread-safe, do not use from api without first forking */
0fd73091 4496static char *getuname(void)
97e9cfa0 4497{
cb7aa5e8
DJ
4498 struct passwd pwent;
4499 struct passwd *pwentp = NULL;
4500 char *buf;
4501 char *username;
4502 size_t bufsize;
4503 int ret;
97e9cfa0 4504
cb7aa5e8
DJ
4505 bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
4506 if (bufsize == -1)
4507 bufsize = 1024;
4508
4509 buf = malloc(bufsize);
4510 if (!buf)
97e9cfa0
SH
4511 return NULL;
4512
cb7aa5e8
DJ
4513 ret = getpwuid_r(geteuid(), &pwent, buf, bufsize, &pwentp);
4514 if (!pwentp) {
4515 if (ret == 0)
4516 WARN("Could not find matched password record.");
4517
4518 ERROR("Failed to get password record - %u", geteuid());
4519 free(buf);
4520 return NULL;
4521 }
4522
4523 username = strdup(pwent.pw_name);
4524 free(buf);
4525
4526 return username;
97e9cfa0
SH
4527}
4528
a96a8e8c 4529/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4530static char *getgname(void)
4531{
3de9fb4c
DJ
4532 struct group grent;
4533 struct group *grentp = NULL;
4534 char *buf;
4535 char *grname;
4536 size_t bufsize;
4537 int ret;
4538
4539 bufsize = sysconf(_SC_GETGR_R_SIZE_MAX);
4540 if (bufsize == -1)
4541 bufsize = 1024;
4542
4543 buf = malloc(bufsize);
4544 if (!buf)
4545 return NULL;
4546
4547 ret = getgrgid_r(getegid(), &grent, buf, bufsize, &grentp);
4548 if (!grentp) {
4549 if (ret == 0)
4550 WARN("Could not find matched group record");
97e9cfa0 4551
3de9fb4c
DJ
4552 ERROR("Failed to get group record - %u", getegid());
4553 free(buf);
97e9cfa0 4554 return NULL;
3de9fb4c
DJ
4555 }
4556
4557 grname = strdup(grent.gr_name);
4558 free(buf);
97e9cfa0 4559
3de9fb4c 4560 return grname;
97e9cfa0
SH
4561}
4562
a96a8e8c 4563/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4564void suggest_default_idmap(void)
4565{
0fd73091 4566 char *uname, *gname;
97e9cfa0
SH
4567 FILE *f;
4568 unsigned int uid = 0, urange = 0, gid = 0, grange = 0;
97e9cfa0 4569 size_t len = 0;
0fd73091 4570 char *line = NULL;
97e9cfa0 4571
0fd73091
CB
4572 uname = getuname();
4573 if (!uname)
97e9cfa0
SH
4574 return;
4575
0fd73091
CB
4576 gname = getgname();
4577 if (!gname) {
97e9cfa0
SH
4578 free(uname);
4579 return;
4580 }
4581
4582 f = fopen(subuidfile, "r");
4583 if (!f) {
4584 ERROR("Your system is not configured with subuids");
4585 free(gname);
4586 free(uname);
4587 return;
4588 }
0fd73091 4589
97e9cfa0 4590 while (getline(&line, &len, f) != -1) {
0fd73091 4591 char *p, *p2;
b7930180 4592 size_t no_newline = 0;
0fd73091
CB
4593
4594 p = strchr(line, ':');
97e9cfa0
SH
4595 if (*line == '#')
4596 continue;
4597 if (!p)
4598 continue;
4599 *p = '\0';
4600 p++;
0fd73091 4601
97e9cfa0
SH
4602 if (strcmp(line, uname))
4603 continue;
0fd73091 4604
97e9cfa0
SH
4605 p2 = strchr(p, ':');
4606 if (!p2)
4607 continue;
4608 *p2 = '\0';
4609 p2++;
4610 if (!*p2)
4611 continue;
b7930180
CB
4612 no_newline = strcspn(p2, "\n");
4613 p2[no_newline] = '\0';
4614
b7b2fde4 4615 if (lxc_safe_uint(p, &uid) < 0)
0fd73091 4616 WARN("Could not parse UID");
b7b2fde4 4617 if (lxc_safe_uint(p2, &urange) < 0)
0fd73091 4618 WARN("Could not parse UID range");
97e9cfa0
SH
4619 }
4620 fclose(f);
4621
6be7389a 4622 f = fopen(subgidfile, "r");
97e9cfa0
SH
4623 if (!f) {
4624 ERROR("Your system is not configured with subgids");
4625 free(gname);
4626 free(uname);
4627 return;
4628 }
0fd73091 4629
97e9cfa0 4630 while (getline(&line, &len, f) != -1) {
0fd73091 4631 char *p, *p2;
b7930180 4632 size_t no_newline = 0;
0fd73091
CB
4633
4634 p = strchr(line, ':');
97e9cfa0
SH
4635 if (*line == '#')
4636 continue;
4637 if (!p)
4638 continue;
4639 *p = '\0';
4640 p++;
0fd73091 4641
97e9cfa0
SH
4642 if (strcmp(line, uname))
4643 continue;
0fd73091 4644
97e9cfa0
SH
4645 p2 = strchr(p, ':');
4646 if (!p2)
4647 continue;
4648 *p2 = '\0';
4649 p2++;
4650 if (!*p2)
4651 continue;
b7930180
CB
4652 no_newline = strcspn(p2, "\n");
4653 p2[no_newline] = '\0';
4654
b7b2fde4 4655 if (lxc_safe_uint(p, &gid) < 0)
0fd73091 4656 WARN("Could not parse GID");
b7b2fde4 4657 if (lxc_safe_uint(p2, &grange) < 0)
0fd73091 4658 WARN("Could not parse GID range");
97e9cfa0
SH
4659 }
4660 fclose(f);
4661
f10fad2f 4662 free(line);
97e9cfa0
SH
4663
4664 if (!urange || !grange) {
4665 ERROR("You do not have subuids or subgids allocated");
4666 ERROR("Unprivileged containers require subuids and subgids");
fbd4a4d1 4667 free(uname);
1e7cd2f7 4668 free(gname);
97e9cfa0
SH
4669 return;
4670 }
4671
4672 ERROR("You must either run as root, or define uid mappings");
4673 ERROR("To pass uid mappings to lxc-create, you could create");
4674 ERROR("~/.config/lxc/default.conf:");
4675 ERROR("lxc.include = %s", LXC_DEFAULT_CONFIG);
bdcbb6b3
CB
4676 ERROR("lxc.idmap = u 0 %u %u", uid, urange);
4677 ERROR("lxc.idmap = g 0 %u %u", gid, grange);
97e9cfa0
SH
4678
4679 free(gname);
4680 free(uname);
4681}
aaf26830 4682
a7307747
SH
4683static void free_cgroup_settings(struct lxc_list *result)
4684{
4685 struct lxc_list *iterator, *next;
4686
0fd73091 4687 lxc_list_for_each_safe (iterator, result, next) {
a7307747
SH
4688 lxc_list_del(iterator);
4689 free(iterator);
4690 }
4691 free(result);
4692}
4693
0fd73091 4694/* Return the list of cgroup_settings sorted according to the following rules
aaf26830
KT
4695 * 1. Put memory.limit_in_bytes before memory.memsw.limit_in_bytes
4696 */
0fd73091 4697struct lxc_list *sort_cgroup_settings(struct lxc_list *cgroup_settings)
aaf26830
KT
4698{
4699 struct lxc_list *result;
aaf26830 4700 struct lxc_cgroup *cg = NULL;
0fd73091 4701 struct lxc_list *it = NULL, *item = NULL, *memsw_limit = NULL;
aaf26830
KT
4702
4703 result = malloc(sizeof(*result));
0fd73091 4704 if (!result)
fac7c663 4705 return NULL;
aaf26830
KT
4706 lxc_list_init(result);
4707
0fd73091
CB
4708 /* Iterate over the cgroup settings and copy them to the output list. */
4709 lxc_list_for_each (it, cgroup_settings) {
aaf26830 4710 item = malloc(sizeof(*item));
fac7c663 4711 if (!item) {
a7307747 4712 free_cgroup_settings(result);
fac7c663
KT
4713 return NULL;
4714 }
0fd73091 4715
aaf26830
KT
4716 item->elem = it->elem;
4717 cg = it->elem;
4718 if (strcmp(cg->subsystem, "memory.memsw.limit_in_bytes") == 0) {
4719 /* Store the memsw_limit location */
4720 memsw_limit = item;
0fd73091
CB
4721 } else if (strcmp(cg->subsystem, "memory.limit_in_bytes") == 0 &&
4722 memsw_limit != NULL) {
4723 /* lxc.cgroup.memory.memsw.limit_in_bytes is found
4724 * before lxc.cgroup.memory.limit_in_bytes, swap these
4725 * two items */
aaf26830
KT
4726 item->elem = memsw_limit->elem;
4727 memsw_limit->elem = it->elem;
4728 }
4729 lxc_list_add_tail(result, item);
4730 }
4731
4732 return result;
a7307747 4733}