]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
doc: Add the description of lxc.hook.stop to Japanese lxc.container.conf(5)
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
d06245b8
NC
23#include "config.h"
24
0ad19a3f 25#include <stdio.h>
0ad19a3f 26#include <stdlib.h>
e3b4c4c4 27#include <stdarg.h>
0ad19a3f 28#include <errno.h>
29#include <string.h>
30#include <dirent.h>
0ad19a3f 31#include <unistd.h>
bc6928ff 32#include <inttypes.h>
e3b4c4c4 33#include <sys/wait.h>
2d76d1d7 34#include <sys/syscall.h>
97e9cfa0
SH
35#include <sys/types.h>
36#include <pwd.h>
37#include <grp.h>
4a0ba80d 38#include <time.h>
614305f3 39#ifdef HAVE_STATVFS
2938f7c8 40#include <sys/statvfs.h>
614305f3 41#endif
e827ff7e
SG
42
43#if HAVE_PTY_H
b0a33c1e 44#include <pty.h>
e827ff7e
SG
45#else
46#include <../include/openpty.h>
47#endif
0ad19a3f 48
b3ecde1e
DL
49#include <linux/loop.h>
50
0ad19a3f 51#include <sys/types.h>
52#include <sys/utsname.h>
53#include <sys/param.h>
54#include <sys/stat.h>
55#include <sys/socket.h>
56#include <sys/mount.h>
57#include <sys/mman.h>
81810dd1 58#include <sys/prctl.h>
0ad19a3f 59
60#include <arpa/inet.h>
61#include <fcntl.h>
62#include <netinet/in.h>
63#include <net/if.h>
6f4a3756 64#include <libgen.h>
0ad19a3f 65
e5bda9ee 66#include "network.h"
67#include "error.h"
e8bd4e43 68#include "af_unix.h"
b2718c72 69#include "parse.h"
1b09f2c0
DL
70#include "utils.h"
71#include "conf.h"
72#include "log.h"
d55bc1ad 73#include "caps.h" /* for lxc_caps_last_cap() */
9be53773 74#include "bdev.h"
368bbc02 75#include "cgroup.h"
025ed0f3 76#include "lxclock.h"
4355ab5f 77#include "namespace.h"
fe4de9a6 78#include "lsm/lsm.h"
d0a36f2c 79
495d2046
SG
80#if HAVE_SYS_CAPABILITY_H
81#include <sys/capability.h>
82#endif
83
6ff05e18
SG
84#if HAVE_SYS_PERSONALITY_H
85#include <sys/personality.h>
86#endif
87
edaf8b1b
SG
88#if IS_BIONIC
89#include <../include/lxcmntent.h>
90#else
91#include <mntent.h>
92#endif
93
769872f9
SH
94#include "lxcseccomp.h"
95
36eb9bde 96lxc_log_define(lxc_conf, lxc);
e5bda9ee 97
87da4ec3 98#define LINELEN 4096
0ad19a3f 99
495d2046 100#if HAVE_SYS_CAPABILITY_H
b09094da
MN
101#ifndef CAP_SETFCAP
102#define CAP_SETFCAP 31
103#endif
104
105#ifndef CAP_MAC_OVERRIDE
106#define CAP_MAC_OVERRIDE 32
107#endif
108
109#ifndef CAP_MAC_ADMIN
110#define CAP_MAC_ADMIN 33
111#endif
495d2046 112#endif
b09094da
MN
113
114#ifndef PR_CAPBSET_DROP
115#define PR_CAPBSET_DROP 24
116#endif
117
9818cae4
SG
118#ifndef LO_FLAGS_AUTOCLEAR
119#define LO_FLAGS_AUTOCLEAR 4
120#endif
121
0769b82a
CS
122/* needed for cgroup automount checks, regardless of whether we
123 * have included linux/capability.h or not */
124#ifndef CAP_SYS_ADMIN
125#define CAP_SYS_ADMIN 21
126#endif
127
2d76d1d7
SG
128/* Define pivot_root() if missing from the C library */
129#ifndef HAVE_PIVOT_ROOT
130static int pivot_root(const char * new_root, const char * put_old)
131{
132#ifdef __NR_pivot_root
133return syscall(__NR_pivot_root, new_root, put_old);
134#else
135errno = ENOSYS;
136return -1;
137#endif
138}
139#else
140extern int pivot_root(const char * new_root, const char * put_old);
141#endif
142
143/* Define sethostname() if missing from the C library */
144#ifndef HAVE_SETHOSTNAME
145static int sethostname(const char * name, size_t len)
146{
147#ifdef __NR_sethostname
148return syscall(__NR_sethostname, name, len);
149#else
150errno = ENOSYS;
151return -1;
152#endif
153}
154#endif
155
72f919c4
SG
156/* Define __S_ISTYPE if missing from the C library */
157#ifndef __S_ISTYPE
158#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
159#endif
160
ecec0126
SG
161#ifndef MS_PRIVATE
162#define MS_PRIVATE (1<<18)
163#endif
164
72d0e1cb 165char *lxchook_names[NUM_LXC_HOOKS] = {
52492063 166 "pre-start", "pre-mount", "mount", "autodev", "start", "stop", "post-stop", "clone", "destroy" };
72d0e1cb 167
a589434e 168typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 169
998ac676
RT
170struct mount_opt {
171 char *name;
172 int clear;
173 int flag;
174};
175
81810dd1
DL
176struct caps_opt {
177 char *name;
178 int value;
179};
180
858377e4
SH
181/*
182 * The lxc_conf of the container currently being worked on in an
183 * API call
184 * This is used in the error calls
185 */
186#ifdef HAVE_TLS
187__thread struct lxc_conf *current_config;
188#else
189struct lxc_conf *current_config;
190#endif
191
0769b82a
CS
192/* Declare this here, since we don't want to reshuffle the whole file. */
193static int in_caplist(int cap, struct lxc_list *caps);
194
a589434e
JN
195static int instantiate_veth(struct lxc_handler *, struct lxc_netdev *);
196static int instantiate_macvlan(struct lxc_handler *, struct lxc_netdev *);
197static int instantiate_vlan(struct lxc_handler *, struct lxc_netdev *);
198static int instantiate_phys(struct lxc_handler *, struct lxc_netdev *);
199static int instantiate_empty(struct lxc_handler *, struct lxc_netdev *);
200static int instantiate_none(struct lxc_handler *, struct lxc_netdev *);
201
202static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
203 [LXC_NET_VETH] = instantiate_veth,
204 [LXC_NET_MACVLAN] = instantiate_macvlan,
205 [LXC_NET_VLAN] = instantiate_vlan,
206 [LXC_NET_PHYS] = instantiate_phys,
207 [LXC_NET_EMPTY] = instantiate_empty,
208 [LXC_NET_NONE] = instantiate_none,
0ad19a3f 209};
210
74a2b586
JK
211static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
212static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
213static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
214static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
215static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 216static int shutdown_none(struct lxc_handler *, struct lxc_netdev *);
74a2b586 217
a589434e 218static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
74a2b586
JK
219 [LXC_NET_VETH] = shutdown_veth,
220 [LXC_NET_MACVLAN] = shutdown_macvlan,
221 [LXC_NET_VLAN] = shutdown_vlan,
222 [LXC_NET_PHYS] = shutdown_phys,
223 [LXC_NET_EMPTY] = shutdown_empty,
26b797f3 224 [LXC_NET_NONE] = shutdown_none,
74a2b586
JK
225};
226
998ac676 227static struct mount_opt mount_opt[] = {
88d413d5
SW
228 { "defaults", 0, 0 },
229 { "ro", 0, MS_RDONLY },
230 { "rw", 1, MS_RDONLY },
231 { "suid", 1, MS_NOSUID },
232 { "nosuid", 0, MS_NOSUID },
233 { "dev", 1, MS_NODEV },
234 { "nodev", 0, MS_NODEV },
235 { "exec", 1, MS_NOEXEC },
236 { "noexec", 0, MS_NOEXEC },
237 { "sync", 0, MS_SYNCHRONOUS },
238 { "async", 1, MS_SYNCHRONOUS },
239 { "dirsync", 0, MS_DIRSYNC },
240 { "remount", 0, MS_REMOUNT },
241 { "mand", 0, MS_MANDLOCK },
242 { "nomand", 1, MS_MANDLOCK },
243 { "atime", 1, MS_NOATIME },
244 { "noatime", 0, MS_NOATIME },
245 { "diratime", 1, MS_NODIRATIME },
246 { "nodiratime", 0, MS_NODIRATIME },
247 { "bind", 0, MS_BIND },
248 { "rbind", 0, MS_BIND|MS_REC },
249 { "relatime", 0, MS_RELATIME },
250 { "norelatime", 1, MS_RELATIME },
251 { "strictatime", 0, MS_STRICTATIME },
252 { "nostrictatime", 1, MS_STRICTATIME },
253 { NULL, 0, 0 },
998ac676
RT
254};
255
495d2046 256#if HAVE_SYS_CAPABILITY_H
81810dd1 257static struct caps_opt caps_opt[] = {
a6afdde9 258 { "chown", CAP_CHOWN },
1e11be34
DL
259 { "dac_override", CAP_DAC_OVERRIDE },
260 { "dac_read_search", CAP_DAC_READ_SEARCH },
261 { "fowner", CAP_FOWNER },
262 { "fsetid", CAP_FSETID },
81810dd1
DL
263 { "kill", CAP_KILL },
264 { "setgid", CAP_SETGID },
265 { "setuid", CAP_SETUID },
266 { "setpcap", CAP_SETPCAP },
267 { "linux_immutable", CAP_LINUX_IMMUTABLE },
268 { "net_bind_service", CAP_NET_BIND_SERVICE },
269 { "net_broadcast", CAP_NET_BROADCAST },
270 { "net_admin", CAP_NET_ADMIN },
271 { "net_raw", CAP_NET_RAW },
272 { "ipc_lock", CAP_IPC_LOCK },
273 { "ipc_owner", CAP_IPC_OWNER },
274 { "sys_module", CAP_SYS_MODULE },
275 { "sys_rawio", CAP_SYS_RAWIO },
276 { "sys_chroot", CAP_SYS_CHROOT },
277 { "sys_ptrace", CAP_SYS_PTRACE },
278 { "sys_pacct", CAP_SYS_PACCT },
279 { "sys_admin", CAP_SYS_ADMIN },
280 { "sys_boot", CAP_SYS_BOOT },
281 { "sys_nice", CAP_SYS_NICE },
282 { "sys_resource", CAP_SYS_RESOURCE },
283 { "sys_time", CAP_SYS_TIME },
284 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
285 { "mknod", CAP_MKNOD },
286 { "lease", CAP_LEASE },
57b837e2
CB
287#ifdef CAP_AUDIT_READ
288 { "audit_read", CAP_AUDIT_READ },
289#endif
9527e566 290#ifdef CAP_AUDIT_WRITE
81810dd1 291 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
292#endif
293#ifdef CAP_AUDIT_CONTROL
81810dd1 294 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 295#endif
81810dd1
DL
296 { "setfcap", CAP_SETFCAP },
297 { "mac_override", CAP_MAC_OVERRIDE },
298 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
299#ifdef CAP_SYSLOG
300 { "syslog", CAP_SYSLOG },
301#endif
302#ifdef CAP_WAKE_ALARM
303 { "wake_alarm", CAP_WAKE_ALARM },
304#endif
2b54359b
CB
305#ifdef CAP_BLOCK_SUSPEND
306 { "block_suspend", CAP_BLOCK_SUSPEND },
307#endif
81810dd1 308};
495d2046
SG
309#else
310static struct caps_opt caps_opt[] = {};
311#endif
81810dd1 312
91c3830e
SH
313static int run_buffer(char *buffer)
314{
ebec9176 315 struct lxc_popen_FILE *f;
91c3830e 316 char *output;
8e7da691 317 int ret;
91c3830e 318
ebec9176 319 f = lxc_popen(buffer);
91c3830e
SH
320 if (!f) {
321 SYSERROR("popen failed");
322 return -1;
323 }
324
325 output = malloc(LXC_LOG_BUFFER_SIZE);
326 if (!output) {
327 ERROR("failed to allocate memory for script output");
ebec9176 328 lxc_pclose(f);
91c3830e
SH
329 return -1;
330 }
331
ebec9176 332 while(fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
91c3830e
SH
333 DEBUG("script output: %s", output);
334
335 free(output);
336
ebec9176 337 ret = lxc_pclose(f);
8e7da691 338 if (ret == -1) {
91c3830e
SH
339 SYSERROR("Script exited on error");
340 return -1;
8e7da691
DE
341 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
342 ERROR("Script exited with status %d", WEXITSTATUS(ret));
343 return -1;
344 } else if (WIFSIGNALED(ret)) {
345 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
346 strsignal(WTERMSIG(ret)));
347 return -1;
91c3830e
SH
348 }
349
350 return 0;
351}
352
148e91f5 353static int run_script_argv(const char *name, const char *section,
283678ed
SH
354 const char *script, const char *hook, const char *lxcpath,
355 char **argsin)
148e91f5
SH
356{
357 int ret, i;
358 char *buffer;
359 size_t size = 0;
360
361 INFO("Executing script '%s' for container '%s', config section '%s'",
362 script, name, section);
363
364 for (i=0; argsin && argsin[i]; i++)
365 size += strlen(argsin[i]) + 1;
366
367 size += strlen(hook) + 1;
368
369 size += strlen(script);
370 size += strlen(name);
371 size += strlen(section);
372 size += 3;
373
374 if (size > INT_MAX)
375 return -1;
376
377 buffer = alloca(size);
378 if (!buffer) {
379 ERROR("failed to allocate memory");
380 return -1;
381 }
382
383 ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
384 if (ret < 0 || ret >= size) {
385 ERROR("Script name too long");
386 return -1;
387 }
388
389 for (i=0; argsin && argsin[i]; i++) {
390 int len = size-ret;
391 int rc;
392 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
393 if (rc < 0 || rc >= len) {
394 ERROR("Script args too long");
395 return -1;
396 }
397 ret += rc;
398 }
399
400 return run_buffer(buffer);
401}
402
751d9dcd
DL
403static int run_script(const char *name, const char *section,
404 const char *script, ...)
e3b4c4c4 405{
abbfd20b 406 int ret;
91c3830e 407 char *buffer, *p;
abbfd20b
DL
408 size_t size = 0;
409 va_list ap;
751d9dcd
DL
410
411 INFO("Executing script '%s' for container '%s', config section '%s'",
412 script, name, section);
e3b4c4c4 413
abbfd20b
DL
414 va_start(ap, script);
415 while ((p = va_arg(ap, char *)))
95642a10 416 size += strlen(p) + 1;
abbfd20b
DL
417 va_end(ap);
418
419 size += strlen(script);
420 size += strlen(name);
421 size += strlen(section);
95642a10 422 size += 3;
abbfd20b 423
95642a10
MS
424 if (size > INT_MAX)
425 return -1;
426
427 buffer = alloca(size);
abbfd20b
DL
428 if (!buffer) {
429 ERROR("failed to allocate memory");
751d9dcd
DL
430 return -1;
431 }
432
9ba8130c
SH
433 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
434 if (ret < 0 || ret >= size) {
435 ERROR("Script name too long");
9ba8130c
SH
436 return -1;
437 }
751d9dcd 438
abbfd20b 439 va_start(ap, script);
9ba8130c
SH
440 while ((p = va_arg(ap, char *))) {
441 int len = size-ret;
442 int rc;
443 rc = snprintf(buffer + ret, len, " %s", p);
444 if (rc < 0 || rc >= len) {
9ba8130c
SH
445 ERROR("Script args too long");
446 return -1;
447 }
448 ret += rc;
449 }
abbfd20b 450 va_end(ap);
751d9dcd 451
91c3830e 452 return run_buffer(buffer);
e3b4c4c4
ST
453}
454
a6afdde9 455static int find_fstype_cb(char* buffer, void *data)
78ae2fcc 456{
457 struct cbarg {
458 const char *rootfs;
a6afdde9 459 const char *target;
a17b1e65 460 const char *options;
78ae2fcc 461 } *cbarg = data;
462
a17b1e65
SG
463 unsigned long mntflags;
464 char *mntdata;
78ae2fcc 465 char *fstype;
466
467 /* we don't try 'nodev' entries */
468 if (strstr(buffer, "nodev"))
469 return 0;
470
471 fstype = buffer;
b2718c72 472 fstype += lxc_char_left_gc(fstype, strlen(fstype));
473 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
78ae2fcc 474
9827ecdb
YK
475 /* ignore blank line and comment */
476 if (fstype[0] == '\0' || fstype[0] == '#')
477 return 0;
478
a6afdde9
DL
479 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
480 cbarg->rootfs, cbarg->target, fstype);
481
a17b1e65
SG
482 if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) {
483 free(mntdata);
484 return -1;
485 }
486
487 if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
a6afdde9 488 DEBUG("mount failed with error: %s", strerror(errno));
a17b1e65 489 free(mntdata);
78ae2fcc 490 return 0;
a6afdde9 491 }
a17b1e65 492 free(mntdata);
78ae2fcc 493
a6afdde9
DL
494 INFO("mounted '%s' on '%s', with fstype '%s'",
495 cbarg->rootfs, cbarg->target, fstype);
78ae2fcc 496
497 return 1;
498}
499
a17b1e65
SG
500static int mount_unknown_fs(const char *rootfs, const char *target,
501 const char *options)
78ae2fcc 502{
a6afdde9 503 int i;
78ae2fcc 504
505 struct cbarg {
506 const char *rootfs;
a6afdde9 507 const char *target;
a17b1e65 508 const char *options;
78ae2fcc 509 } cbarg = {
510 .rootfs = rootfs,
a6afdde9 511 .target = target,
a17b1e65 512 .options = options,
78ae2fcc 513 };
514
a6afdde9
DL
515 /*
516 * find the filesystem type with brute force:
517 * first we check with /etc/filesystems, in case the modules
78ae2fcc 518 * are auto-loaded and fall back to the supported kernel fs
519 */
520 char *fsfile[] = {
521 "/etc/filesystems",
522 "/proc/filesystems",
523 };
524
a6afdde9
DL
525 for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
526
527 int ret;
528
529 if (access(fsfile[i], F_OK))
530 continue;
531
532 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
533 if (ret < 0) {
534 ERROR("failed to parse '%s'", fsfile[i]);
535 return -1;
536 }
537
538 if (ret)
539 return 0;
78ae2fcc 540 }
541
a6afdde9
DL
542 ERROR("failed to determine fs type for '%s'", rootfs);
543 return -1;
544}
545
a17b1e65
SG
546static int mount_rootfs_dir(const char *rootfs, const char *target,
547 const char *options)
a6afdde9 548{
a17b1e65
SG
549 unsigned long mntflags;
550 char *mntdata;
551 int ret;
552
553 if (parse_mntopts(options, &mntflags, &mntdata) < 0) {
554 free(mntdata);
555 return -1;
556 }
557
558 ret = mount(rootfs, target, "none", MS_BIND | MS_REC | mntflags, mntdata);
559 free(mntdata);
560
561 return ret;
a6afdde9
DL
562}
563
564static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
565{
566 int rfd;
567 int ret = -1;
568
569 rfd = open(rootfs, O_RDWR);
570 if (rfd < 0) {
571 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 572 return -1;
573 }
574
a6afdde9 575 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 576
a6afdde9 577 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 578
a6afdde9
DL
579 if (ioctl(fd, LOOP_SET_FD, rfd)) {
580 SYSERROR("failed to LOOP_SET_FD");
581 goto out;
78ae2fcc 582 }
583
a6afdde9
DL
584 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
585 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 586 goto out;
587 }
588
a6afdde9 589 ret = 0;
78ae2fcc 590out:
a6afdde9 591 close(rfd);
78ae2fcc 592
a6afdde9 593 return ret;
78ae2fcc 594}
595
a17b1e65
SG
596static int mount_rootfs_file(const char *rootfs, const char *target,
597 const char *options)
78ae2fcc 598{
a6afdde9
DL
599 struct dirent dirent, *direntp;
600 struct loop_info64 loinfo;
9ba8130c 601 int ret = -1, fd = -1, rc;
a6afdde9
DL
602 DIR *dir;
603 char path[MAXPATHLEN];
78ae2fcc 604
a6afdde9
DL
605 dir = opendir("/dev");
606 if (!dir) {
607 SYSERROR("failed to open '/dev'");
78ae2fcc 608 return -1;
609 }
610
a6afdde9
DL
611 while (!readdir_r(dir, &dirent, &direntp)) {
612
613 if (!direntp)
614 break;
615
616 if (!strcmp(direntp->d_name, "."))
617 continue;
618
619 if (!strcmp(direntp->d_name, ".."))
620 continue;
621
622 if (strncmp(direntp->d_name, "loop", 4))
623 continue;
624
9ba8130c
SH
625 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
626 if (rc < 0 || rc >= MAXPATHLEN)
627 continue;
628
a6afdde9
DL
629 fd = open(path, O_RDWR);
630 if (fd < 0)
631 continue;
632
633 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
634 close(fd);
635 continue;
636 }
637
638 if (errno != ENXIO) {
639 WARN("unexpected error for ioctl on '%s': %m",
640 direntp->d_name);
00b6be44 641 close(fd);
a6afdde9
DL
642 continue;
643 }
644
645 DEBUG("found '%s' free lodev", path);
646
647 ret = setup_lodev(rootfs, fd, &loinfo);
648 if (!ret)
a17b1e65 649 ret = mount_unknown_fs(path, target, options);
a6afdde9
DL
650 close(fd);
651
652 break;
653 }
654
655 if (closedir(dir))
656 WARN("failed to close directory");
657
658 return ret;
78ae2fcc 659}
660
a17b1e65
SG
661static int mount_rootfs_block(const char *rootfs, const char *target,
662 const char *options)
a6afdde9 663{
a17b1e65 664 return mount_unknown_fs(rootfs, target, options);
a6afdde9
DL
665}
666
0c547523
SH
667/*
668 * pin_rootfs
b7ed4bf0
CS
669 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
670 * the duration of the container run, to prevent the container from marking
671 * the underlying fs readonly on shutdown. unlink the file immediately so
672 * no name pollution is happens
0c547523
SH
673 * return -1 on error.
674 * return -2 if nothing needed to be pinned.
675 * return an open fd (>=0) if we pinned it.
676 */
677int pin_rootfs(const char *rootfs)
678{
679 char absrootfs[MAXPATHLEN];
680 char absrootfspin[MAXPATHLEN];
681 struct stat s;
682 int ret, fd;
683
e99ee0de 684 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 685 return -2;
e99ee0de 686
00ec333b 687 if (!realpath(rootfs, absrootfs))
9be53773 688 return -2;
0c547523 689
00ec333b 690 if (access(absrootfs, F_OK))
0c547523 691 return -1;
0c547523 692
00ec333b 693 if (stat(absrootfs, &s))
0c547523 694 return -1;
0c547523 695
72f919c4 696 if (!S_ISDIR(s.st_mode))
0c547523
SH
697 return -2;
698
b7ed4bf0 699 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
00ec333b 700 if (ret >= MAXPATHLEN)
0c547523 701 return -1;
0c547523
SH
702
703 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
b7ed4bf0
CS
704 if (fd < 0)
705 return fd;
706 (void)unlink(absrootfspin);
0c547523
SH
707 return fd;
708}
709
e2a7e8dc
SH
710/*
711 * If we are asking to remount something, make sure that any
712 * NOEXEC etc are honored.
713 */
714static unsigned long add_required_remount_flags(const char *s, const char *d,
715 unsigned long flags)
716{
614305f3 717#ifdef HAVE_STATVFS
e2a7e8dc
SH
718 struct statvfs sb;
719 unsigned long required_flags = 0;
720
721 if (!(flags & MS_REMOUNT))
722 return flags;
723
724 if (!s)
725 s = d;
726
727 if (!s)
728 return flags;
729 if (statvfs(s, &sb) < 0)
730 return flags;
731
732 if (sb.f_flag & MS_NOSUID)
733 required_flags |= MS_NOSUID;
734 if (sb.f_flag & MS_NODEV)
735 required_flags |= MS_NODEV;
736 if (sb.f_flag & MS_RDONLY)
737 required_flags |= MS_RDONLY;
738 if (sb.f_flag & MS_NOEXEC)
739 required_flags |= MS_NOEXEC;
740
741 return flags | required_flags;
614305f3
SH
742#else
743 return flags;
744#endif
e2a7e8dc
SH
745}
746
4fb3cba5 747static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
368bbc02 748{
368bbc02 749 int r;
80e80c40 750 int i;
b06b8511
CS
751 static struct {
752 int match_mask;
753 int match_flag;
754 const char *source;
755 const char *destination;
756 const char *fstype;
757 unsigned long flags;
758 const char *options;
759 } default_mounts[] = {
760 /* Read-only bind-mounting... In older kernels, doing that required
761 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
762 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
763 * kernel 2.6.26 onwards. However, this apparently does not work on
764 * kernel 3.8. Unfortunately, on that very same kernel, doing the
765 * same trick as above doesn't seem to work either, there one needs
766 * to ALSO specify MS_BIND for the remount, otherwise the entire
767 * fs is remounted read-only or the mount fails because it's busy...
768 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
769 * 2.6.32...
368bbc02 770 */
f24a52d5 771 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
592fd47a
SH
772 /* proc/tty is used as a temporary placeholder for proc/sys/net which we'll move back in a few steps */
773 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys/net", "%r/proc/tty", NULL, MS_BIND, NULL },
f24a52d5
SG
774 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
775 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
592fd47a 776 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/tty", "%r/proc/sys/net", NULL, MS_MOVE, NULL },
f24a52d5
SG
777 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
778 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
779 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
780 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
781 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
782 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
783 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL },
784 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
785 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys/devices/virtual/net", "sysfs", 0, NULL },
786 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys/devices/virtual/net/devices/virtual/net", "%r/sys/devices/virtual/net", NULL, MS_BIND, NULL },
787 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys/devices/virtual/net", NULL, MS_REMOUNT|MS_BIND|MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL },
788 { 0, 0, NULL, NULL, NULL, 0, NULL }
b06b8511 789 };
368bbc02 790
b06b8511
CS
791 for (i = 0; default_mounts[i].match_mask; i++) {
792 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
793 char *source = NULL;
794 char *destination = NULL;
795 int saved_errno;
e2a7e8dc 796 unsigned long mflags;
b06b8511
CS
797
798 if (default_mounts[i].source) {
799 /* will act like strdup if %r is not present */
8ede5f4c 800 source = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].source);
b06b8511
CS
801 if (!source) {
802 SYSERROR("memory allocation error");
803 return -1;
804 }
805 }
cc4fd506
SH
806 if (!default_mounts[i].destination) {
807 ERROR("BUG: auto mounts destination %d was NULL", i);
808 return -1;
809 }
810 /* will act like strdup if %r is not present */
811 destination = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].destination);
812 if (!destination) {
813 saved_errno = errno;
814 SYSERROR("memory allocation error");
815 free(source);
816 errno = saved_errno;
817 return -1;
b06b8511 818 }
e2a7e8dc
SH
819 mflags = add_required_remount_flags(source, destination,
820 default_mounts[i].flags);
592fd47a 821 r = safe_mount(source, destination, default_mounts[i].fstype, mflags, default_mounts[i].options, conf->rootfs.path ? conf->rootfs.mount : NULL);
b06b8511 822 saved_errno = errno;
b88ff9a0
SG
823 if (r < 0 && errno == ENOENT) {
824 INFO("Mount source or target for %s on %s doesn't exist. Skipping.", source, destination);
825 r = 0;
826 }
827 else if (r < 0)
e2a7e8dc 828 SYSERROR("error mounting %s on %s flags %lu", source, destination, mflags);
f24a52d5 829
b06b8511
CS
830 free(source);
831 free(destination);
832 if (r < 0) {
b06b8511
CS
833 errno = saved_errno;
834 return -1;
835 }
368bbc02 836 }
368bbc02
CS
837 }
838
b06b8511 839 if (flags & LXC_AUTO_CGROUP_MASK) {
0769b82a
CS
840 int cg_flags;
841
842 cg_flags = flags & LXC_AUTO_CGROUP_MASK;
843 /* If the type of cgroup mount was not specified, it depends on the
844 * container's capabilities as to what makes sense: if we have
845 * CAP_SYS_ADMIN, the read-only part can be remounted read-write
846 * anyway, so we may as well default to read-write; then the admin
847 * will not be given a false sense of security. (And if they really
848 * want mixed r/o r/w, then they can explicitly specify :mixed.)
849 * OTOH, if the container lacks CAP_SYS_ADMIN, do only default to
850 * :mixed, because then the container can't remount it read-write. */
851 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
852 int has_sys_admin = 0;
853 if (!lxc_list_empty(&conf->keepcaps)) {
854 has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps);
855 } else {
856 has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps);
857 }
858 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC) {
859 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED;
860 } else {
861 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
862 }
863 }
864
8ede5f4c 865 if (!cgroup_mount(conf->rootfs.path ? conf->rootfs.mount : "", handler, cg_flags)) {
368bbc02 866 SYSERROR("error mounting /sys/fs/cgroup");
b06b8511 867 return -1;
368bbc02
CS
868 }
869 }
870
368bbc02 871 return 0;
368bbc02
CS
872}
873
a17b1e65 874static int mount_rootfs(const char *rootfs, const char *target, const char *options)
0ad19a3f 875{
b09ef133 876 char absrootfs[MAXPATHLEN];
78ae2fcc 877 struct stat s;
a6afdde9 878 int i;
78ae2fcc 879
a17b1e65 880 typedef int (*rootfs_cb)(const char *, const char *, const char *);
78ae2fcc 881
882 struct rootfs_type {
883 int type;
884 rootfs_cb cb;
885 } rtfs_type[] = {
2656d231
DL
886 { S_IFDIR, mount_rootfs_dir },
887 { S_IFBLK, mount_rootfs_block },
888 { S_IFREG, mount_rootfs_file },
78ae2fcc 889 };
0ad19a3f 890
4c8ab83b 891 if (!realpath(rootfs, absrootfs)) {
36eb9bde 892 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 893 return -1;
894 }
b09ef133 895
b09ef133 896 if (access(absrootfs, F_OK)) {
36eb9bde 897 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 898 return -1;
899 }
900
78ae2fcc 901 if (stat(absrootfs, &s)) {
36eb9bde 902 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 903 return -1;
904 }
905
78ae2fcc 906 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 907
78ae2fcc 908 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
909 continue;
9b0f0477 910
a17b1e65 911 return rtfs_type[i].cb(absrootfs, target, options);
78ae2fcc 912 }
9b0f0477 913
36eb9bde 914 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 915 return -1;
0ad19a3f 916}
917
4e5440c6 918static int setup_utsname(struct utsname *utsname)
0ad19a3f 919{
4e5440c6
DL
920 if (!utsname)
921 return 0;
0ad19a3f 922
4e5440c6
DL
923 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
924 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 925 return -1;
926 }
927
4e5440c6 928 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 929
0ad19a3f 930 return 0;
931}
932
69aa6655
DE
933struct dev_symlinks {
934 const char *oldpath;
935 const char *name;
936};
937
938static const struct dev_symlinks dev_symlinks[] = {
939 {"/proc/self/fd", "fd"},
940 {"/proc/self/fd/0", "stdin"},
941 {"/proc/self/fd/1", "stdout"},
942 {"/proc/self/fd/2", "stderr"},
943};
944
945static int setup_dev_symlinks(const struct lxc_rootfs *rootfs)
946{
947 char path[MAXPATHLEN];
948 int ret,i;
09227be2 949 struct stat s;
69aa6655
DE
950
951
952 for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
953 const struct dev_symlinks *d = &dev_symlinks[i];
cd2b3cfe 954 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->path ? rootfs->mount : "", d->name);
69aa6655
DE
955 if (ret < 0 || ret >= MAXPATHLEN)
956 return -1;
09227be2
MW
957
958 /*
959 * Stat the path first. If we don't get an error
960 * accept it as is and don't try to create it
961 */
962 if (!stat(path, &s)) {
963 continue;
964 }
965
69aa6655 966 ret = symlink(d->oldpath, path);
09227be2 967
69aa6655 968 if (ret && errno != EEXIST) {
09227be2
MW
969 if ( errno == EROFS ) {
970 WARN("Warning: Read Only file system while creating %s", path);
971 } else {
972 SYSERROR("Error creating %s", path);
973 return -1;
974 }
69aa6655
DE
975 }
976 }
977 return 0;
978}
979
393903d1
SH
980/*
981 * Build a space-separate list of ptys to pass to systemd.
982 */
983static bool append_ptyname(char **pp, char *name)
b0a33c1e 984{
393903d1
SH
985 char *p;
986
987 if (!*pp) {
988 *pp = malloc(strlen(name) + strlen("container_ttys=") + 1);
989 if (!*pp)
990 return false;
991 sprintf(*pp, "container_ttys=%s", name);
992 return true;
993 }
994 p = realloc(*pp, strlen(*pp) + strlen(name) + 2);
995 if (!p)
996 return false;
997 *pp = p;
998 strcat(p, " ");
999 strcat(p, name);
1000 return true;
1001}
1002
1003static int setup_tty(struct lxc_conf *conf)
1004{
393903d1
SH
1005 const struct lxc_tty_info *tty_info = &conf->tty_info;
1006 char *ttydir = conf->ttydir;
7c6ef2a2
SH
1007 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1008 int i, ret;
b0a33c1e 1009
e8bd4e43 1010 if (!conf->rootfs.path)
bc9bd0e3
DL
1011 return 0;
1012
b0a33c1e 1013 for (i = 0; i < tty_info->nbtty; i++) {
1014
1015 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
1016
e8bd4e43 1017 ret = snprintf(path, sizeof(path), "/dev/tty%d", i + 1);
7c6ef2a2
SH
1018 if (ret >= sizeof(path)) {
1019 ERROR("pathname too long for ttys");
1020 return -1;
1021 }
1022 if (ttydir) {
1023 /* create dev/lxc/tty%d" */
e8bd4e43 1024 ret = snprintf(lxcpath, sizeof(lxcpath), "/dev/%s/tty%d", ttydir, i + 1);
7c6ef2a2
SH
1025 if (ret >= sizeof(lxcpath)) {
1026 ERROR("pathname too long for ttys");
1027 return -1;
1028 }
1029 ret = creat(lxcpath, 0660);
1030 if (ret==-1 && errno != EEXIST) {
959aee9c 1031 SYSERROR("error creating %s", lxcpath);
7c6ef2a2
SH
1032 return -1;
1033 }
4d44e274
SH
1034 if (ret >= 0)
1035 close(ret);
7c6ef2a2
SH
1036 ret = unlink(path);
1037 if (ret && errno != ENOENT) {
959aee9c 1038 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1039 return -1;
1040 }
b0a33c1e 1041
7c6ef2a2
SH
1042 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
1043 WARN("failed to mount '%s'->'%s'",
1044 pty_info->name, path);
1045 continue;
1046 }
13954cce 1047
9ba8130c
SH
1048 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
1049 if (ret >= sizeof(lxcpath)) {
1050 ERROR("tty pathname too long");
1051 return -1;
1052 }
7c6ef2a2
SH
1053 ret = symlink(lxcpath, path);
1054 if (ret) {
959aee9c 1055 SYSERROR("failed to create symlink for tty %d", i+1);
7c6ef2a2
SH
1056 return -1;
1057 }
1058 } else {
c6883f38
SH
1059 /* If we populated /dev, then we need to create /dev/ttyN */
1060 if (access(path, F_OK)) {
1061 ret = creat(path, 0660);
1062 if (ret==-1) {
959aee9c 1063 SYSERROR("error creating %s", path);
c6883f38 1064 /* this isn't fatal, continue */
025ed0f3 1065 } else {
c6883f38 1066 close(ret);
025ed0f3 1067 }
c6883f38 1068 }
7c6ef2a2 1069 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
e8bd4e43 1070 SYSERROR("failed to mount '%s'->'%s'", pty_info->name, path);
7c6ef2a2
SH
1071 continue;
1072 }
393903d1 1073 }
e8bd4e43 1074 if (!append_ptyname(&conf->pty_names, pty_info->name)) {
393903d1
SH
1075 ERROR("Error setting up container_ttys string");
1076 return -1;
b0a33c1e 1077 }
1078 }
1079
cd54d859
DL
1080 INFO("%d tty(s) has been setup", tty_info->nbtty);
1081
b0a33c1e 1082 return 0;
1083}
1084
bf601689 1085
2d489f9e 1086static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
bf601689 1087{
2d489f9e 1088 int oldroot = -1, newroot = -1;
bf601689 1089
2d489f9e
SH
1090 oldroot = open("/", O_DIRECTORY | O_RDONLY);
1091 if (oldroot < 0) {
1092 SYSERROR("Error opening old-/ for fchdir");
9ba8130c
SH
1093 return -1;
1094 }
2d489f9e
SH
1095 newroot = open(rootfs, O_DIRECTORY | O_RDONLY);
1096 if (newroot < 0) {
1097 SYSERROR("Error opening new-/ for fchdir");
1098 goto fail;
c08556c6 1099 }
bf601689 1100
cc6f6dd7 1101 /* change into new root fs */
2d489f9e 1102 if (fchdir(newroot)) {
cc6f6dd7 1103 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
2d489f9e 1104 goto fail;
cc6f6dd7
DL
1105 }
1106
cc6f6dd7 1107 /* pivot_root into our new root fs */
2d489f9e 1108 if (pivot_root(".", ".")) {
cc6f6dd7 1109 SYSERROR("pivot_root syscall failed");
2d489f9e 1110 goto fail;
bf601689 1111 }
cc6f6dd7 1112
2d489f9e
SH
1113 /*
1114 * at this point the old-root is mounted on top of our new-root
1115 * To unmounted it we must not be chdir'd into it, so escape back
1116 * to old-root
1117 */
1118 if (fchdir(oldroot) < 0) {
1119 SYSERROR("Error entering oldroot");
1120 goto fail;
1121 }
7981ea46 1122 if (umount2(".", MNT_DETACH) < 0) {
2d489f9e
SH
1123 SYSERROR("Error detaching old root");
1124 goto fail;
cc6f6dd7
DL
1125 }
1126
2d489f9e
SH
1127 if (fchdir(newroot) < 0) {
1128 SYSERROR("Error re-entering newroot");
1129 goto fail;
1130 }
cc6f6dd7 1131
2d489f9e
SH
1132 close(oldroot);
1133 close(newroot);
bf601689 1134
2d489f9e 1135 DEBUG("pivot_root syscall to '%s' successful", rootfs);
bf601689 1136
bf601689 1137 return 0;
2d489f9e
SH
1138
1139fail:
1140 if (oldroot != -1)
1141 close(oldroot);
1142 if (newroot != -1)
1143 close(newroot);
1144 return -1;
bf601689
MH
1145}
1146
bc6928ff 1147/*
87da4ec3
SH
1148 * Just create a path for /dev under $lxcpath/$name and in rootfs
1149 * If we hit an error, log it but don't fail yet.
91c3830e 1150 */
14221cbb 1151static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, const char *lxcpath)
91c3830e
SH
1152{
1153 int ret;
87da4ec3
SH
1154 size_t clen;
1155 char *path;
91c3830e 1156
14221cbb 1157 INFO("Mounting container /dev");
bc6928ff 1158
14221cbb
DW
1159 /* $(rootfs->mount) + "/dev/pts" + '\0' */
1160 clen = (rootfs->path ? strlen(rootfs->mount) : 0) + 9;
87da4ec3 1161 path = alloca(clen);
bc6928ff 1162
14221cbb 1163 ret = snprintf(path, clen, "%s/dev", rootfs->path ? rootfs->mount : "");
87da4ec3 1164 if (ret < 0 || ret >= clen)
91c3830e 1165 return -1;
bc6928ff 1166
87da4ec3 1167 if (!dir_exists(path)) {
14221cbb 1168 WARN("No /dev in container.");
87da4ec3
SH
1169 WARN("Proceeding without autodev setup");
1170 return 0;
bc6928ff 1171 }
87da4ec3 1172
592fd47a
SH
1173 if (safe_mount("none", path, "tmpfs", 0, "size=100000,mode=755",
1174 rootfs->path ? rootfs->mount : NULL)) {
87da4ec3
SH
1175 SYSERROR("Failed mounting tmpfs onto %s\n", path);
1176 return false;
91c3830e 1177 }
87da4ec3
SH
1178
1179 INFO("Mounted tmpfs onto %s", path);
1180
14221cbb 1181 ret = snprintf(path, clen, "%s/dev/pts", rootfs->path ? rootfs->mount : "");
87da4ec3 1182 if (ret < 0 || ret >= clen)
91c3830e 1183 return -1;
87da4ec3 1184
bc6928ff
MW
1185 /*
1186 * If we are running on a devtmpfs mapping, dev/pts may already exist.
1187 * If not, then create it and exit if that fails...
1188 */
87da4ec3 1189 if (!dir_exists(path)) {
bc6928ff
MW
1190 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1191 if (ret) {
1192 SYSERROR("Failed to create /dev/pts in container");
1193 return -1;
1194 }
91c3830e
SH
1195 }
1196
14221cbb 1197 INFO("Mounted container /dev");
91c3830e
SH
1198 return 0;
1199}
1200
c6883f38 1201struct lxc_devs {
74a3920a 1202 const char *name;
c6883f38
SH
1203 mode_t mode;
1204 int maj;
1205 int min;
1206};
1207
74a3920a 1208static const struct lxc_devs lxc_devs[] = {
c6883f38
SH
1209 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1210 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1211 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1212 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1213 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1214 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1215 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1216};
1217
14221cbb 1218static int fill_autodev(const struct lxc_rootfs *rootfs)
c6883f38
SH
1219{
1220 int ret;
c6883f38
SH
1221 char path[MAXPATHLEN];
1222 int i;
3a32201c 1223 mode_t cmask;
c6883f38 1224
14221cbb 1225 INFO("Creating initial consoles under container /dev");
91c3830e 1226
14221cbb 1227 ret = snprintf(path, MAXPATHLEN, "%s/dev", rootfs->path ? rootfs->mount : "");
91c3830e
SH
1228 if (ret < 0 || ret >= MAXPATHLEN) {
1229 ERROR("Error calculating container /dev location");
c6883f38 1230 return -1;
f7bee6c6 1231 }
91c3830e 1232
9cb4d183
SH
1233 if (!dir_exists(path)) // ignore, just don't try to fill in
1234 return 0;
1235
14221cbb 1236 INFO("Populating container /dev");
3a32201c 1237 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38 1238 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
74a3920a 1239 const struct lxc_devs *d = &lxc_devs[i];
14221cbb 1240 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", rootfs->path ? rootfs->mount : "", d->name);
c6883f38
SH
1241 if (ret < 0 || ret >= MAXPATHLEN)
1242 return -1;
1243 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1244 if (ret && errno != EEXIST) {
9cb4d183
SH
1245 char hostpath[MAXPATHLEN];
1246 FILE *pathfile;
1247
1248 // Unprivileged containers cannot create devices, so
1249 // bind mount the device from the host
1250 ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", d->name);
1251 if (ret < 0 || ret >= MAXPATHLEN)
1252 return -1;
1253 pathfile = fopen(path, "wb");
1254 if (!pathfile) {
1255 SYSERROR("Failed to create device mount target '%s'", path);
1256 return -1;
1257 }
1258 fclose(pathfile);
592fd47a
SH
1259 if (safe_mount(hostpath, path, 0, MS_BIND, NULL,
1260 rootfs->path ? rootfs->mount : NULL) != 0) {
9cb4d183
SH
1261 SYSERROR("Failed bind mounting device %s from host into container",
1262 d->name);
1263 return -1;
1264 }
c6883f38
SH
1265 }
1266 }
3a32201c 1267 umask(cmask);
c6883f38 1268
14221cbb 1269 INFO("Populated container /dev");
c6883f38
SH
1270 return 0;
1271}
1272
cc28d0b0 1273static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1274{
cc28d0b0
SH
1275 const struct lxc_rootfs *rootfs = &conf->rootfs;
1276
a0f379bf
DW
1277 if (!rootfs->path) {
1278 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1279 SYSERROR("Failed to make / rslave");
1280 return -1;
1281 }
c69bd12f 1282 return 0;
a0f379bf 1283 }
0ad19a3f 1284
12297168 1285 if (access(rootfs->mount, F_OK)) {
b1789442 1286 SYSERROR("failed to access to '%s', check it is present",
12297168 1287 rootfs->mount);
b1789442
DL
1288 return -1;
1289 }
1290
9be53773 1291 // First try mounting rootfs using a bdev
76a26f55 1292 struct bdev *bdev = bdev_init(conf, rootfs->path, rootfs->mount, rootfs->options);
9be53773 1293 if (bdev && bdev->ops->mount(bdev) == 0) {
59d66af2 1294 bdev_put(bdev);
9be53773
SH
1295 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1296 return 0;
1297 }
59d66af2
SH
1298 if (bdev)
1299 bdev_put(bdev);
a17b1e65 1300 if (mount_rootfs(rootfs->path, rootfs->mount, rootfs->options)) {
a6afdde9 1301 ERROR("failed to mount rootfs");
c3f0a28c 1302 return -1;
1303 }
0ad19a3f 1304
12297168 1305 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1306
ac778708
DL
1307 return 0;
1308}
1309
91e93c71
AV
1310int prepare_ramfs_root(char *root)
1311{
1312 char buf[LINELEN], *p;
1313 char nroot[PATH_MAX];
1314 FILE *f;
1315 int i;
1316 char *p2;
1317
1318 if (realpath(root, nroot) == NULL)
1319 return -1;
1320
1321 if (chdir("/") == -1)
1322 return -1;
1323
1324 /*
1325 * We could use here MS_MOVE, but in userns this mount is
1326 * locked and can't be moved.
1327 */
1328 if (mount(root, "/", NULL, MS_REC | MS_BIND, NULL)) {
1329 SYSERROR("Failed to move %s into /", root);
1330 return -1;
1331 }
1332
88322f77 1333 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
91e93c71
AV
1334 SYSERROR("Failed to make . rprivate");
1335 return -1;
1336 }
1337
1338 /*
1339 * The following code cleans up inhereted mounts which are not
1340 * required for CT.
1341 *
1342 * The mountinfo file shows not all mounts, if a few points have been
1343 * unmounted between read operations from the mountinfo. So we need to
1344 * read mountinfo a few times.
1345 *
1346 * This loop can be skipped if a container uses unserns, because all
1347 * inherited mounts are locked and we should live with all this trash.
1348 */
1349 while (1) {
1350 int progress = 0;
1351
1352 f = fopen("./proc/self/mountinfo", "r");
1353 if (!f) {
1354 SYSERROR("Unable to open /proc/self/mountinfo");
1355 return -1;
1356 }
1357 while (fgets(buf, LINELEN, f)) {
1358 for (p = buf, i=0; p && i < 4; i++)
1359 p = strchr(p+1, ' ');
1360 if (!p)
1361 continue;
1362 p2 = strchr(p+1, ' ');
1363 if (!p2)
1364 continue;
1365
1366 *p2 = '\0';
1367 *p = '.';
1368
1369 if (strcmp(p + 1, "/") == 0)
1370 continue;
1371 if (strcmp(p + 1, "/proc") == 0)
1372 continue;
1373
1374 if (umount2(p, MNT_DETACH) == 0)
1375 progress++;
1376 }
1377 fclose(f);
1378 if (!progress)
1379 break;
1380 }
1381
8bea9fae
PR
1382 /* This also can be skipped if a container uses unserns */
1383 umount2("./proc", MNT_DETACH);
91e93c71
AV
1384
1385 /* It is weird, but chdir("..") moves us in a new root */
1386 if (chdir("..") == -1) {
1387 SYSERROR("Unable to change working directory");
1388 return -1;
1389 }
1390
1391 if (chroot(".") == -1) {
1392 SYSERROR("Unable to chroot");
1393 return -1;
1394 }
1395
1396 return 0;
1397}
1398
74a3920a 1399static int setup_pivot_root(const struct lxc_rootfs *rootfs)
ac778708 1400{
ac778708
DL
1401 if (!rootfs->path)
1402 return 0;
1403
91e93c71
AV
1404 if (detect_ramfs_rootfs()) {
1405 if (prepare_ramfs_root(rootfs->mount))
1406 return -1;
1407 } else if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
cc6f6dd7 1408 ERROR("failed to setup pivot root");
25368b52 1409 return -1;
c69bd12f
DL
1410 }
1411
25368b52 1412 return 0;
0ad19a3f 1413}
1414
d852c78c 1415static int setup_pts(int pts)
3c26f34e 1416{
77890c6d
SW
1417 char target[PATH_MAX];
1418
d852c78c
DL
1419 if (!pts)
1420 return 0;
3c26f34e 1421
1422 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1423 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1424 return -1;
1425 }
1426
7e40254a
JTLB
1427 if (mkdir("/dev/pts", 0755)) {
1428 if ( errno != EEXIST ) {
1429 SYSERROR("failed to create '/dev/pts'");
1430 return -1;
1431 }
1432 }
1433
a6afdde9 1434 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
67e5a20a 1435 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
36eb9bde 1436 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1437 return -1;
1438 }
1439
3c26f34e 1440 if (access("/dev/ptmx", F_OK)) {
1441 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1442 goto out;
36eb9bde 1443 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1444 return -1;
1445 }
1446
77890c6d
SW
1447 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1448 goto out;
1449
3c26f34e 1450 /* fallback here, /dev/pts/ptmx exists just mount bind */
1451 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1452 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1453 return -1;
1454 }
cd54d859
DL
1455
1456 INFO("created new pts instance");
d852c78c 1457
3c26f34e 1458out:
1459 return 0;
1460}
1461
cccc74b5
DL
1462static int setup_personality(int persona)
1463{
6ff05e18 1464 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1465 if (persona == -1)
1466 return 0;
1467
1468 if (personality(persona) < 0) {
1469 SYSERROR("failed to set personality to '0x%x'", persona);
1470 return -1;
1471 }
1472
1473 INFO("set personality to '0x%x'", persona);
6ff05e18 1474 #endif
cccc74b5
DL
1475
1476 return 0;
1477}
1478
7c6ef2a2 1479static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1480 const struct lxc_console *console)
6e590161 1481{
63376d7d
DL
1482 char path[MAXPATHLEN];
1483 struct stat s;
7c6ef2a2 1484 int ret;
52e35957 1485
7c6ef2a2
SH
1486 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1487 if (ret >= sizeof(path)) {
959aee9c 1488 ERROR("console path too long");
7c6ef2a2
SH
1489 return -1;
1490 }
52e35957 1491
63376d7d 1492 if (access(path, F_OK)) {
466978b0 1493 WARN("rootfs specified but no console found at '%s'", path);
63376d7d 1494 return 0;
52e35957
DL
1495 }
1496
b5159817
DE
1497 if (console->master < 0) {
1498 INFO("no console");
f78a1f32
DL
1499 return 0;
1500 }
ed502555 1501
63376d7d
DL
1502 if (stat(path, &s)) {
1503 SYSERROR("failed to stat '%s'", path);
1504 return -1;
1505 }
1506
1507 if (chmod(console->name, s.st_mode)) {
1508 SYSERROR("failed to set mode '0%o' to '%s'",
1509 s.st_mode, console->name);
1510 return -1;
1511 }
13954cce 1512
592fd47a 1513 if (safe_mount(console->name, path, "none", MS_BIND, 0, rootfs->mount)) {
63376d7d 1514 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1515 return -1;
1516 }
1517
63376d7d 1518 INFO("console has been setup");
7c6ef2a2
SH
1519 return 0;
1520}
1521
1522static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1523 const struct lxc_console *console,
1524 char *ttydir)
1525{
1526 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1527 int ret;
1528
1529 /* create rootfs/dev/<ttydir> directory */
1530 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1531 ttydir);
1532 if (ret >= sizeof(path))
1533 return -1;
1534 ret = mkdir(path, 0755);
1535 if (ret && errno != EEXIST) {
959aee9c 1536 SYSERROR("failed with errno %d to create %s", errno, path);
7c6ef2a2
SH
1537 return -1;
1538 }
959aee9c 1539 INFO("created %s", path);
7c6ef2a2
SH
1540
1541 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1542 rootfs->mount, ttydir);
1543 if (ret >= sizeof(lxcpath)) {
959aee9c 1544 ERROR("console path too long");
7c6ef2a2
SH
1545 return -1;
1546 }
1547
1548 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1549 ret = unlink(path);
1550 if (ret && errno != ENOENT) {
959aee9c 1551 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1552 return -1;
1553 }
1554
1555 ret = creat(lxcpath, 0660);
1556 if (ret==-1 && errno != EEXIST) {
959aee9c 1557 SYSERROR("error %d creating %s", errno, lxcpath);
7c6ef2a2
SH
1558 return -1;
1559 }
4d44e274
SH
1560 if (ret >= 0)
1561 close(ret);
7c6ef2a2 1562
b5159817
DE
1563 if (console->master < 0) {
1564 INFO("no console");
7c6ef2a2
SH
1565 return 0;
1566 }
1567
592fd47a 1568 if (safe_mount(console->name, lxcpath, "none", MS_BIND, 0, rootfs->mount)) {
7c6ef2a2
SH
1569 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1570 return -1;
1571 }
1572
1573 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1574 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1575 if (ret >= sizeof(lxcpath)) {
1576 ERROR("lxc/console path too long");
1577 return -1;
1578 }
7c6ef2a2
SH
1579 ret = symlink(lxcpath, path);
1580 if (ret) {
1581 SYSERROR("failed to create symlink for console");
1582 return -1;
1583 }
1584
1585 INFO("console has been setup on %s", lxcpath);
cd54d859 1586
6e590161 1587 return 0;
1588}
1589
7c6ef2a2
SH
1590static int setup_console(const struct lxc_rootfs *rootfs,
1591 const struct lxc_console *console,
1592 char *ttydir)
1593{
1594 /* We don't have a rootfs, /dev/console will be shared */
1595 if (!rootfs->path)
1596 return 0;
1597 if (!ttydir)
1598 return setup_dev_console(rootfs, console);
1599
1600 return setup_ttydir_console(rootfs, console, ttydir);
1601}
1602
1bd051a6
SH
1603static int setup_kmsg(const struct lxc_rootfs *rootfs,
1604 const struct lxc_console *console)
1605{
1606 char kpath[MAXPATHLEN];
1607 int ret;
1608
222fea5a
DE
1609 if (!rootfs->path)
1610 return 0;
1bd051a6
SH
1611 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1612 if (ret < 0 || ret >= sizeof(kpath))
1613 return -1;
1614
1615 ret = unlink(kpath);
1616 if (ret && errno != ENOENT) {
959aee9c 1617 SYSERROR("error unlinking %s", kpath);
1bd051a6
SH
1618 return -1;
1619 }
1620
1621 ret = symlink("console", kpath);
1622 if (ret) {
1623 SYSERROR("failed to create symlink for kmsg");
1624 return -1;
1625 }
1626
1627 return 0;
1628}
1629
998ac676
RT
1630static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1631{
1632 struct mount_opt *mo;
1633
1634 /* If opt is found in mount_opt, set or clear flags.
1635 * Otherwise append it to data. */
1636
1637 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1638 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1639 if (mo->clear)
1640 *flags &= ~mo->flag;
1641 else
1642 *flags |= mo->flag;
1643 return;
1644 }
1645 }
1646
1647 if (strlen(*data))
1648 strcat(*data, ",");
1649 strcat(*data, opt);
1650}
1651
a17b1e65 1652int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1653 char **mntdata)
1654{
1655 char *s, *data;
1656 char *p, *saveptr = NULL;
1657
911324ef 1658 *mntdata = NULL;
91656ce5 1659 *mntflags = 0L;
911324ef
DL
1660
1661 if (!mntopts)
998ac676
RT
1662 return 0;
1663
911324ef 1664 s = strdup(mntopts);
998ac676 1665 if (!s) {
36eb9bde 1666 SYSERROR("failed to allocate memory");
998ac676
RT
1667 return -1;
1668 }
1669
1670 data = malloc(strlen(s) + 1);
1671 if (!data) {
36eb9bde 1672 SYSERROR("failed to allocate memory");
998ac676
RT
1673 free(s);
1674 return -1;
1675 }
1676 *data = 0;
1677
1678 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1679 p = strtok_r(NULL, ",", &saveptr))
1680 parse_mntopt(p, mntflags, &data);
1681
1682 if (*data)
1683 *mntdata = data;
1684 else
1685 free(data);
1686 free(s);
1687
1688 return 0;
1689}
1690
6fd5e769
SH
1691static void null_endofword(char *word)
1692{
1693 while (*word && *word != ' ' && *word != '\t')
1694 word++;
1695 *word = '\0';
1696}
1697
1698/*
1699 * skip @nfields spaces in @src
1700 */
1701static char *get_field(char *src, int nfields)
1702{
1703 char *p = src;
1704 int i;
1705
1706 for (i = 0; i < nfields; i++) {
1707 while (*p && *p != ' ' && *p != '\t')
1708 p++;
1709 if (!*p)
1710 break;
1711 p++;
1712 }
1713 return p;
1714}
1715
911324ef
DL
1716static int mount_entry(const char *fsname, const char *target,
1717 const char *fstype, unsigned long mountflags,
592fd47a 1718 const char *data, int optional, const char *rootfs)
911324ef 1719{
614305f3 1720#ifdef HAVE_STATVFS
2938f7c8 1721 struct statvfs sb;
614305f3 1722#endif
2938f7c8 1723
592fd47a 1724 if (safe_mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data, rootfs)) {
1fc64d22
SG
1725 if (optional) {
1726 INFO("failed to mount '%s' on '%s' (optional): %s", fsname,
1727 target, strerror(errno));
1728 return 0;
1729 }
1730 else {
1731 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1732 return -1;
1733 }
911324ef
DL
1734 }
1735
1736 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
2938f7c8
SH
1737 DEBUG("remounting %s on %s to respect bind or remount options",
1738 fsname ? fsname : "(none)", target ? target : "(none)");
7c5b6e7c
AS
1739 unsigned long rqd_flags = 0;
1740 if (mountflags & MS_RDONLY)
1741 rqd_flags |= MS_RDONLY;
614305f3 1742#ifdef HAVE_STATVFS
2938f7c8 1743 if (statvfs(fsname, &sb) == 0) {
7c5b6e7c 1744 unsigned long required_flags = rqd_flags;
2938f7c8
SH
1745 if (sb.f_flag & MS_NOSUID)
1746 required_flags |= MS_NOSUID;
1747 if (sb.f_flag & MS_NODEV)
1748 required_flags |= MS_NODEV;
1749 if (sb.f_flag & MS_RDONLY)
1750 required_flags |= MS_RDONLY;
1751 if (sb.f_flag & MS_NOEXEC)
1752 required_flags |= MS_NOEXEC;
1753 DEBUG("(at remount) flags for %s was %lu, required extra flags are %lu", fsname, sb.f_flag, required_flags);
1754 /*
1755 * If this was a bind mount request, and required_flags
1756 * does not have any flags which are not already in
1757 * mountflags, then skip the remount
1758 */
1759 if (!(mountflags & MS_REMOUNT)) {
7c5b6e7c 1760 if (!(required_flags & ~mountflags) && rqd_flags == 0) {
2938f7c8
SH
1761 DEBUG("mountflags already was %lu, skipping remount",
1762 mountflags);
1763 goto skipremount;
1764 }
1765 }
1766 mountflags |= required_flags;
6fd5e769 1767 }
614305f3 1768#endif
911324ef
DL
1769
1770 if (mount(fsname, target, fstype,
592fd47a 1771 mountflags | MS_REMOUNT, data) < 0) {
1fc64d22
SG
1772 if (optional) {
1773 INFO("failed to mount '%s' on '%s' (optional): %s",
1774 fsname, target, strerror(errno));
1775 return 0;
1776 }
1777 else {
1778 SYSERROR("failed to mount '%s' on '%s'",
1779 fsname, target);
1780 return -1;
1781 }
911324ef
DL
1782 }
1783 }
1784
614305f3 1785#ifdef HAVE_STATVFS
6fd5e769 1786skipremount:
614305f3 1787#endif
911324ef
DL
1788 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1789
1790 return 0;
1791}
1792
4e4ca161
SH
1793/*
1794 * Remove 'optional', 'create=dir', and 'create=file' from mntopt
1795 */
1796static void cull_mntent_opt(struct mntent *mntent)
1797{
1798 int i;
1799 char *p, *p2;
1800 char *list[] = {"create=dir",
1801 "create=file",
1802 "optional",
1803 NULL };
1804
1805 for (i=0; list[i]; i++) {
1806 if (!(p = strstr(mntent->mnt_opts, list[i])))
1807 continue;
1808 p2 = strchr(p, ',');
1809 if (!p2) {
1810 /* no more mntopts, so just chop it here */
1811 *p = '\0';
1812 continue;
1813 }
1814 memmove(p, p2+1, strlen(p2+1)+1);
1815 }
1816}
1817
6e46cc0d
CB
1818static int mount_entry_create_overlay_dirs(const struct mntent *mntent,
1819 const struct lxc_rootfs *rootfs)
1820{
1821 char *del = NULL;
1822 char *lxcpath = NULL;
1823 char *upperdir = NULL;
1824 char *workdir = NULL;
1825 char **opts = NULL;
1826 size_t arrlen = 0;
1827 size_t dirlen = 0;
1828 size_t i;
1829 size_t len = 0;
1830 size_t rootfslen = 0;
1831
1832 if (!rootfs->path)
1833 return -1;
1834
1835 opts = lxc_string_split(mntent->mnt_opts, ',');
1836 if (opts)
1837 arrlen = lxc_array_len((void **)opts);
1838 else
1839 return -1;
1840
1841 for (i = 0; i < arrlen; i++) {
1842 if (strstr(opts[i], "upperdir=") && (strlen(opts[i]) > (len = strlen("upperdir="))))
1843 upperdir = opts[i] + len;
1844 else if (strstr(opts[i], "workdir=") && (strlen(opts[i]) > (len = strlen("workdir="))))
1845 workdir = opts[i] + len;
1846 }
1847
1848 lxcpath = strdup(rootfs->path);
1849 if (!lxcpath) {
1850 lxc_free_array((void **)opts, free);
1851 return -1;
1852 }
1853
1854 del = strstr(lxcpath, "/rootfs");
1855 if (!del) {
1856 free(lxcpath);
1857 lxc_free_array((void **)opts, free);
1858 return -1;
1859 }
1860 *del = '\0';
1861
1862 dirlen = strlen(lxcpath);
1863 rootfslen = strlen(rootfs->path);
1864
1865 /* We neither allow users to create upperdirs and workdirs outside the
1866 * containerdir nor inside the rootfs. The latter might be debatable. */
1867 if (upperdir)
1868 if ((strncmp(upperdir, lxcpath, dirlen) == 0) && (strncmp(upperdir, rootfs->path, rootfslen) != 0))
1869 if (mkdir_p(upperdir, 0755) < 0) {
1870 WARN("Failed to create upperdir");
1871 }
1872
1873
1874 if (workdir)
1875 if ((strncmp(workdir, lxcpath, dirlen) == 0) && (strncmp(workdir, rootfs->path, rootfslen) != 0))
1876 if (mkdir_p(workdir, 0755) < 0) {
1877 WARN("Failed to create workdir");
1878 }
1879
1880 free(lxcpath);
1881 lxc_free_array((void **)opts, free);
1882 return 0;
1883}
1884
1885static int mount_entry_create_aufs_dirs(const struct mntent *mntent,
1886 const struct lxc_rootfs *rootfs)
1887{
1888 char *del = NULL;
1889 char *lxcpath = NULL;
1890 char *scratch = NULL;
1891 char *tmp = NULL;
1892 char *upperdir = NULL;
1893 char **opts = NULL;
1894 size_t arrlen = 0;
1895 size_t i;
1896 size_t len = 0;
1897
1898 if (!rootfs->path)
1899 return -1;
1900
1901 opts = lxc_string_split(mntent->mnt_opts, ',');
1902 if (opts)
1903 arrlen = lxc_array_len((void **)opts);
1904 else
1905 return -1;
1906
1907 for (i = 0; i < arrlen; i++) {
1908 if (strstr(opts[i], "br=") && (strlen(opts[i]) > (len = strlen("br="))))
1909 tmp = opts[i] + len;
1910 }
1911 if (!tmp) {
1912 lxc_free_array((void **)opts, free);
1913 return -1;
1914 }
1915
1916 upperdir = strtok_r(tmp, ":=", &scratch);
1917 if (!upperdir) {
1918 lxc_free_array((void **)opts, free);
1919 return -1;
1920 }
1921
1922 lxcpath = strdup(rootfs->path);
1923 if (!lxcpath) {
1924 lxc_free_array((void **)opts, free);
1925 return -1;
1926 }
1927
1928 del = strstr(lxcpath, "/rootfs");
1929 if (!del) {
1930 free(lxcpath);
1931 lxc_free_array((void **)opts, free);
1932 return -1;
1933 }
1934 *del = '\0';
1935
1936 /* We neither allow users to create upperdirs outside the containerdir
1937 * nor inside the rootfs. The latter might be debatable. */
1938 if ((strncmp(upperdir, lxcpath, strlen(lxcpath)) == 0) && (strncmp(upperdir, rootfs->path, strlen(rootfs->path)) != 0))
1939 if (mkdir_p(upperdir, 0755) < 0) {
1940 WARN("Failed to create upperdir");
1941 }
1942
1943 free(lxcpath);
1944 lxc_free_array((void **)opts, free);
1945 return 0;
1946}
1947
4d5b72a1 1948static int mount_entry_create_dir_file(const struct mntent *mntent,
6e46cc0d 1949 const char* path, const struct lxc_rootfs *rootfs)
0ad19a3f 1950{
4d5b72a1 1951 char *pathdirname = NULL;
608e3567 1952 int ret = 0;
34cfffb3 1953 FILE *pathfile = NULL;
911324ef 1954
6e46cc0d
CB
1955 if (strncmp(mntent->mnt_type, "overlay", 7) == 0) {
1956 if (mount_entry_create_overlay_dirs(mntent, rootfs) < 0)
1957 return -1;
1958 } else if (strncmp(mntent->mnt_type, "aufs", 4) == 0) {
1959 if (mount_entry_create_aufs_dirs(mntent, rootfs) < 0)
1960 return -1;
1961 }
1962
34cfffb3 1963 if (hasmntopt(mntent, "create=dir")) {
4d5b72a1
NC
1964 if (mkdir_p(path, 0755) < 0) {
1965 WARN("Failed to create mount target '%s'", path);
34cfffb3
SG
1966 ret = -1;
1967 }
1968 }
1969
4d5b72a1
NC
1970 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
1971 pathdirname = strdup(path);
34cfffb3 1972 pathdirname = dirname(pathdirname);
119126b6
SG
1973 if (mkdir_p(pathdirname, 0755) < 0) {
1974 WARN("Failed to create target directory");
1975 }
4d5b72a1 1976 pathfile = fopen(path, "wb");
34cfffb3 1977 if (!pathfile) {
4d5b72a1 1978 WARN("Failed to create mount target '%s'", path);
34cfffb3 1979 ret = -1;
6e46cc0d 1980 } else {
34cfffb3 1981 fclose(pathfile);
6e46cc0d 1982 }
34cfffb3 1983 }
4d5b72a1
NC
1984 free(pathdirname);
1985 return ret;
1986}
1987
db4aba38 1988static inline int mount_entry_on_generic(struct mntent *mntent,
6e46cc0d 1989 const char* path, const struct lxc_rootfs *rootfs)
4d5b72a1
NC
1990{
1991 unsigned long mntflags;
1992 char *mntdata;
1993 int ret;
1994 bool optional = hasmntopt(mntent, "optional") != NULL;
1995
6e46cc0d 1996 ret = mount_entry_create_dir_file(mntent, path, rootfs);
34cfffb3 1997
608e3567
SH
1998 if (ret < 0)
1999 return optional ? 0 : -1;
2000
4e4ca161
SH
2001 cull_mntent_opt(mntent);
2002
a17b1e65
SG
2003 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
2004 free(mntdata);
2005 return -1;
2006 }
2007
6e46cc0d
CB
2008 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type, mntflags,
2009 mntdata, optional,
2010 rootfs->path ? rootfs->mount : NULL);
68c152ef 2011
911324ef 2012 free(mntdata);
911324ef
DL
2013 return ret;
2014}
2015
db4aba38
NC
2016static inline int mount_entry_on_systemfs(struct mntent *mntent)
2017{
592fd47a 2018 return mount_entry_on_generic(mntent, mntent->mnt_dir, NULL);
db4aba38
NC
2019}
2020
4e4ca161 2021static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2
SH
2022 const struct lxc_rootfs *rootfs,
2023 const char *lxc_name)
911324ef 2024{
013bd428 2025 char *aux;
59760f5d 2026 char path[MAXPATHLEN];
80a881b2 2027 int r, ret = 0, offset;
67e571de 2028 const char *lxcpath;
0ad19a3f 2029
593e8478 2030 lxcpath = lxc_global_config_value("lxc.lxcpath");
2a59a681
SH
2031 if (!lxcpath) {
2032 ERROR("Out of memory");
2033 return -1;
2034 }
2035
80a881b2 2036 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
2037 * use $lxcpath/CN/rootfs as the target prefix */
2038 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
2039 if (r < 0 || r >= MAXPATHLEN)
2040 goto skipvarlib;
2041
2042 aux = strstr(mntent->mnt_dir, path);
2043 if (aux) {
2044 offset = strlen(path);
2045 goto skipabs;
2046 }
2047
2048skipvarlib:
013bd428
DL
2049 aux = strstr(mntent->mnt_dir, rootfs->path);
2050 if (!aux) {
2051 WARN("ignoring mount point '%s'", mntent->mnt_dir);
db4aba38 2052 return ret;
013bd428 2053 }
80a881b2
SH
2054 offset = strlen(rootfs->path);
2055
2056skipabs:
013bd428 2057
9ba8130c 2058 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
2059 aux + offset);
2060 if (r < 0 || r >= MAXPATHLEN) {
2061 WARN("pathnme too long for '%s'", mntent->mnt_dir);
a17b1e65
SG
2062 return -1;
2063 }
2064
6e46cc0d 2065 return mount_entry_on_generic(mntent, path, rootfs);
911324ef 2066}
d330fe7b 2067
4e4ca161 2068static int mount_entry_on_relative_rootfs(struct mntent *mntent,
6e46cc0d 2069 const struct lxc_rootfs *rootfs)
911324ef
DL
2070{
2071 char path[MAXPATHLEN];
911324ef 2072 int ret;
d330fe7b 2073
34cfffb3 2074 /* relative to root mount point */
6e46cc0d 2075 ret = snprintf(path, sizeof(path), "%s/%s", rootfs->mount, mntent->mnt_dir);
9ba8130c
SH
2076 if (ret >= sizeof(path)) {
2077 ERROR("path name too long");
2078 return -1;
2079 }
911324ef 2080
592fd47a 2081 return mount_entry_on_generic(mntent, path, rootfs);
911324ef
DL
2082}
2083
80a881b2
SH
2084static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
2085 const char *lxc_name)
911324ef 2086{
aaf901be
AM
2087 struct mntent mntent;
2088 char buf[4096];
911324ef 2089 int ret = -1;
e76b8764 2090
aaf901be 2091 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
e76b8764 2092
911324ef 2093 if (!rootfs->path) {
aaf901be 2094 if (mount_entry_on_systemfs(&mntent))
e76b8764 2095 goto out;
911324ef 2096 continue;
e76b8764
CDC
2097 }
2098
911324ef 2099 /* We have a separate root, mounts are relative to it */
aaf901be
AM
2100 if (mntent.mnt_dir[0] != '/') {
2101 if (mount_entry_on_relative_rootfs(&mntent,
6e46cc0d 2102 rootfs))
911324ef
DL
2103 goto out;
2104 continue;
2105 }
cd54d859 2106
aaf901be 2107 if (mount_entry_on_absolute_rootfs(&mntent, rootfs, lxc_name))
911324ef 2108 goto out;
0ad19a3f 2109 }
cd54d859 2110
0ad19a3f 2111 ret = 0;
cd54d859
DL
2112
2113 INFO("mount points have been setup");
0ad19a3f 2114out:
e7938e9e
MN
2115 return ret;
2116}
2117
80a881b2
SH
2118static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
2119 const char *lxc_name)
e7938e9e
MN
2120{
2121 FILE *file;
2122 int ret;
2123
2124 if (!fstab)
2125 return 0;
2126
2127 file = setmntent(fstab, "r");
2128 if (!file) {
2129 SYSERROR("failed to use '%s'", fstab);
2130 return -1;
2131 }
2132
80a881b2 2133 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e 2134
0ad19a3f 2135 endmntent(file);
2136 return ret;
2137}
2138
9fc7f8c0 2139FILE *write_mount_file(struct lxc_list *mount)
e7938e9e
MN
2140{
2141 FILE *file;
2142 struct lxc_list *iterator;
2143 char *mount_entry;
e7938e9e
MN
2144
2145 file = tmpfile();
2146 if (!file) {
2147 ERROR("tmpfile error: %m");
9fc7f8c0 2148 return NULL;
e7938e9e
MN
2149 }
2150
2151 lxc_list_for_each(iterator, mount) {
2152 mount_entry = iterator->elem;
1d6b1976 2153 fprintf(file, "%s\n", mount_entry);
e7938e9e
MN
2154 }
2155
2156 rewind(file);
9fc7f8c0
TA
2157 return file;
2158}
2159
2160static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
2161 const char *lxc_name)
2162{
2163 FILE *file;
2164 int ret;
2165
2166 file = write_mount_file(mount);
2167 if (!file)
2168 return -1;
e7938e9e 2169
80a881b2 2170 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e
MN
2171
2172 fclose(file);
2173 return ret;
2174}
2175
bab88e68
CS
2176static int parse_cap(const char *cap)
2177{
2178 char *ptr = NULL;
2179 int i, capid = -1;
2180
7035407c
DE
2181 if (!strcmp(cap, "none"))
2182 return -2;
2183
bab88e68
CS
2184 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2185
2186 if (strcmp(cap, caps_opt[i].name))
2187 continue;
2188
2189 capid = caps_opt[i].value;
2190 break;
2191 }
2192
2193 if (capid < 0) {
2194 /* try to see if it's numeric, so the user may specify
2195 * capabilities that the running kernel knows about but
2196 * we don't */
2197 errno = 0;
2198 capid = strtol(cap, &ptr, 10);
2199 if (!ptr || *ptr != '\0' || errno != 0)
2200 /* not a valid number */
2201 capid = -1;
2202 else if (capid > lxc_caps_last_cap())
2203 /* we have a number but it's not a valid
2204 * capability */
2205 capid = -1;
2206 }
2207
2208 return capid;
2209}
2210
0769b82a
CS
2211int in_caplist(int cap, struct lxc_list *caps)
2212{
2213 struct lxc_list *iterator;
2214 int capid;
2215
2216 lxc_list_for_each(iterator, caps) {
2217 capid = parse_cap(iterator->elem);
2218 if (capid == cap)
2219 return 1;
2220 }
2221
2222 return 0;
2223}
2224
81810dd1
DL
2225static int setup_caps(struct lxc_list *caps)
2226{
2227 struct lxc_list *iterator;
2228 char *drop_entry;
bab88e68 2229 int capid;
81810dd1
DL
2230
2231 lxc_list_for_each(iterator, caps) {
2232
2233 drop_entry = iterator->elem;
2234
bab88e68 2235 capid = parse_cap(drop_entry);
d55bc1ad 2236
81810dd1 2237 if (capid < 0) {
1e11be34
DL
2238 ERROR("unknown capability %s", drop_entry);
2239 return -1;
81810dd1
DL
2240 }
2241
2242 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
2243
2244 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
3ec1648d
SH
2245 SYSERROR("failed to remove %s capability", drop_entry);
2246 return -1;
2247 }
81810dd1
DL
2248
2249 }
2250
1fb86a7c
SH
2251 DEBUG("capabilities have been setup");
2252
2253 return 0;
2254}
2255
2256static int dropcaps_except(struct lxc_list *caps)
2257{
2258 struct lxc_list *iterator;
2259 char *keep_entry;
1fb86a7c
SH
2260 int i, capid;
2261 int numcaps = lxc_caps_last_cap() + 1;
959aee9c 2262 INFO("found %d capabilities", numcaps);
1fb86a7c 2263
2caf9a97
SH
2264 if (numcaps <= 0 || numcaps > 200)
2265 return -1;
2266
1fb86a7c
SH
2267 // caplist[i] is 1 if we keep capability i
2268 int *caplist = alloca(numcaps * sizeof(int));
2269 memset(caplist, 0, numcaps * sizeof(int));
2270
2271 lxc_list_for_each(iterator, caps) {
2272
2273 keep_entry = iterator->elem;
2274
bab88e68 2275 capid = parse_cap(keep_entry);
1fb86a7c 2276
7035407c
DE
2277 if (capid == -2)
2278 continue;
2279
1fb86a7c
SH
2280 if (capid < 0) {
2281 ERROR("unknown capability %s", keep_entry);
2282 return -1;
2283 }
2284
8255688a 2285 DEBUG("keep capability '%s' (%d)", keep_entry, capid);
1fb86a7c
SH
2286
2287 caplist[capid] = 1;
2288 }
2289 for (i=0; i<numcaps; i++) {
2290 if (caplist[i])
2291 continue;
2292 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
3ec1648d
SH
2293 SYSERROR("failed to remove capability %d", i);
2294 return -1;
2295 }
1fb86a7c
SH
2296 }
2297
2298 DEBUG("capabilities have been setup");
81810dd1
DL
2299
2300 return 0;
2301}
2302
0ad19a3f 2303static int setup_hw_addr(char *hwaddr, const char *ifname)
2304{
2305 struct sockaddr sockaddr;
2306 struct ifreq ifr;
2307 int ret, fd;
2308
3cfc0f3a
MN
2309 ret = lxc_convert_mac(hwaddr, &sockaddr);
2310 if (ret) {
2311 ERROR("mac address '%s' conversion failed : %s",
2312 hwaddr, strerror(-ret));
0ad19a3f 2313 return -1;
2314 }
2315
2316 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
5da6aa8c 2317 ifr.ifr_name[IFNAMSIZ-1] = '\0';
0ad19a3f 2318 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2319
2320 fd = socket(AF_INET, SOCK_DGRAM, 0);
2321 if (fd < 0) {
3ab87b66 2322 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 2323 return -1;
2324 }
2325
2326 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
2327 close(fd);
2328 if (ret)
3ab87b66 2329 ERROR("ioctl failure : %s", strerror(errno));
0ad19a3f 2330
5da6aa8c 2331 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
cd54d859 2332
0ad19a3f 2333 return ret;
2334}
2335
82d5ae15 2336static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2337{
82d5ae15
DL
2338 struct lxc_list *iterator;
2339 struct lxc_inetdev *inetdev;
3cfc0f3a 2340 int err;
0ad19a3f 2341
82d5ae15
DL
2342 lxc_list_for_each(iterator, ip) {
2343
2344 inetdev = iterator->elem;
2345
0093bb8c
DL
2346 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2347 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
2348 if (err) {
2349 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2350 ifindex, strerror(-err));
82d5ae15
DL
2351 return -1;
2352 }
2353 }
2354
2355 return 0;
0ad19a3f 2356}
2357
82d5ae15 2358static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2359{
82d5ae15 2360 struct lxc_list *iterator;
7fa9074f 2361 struct lxc_inet6dev *inet6dev;
3cfc0f3a 2362 int err;
0ad19a3f 2363
82d5ae15
DL
2364 lxc_list_for_each(iterator, ip) {
2365
2366 inet6dev = iterator->elem;
2367
b3df193c 2368 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
2369 &inet6dev->mcast, &inet6dev->acast,
2370 inet6dev->prefix);
3cfc0f3a
MN
2371 if (err) {
2372 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2373 ifindex, strerror(-err));
82d5ae15 2374 return -1;
3cfc0f3a 2375 }
82d5ae15
DL
2376 }
2377
2378 return 0;
0ad19a3f 2379}
2380
82d5ae15 2381static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 2382{
0ad19a3f 2383 char ifname[IFNAMSIZ];
0ad19a3f 2384 char *current_ifname = ifname;
3cfc0f3a 2385 int err;
0ad19a3f 2386
82d5ae15
DL
2387 /* empty network namespace */
2388 if (!netdev->ifindex) {
b0efbac4 2389 if (netdev->flags & IFF_UP) {
d472214b 2390 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2391 if (err) {
2392 ERROR("failed to set the loopback up : %s",
2393 strerror(-err));
82d5ae15
DL
2394 return -1;
2395 }
82d5ae15 2396 }
40790553
SH
2397 if (netdev->type != LXC_NET_VETH)
2398 return 0;
2399 netdev->ifindex = if_nametoindex(netdev->name);
0ad19a3f 2400 }
13954cce 2401
b466dc33 2402 /* get the new ifindex in case of physical netdev */
40790553 2403 if (netdev->type == LXC_NET_PHYS) {
b466dc33
BP
2404 if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
2405 ERROR("failed to get ifindex for %s",
2406 netdev->link);
2407 return -1;
2408 }
40790553 2409 }
b466dc33 2410
82d5ae15
DL
2411 /* retrieve the name of the interface */
2412 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 2413 ERROR("no interface corresponding to index '%d'",
82d5ae15 2414 netdev->ifindex);
0ad19a3f 2415 return -1;
2416 }
13954cce 2417
018ef520 2418 /* default: let the system to choose one interface name */
9d083402 2419 if (!netdev->name)
fb6d9b2f
DL
2420 netdev->name = netdev->type == LXC_NET_PHYS ?
2421 netdev->link : "eth%d";
018ef520 2422
82d5ae15 2423 /* rename the interface name */
40790553
SH
2424 if (strcmp(ifname, netdev->name) != 0) {
2425 err = lxc_netdev_rename_by_name(ifname, netdev->name);
2426 if (err) {
2427 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2428 strerror(-err));
2429 return -1;
2430 }
018ef520
DL
2431 }
2432
2433 /* Re-read the name of the interface because its name has changed
2434 * and would be automatically allocated by the system
2435 */
82d5ae15 2436 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 2437 ERROR("no interface corresponding to index '%d'",
82d5ae15 2438 netdev->ifindex);
018ef520 2439 return -1;
0ad19a3f 2440 }
2441
82d5ae15
DL
2442 /* set a mac address */
2443 if (netdev->hwaddr) {
2444 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 2445 ERROR("failed to setup hw address for '%s'",
82d5ae15 2446 current_ifname);
0ad19a3f 2447 return -1;
2448 }
2449 }
2450
82d5ae15
DL
2451 /* setup ipv4 addresses on the interface */
2452 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 2453 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 2454 ifname);
2455 return -1;
2456 }
2457
82d5ae15
DL
2458 /* setup ipv6 addresses on the interface */
2459 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 2460 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 2461 ifname);
2462 return -1;
2463 }
2464
82d5ae15 2465 /* set the network device up */
b0efbac4 2466 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
2467 int err;
2468
d472214b 2469 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
2470 if (err) {
2471 ERROR("failed to set '%s' up : %s", current_ifname,
2472 strerror(-err));
0ad19a3f 2473 return -1;
2474 }
2475
2476 /* the network is up, make the loopback up too */
d472214b 2477 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2478 if (err) {
2479 ERROR("failed to set the loopback up : %s",
2480 strerror(-err));
0ad19a3f 2481 return -1;
2482 }
2483 }
2484
f8fee0e2
MK
2485 /* We can only set up the default routes after bringing
2486 * up the interface, sine bringing up the interface adds
2487 * the link-local routes and we can't add a default
2488 * route if the gateway is not reachable. */
2489
2490 /* setup ipv4 gateway on the interface */
2491 if (netdev->ipv4_gateway) {
2492 if (!(netdev->flags & IFF_UP)) {
2493 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2494 return -1;
2495 }
2496
2497 if (lxc_list_empty(&netdev->ipv4)) {
2498 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2499 return -1;
2500 }
2501
2502 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2503 if (err) {
fc739df5
SG
2504 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2505 if (err) {
2506 ERROR("failed to add ipv4 dest for '%s': %s",
2507 ifname, strerror(-err));
2508 }
2509
2510 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2511 if (err) {
2512 ERROR("failed to setup ipv4 gateway for '%s': %s",
2513 ifname, strerror(-err));
2514 if (netdev->ipv4_gateway_auto) {
2515 char buf[INET_ADDRSTRLEN];
2516 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2517 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2518 }
2519 return -1;
19a26f82 2520 }
f8fee0e2
MK
2521 }
2522 }
2523
2524 /* setup ipv6 gateway on the interface */
2525 if (netdev->ipv6_gateway) {
2526 if (!(netdev->flags & IFF_UP)) {
2527 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2528 return -1;
2529 }
2530
2531 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2532 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2533 return -1;
2534 }
2535
2536 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2537 if (err) {
fc739df5
SG
2538 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2539 if (err) {
2540 ERROR("failed to add ipv6 dest for '%s': %s",
f8fee0e2 2541 ifname, strerror(-err));
19a26f82 2542 }
fc739df5
SG
2543
2544 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2545 if (err) {
2546 ERROR("failed to setup ipv6 gateway for '%s': %s",
2547 ifname, strerror(-err));
2548 if (netdev->ipv6_gateway_auto) {
2549 char buf[INET6_ADDRSTRLEN];
2550 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2551 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2552 }
2553 return -1;
2554 }
f8fee0e2
MK
2555 }
2556 }
2557
cd54d859
DL
2558 DEBUG("'%s' has been setup", current_ifname);
2559
0ad19a3f 2560 return 0;
2561}
2562
5f4535a3 2563static int setup_network(struct lxc_list *network)
0ad19a3f 2564{
82d5ae15 2565 struct lxc_list *iterator;
82d5ae15 2566 struct lxc_netdev *netdev;
0ad19a3f 2567
5f4535a3 2568 lxc_list_for_each(iterator, network) {
cd54d859 2569
5f4535a3 2570 netdev = iterator->elem;
82d5ae15
DL
2571
2572 if (setup_netdev(netdev)) {
2573 ERROR("failed to setup netdev");
2574 return -1;
2575 }
2576 }
cd54d859 2577
5f4535a3
DL
2578 if (!lxc_list_empty(network))
2579 INFO("network has been setup");
cd54d859
DL
2580
2581 return 0;
0ad19a3f 2582}
2583
2af6bd1b
SH
2584/* try to move physical nics to the init netns */
2585void restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf)
2586{
2587 int i, ret, oldfd;
2588 char path[MAXPATHLEN];
2589
2590 if (netnsfd < 0)
2591 return;
2592
2593 ret = snprintf(path, MAXPATHLEN, "/proc/self/ns/net");
2594 if (ret < 0 || ret >= MAXPATHLEN) {
2595 WARN("Failed to open monitor netns fd");
2596 return;
2597 }
2598 if ((oldfd = open(path, O_RDONLY)) < 0) {
2599 SYSERROR("Failed to open monitor netns fd");
2600 return;
2601 }
2602 if (setns(netnsfd, 0) != 0) {
2603 SYSERROR("Failed to enter container netns to reset nics");
2604 close(oldfd);
2605 return;
2606 }
2607 for (i=0; i<conf->num_savednics; i++) {
2608 struct saved_nic *s = &conf->saved_nics[i];
8d357196 2609 if (lxc_netdev_move_by_index(s->ifindex, 1, NULL))
2af6bd1b
SH
2610 WARN("Error moving nic index:%d back to host netns",
2611 s->ifindex);
2612 }
2613 if (setns(oldfd, 0) != 0)
2614 SYSERROR("Failed to re-enter monitor's netns");
2615 close(oldfd);
2616}
2617
2618void lxc_rename_phys_nics_on_shutdown(int netnsfd, struct lxc_conf *conf)
7b35f3d6
SH
2619{
2620 int i;
2621
2af6bd1b
SH
2622 if (conf->num_savednics == 0)
2623 return;
2624
7b35f3d6 2625 INFO("running to reset %d nic names", conf->num_savednics);
2af6bd1b 2626 restore_phys_nics_to_netns(netnsfd, conf);
7b35f3d6
SH
2627 for (i=0; i<conf->num_savednics; i++) {
2628 struct saved_nic *s = &conf->saved_nics[i];
959aee9c 2629 INFO("resetting nic %d to %s", s->ifindex, s->orig_name);
7b35f3d6
SH
2630 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
2631 free(s->orig_name);
2632 }
2633 conf->num_savednics = 0;
7b35f3d6
SH
2634}
2635
ae9242c8
SH
2636static char *default_rootfs_mount = LXCROOTFSMOUNT;
2637
7b379ab3 2638struct lxc_conf *lxc_conf_init(void)
089cd8b8 2639{
7b379ab3 2640 struct lxc_conf *new;
26ddeedd 2641 int i;
7b379ab3
MN
2642
2643 new = malloc(sizeof(*new));
2644 if (!new) {
2645 ERROR("lxc_conf_init : %m");
2646 return NULL;
2647 }
2648 memset(new, 0, sizeof(*new));
2649
b40a606e 2650 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
cccc74b5 2651 new->personality = -1;
124fa0a8 2652 new->autodev = 1;
596a818d
DE
2653 new->console.log_path = NULL;
2654 new->console.log_fd = -1;
28a4b0e5 2655 new->console.path = NULL;
63376d7d 2656 new->console.peer = -1;
b5159817
DE
2657 new->console.peerpty.busy = -1;
2658 new->console.peerpty.master = -1;
2659 new->console.peerpty.slave = -1;
63376d7d
DL
2660 new->console.master = -1;
2661 new->console.slave = -1;
2662 new->console.name[0] = '\0';
d2e30e99 2663 new->maincmd_fd = -1;
76a26f55 2664 new->nbd_idx = -1;
54c30e29 2665 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048
SH
2666 if (!new->rootfs.mount) {
2667 ERROR("lxc_conf_init : %m");
2668 free(new);
2669 return NULL;
2670 }
d89de239 2671 new->kmsg = 0;
858377e4 2672 new->logfd = -1;
7b379ab3
MN
2673 lxc_list_init(&new->cgroup);
2674 lxc_list_init(&new->network);
2675 lxc_list_init(&new->mount_list);
81810dd1 2676 lxc_list_init(&new->caps);
1fb86a7c 2677 lxc_list_init(&new->keepcaps);
f6d3e3e4 2678 lxc_list_init(&new->id_map);
f979ac15 2679 lxc_list_init(&new->includes);
4184c3e1 2680 lxc_list_init(&new->aliens);
7c661726 2681 lxc_list_init(&new->environment);
26ddeedd
SH
2682 for (i=0; i<NUM_LXC_HOOKS; i++)
2683 lxc_list_init(&new->hooks[i]);
ee1e7aa0 2684 lxc_list_init(&new->groups);
fe4de9a6
DE
2685 new->lsm_aa_profile = NULL;
2686 new->lsm_se_context = NULL;
5112cd70 2687 new->tmp_umount_proc = 0;
7b379ab3 2688
9f30a190
MM
2689 for (i = 0; i < LXC_NS_MAX; i++)
2690 new->inherit_ns_fd[i] = -1;
2691
72bb04e4
PT
2692 /* if running in a new user namespace, init and COMMAND
2693 * default to running as UID/GID 0 when using lxc-execute */
2694 new->init_uid = 0;
2695 new->init_gid = 0;
2696
7b379ab3 2697 return new;
089cd8b8
DL
2698}
2699
a589434e 2700static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2701{
8634bc19 2702 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2703 char veth2buf[IFNAMSIZ], *veth2;
e54864d3 2704 int err, mtu = 0;
13954cce 2705
8bee8851 2706 if (netdev->priv.veth_attr.pair) {
e892973e 2707 veth1 = netdev->priv.veth_attr.pair;
8bee8851
WB
2708 if (handler->conf->reboot)
2709 lxc_netdev_delete_by_name(veth1);
2710 } else {
9ba8130c
SH
2711 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2712 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2713 ERROR("veth1 name too long");
2714 return -1;
2715 }
a0265685 2716 veth1 = lxc_mkifname(veth1buf);
ad40563e
ÇO
2717 if (!veth1) {
2718 ERROR("failed to allocate a temporary name");
2719 return -1;
2720 }
74a2b586
JK
2721 /* store away for deconf */
2722 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2723 }
82d5ae15 2724
0e391e57 2725 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
a0265685 2726 veth2 = lxc_mkifname(veth2buf);
ad40563e 2727 if (!veth2) {
82d5ae15 2728 ERROR("failed to allocate a temporary name");
ad40563e 2729 goto out_delete;
0ad19a3f 2730 }
2731
3cfc0f3a
MN
2732 err = lxc_veth_create(veth1, veth2);
2733 if (err) {
2e2d6a7b 2734 ERROR("failed to create veth pair (%s and %s): %s", veth1, veth2,
3cfc0f3a 2735 strerror(-err));
ad40563e 2736 goto out_delete;
0ad19a3f 2737 }
13954cce 2738
49684c0b
CS
2739 /* changing the high byte of the mac address to 0xfe, the bridge interface
2740 * will always keep the host's mac address and not take the mac address
2741 * of a container */
2742 err = setup_private_host_hw_addr(veth1);
2743 if (err) {
2e2d6a7b 2744 ERROR("failed to change mac address of host interface '%s': %s",
49684c0b
CS
2745 veth1, strerror(-err));
2746 goto out_delete;
2747 }
2748
af651aa9
SN
2749 netdev->ifindex = if_nametoindex(veth2);
2750 if (!netdev->ifindex) {
2751 ERROR("failed to retrieve the index for %s", veth2);
2752 goto out_delete;
2753 }
2754
82d5ae15 2755 if (netdev->mtu) {
e54864d3
NC
2756 mtu = atoi(netdev->mtu);
2757 } else if (netdev->link) {
af651aa9 2758 mtu = netdev_get_mtu(netdev->ifindex);
e54864d3
NC
2759 }
2760
2761 if (mtu) {
2762 err = lxc_netdev_set_mtu(veth1, mtu);
3cfc0f3a 2763 if (!err)
e54864d3 2764 err = lxc_netdev_set_mtu(veth2, mtu);
3cfc0f3a 2765 if (err) {
e54864d3
NC
2766 ERROR("failed to set mtu '%i' for veth pair (%s and %s): %s",
2767 mtu, veth1, veth2, strerror(-err));
eb14c10a 2768 goto out_delete;
75d09f83
DL
2769 }
2770 }
2771
3cfc0f3a
MN
2772 if (netdev->link) {
2773 err = lxc_bridge_attach(netdev->link, veth1);
2774 if (err) {
2e2d6a7b 2775 ERROR("failed to attach '%s' to the bridge '%s': %s",
3cfc0f3a
MN
2776 veth1, netdev->link, strerror(-err));
2777 goto out_delete;
2778 }
eb14c10a
DL
2779 }
2780
d472214b 2781 err = lxc_netdev_up(veth1);
6e35af2e
DL
2782 if (err) {
2783 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2784 goto out_delete;
0ad19a3f 2785 }
2786
e3b4c4c4 2787 if (netdev->upscript) {
751d9dcd
DL
2788 err = run_script(handler->name, "net", netdev->upscript, "up",
2789 "veth", veth1, (char*) NULL);
2790 if (err)
e3b4c4c4 2791 goto out_delete;
e3b4c4c4
ST
2792 }
2793
a589434e 2794 DEBUG("instantiated veth '%s/%s', index is '%d'",
82d5ae15
DL
2795 veth1, veth2, netdev->ifindex);
2796
6ab9ab6d 2797 return 0;
eb14c10a
DL
2798
2799out_delete:
b84f58b9 2800 lxc_netdev_delete_by_name(veth1);
f10fad2f 2801 if (!netdev->priv.veth_attr.pair)
ad40563e 2802 free(veth1);
f10fad2f 2803 free(veth2);
6ab9ab6d 2804 return -1;
13954cce 2805}
d957ae2d 2806
74a2b586
JK
2807static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2808{
2809 char *veth1;
2810 int err;
2811
2812 if (netdev->priv.veth_attr.pair)
2813 veth1 = netdev->priv.veth_attr.pair;
2814 else
2815 veth1 = netdev->priv.veth_attr.veth1;
2816
2817 if (netdev->downscript) {
2818 err = run_script(handler->name, "net", netdev->downscript,
2819 "down", "veth", veth1, (char*) NULL);
2820 if (err)
2821 return -1;
2822 }
2823 return 0;
2824}
2825
a589434e 2826static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2827{
0e391e57 2828 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2829 int err;
d957ae2d
MT
2830
2831 if (!netdev->link) {
2832 ERROR("no link specified for macvlan netdev");
2833 return -1;
2834 }
13954cce 2835
9ba8130c
SH
2836 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2837 if (err >= sizeof(peerbuf))
2838 return -1;
82d5ae15 2839
a0265685 2840 peer = lxc_mkifname(peerbuf);
ad40563e 2841 if (!peer) {
82d5ae15
DL
2842 ERROR("failed to make a temporary name");
2843 return -1;
0ad19a3f 2844 }
2845
3cfc0f3a
MN
2846 err = lxc_macvlan_create(netdev->link, peer,
2847 netdev->priv.macvlan_attr.mode);
2848 if (err) {
2849 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2850 peer, netdev->link, strerror(-err));
ad40563e 2851 goto out;
0ad19a3f 2852 }
2853
82d5ae15
DL
2854 netdev->ifindex = if_nametoindex(peer);
2855 if (!netdev->ifindex) {
36eb9bde 2856 ERROR("failed to retrieve the index for %s", peer);
ad40563e 2857 goto out;
22ebac19 2858 }
2859
e3b4c4c4 2860 if (netdev->upscript) {
751d9dcd
DL
2861 err = run_script(handler->name, "net", netdev->upscript, "up",
2862 "macvlan", netdev->link, (char*) NULL);
2863 if (err)
ad40563e 2864 goto out;
e3b4c4c4
ST
2865 }
2866
a589434e 2867 DEBUG("instantiated macvlan '%s', index is '%d' and mode '%d'",
e892973e 2868 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 2869
d957ae2d 2870 return 0;
ad40563e
ÇO
2871out:
2872 lxc_netdev_delete_by_name(peer);
2873 free(peer);
2874 return -1;
0ad19a3f 2875}
2876
74a2b586
JK
2877static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2878{
2879 int err;
2880
2881 if (netdev->downscript) {
2882 err = run_script(handler->name, "net", netdev->downscript,
2883 "down", "macvlan", netdev->link,
2884 (char*) NULL);
2885 if (err)
2886 return -1;
2887 }
2888 return 0;
2889}
2890
a589434e
JN
2891/* XXX: merge with instantiate_macvlan */
2892static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
2893{
2894 char peer[IFNAMSIZ];
3cfc0f3a 2895 int err;
82f58d03 2896 static uint16_t vlan_cntr = 0;
26c39028
JHS
2897
2898 if (!netdev->link) {
2899 ERROR("no link specified for vlan netdev");
2900 return -1;
2901 }
2902
82f58d03 2903 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
9ba8130c
SH
2904 if (err >= sizeof(peer)) {
2905 ERROR("peer name too long");
2906 return -1;
2907 }
26c39028 2908
3cfc0f3a
MN
2909 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2910 if (err) {
2911 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2912 peer, netdev->link, strerror(-err));
26c39028
JHS
2913 return -1;
2914 }
2915
2916 netdev->ifindex = if_nametoindex(peer);
2917 if (!netdev->ifindex) {
2918 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 2919 lxc_netdev_delete_by_name(peer);
26c39028
JHS
2920 return -1;
2921 }
2922
a589434e 2923 DEBUG("instantiated vlan '%s', ifindex is '%d'", " vlan1000",
e892973e
DL
2924 netdev->ifindex);
2925
26c39028
JHS
2926 return 0;
2927}
2928
74a2b586
JK
2929static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2930{
2931 return 0;
2932}
2933
a589434e 2934static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2935{
6168e99f
DL
2936 if (!netdev->link) {
2937 ERROR("no link specified for the physical interface");
2938 return -1;
2939 }
2940
9d083402 2941 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 2942 if (!netdev->ifindex) {
9d083402 2943 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 2944 return -1;
2945 }
2946
e3b4c4c4
ST
2947 if (netdev->upscript) {
2948 int err;
751d9dcd
DL
2949 err = run_script(handler->name, "net", netdev->upscript,
2950 "up", "phys", netdev->link, (char*) NULL);
2951 if (err)
e3b4c4c4 2952 return -1;
e3b4c4c4
ST
2953 }
2954
82d5ae15 2955 return 0;
0ad19a3f 2956}
2957
74a2b586
JK
2958static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2959{
2960 int err;
2961
2962 if (netdev->downscript) {
2963 err = run_script(handler->name, "net", netdev->downscript,
2964 "down", "phys", netdev->link, (char*) NULL);
2965 if (err)
2966 return -1;
2967 }
2968 return 0;
2969}
2970
a589434e 2971static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
26b797f3
SH
2972{
2973 netdev->ifindex = 0;
2974 return 0;
2975}
2976
a589434e 2977static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2978{
82d5ae15 2979 netdev->ifindex = 0;
e3b4c4c4
ST
2980 if (netdev->upscript) {
2981 int err;
751d9dcd
DL
2982 err = run_script(handler->name, "net", netdev->upscript,
2983 "up", "empty", (char*) NULL);
2984 if (err)
e3b4c4c4 2985 return -1;
e3b4c4c4 2986 }
82d5ae15 2987 return 0;
0ad19a3f 2988}
2989
74a2b586
JK
2990static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2991{
2992 int err;
2993
2994 if (netdev->downscript) {
2995 err = run_script(handler->name, "net", netdev->downscript,
2996 "down", "empty", (char*) NULL);
2997 if (err)
2998 return -1;
2999 }
3000 return 0;
3001}
3002
26b797f3
SH
3003static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
3004{
3005 return 0;
3006}
3007
3008int lxc_requests_empty_network(struct lxc_handler *handler)
3009{
3010 struct lxc_list *network = &handler->conf->network;
3011 struct lxc_list *iterator;
3012 struct lxc_netdev *netdev;
3013 bool found_none = false, found_nic = false;
3014
3015 if (lxc_list_empty(network))
3016 return 0;
3017
3018 lxc_list_for_each(iterator, network) {
3019
3020 netdev = iterator->elem;
3021
3022 if (netdev->type == LXC_NET_NONE)
3023 found_none = true;
3024 else
3025 found_nic = true;
3026 }
3027 if (found_none && !found_nic)
3028 return 1;
3029 return 0;
3030}
3031
e3b4c4c4 3032int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 3033{
e3b4c4c4 3034 struct lxc_list *network = &handler->conf->network;
82d5ae15 3035 struct lxc_list *iterator;
82d5ae15 3036 struct lxc_netdev *netdev;
cbef6c52
SH
3037 int am_root = (getuid() == 0);
3038
3039 if (!am_root)
3040 return 0;
0ad19a3f 3041
5f4535a3 3042 lxc_list_for_each(iterator, network) {
0ad19a3f 3043
5f4535a3 3044 netdev = iterator->elem;
13954cce 3045
24654103 3046 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 3047 ERROR("invalid network configuration type '%d'",
5f4535a3 3048 netdev->type);
82d5ae15
DL
3049 return -1;
3050 }
0ad19a3f 3051
e3b4c4c4 3052 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
3053 ERROR("failed to create netdev");
3054 return -1;
3055 }
e3b4c4c4 3056
0ad19a3f 3057 }
3058
3059 return 0;
3060}
3061
74a2b586 3062void lxc_delete_network(struct lxc_handler *handler)
7fef7a06 3063{
74a2b586 3064 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
3065 struct lxc_list *iterator;
3066 struct lxc_netdev *netdev;
3067
3068 lxc_list_for_each(iterator, network) {
3069 netdev = iterator->elem;
d472214b 3070
74a2b586 3071 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352
DL
3072 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
3073 WARN("failed to rename to the initial name the " \
3074 "netdev '%s'", netdev->link);
d472214b 3075 continue;
d8f8e352 3076 }
d472214b 3077
74a2b586
JK
3078 if (netdev_deconf[netdev->type](handler, netdev)) {
3079 WARN("failed to destroy netdev");
3080 }
3081
d8f8e352
DL
3082 /* Recent kernel remove the virtual interfaces when the network
3083 * namespace is destroyed but in case we did not moved the
3084 * interface to the network namespace, we have to destroy it
3085 */
74a2b586
JK
3086 if (netdev->ifindex != 0 &&
3087 lxc_netdev_delete_by_index(netdev->ifindex))
d8f8e352 3088 WARN("failed to remove interface '%s'", netdev->name);
7fef7a06
DL
3089 }
3090}
3091
45e854dc
SG
3092#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
3093
fe1f672f
ÇO
3094/* lxc-user-nic returns "interface_name:interface_name\n" */
3095#define MAX_BUFFER_SIZE IFNAMSIZ*2 + 2
74a3920a 3096static int unpriv_assign_nic(struct lxc_netdev *netdev, pid_t pid)
cbef6c52
SH
3097{
3098 pid_t child;
a7242d9a
ÇO
3099 int bytes, pipefd[2];
3100 char *token, *saveptr = NULL;
fe1f672f 3101 char buffer[MAX_BUFFER_SIZE];
cff7b5eb 3102 char netdev_link[IFNAMSIZ+1];
cbef6c52
SH
3103
3104 if (netdev->type != LXC_NET_VETH) {
3105 ERROR("nic type %d not support for unprivileged use",
3106 netdev->type);
3107 return -1;
3108 }
3109
a7242d9a
ÇO
3110 if(pipe(pipefd) < 0) {
3111 SYSERROR("pipe failed");
3112 return -1;
3113 }
3114
cbef6c52
SH
3115 if ((child = fork()) < 0) {
3116 SYSERROR("fork");
a7242d9a
ÇO
3117 close(pipefd[0]);
3118 close(pipefd[1]);
3119 return -1;
3120 }
3121
3122 if (child == 0) { // child
3123 /* close the read-end of the pipe */
3124 close(pipefd[0]);
3125 /* redirect the stdout to write-end of the pipe */
3126 dup2(pipefd[1], STDOUT_FILENO);
3127 /* close the write-end of the pipe */
fe1f672f 3128 close(pipefd[1]);
a7242d9a
ÇO
3129
3130 // Call lxc-user-nic pid type bridge
3131 char pidstr[20];
cff7b5eb
FN
3132 if (netdev->link) {
3133 strncpy(netdev_link, netdev->link, IFNAMSIZ);
3134 } else {
3135 strncpy(netdev_link, "none", IFNAMSIZ);
3136 }
3137 char *args[] = {LXC_USERNIC_PATH, pidstr, "veth", netdev_link, netdev->name, NULL };
a7242d9a
ÇO
3138 snprintf(pidstr, 19, "%lu", (unsigned long) pid);
3139 pidstr[19] = '\0';
3140 execvp(args[0], args);
3141 SYSERROR("execvp lxc-user-nic");
3142 exit(1);
3143 }
3144
3145 /* close the write-end of the pipe */
3146 close(pipefd[1]);
3147
fe1f672f 3148 bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE);
a7242d9a
ÇO
3149 if (bytes < 0) {
3150 SYSERROR("read failed");
3151 }
3152 buffer[bytes - 1] = '\0';
3153
3154 if (wait_for_pid(child) != 0) {
3155 close(pipefd[0]);
cbef6c52
SH
3156 return -1;
3157 }
3158
a7242d9a
ÇO
3159 /* close the read-end of the pipe */
3160 close(pipefd[0]);
cbef6c52 3161
a7242d9a
ÇO
3162 /* fill netdev->name field */
3163 token = strtok_r(buffer, ":", &saveptr);
3164 if (!token)
3165 return -1;
658979c5
SH
3166 netdev->name = malloc(IFNAMSIZ+1);
3167 if (!netdev->name) {
3168 ERROR("Out of memory");
3169 return -1;
3170 }
3171 memset(netdev->name, 0, IFNAMSIZ+1);
3172 strncpy(netdev->name, token, IFNAMSIZ);
a7242d9a
ÇO
3173
3174 /* fill netdev->veth_attr.pair field */
3175 token = strtok_r(NULL, ":", &saveptr);
3176 if (!token)
3177 return -1;
3178 netdev->priv.veth_attr.pair = strdup(token);
658979c5
SH
3179 if (!netdev->priv.veth_attr.pair) {
3180 ERROR("Out of memory");
3181 return -1;
3182 }
45e854dc 3183
a7242d9a 3184 return 0;
cbef6c52
SH
3185}
3186
5f4535a3 3187int lxc_assign_network(struct lxc_list *network, pid_t pid)
0ad19a3f 3188{
82d5ae15 3189 struct lxc_list *iterator;
82d5ae15 3190 struct lxc_netdev *netdev;
cbef6c52 3191 int am_root = (getuid() == 0);
3cfc0f3a 3192 int err;
0ad19a3f 3193
5f4535a3 3194 lxc_list_for_each(iterator, network) {
82d5ae15 3195
5f4535a3 3196 netdev = iterator->elem;
82d5ae15 3197
fbb16259 3198 if (netdev->type == LXC_NET_VETH && !am_root) {
cbef6c52
SH
3199 if (unpriv_assign_nic(netdev, pid))
3200 return -1;
658979c5
SH
3201 // lxc-user-nic has moved the nic to the new ns.
3202 // unpriv_assign_nic() fills in netdev->name.
3203 // netdev->ifindex will be filed in at setup_netdev.
cbef6c52
SH
3204 continue;
3205 }
236087a6 3206
fbb16259
SH
3207 /* empty network namespace, nothing to move */
3208 if (!netdev->ifindex)
3209 continue;
3210
8d357196 3211 err = lxc_netdev_move_by_index(netdev->ifindex, pid, NULL);
3cfc0f3a
MN
3212 if (err) {
3213 ERROR("failed to move '%s' to the container : %s",
3214 netdev->link, strerror(-err));
82d5ae15
DL
3215 return -1;
3216 }
3217
c1c75c04 3218 DEBUG("move '%s' to '%d'", netdev->name, pid);
0ad19a3f 3219 }
3220
3221 return 0;
3222}
3223
251d0d2a
DE
3224static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
3225 size_t buf_size)
f6d3e3e4
SH
3226{
3227 char path[PATH_MAX];
e4ccd113 3228 int ret, closeret;
f6d3e3e4
SH
3229 FILE *f;
3230
3231 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
3232 if (ret < 0 || ret >= PATH_MAX) {
03fadd16 3233 fprintf(stderr, "%s: path name too long\n", __func__);
f6d3e3e4
SH
3234 return -E2BIG;
3235 }
3236 f = fopen(path, "w");
3237 if (!f) {
3238 perror("open");
3239 return -EINVAL;
3240 }
251d0d2a 3241 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 3242 if (ret < 0)
e4ccd113
SH
3243 SYSERROR("writing id mapping");
3244 closeret = fclose(f);
3245 if (closeret)
3246 SYSERROR("writing id mapping");
3247 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
3248}
3249
3250int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
3251{
3252 struct lxc_list *iterator;
3253 struct id_map *map;
8afb3e61 3254 int ret = 0, use_shadow = 0;
251d0d2a 3255 enum idtype type;
8afb3e61
SG
3256 char *buf = NULL, *pos, *cmdpath = NULL;
3257
22038de5
SH
3258 /*
3259 * If newuidmap exists, that is, if shadow is handing out subuid
3260 * ranges, then insist that root also reserve ranges in subuid. This
3261 * will protected it by preventing another user from being handed the
3262 * range by shadow.
3263 */
9d9c111c 3264 cmdpath = on_path("newuidmap", NULL);
8afb3e61
SG
3265 if (cmdpath) {
3266 use_shadow = 1;
3267 free(cmdpath);
3268 }
3269
0e6e3a41
SG
3270 if (!use_shadow && geteuid()) {
3271 ERROR("Missing newuidmap/newgidmap");
3272 return -1;
3273 }
251d0d2a
DE
3274
3275 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4 3276 int left, fill;
cf3ef16d
SH
3277 int had_entry = 0;
3278 if (!buf) {
3279 buf = pos = malloc(4096);
4f7521b4
SH
3280 if (!buf)
3281 return -ENOMEM;
cf3ef16d
SH
3282 }
3283 pos = buf;
0e6e3a41 3284 if (use_shadow)
d1838f34 3285 pos += sprintf(buf, "new%cidmap %d",
cf3ef16d
SH
3286 type == ID_TYPE_UID ? 'u' : 'g',
3287 pid);
4f7521b4 3288
cf3ef16d
SH
3289 lxc_list_for_each(iterator, idmap) {
3290 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
251d0d2a 3291 map = iterator->elem;
cf3ef16d
SH
3292 if (map->idtype != type)
3293 continue;
3294
3295 had_entry = 1;
3296 left = 4096 - (pos - buf);
d1838f34 3297 fill = snprintf(pos, left, "%s%lu %lu %lu%s",
0e6e3a41 3298 use_shadow ? " " : "",
d1838f34 3299 map->nsid, map->hostid, map->range,
0e6e3a41 3300 use_shadow ? "" : "\n");
cf3ef16d
SH
3301 if (fill <= 0 || fill >= left)
3302 SYSERROR("snprintf failed, too many mappings");
3303 pos += fill;
251d0d2a 3304 }
cf3ef16d 3305 if (!had_entry)
4f7521b4 3306 continue;
cf3ef16d 3307
0e6e3a41 3308 if (!use_shadow) {
cf3ef16d 3309 ret = write_id_mapping(type, pid, buf, pos-buf);
d1838f34
MS
3310 } else {
3311 left = 4096 - (pos - buf);
3312 fill = snprintf(pos, left, "\n");
3313 if (fill <= 0 || fill >= left)
3314 SYSERROR("snprintf failed, too many mappings");
3315 pos += fill;
cf3ef16d 3316 ret = system(buf);
d1838f34 3317 }
cf3ef16d 3318
f6d3e3e4
SH
3319 if (ret)
3320 break;
3321 }
251d0d2a 3322
f10fad2f 3323 free(buf);
f6d3e3e4
SH
3324 return ret;
3325}
3326
cf3ef16d 3327/*
7b50c609
TS
3328 * return the host uid/gid to which the container root is mapped in
3329 * *val.
0b3a6504 3330 * Return true if id was found, false otherwise.
cf3ef16d 3331 */
2a9a80cb 3332bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
3ec1648d 3333 unsigned long *val)
cf3ef16d
SH
3334{
3335 struct lxc_list *it;
3336 struct id_map *map;
3337
3338 lxc_list_for_each(it, &conf->id_map) {
3339 map = it->elem;
7b50c609 3340 if (map->idtype != idtype)
cf3ef16d
SH
3341 continue;
3342 if (map->nsid != 0)
3343 continue;
2a9a80cb
SH
3344 *val = map->hostid;
3345 return true;
cf3ef16d 3346 }
2a9a80cb 3347 return false;
cf3ef16d
SH
3348}
3349
2133f58c 3350int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3351{
3352 struct lxc_list *it;
3353 struct id_map *map;
3354 lxc_list_for_each(it, &conf->id_map) {
3355 map = it->elem;
2133f58c 3356 if (map->idtype != idtype)
cf3ef16d
SH
3357 continue;
3358 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 3359 return (id - map->hostid) + map->nsid;
cf3ef16d 3360 }
57d116ab 3361 return -1;
cf3ef16d
SH
3362}
3363
2133f58c 3364int find_unmapped_nsuid(struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3365{
3366 struct lxc_list *it;
3367 struct id_map *map;
2133f58c 3368 unsigned int freeid = 0;
cf3ef16d
SH
3369again:
3370 lxc_list_for_each(it, &conf->id_map) {
3371 map = it->elem;
2133f58c 3372 if (map->idtype != idtype)
cf3ef16d
SH
3373 continue;
3374 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3375 freeid = map->nsid + map->range;
3376 goto again;
3377 }
3378 }
3379 return freeid;
3380}
3381
19a26f82
MK
3382int lxc_find_gateway_addresses(struct lxc_handler *handler)
3383{
3384 struct lxc_list *network = &handler->conf->network;
3385 struct lxc_list *iterator;
3386 struct lxc_netdev *netdev;
3387 int link_index;
3388
3389 lxc_list_for_each(iterator, network) {
3390 netdev = iterator->elem;
3391
3392 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
3393 continue;
3394
3395 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
3396 ERROR("gateway = auto only supported for "
3397 "veth and macvlan");
3398 return -1;
3399 }
3400
3401 if (!netdev->link) {
3402 ERROR("gateway = auto needs a link interface");
3403 return -1;
3404 }
3405
3406 link_index = if_nametoindex(netdev->link);
3407 if (!link_index)
3408 return -EINVAL;
3409
3410 if (netdev->ipv4_gateway_auto) {
3411 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
3412 ERROR("failed to automatically find ipv4 gateway "
3413 "address from link interface '%s'", netdev->link);
3414 return -1;
3415 }
3416 }
3417
3418 if (netdev->ipv6_gateway_auto) {
3419 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
3420 ERROR("failed to automatically find ipv6 gateway "
3421 "address from link interface '%s'", netdev->link);
3422 return -1;
3423 }
3424 }
3425 }
3426
3427 return 0;
3428}
3429
5e4a62bf 3430int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 3431{
5e4a62bf 3432 struct lxc_tty_info *tty_info = &conf->tty_info;
025ed0f3 3433 int i, ret;
b0a33c1e 3434
5e4a62bf
DL
3435 /* no tty in the configuration */
3436 if (!conf->tty)
b0a33c1e 3437 return 0;
3438
13954cce 3439 tty_info->pty_info =
e4e7d59d 3440 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 3441 if (!tty_info->pty_info) {
36eb9bde 3442 SYSERROR("failed to allocate pty_info");
985d15b1 3443 return -1;
b0a33c1e 3444 }
3445
985d15b1 3446 for (i = 0; i < conf->tty; i++) {
13954cce 3447
b0a33c1e 3448 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3449
025ed0f3
SH
3450 process_lock();
3451 ret = openpty(&pty_info->master, &pty_info->slave,
3452 pty_info->name, NULL, NULL);
3453 process_unlock();
3454 if (ret) {
36eb9bde 3455 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
3456 tty_info->nbtty = i;
3457 lxc_delete_tty(tty_info);
3458 return -1;
b0a33c1e 3459 }
3460
5332bb84
DL
3461 DEBUG("allocated pty '%s' (%d/%d)",
3462 pty_info->name, pty_info->master, pty_info->slave);
3463
3ec1648d 3464 /* Prevent leaking the file descriptors to the container */
b035ad62
MS
3465 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3466 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3467
b0a33c1e 3468 pty_info->busy = 0;
3469 }
3470
985d15b1 3471 tty_info->nbtty = conf->tty;
1ac470c0
DL
3472
3473 INFO("tty's configured");
3474
985d15b1 3475 return 0;
b0a33c1e 3476}
3477
3478void lxc_delete_tty(struct lxc_tty_info *tty_info)
3479{
3480 int i;
3481
3482 for (i = 0; i < tty_info->nbtty; i++) {
3483 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3484
3485 close(pty_info->master);
3486 close(pty_info->slave);
3487 }
3488
3489 free(tty_info->pty_info);
3490 tty_info->nbtty = 0;
3491}
3492
f6d3e3e4 3493/*
7b50c609
TS
3494 * chown_mapped_root: for an unprivileged user with uid/gid X to
3495 * chown a dir to subuid/subgid Y, he needs to run chown as root
3496 * in a userns where nsid 0 is mapped to hostuid/hostgid Y, and
3497 * nsid Y is mapped to hostuid/hostgid X. That way, the container
3498 * root is privileged with respect to hostuid/hostgid X, allowing
3499 * him to do the chown.
f6d3e3e4 3500 */
c4d10a05 3501int chown_mapped_root(char *path, struct lxc_conf *conf)
f6d3e3e4 3502{
7b50c609
TS
3503 uid_t rootuid;
3504 gid_t rootgid;
c4d10a05 3505 pid_t pid;
2a9a80cb 3506 unsigned long val;
a7ef8753 3507 char *chownpath = path;
f6d3e3e4 3508
2a9a80cb 3509 if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
c4d10a05
SH
3510 ERROR("No mapping for container root");
3511 return -1;
f6d3e3e4 3512 }
7b50c609
TS
3513 rootuid = (uid_t) val;
3514 if (!get_mapped_rootid(conf, ID_TYPE_GID, &val)) {
3515 ERROR("No mapping for container root");
3516 return -1;
3517 }
3518 rootgid = (gid_t) val;
2a9a80cb 3519
a7ef8753
SH
3520 /*
3521 * In case of overlay, we want only the writeable layer
3522 * to be chowned
3523 */
1f92162d 3524 if (strncmp(path, "overlayfs:", 10) == 0 || strncmp(path, "aufs:", 5) == 0) {
a7ef8753
SH
3525 chownpath = strchr(path, ':');
3526 if (!chownpath) {
3527 ERROR("Bad overlay path: %s", path);
3528 return -1;
3529 }
3530 chownpath = strchr(chownpath+1, ':');
3531 if (!chownpath) {
3532 ERROR("Bad overlay path: %s", path);
3533 return -1;
3534 }
3535 chownpath++;
3536 }
3537 path = chownpath;
c4d10a05 3538 if (geteuid() == 0) {
7b50c609 3539 if (chown(path, rootuid, rootgid) < 0) {
c4d10a05
SH
3540 ERROR("Error chowning %s", path);
3541 return -1;
3542 }
3543 return 0;
3544 }
f3d7e4ca 3545
7b50c609 3546 if (rootuid == geteuid()) {
f3d7e4ca
SH
3547 // nothing to do
3548 INFO("%s: container root is our uid; no need to chown" ,__func__);
3549 return 0;
3550 }
3551
c4d10a05
SH
3552 pid = fork();
3553 if (pid < 0) {
3554 SYSERROR("Failed forking");
f6d3e3e4
SH
3555 return -1;
3556 }
c4d10a05 3557 if (!pid) {
7b50c609
TS
3558 int hostuid = geteuid(), hostgid = getegid(), ret;
3559 struct stat sb;
3560 char map1[100], map2[100], map3[100], map4[100], map5[100];
3561 char ugid[100];
3562 char *args1[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3563 "-m", map3, "-m", map5,
3564 "--", "chown", ugid, path, NULL };
3565 char *args2[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3566 "-m", map3, "-m", map4, "-m", map5,
3567 "--", "chown", ugid, path, NULL };
3568
3569 // save the current gid of "path"
3570 if (stat(path, &sb) < 0) {
3571 ERROR("Error stat %s", path);
3572 return -1;
3573 }
f6d3e3e4 3574
9a7c2aba
SH
3575 /*
3576 * A file has to be group-owned by a gid mapped into the
3577 * container, or the container won't be privileged over it.
3578 */
3579 if (sb.st_uid == geteuid() &&
3580 mapped_hostid(sb.st_gid, conf, ID_TYPE_GID) < 0 &&
3581 chown(path, -1, hostgid) < 0) {
3582 ERROR("Failed chgrping %s", path);
7b50c609
TS
3583 return -1;
3584 }
3585
3586 // "u:0:rootuid:1"
3587 ret = snprintf(map1, 100, "u:0:%d:1", rootuid);
c4d10a05
SH
3588 if (ret < 0 || ret >= 100) {
3589 ERROR("Error uid printing map string");
f6d3e3e4
SH
3590 return -1;
3591 }
c4d10a05 3592
98e5ba51
SH
3593 // "u:hostuid:hostuid:1"
3594 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3595 if (ret < 0 || ret >= 100) {
3596 ERROR("Error uid printing map string");
3597 return -1;
3598 }
3599
7b50c609
TS
3600 // "g:0:rootgid:1"
3601 ret = snprintf(map3, 100, "g:0:%d:1", rootgid);
c4d10a05 3602 if (ret < 0 || ret >= 100) {
7b50c609 3603 ERROR("Error gid printing map string");
c4d10a05
SH
3604 return -1;
3605 }
3606
7b50c609 3607 // "g:pathgid:rootgid+pathgid:1"
b4c1e35d
SG
3608 ret = snprintf(map4, 100, "g:%d:%d:1", (gid_t)sb.st_gid,
3609 rootgid + (gid_t)sb.st_gid);
7b50c609
TS
3610 if (ret < 0 || ret >= 100) {
3611 ERROR("Error gid printing map string");
3612 return -1;
3613 }
3614
3615 // "g:hostgid:hostgid:1"
3616 ret = snprintf(map5, 100, "g:%d:%d:1", hostgid, hostgid);
3617 if (ret < 0 || ret >= 100) {
3618 ERROR("Error gid printing map string");
3619 return -1;
3620 }
3621
3622 // "0:pathgid" (chown)
b4c1e35d 3623 ret = snprintf(ugid, 100, "0:%d", (gid_t)sb.st_gid);
7b50c609
TS
3624 if (ret < 0 || ret >= 100) {
3625 ERROR("Error owner printing format string for chown");
3626 return -1;
3627 }
3628
3629 if (hostgid == sb.st_gid)
3630 ret = execvp("lxc-usernsexec", args1);
3631 else
3632 ret = execvp("lxc-usernsexec", args2);
c4d10a05
SH
3633 SYSERROR("Failed executing usernsexec");
3634 exit(1);
f6d3e3e4 3635 }
c4d10a05 3636 return wait_for_pid(pid);
f6d3e3e4
SH
3637}
3638
c4d10a05 3639int ttys_shift_ids(struct lxc_conf *c)
f6d3e3e4 3640{
c4d10a05 3641 if (lxc_list_empty(&c->id_map))
f6d3e3e4 3642 return 0;
c4d10a05 3643
29b10e4f 3644 if (strcmp(c->console.name, "") !=0 && chown_mapped_root(c->console.name, c) < 0) {
c4d10a05
SH
3645 ERROR("Failed to chown %s", c->console.name);
3646 return -1;
3647 }
3648
f6d3e3e4
SH
3649 return 0;
3650}
3651
5112cd70
SH
3652int tmp_proc_mount(struct lxc_conf *lxc_conf)
3653{
3654 int mounted;
3655
01958b1f 3656 mounted = mount_proc_if_needed(lxc_conf->rootfs.path ? lxc_conf->rootfs.mount : "");
5112cd70
SH
3657 if (mounted == -1) {
3658 SYSERROR("failed to mount /proc in the container.");
01958b1f
DW
3659 /* continue only if there is no rootfs */
3660 if (lxc_conf->rootfs.path)
3661 return -1;
5112cd70
SH
3662 } else if (mounted == 1) {
3663 lxc_conf->tmp_umount_proc = 1;
3664 }
3665 return 0;
3666}
3667
3668void tmp_proc_unmount(struct lxc_conf *lxc_conf)
3669{
3670 if (lxc_conf->tmp_umount_proc == 1) {
3671 umount("/proc");
3672 lxc_conf->tmp_umount_proc = 0;
3673 }
3674}
3675
6a0c909a 3676void remount_all_slave(void)
e995d7a2
SH
3677{
3678 /* walk /proc/mounts and change any shared entries to slave */
3679 FILE *f = fopen("/proc/self/mountinfo", "r");
3680 char *line = NULL;
3681 size_t len = 0;
3682
3683 if (!f) {
3684 SYSERROR("Failed to open /proc/self/mountinfo to mark all shared");
3685 ERROR("Continuing container startup...");
3686 return;
3687 }
3688
3689 while (getline(&line, &len, f) != -1) {
3690 char *target, *opts;
3691 target = get_field(line, 4);
3692 if (!target)
3693 continue;
3694 opts = get_field(target, 2);
3695 if (!opts)
3696 continue;
3697 null_endofword(opts);
3698 if (!strstr(opts, "shared"))
3699 continue;
3700 null_endofword(target);
3701 if (mount(NULL, target, NULL, MS_SLAVE, NULL)) {
3702 SYSERROR("Failed to make %s rslave", target);
3703 ERROR("Continuing...");
3704 }
3705 }
3706 fclose(f);
f10fad2f 3707 free(line);
e995d7a2
SH
3708}
3709
2322903b
SH
3710void lxc_execute_bind_init(struct lxc_conf *conf)
3711{
3712 int ret;
9d9c111c
SH
3713 char path[PATH_MAX], destpath[PATH_MAX], *p;
3714
3715 /* If init exists in the container, don't bind mount a static one */
3716 p = choose_init(conf->rootfs.mount);
3717 if (p) {
3718 free(p);
3719 return;
3720 }
2322903b
SH
3721
3722 ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
3723 if (ret < 0 || ret >= PATH_MAX) {
3724 WARN("Path name too long searching for lxc.init.static");
3725 return;
3726 }
3727
3728 if (!file_exists(path)) {
3729 INFO("%s does not exist on host", path);
3730 return;
3731 }
3732
3733 ret = snprintf(destpath, PATH_MAX, "%s%s", conf->rootfs.mount, "/init.lxc.static");
3734 if (ret < 0 || ret >= PATH_MAX) {
3735 WARN("Path name too long for container's lxc.init.static");
3736 return;
3737 }
3738
3739 if (!file_exists(destpath)) {
3740 FILE * pathfile = fopen(destpath, "wb");
3741 if (!pathfile) {
3742 SYSERROR("Failed to create mount target '%s'", destpath);
3743 return;
3744 }
3745 fclose(pathfile);
3746 }
3747
592fd47a 3748 ret = safe_mount(path, destpath, "none", MS_BIND, NULL, conf->rootfs.mount);
2322903b
SH
3749 if (ret < 0)
3750 SYSERROR("Failed to bind lxc.init.static into container");
3751 INFO("lxc.init.static bound into container at %s", path);
3752}
3753
35120d9c
SH
3754/*
3755 * This does the work of remounting / if it is shared, calling the
3756 * container pre-mount hooks, and mounting the rootfs.
3757 */
3758int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath)
0ad19a3f 3759{
35120d9c
SH
3760 if (conf->rootfs_setup) {
3761 /*
3762 * rootfs was set up in another namespace. bind-mount it
3763 * to give us a mount in our own ns so we can pivot_root to it
3764 */
3765 const char *path = conf->rootfs.mount;
3766 if (mount(path, path, "rootfs", MS_BIND, NULL) < 0) {
3767 ERROR("Failed to bind-mount container / onto itself");
145832ba 3768 return -1;
35120d9c 3769 }
145832ba 3770 return 0;
35120d9c 3771 }
d4ef7c50 3772
e995d7a2
SH
3773 remount_all_slave();
3774
35120d9c
SH
3775 if (run_lxc_hooks(name, "pre-mount", conf, lxcpath, NULL)) {
3776 ERROR("failed to run pre-mount hooks for container '%s'.", name);
3777 return -1;
3778 }
3779
3780 if (setup_rootfs(conf)) {
3781 ERROR("failed to setup rootfs for '%s'", name);
3782 return -1;
3783 }
3784
3785 conf->rootfs_setup = true;
3786 return 0;
3787}
3788
1c1c7051
SH
3789static bool verify_start_hooks(struct lxc_conf *conf)
3790{
3791 struct lxc_list *it;
3792 char path[MAXPATHLEN];
3793 lxc_list_for_each(it, &conf->hooks[LXCHOOK_START]) {
3794 char *hookname = it->elem;
3795 struct stat st;
3796 int ret;
3797
3798 ret = snprintf(path, MAXPATHLEN, "%s%s",
7b6753e7 3799 conf->rootfs.path ? conf->rootfs.mount : "", hookname);
1c1c7051
SH
3800 if (ret < 0 || ret >= MAXPATHLEN)
3801 return false;
3802 ret = stat(path, &st);
3803 if (ret) {
7b6753e7 3804 SYSERROR("Start hook %s not found in container",
1c1c7051
SH
3805 hookname);
3806 return false;
3807 }
6a0c909a 3808 return true;
1c1c7051
SH
3809 }
3810
3811 return true;
3812}
3813
e8bd4e43
SH
3814static int send_fd(int sock, int fd)
3815{
3816 int ret = lxc_abstract_unix_send_fd(sock, fd, NULL, 0);
3817
3818
3819 if (ret < 0) {
3820 SYSERROR("Error sending tty fd to parent");
3821 return -1;
3822 }
3823
3824 return 0;
3825}
3826
3827static int send_ttys_to_parent(struct lxc_handler *handler)
3828{
3829 struct lxc_conf *conf = handler->conf;
3830 const struct lxc_tty_info *tty_info = &conf->tty_info;
3831 int i;
3832 int sock = handler->ttysock[0];
3833
3834 for (i = 0; i < tty_info->nbtty; i++) {
3835 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3836 if (send_fd(sock, pty_info->slave) < 0)
3837 goto bad;
3838 close(pty_info->slave);
3839 pty_info->slave = -1;
3840 if (send_fd(sock, pty_info->master) < 0)
3841 goto bad;
3842 close(pty_info->master);
3843 pty_info->master = -1;
3844 }
3845
3846 close(handler->ttysock[0]);
3847 close(handler->ttysock[1]);
3848
3849 return 0;
3850
3851bad:
3852 ERROR("Error writing tty fd to parent");
3853 return -1;
3854}
3855
35120d9c
SH
3856int lxc_setup(struct lxc_handler *handler)
3857{
3858 const char *name = handler->name;
3859 struct lxc_conf *lxc_conf = handler->conf;
3860 const char *lxcpath = handler->lxcpath;
35120d9c
SH
3861
3862 if (do_rootfs_setup(lxc_conf, name, lxcpath) < 0) {
3863 ERROR("Error setting up rootfs mount after spawn");
3864 return -1;
3865 }
3866
6c544cb3
MM
3867 if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
3868 if (setup_utsname(lxc_conf->utsname)) {
3869 ERROR("failed to setup the utsname for '%s'", name);
3870 return -1;
3871 }
0ad19a3f 3872 }
3873
5f4535a3 3874 if (setup_network(&lxc_conf->network)) {
36eb9bde 3875 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 3876 return -1;
0ad19a3f 3877 }
3878
bc6928ff 3879 if (lxc_conf->autodev > 0) {
14221cbb 3880 if (mount_autodev(name, &lxc_conf->rootfs, lxcpath)) {
91c3830e 3881 ERROR("failed to mount /dev in the container");
c6883f38
SH
3882 return -1;
3883 }
3884 }
3885
368bbc02
CS
3886 /* do automatic mounts (mainly /proc and /sys), but exclude
3887 * those that need to wait until other stuff has finished
3888 */
4fb3cba5 3889 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
3890 ERROR("failed to setup the automatic mounts for '%s'", name);
3891 return -1;
3892 }
3893
80a881b2 3894 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
36eb9bde 3895 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 3896 return -1;
576f946d 3897 }
3898
c1dc38c2 3899 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
e7938e9e
MN
3900 ERROR("failed to setup the mount entries for '%s'", name);
3901 return -1;
3902 }
3903
7b6753e7 3904 /* Make sure any start hooks are in the container */
1c1c7051
SH
3905 if (!verify_start_hooks(lxc_conf))
3906 return -1;
3907
2322903b
SH
3908 if (lxc_conf->is_execute)
3909 lxc_execute_bind_init(lxc_conf);
3910
368bbc02
CS
3911 /* now mount only cgroup, if wanted;
3912 * before, /sys could not have been mounted
3913 * (is either mounted automatically or via fstab entries)
3914 */
4fb3cba5 3915 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
3916 ERROR("failed to setup the automatic mounts for '%s'", name);
3917 return -1;
3918 }
3919
283678ed 3920 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
773fb9ca
SH
3921 ERROR("failed to run mount hooks for container '%s'.", name);
3922 return -1;
3923 }
3924
bc6928ff 3925 if (lxc_conf->autodev > 0) {
283678ed 3926 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
f7bee6c6
MW
3927 ERROR("failed to run autodev hooks for container '%s'.", name);
3928 return -1;
3929 }
14221cbb 3930 if (fill_autodev(&lxc_conf->rootfs)) {
91c3830e
SH
3931 ERROR("failed to populate /dev in the container");
3932 return -1;
3933 }
3934 }
368bbc02 3935
37903589 3936 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 3937 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 3938 return -1;
6e590161 3939 }
3940
7e0e1d94
AV
3941 if (lxc_conf->kmsg) {
3942 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
3943 ERROR("failed to setup kmsg for '%s'", name);
3944 }
1bd051a6 3945
69aa6655
DE
3946 if (!lxc_conf->is_execute && setup_dev_symlinks(&lxc_conf->rootfs)) {
3947 ERROR("failed to setup /dev symlinks for '%s'", name);
3948 return -1;
3949 }
3950
5112cd70
SH
3951 /* mount /proc if it's not already there */
3952 if (tmp_proc_mount(lxc_conf) < 0) {
fe4de9a6 3953 ERROR("failed to LSM mount proc for '%s'", name);
e075f5d9 3954 return -1;
e075f5d9 3955 }
e075f5d9 3956
ac778708 3957 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 3958 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 3959 return -1;
ed502555 3960 }
3961
571e6ec8 3962 if (setup_pts(lxc_conf->pts)) {
36eb9bde 3963 ERROR("failed to setup the new pts instance");
95b5ffaf 3964 return -1;
3c26f34e 3965 }
3966
e8bd4e43
SH
3967 if (lxc_create_tty(name, lxc_conf)) {
3968 ERROR("failed to create the ttys");
3969 return -1;
3970 }
3971
3972 if (send_ttys_to_parent(handler) < 0) {
3973 ERROR("failure sending console info to parent");
3974 return -1;
3975 }
3976
3977
3978 if (!lxc_conf->is_execute && setup_tty(lxc_conf)) {
3979 ERROR("failed to setup the ttys for '%s'", name);
3980 return -1;
3981 }
3982
3983 if (lxc_conf->pty_names && setenv("container_ttys", lxc_conf->pty_names, 1))
3984 SYSERROR("failed to set environment variable for container ptys");
3985
3986
cccc74b5
DL
3987 if (setup_personality(lxc_conf->personality)) {
3988 ERROR("failed to setup personality");
3989 return -1;
3990 }
3991
97a8f74f
SG
3992 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
3993 if (!lxc_list_empty(&lxc_conf->caps)) {
3994 ERROR("Simultaneously requested dropping and keeping caps");
f6d3e3e4
SH
3995 return -1;
3996 }
97a8f74f
SG
3997 if (dropcaps_except(&lxc_conf->keepcaps)) {
3998 ERROR("failed to keep requested caps");
3999 return -1;
4000 }
4001 } else if (setup_caps(&lxc_conf->caps)) {
4002 ERROR("failed to drop capabilities");
4003 return -1;
81810dd1
DL
4004 }
4005
cd54d859
DL
4006 NOTICE("'%s' is setup.", name);
4007
0ad19a3f 4008 return 0;
4009}
26ddeedd 4010
283678ed
SH
4011int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
4012 const char *lxcpath, char *argv[])
26ddeedd
SH
4013{
4014 int which = -1;
4015 struct lxc_list *it;
4016
4017 if (strcmp(hook, "pre-start") == 0)
4018 which = LXCHOOK_PRESTART;
5ea6163a
SH
4019 else if (strcmp(hook, "pre-mount") == 0)
4020 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
4021 else if (strcmp(hook, "mount") == 0)
4022 which = LXCHOOK_MOUNT;
f7bee6c6
MW
4023 else if (strcmp(hook, "autodev") == 0)
4024 which = LXCHOOK_AUTODEV;
26ddeedd
SH
4025 else if (strcmp(hook, "start") == 0)
4026 which = LXCHOOK_START;
52492063
WB
4027 else if (strcmp(hook, "stop") == 0)
4028 which = LXCHOOK_STOP;
26ddeedd
SH
4029 else if (strcmp(hook, "post-stop") == 0)
4030 which = LXCHOOK_POSTSTOP;
148e91f5
SH
4031 else if (strcmp(hook, "clone") == 0)
4032 which = LXCHOOK_CLONE;
37cf711b
SY
4033 else if (strcmp(hook, "destroy") == 0)
4034 which = LXCHOOK_DESTROY;
26ddeedd
SH
4035 else
4036 return -1;
4037 lxc_list_for_each(it, &conf->hooks[which]) {
4038 int ret;
4039 char *hookname = it->elem;
283678ed 4040 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
26ddeedd
SH
4041 if (ret)
4042 return ret;
4043 }
4044 return 0;
4045}
72d0e1cb 4046
427b3a21 4047static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
4048{
4049 struct lxc_netdev *netdev = it->elem;
9ebb03ad 4050 struct lxc_list *it2,*next;
72d0e1cb
SG
4051
4052 lxc_list_del(it);
4053
f10fad2f
ME
4054 free(netdev->link);
4055 free(netdev->name);
4056 if (netdev->type == LXC_NET_VETH)
c9bb9a85 4057 free(netdev->priv.veth_attr.pair);
f10fad2f
ME
4058 free(netdev->upscript);
4059 free(netdev->hwaddr);
4060 free(netdev->mtu);
4061 free(netdev->ipv4_gateway);
4062 free(netdev->ipv6_gateway);
9ebb03ad 4063 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
4064 lxc_list_del(it2);
4065 free(it2->elem);
4066 free(it2);
4067 }
9ebb03ad 4068 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
4069 lxc_list_del(it2);
4070 free(it2->elem);
4071 free(it2);
4072 }
d95db067 4073 free(netdev);
72d0e1cb
SG
4074 free(it);
4075}
4076
4077/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 4078int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
4079{
4080 char *p1;
4081 int ret, idx, i;
4082 struct lxc_list *it;
4083 struct lxc_netdev *netdev;
4084
46cd2845 4085 p1 = strchr(key, '.');
72d0e1cb
SG
4086 if (!p1 || *(p1+1) == '\0')
4087 p1 = NULL;
4088
4089 ret = sscanf(key, "%d", &idx);
4090 if (ret != 1) return -1;
4091 if (idx < 0)
4092 return -1;
4093
4094 i = 0;
4095 lxc_list_for_each(it, &c->network) {
4096 if (i == idx)
4097 break;
4098 i++;
4099 }
4100 if (i < idx) // we don't have that many nics defined
4101 return -1;
4102
4103 if (!it || !it->elem)
4104 return -1;
4105
4106 netdev = it->elem;
4107
4108 if (!p1) {
4109 lxc_remove_nic(it);
52d21d40 4110 } else if (strcmp(p1, ".ipv4") == 0) {
9ebb03ad
DE
4111 struct lxc_list *it2,*next;
4112 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
4113 lxc_list_del(it2);
4114 free(it2->elem);
4115 free(it2);
4116 }
52d21d40 4117 } else if (strcmp(p1, ".ipv6") == 0) {
9ebb03ad
DE
4118 struct lxc_list *it2,*next;
4119 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
4120 lxc_list_del(it2);
4121 free(it2->elem);
4122 free(it2);
4123 }
72d0e1cb
SG
4124 }
4125 else return -1;
4126
4127 return 0;
4128}
4129
4130int lxc_clear_config_network(struct lxc_conf *c)
4131{
9ebb03ad
DE
4132 struct lxc_list *it,*next;
4133 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
4134 lxc_remove_nic(it);
4135 }
4136 return 0;
4137}
4138
4139int lxc_clear_config_caps(struct lxc_conf *c)
4140{
9ebb03ad 4141 struct lxc_list *it,*next;
72d0e1cb 4142
9ebb03ad 4143 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
4144 lxc_list_del(it);
4145 free(it->elem);
4146 free(it);
4147 }
4148 return 0;
4149}
4150
74a3920a 4151static int lxc_free_idmap(struct lxc_list *id_map) {
27c27d73
SH
4152 struct lxc_list *it, *next;
4153
4355ab5f 4154 lxc_list_for_each_safe(it, id_map, next) {
27c27d73
SH
4155 lxc_list_del(it);
4156 free(it->elem);
4157 free(it);
4158 }
4159 return 0;
4160}
4161
4355ab5f
SH
4162int lxc_clear_idmaps(struct lxc_conf *c)
4163{
4164 return lxc_free_idmap(&c->id_map);
4165}
4166
1fb86a7c
SH
4167int lxc_clear_config_keepcaps(struct lxc_conf *c)
4168{
4169 struct lxc_list *it,*next;
4170
4171 lxc_list_for_each_safe(it, &c->keepcaps, next) {
4172 lxc_list_del(it);
4173 free(it->elem);
4174 free(it);
4175 }
4176 return 0;
4177}
4178
12a50cc6 4179int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 4180{
9ebb03ad 4181 struct lxc_list *it,*next;
72d0e1cb 4182 bool all = false;
12a50cc6 4183 const char *k = key + 11;
72d0e1cb
SG
4184
4185 if (strcmp(key, "lxc.cgroup") == 0)
4186 all = true;
4187
9ebb03ad 4188 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
4189 struct lxc_cgroup *cg = it->elem;
4190 if (!all && strcmp(cg->subsystem, k) != 0)
4191 continue;
4192 lxc_list_del(it);
4193 free(cg->subsystem);
4194 free(cg->value);
4195 free(cg);
4196 free(it);
4197 }
4198 return 0;
4199}
4200
ee1e7aa0
SG
4201int lxc_clear_groups(struct lxc_conf *c)
4202{
4203 struct lxc_list *it,*next;
4204
4205 lxc_list_for_each_safe(it, &c->groups, next) {
4206 lxc_list_del(it);
4207 free(it->elem);
4208 free(it);
4209 }
4210 return 0;
4211}
4212
ab799c0b
SG
4213int lxc_clear_environment(struct lxc_conf *c)
4214{
4215 struct lxc_list *it,*next;
4216
4217 lxc_list_for_each_safe(it, &c->environment, next) {
4218 lxc_list_del(it);
4219 free(it->elem);
4220 free(it);
4221 }
4222 return 0;
4223}
4224
4225
72d0e1cb
SG
4226int lxc_clear_mount_entries(struct lxc_conf *c)
4227{
9ebb03ad 4228 struct lxc_list *it,*next;
72d0e1cb 4229
9ebb03ad 4230 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
4231 lxc_list_del(it);
4232 free(it->elem);
4233 free(it);
4234 }
4235 return 0;
4236}
4237
b099e9e9
SH
4238int lxc_clear_automounts(struct lxc_conf *c)
4239{
4240 c->auto_mounts = 0;
4241 return 0;
4242}
4243
12a50cc6 4244int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 4245{
9ebb03ad 4246 struct lxc_list *it,*next;
17ed13a3 4247 bool all = false, done = false;
12a50cc6 4248 const char *k = key + 9;
72d0e1cb
SG
4249 int i;
4250
17ed13a3
SH
4251 if (strcmp(key, "lxc.hook") == 0)
4252 all = true;
4253
72d0e1cb 4254 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 4255 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 4256 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
4257 lxc_list_del(it);
4258 free(it->elem);
4259 free(it);
4260 }
4261 done = true;
72d0e1cb
SG
4262 }
4263 }
17ed13a3
SH
4264
4265 if (!done) {
4266 ERROR("Invalid hook key: %s", key);
4267 return -1;
4268 }
72d0e1cb
SG
4269 return 0;
4270}
8eb5694b 4271
74a3920a 4272static void lxc_clear_saved_nics(struct lxc_conf *conf)
7b35f3d6
SH
4273{
4274 int i;
4275
0cf45501 4276 if (!conf->saved_nics)
7b35f3d6
SH
4277 return;
4278 for (i=0; i < conf->num_savednics; i++)
4279 free(conf->saved_nics[i].orig_name);
7b35f3d6
SH
4280 free(conf->saved_nics);
4281}
4282
4184c3e1
SH
4283static inline void lxc_clear_aliens(struct lxc_conf *conf)
4284{
4285 struct lxc_list *it,*next;
4286
4287 lxc_list_for_each_safe(it, &conf->aliens, next) {
4288 lxc_list_del(it);
4289 free(it->elem);
4290 free(it);
4291 }
4292}
4293
f979ac15
SH
4294static inline void lxc_clear_includes(struct lxc_conf *conf)
4295{
4296 struct lxc_list *it,*next;
4297
4298 lxc_list_for_each_safe(it, &conf->includes, next) {
4299 lxc_list_del(it);
4300 free(it->elem);
4301 free(it);
4302 }
4303}
4304
8eb5694b
SH
4305void lxc_conf_free(struct lxc_conf *conf)
4306{
4307 if (!conf)
4308 return;
858377e4
SH
4309 if (current_config == conf)
4310 current_config = NULL;
f10fad2f
ME
4311 free(conf->console.log_path);
4312 free(conf->console.path);
4313 free(conf->rootfs.mount);
4314 free(conf->rootfs.options);
4315 free(conf->rootfs.path);
4316 free(conf->rootfs.pivot);
4317 free(conf->logfile);
858377e4
SH
4318 if (conf->logfd != -1)
4319 close(conf->logfd);
f10fad2f
ME
4320 free(conf->utsname);
4321 free(conf->ttydir);
4322 free(conf->fstab);
4323 free(conf->rcfile);
4324 free(conf->init_cmd);
6b0d5538 4325 free(conf->unexpanded_config);
393903d1 4326 free(conf->pty_names);
8eb5694b 4327 lxc_clear_config_network(conf);
f10fad2f
ME
4328 free(conf->lsm_aa_profile);
4329 free(conf->lsm_se_context);
769872f9 4330 lxc_seccomp_free(conf);
8eb5694b 4331 lxc_clear_config_caps(conf);
1fb86a7c 4332 lxc_clear_config_keepcaps(conf);
8eb5694b 4333 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 4334 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 4335 lxc_clear_mount_entries(conf);
7b35f3d6 4336 lxc_clear_saved_nics(conf);
27c27d73 4337 lxc_clear_idmaps(conf);
ee1e7aa0 4338 lxc_clear_groups(conf);
f979ac15 4339 lxc_clear_includes(conf);
761d81ca 4340 lxc_clear_aliens(conf);
ab799c0b 4341 lxc_clear_environment(conf);
8eb5694b
SH
4342 free(conf);
4343}
4355ab5f
SH
4344
4345struct userns_fn_data {
4346 int (*fn)(void *);
4347 void *arg;
4348 int p[2];
4349};
4350
4351static int run_userns_fn(void *data)
4352{
4353 struct userns_fn_data *d = data;
4354 char c;
4355 // we're not sharing with the parent any more, if it was a thread
4356
4357 close(d->p[1]);
4358 if (read(d->p[0], &c, 1) != 1)
4359 return -1;
4360 close(d->p[0]);
4361 return d->fn(d->arg);
4362}
4363
4364/*
8b227008
TS
4365 * Add ID_TYPE_UID/ID_TYPE_GID entries to an existing lxc_conf,
4366 * if they are not already there.
4355ab5f 4367 */
8b227008
TS
4368static struct lxc_list *idmap_add_id(struct lxc_conf *conf,
4369 uid_t uid, gid_t gid)
4355ab5f 4370{
8b227008
TS
4371 int hostuid_mapped = mapped_hostid(uid, conf, ID_TYPE_UID);
4372 int hostgid_mapped = mapped_hostid(gid, conf, ID_TYPE_GID);
4355ab5f
SH
4373 struct lxc_list *new = NULL, *tmp, *it, *next;
4374 struct id_map *entry;
4375
3ec1648d
SH
4376 new = malloc(sizeof(*new));
4377 if (!new) {
4378 ERROR("Out of memory building id map");
4379 return NULL;
4380 }
4381 lxc_list_init(new);
4382
8b227008
TS
4383 if (hostuid_mapped < 0) {
4384 hostuid_mapped = find_unmapped_nsuid(conf, ID_TYPE_UID);
4385 if (hostuid_mapped < 0)
3ec1648d
SH
4386 goto err;
4387 tmp = malloc(sizeof(*tmp));
4388 if (!tmp)
4389 goto err;
4355ab5f
SH
4390 entry = malloc(sizeof(*entry));
4391 if (!entry) {
3ec1648d
SH
4392 free(tmp);
4393 goto err;
4355ab5f 4394 }
3ec1648d 4395 tmp->elem = entry;
4355ab5f 4396 entry->idtype = ID_TYPE_UID;
8b227008
TS
4397 entry->nsid = hostuid_mapped;
4398 entry->hostid = (unsigned long) uid;
4399 entry->range = 1;
4400 lxc_list_add_tail(new, tmp);
4401 }
4402 if (hostgid_mapped < 0) {
4403 hostgid_mapped = find_unmapped_nsuid(conf, ID_TYPE_GID);
4404 if (hostgid_mapped < 0)
4405 goto err;
4406 tmp = malloc(sizeof(*tmp));
4407 if (!tmp)
4408 goto err;
4409 entry = malloc(sizeof(*entry));
4410 if (!entry) {
4411 free(tmp);
4412 goto err;
4413 }
4414 tmp->elem = entry;
4415 entry->idtype = ID_TYPE_GID;
4416 entry->nsid = hostgid_mapped;
4417 entry->hostid = (unsigned long) gid;
4355ab5f 4418 entry->range = 1;
3ec1648d 4419 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4420 }
4421 lxc_list_for_each_safe(it, &conf->id_map, next) {
4422 tmp = malloc(sizeof(*tmp));
4423 if (!tmp)
4424 goto err;
4425 entry = malloc(sizeof(*entry));
4426 if (!entry) {
4427 free(tmp);
4428 goto err;
4429 }
4430 memset(entry, 0, sizeof(*entry));
4431 memcpy(entry, it->elem, sizeof(*entry));
4432 tmp->elem = entry;
3ec1648d 4433 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4434 }
4435
4436 return new;
4437
4438err:
8b227008 4439 ERROR("Out of memory building a new uid/gid map");
908fde6a
SH
4440 if (new)
4441 lxc_free_idmap(new);
c30ac545 4442 free(new);
4355ab5f
SH
4443 return NULL;
4444}
4445
4446/*
4447 * Run a function in a new user namespace.
8b227008 4448 * The caller's euid/egid will be mapped in if it is not already.
4355ab5f
SH
4449 */
4450int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
4451{
4452 int ret, pid;
4453 struct userns_fn_data d;
4454 char c = '1';
4455 int p[2];
4456 struct lxc_list *idmap;
4457
4355ab5f 4458 ret = pipe(p);
4355ab5f
SH
4459 if (ret < 0) {
4460 SYSERROR("opening pipe");
4461 return -1;
4462 }
4463 d.fn = fn;
4464 d.arg = data;
4465 d.p[0] = p[0];
4466 d.p[1] = p[1];
4467 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4468 if (pid < 0)
4469 goto err;
4355ab5f 4470 close(p[0]);
4355ab5f
SH
4471 p[0] = -1;
4472
8b227008
TS
4473 if ((idmap = idmap_add_id(conf, geteuid(), getegid())) == NULL) {
4474 ERROR("Error adding self to container uid/gid map");
4355ab5f
SH
4475 goto err;
4476 }
4477
4478 ret = lxc_map_ids(idmap, pid);
4479 lxc_free_idmap(idmap);
88dd66fc 4480 free(idmap);
565e571c 4481 if (ret) {
4355ab5f
SH
4482 ERROR("Error setting up child mappings");
4483 goto err;
4484 }
4485
4486 // kick the child
4487 if (write(p[1], &c, 1) != 1) {
4488 SYSERROR("writing to pipe to child");
4489 goto err;
4490 }
4491
3139aead
SG
4492 ret = wait_for_pid(pid);
4493
4494 close(p[1]);
4495 return ret;
4496
4355ab5f 4497err:
4355ab5f
SH
4498 if (p[0] != -1)
4499 close(p[0]);
4500 close(p[1]);
4355ab5f
SH
4501 return -1;
4502}
97e9cfa0 4503
a96a8e8c 4504/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4505static char* getuname(void)
4506{
a96a8e8c 4507 struct passwd *result;
97e9cfa0 4508
a96a8e8c
SH
4509 result = getpwuid(geteuid());
4510 if (!result)
97e9cfa0
SH
4511 return NULL;
4512
a96a8e8c 4513 return strdup(result->pw_name);
97e9cfa0
SH
4514}
4515
a96a8e8c 4516/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4517static char *getgname(void)
4518{
a96a8e8c 4519 struct group *result;
97e9cfa0 4520
a96a8e8c
SH
4521 result = getgrgid(getegid());
4522 if (!result)
97e9cfa0
SH
4523 return NULL;
4524
a96a8e8c 4525 return strdup(result->gr_name);
97e9cfa0
SH
4526}
4527
a96a8e8c 4528/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4529void suggest_default_idmap(void)
4530{
4531 FILE *f;
4532 unsigned int uid = 0, urange = 0, gid = 0, grange = 0;
4533 char *line = NULL;
4534 char *uname, *gname;
4535 size_t len = 0;
4536
4537 if (!(uname = getuname()))
4538 return;
4539
4540 if (!(gname = getgname())) {
4541 free(uname);
4542 return;
4543 }
4544
4545 f = fopen(subuidfile, "r");
4546 if (!f) {
4547 ERROR("Your system is not configured with subuids");
4548 free(gname);
4549 free(uname);
4550 return;
4551 }
4552 while (getline(&line, &len, f) != -1) {
4553 char *p = strchr(line, ':'), *p2;
4554 if (*line == '#')
4555 continue;
4556 if (!p)
4557 continue;
4558 *p = '\0';
4559 p++;
4560 if (strcmp(line, uname))
4561 continue;
4562 p2 = strchr(p, ':');
4563 if (!p2)
4564 continue;
4565 *p2 = '\0';
4566 p2++;
4567 if (!*p2)
4568 continue;
4569 uid = atoi(p);
4570 urange = atoi(p2);
4571 }
4572 fclose(f);
4573
4574 f = fopen(subuidfile, "r");
4575 if (!f) {
4576 ERROR("Your system is not configured with subgids");
4577 free(gname);
4578 free(uname);
4579 return;
4580 }
4581 while (getline(&line, &len, f) != -1) {
4582 char *p = strchr(line, ':'), *p2;
4583 if (*line == '#')
4584 continue;
4585 if (!p)
4586 continue;
4587 *p = '\0';
4588 p++;
4589 if (strcmp(line, uname))
4590 continue;
4591 p2 = strchr(p, ':');
4592 if (!p2)
4593 continue;
4594 *p2 = '\0';
4595 p2++;
4596 if (!*p2)
4597 continue;
4598 gid = atoi(p);
4599 grange = atoi(p2);
4600 }
4601 fclose(f);
4602
f10fad2f 4603 free(line);
97e9cfa0
SH
4604
4605 if (!urange || !grange) {
4606 ERROR("You do not have subuids or subgids allocated");
4607 ERROR("Unprivileged containers require subuids and subgids");
4608 return;
4609 }
4610
4611 ERROR("You must either run as root, or define uid mappings");
4612 ERROR("To pass uid mappings to lxc-create, you could create");
4613 ERROR("~/.config/lxc/default.conf:");
4614 ERROR("lxc.include = %s", LXC_DEFAULT_CONFIG);
4615 ERROR("lxc.id_map = u 0 %u %u", uid, urange);
4616 ERROR("lxc.id_map = g 0 %u %u", gid, grange);
4617
4618 free(gname);
4619 free(uname);
4620}
aaf26830 4621
a7307747
SH
4622static void free_cgroup_settings(struct lxc_list *result)
4623{
4624 struct lxc_list *iterator, *next;
4625
4626 lxc_list_for_each_safe(iterator, result, next) {
4627 lxc_list_del(iterator);
4628 free(iterator);
4629 }
4630 free(result);
4631}
4632
aaf26830
KT
4633/*
4634 * Return the list of cgroup_settings sorted according to the following rules
4635 * 1. Put memory.limit_in_bytes before memory.memsw.limit_in_bytes
4636 */
4637struct lxc_list *sort_cgroup_settings(struct lxc_list* cgroup_settings)
4638{
4639 struct lxc_list *result;
4640 struct lxc_list *memsw_limit = NULL;
4641 struct lxc_list *it = NULL;
4642 struct lxc_cgroup *cg = NULL;
4643 struct lxc_list *item = NULL;
4644
4645 result = malloc(sizeof(*result));
fac7c663
KT
4646 if (!result) {
4647 ERROR("failed to allocate memory to sort cgroup settings");
4648 return NULL;
4649 }
aaf26830
KT
4650 lxc_list_init(result);
4651
4652 /*Iterate over the cgroup settings and copy them to the output list*/
4653 lxc_list_for_each(it, cgroup_settings) {
4654 item = malloc(sizeof(*item));
fac7c663
KT
4655 if (!item) {
4656 ERROR("failed to allocate memory to sort cgroup settings");
a7307747 4657 free_cgroup_settings(result);
fac7c663
KT
4658 return NULL;
4659 }
aaf26830
KT
4660 item->elem = it->elem;
4661 cg = it->elem;
4662 if (strcmp(cg->subsystem, "memory.memsw.limit_in_bytes") == 0) {
4663 /* Store the memsw_limit location */
4664 memsw_limit = item;
4665 } else if (strcmp(cg->subsystem, "memory.limit_in_bytes") == 0 && memsw_limit != NULL) {
4d5b72a1 4666 /* lxc.cgroup.memory.memsw.limit_in_bytes is found before
aaf26830
KT
4667 * lxc.cgroup.memory.limit_in_bytes, swap these two items */
4668 item->elem = memsw_limit->elem;
4669 memsw_limit->elem = it->elem;
4670 }
4671 lxc_list_add_tail(result, item);
4672 }
4673
4674 return result;
a7307747 4675}