]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
Split bdev into modules: lxclvm
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
d06245b8
NC
23#include "config.h"
24
0ad19a3f 25#include <stdio.h>
0ad19a3f 26#include <stdlib.h>
e3b4c4c4 27#include <stdarg.h>
0ad19a3f 28#include <errno.h>
29#include <string.h>
30#include <dirent.h>
0ad19a3f 31#include <unistd.h>
bc6928ff 32#include <inttypes.h>
e3b4c4c4 33#include <sys/wait.h>
2d76d1d7 34#include <sys/syscall.h>
97e9cfa0
SH
35#include <sys/types.h>
36#include <pwd.h>
37#include <grp.h>
4a0ba80d 38#include <time.h>
614305f3 39#ifdef HAVE_STATVFS
2938f7c8 40#include <sys/statvfs.h>
614305f3 41#endif
e827ff7e
SG
42
43#if HAVE_PTY_H
b0a33c1e 44#include <pty.h>
e827ff7e
SG
45#else
46#include <../include/openpty.h>
47#endif
0ad19a3f 48
b3ecde1e
DL
49#include <linux/loop.h>
50
0ad19a3f 51#include <sys/types.h>
52#include <sys/utsname.h>
53#include <sys/param.h>
54#include <sys/stat.h>
55#include <sys/socket.h>
56#include <sys/mount.h>
57#include <sys/mman.h>
81810dd1 58#include <sys/prctl.h>
0ad19a3f 59
60#include <arpa/inet.h>
61#include <fcntl.h>
62#include <netinet/in.h>
63#include <net/if.h>
6f4a3756 64#include <libgen.h>
0ad19a3f 65
e5bda9ee 66#include "network.h"
67#include "error.h"
e8bd4e43 68#include "af_unix.h"
b2718c72 69#include "parse.h"
1b09f2c0
DL
70#include "utils.h"
71#include "conf.h"
72#include "log.h"
d55bc1ad 73#include "caps.h" /* for lxc_caps_last_cap() */
4ec31c52 74#include "bdev/bdev.h"
3c16d0cb 75#include "bdev/lxcoverlay.h"
368bbc02 76#include "cgroup.h"
025ed0f3 77#include "lxclock.h"
4355ab5f 78#include "namespace.h"
fe4de9a6 79#include "lsm/lsm.h"
d0a36f2c 80
495d2046
SG
81#if HAVE_SYS_CAPABILITY_H
82#include <sys/capability.h>
83#endif
84
6ff05e18
SG
85#if HAVE_SYS_PERSONALITY_H
86#include <sys/personality.h>
87#endif
88
edaf8b1b
SG
89#if IS_BIONIC
90#include <../include/lxcmntent.h>
91#else
92#include <mntent.h>
93#endif
94
769872f9
SH
95#include "lxcseccomp.h"
96
36eb9bde 97lxc_log_define(lxc_conf, lxc);
e5bda9ee 98
87da4ec3 99#define LINELEN 4096
0ad19a3f 100
495d2046 101#if HAVE_SYS_CAPABILITY_H
b09094da
MN
102#ifndef CAP_SETFCAP
103#define CAP_SETFCAP 31
104#endif
105
106#ifndef CAP_MAC_OVERRIDE
107#define CAP_MAC_OVERRIDE 32
108#endif
109
110#ifndef CAP_MAC_ADMIN
111#define CAP_MAC_ADMIN 33
112#endif
495d2046 113#endif
b09094da
MN
114
115#ifndef PR_CAPBSET_DROP
116#define PR_CAPBSET_DROP 24
117#endif
118
9818cae4
SG
119#ifndef LO_FLAGS_AUTOCLEAR
120#define LO_FLAGS_AUTOCLEAR 4
121#endif
122
0769b82a
CS
123/* needed for cgroup automount checks, regardless of whether we
124 * have included linux/capability.h or not */
125#ifndef CAP_SYS_ADMIN
126#define CAP_SYS_ADMIN 21
127#endif
128
2d76d1d7
SG
129/* Define pivot_root() if missing from the C library */
130#ifndef HAVE_PIVOT_ROOT
131static int pivot_root(const char * new_root, const char * put_old)
132{
133#ifdef __NR_pivot_root
134return syscall(__NR_pivot_root, new_root, put_old);
135#else
136errno = ENOSYS;
137return -1;
138#endif
139}
140#else
141extern int pivot_root(const char * new_root, const char * put_old);
142#endif
143
144/* Define sethostname() if missing from the C library */
145#ifndef HAVE_SETHOSTNAME
146static int sethostname(const char * name, size_t len)
147{
148#ifdef __NR_sethostname
149return syscall(__NR_sethostname, name, len);
150#else
151errno = ENOSYS;
152return -1;
153#endif
154}
155#endif
156
72f919c4
SG
157/* Define __S_ISTYPE if missing from the C library */
158#ifndef __S_ISTYPE
159#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
160#endif
161
ecec0126
SG
162#ifndef MS_PRIVATE
163#define MS_PRIVATE (1<<18)
164#endif
165
72d0e1cb 166char *lxchook_names[NUM_LXC_HOOKS] = {
52492063 167 "pre-start", "pre-mount", "mount", "autodev", "start", "stop", "post-stop", "clone", "destroy" };
72d0e1cb 168
a589434e 169typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 170
998ac676
RT
171struct mount_opt {
172 char *name;
173 int clear;
174 int flag;
175};
176
81810dd1
DL
177struct caps_opt {
178 char *name;
179 int value;
180};
181
858377e4
SH
182/*
183 * The lxc_conf of the container currently being worked on in an
184 * API call
185 * This is used in the error calls
186 */
187#ifdef HAVE_TLS
188__thread struct lxc_conf *current_config;
189#else
190struct lxc_conf *current_config;
191#endif
192
0769b82a
CS
193/* Declare this here, since we don't want to reshuffle the whole file. */
194static int in_caplist(int cap, struct lxc_list *caps);
195
a589434e
JN
196static int instantiate_veth(struct lxc_handler *, struct lxc_netdev *);
197static int instantiate_macvlan(struct lxc_handler *, struct lxc_netdev *);
198static int instantiate_vlan(struct lxc_handler *, struct lxc_netdev *);
199static int instantiate_phys(struct lxc_handler *, struct lxc_netdev *);
200static int instantiate_empty(struct lxc_handler *, struct lxc_netdev *);
201static int instantiate_none(struct lxc_handler *, struct lxc_netdev *);
202
203static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
204 [LXC_NET_VETH] = instantiate_veth,
205 [LXC_NET_MACVLAN] = instantiate_macvlan,
206 [LXC_NET_VLAN] = instantiate_vlan,
207 [LXC_NET_PHYS] = instantiate_phys,
208 [LXC_NET_EMPTY] = instantiate_empty,
209 [LXC_NET_NONE] = instantiate_none,
0ad19a3f 210};
211
74a2b586
JK
212static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
213static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
214static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
215static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
216static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 217static int shutdown_none(struct lxc_handler *, struct lxc_netdev *);
74a2b586 218
a589434e 219static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
74a2b586
JK
220 [LXC_NET_VETH] = shutdown_veth,
221 [LXC_NET_MACVLAN] = shutdown_macvlan,
222 [LXC_NET_VLAN] = shutdown_vlan,
223 [LXC_NET_PHYS] = shutdown_phys,
224 [LXC_NET_EMPTY] = shutdown_empty,
26b797f3 225 [LXC_NET_NONE] = shutdown_none,
74a2b586
JK
226};
227
998ac676 228static struct mount_opt mount_opt[] = {
88d413d5
SW
229 { "defaults", 0, 0 },
230 { "ro", 0, MS_RDONLY },
231 { "rw", 1, MS_RDONLY },
232 { "suid", 1, MS_NOSUID },
233 { "nosuid", 0, MS_NOSUID },
234 { "dev", 1, MS_NODEV },
235 { "nodev", 0, MS_NODEV },
236 { "exec", 1, MS_NOEXEC },
237 { "noexec", 0, MS_NOEXEC },
238 { "sync", 0, MS_SYNCHRONOUS },
239 { "async", 1, MS_SYNCHRONOUS },
240 { "dirsync", 0, MS_DIRSYNC },
241 { "remount", 0, MS_REMOUNT },
242 { "mand", 0, MS_MANDLOCK },
243 { "nomand", 1, MS_MANDLOCK },
244 { "atime", 1, MS_NOATIME },
245 { "noatime", 0, MS_NOATIME },
246 { "diratime", 1, MS_NODIRATIME },
247 { "nodiratime", 0, MS_NODIRATIME },
248 { "bind", 0, MS_BIND },
249 { "rbind", 0, MS_BIND|MS_REC },
250 { "relatime", 0, MS_RELATIME },
251 { "norelatime", 1, MS_RELATIME },
252 { "strictatime", 0, MS_STRICTATIME },
253 { "nostrictatime", 1, MS_STRICTATIME },
254 { NULL, 0, 0 },
998ac676
RT
255};
256
495d2046 257#if HAVE_SYS_CAPABILITY_H
81810dd1 258static struct caps_opt caps_opt[] = {
a6afdde9 259 { "chown", CAP_CHOWN },
1e11be34
DL
260 { "dac_override", CAP_DAC_OVERRIDE },
261 { "dac_read_search", CAP_DAC_READ_SEARCH },
262 { "fowner", CAP_FOWNER },
263 { "fsetid", CAP_FSETID },
81810dd1
DL
264 { "kill", CAP_KILL },
265 { "setgid", CAP_SETGID },
266 { "setuid", CAP_SETUID },
267 { "setpcap", CAP_SETPCAP },
268 { "linux_immutable", CAP_LINUX_IMMUTABLE },
269 { "net_bind_service", CAP_NET_BIND_SERVICE },
270 { "net_broadcast", CAP_NET_BROADCAST },
271 { "net_admin", CAP_NET_ADMIN },
272 { "net_raw", CAP_NET_RAW },
273 { "ipc_lock", CAP_IPC_LOCK },
274 { "ipc_owner", CAP_IPC_OWNER },
275 { "sys_module", CAP_SYS_MODULE },
276 { "sys_rawio", CAP_SYS_RAWIO },
277 { "sys_chroot", CAP_SYS_CHROOT },
278 { "sys_ptrace", CAP_SYS_PTRACE },
279 { "sys_pacct", CAP_SYS_PACCT },
280 { "sys_admin", CAP_SYS_ADMIN },
281 { "sys_boot", CAP_SYS_BOOT },
282 { "sys_nice", CAP_SYS_NICE },
283 { "sys_resource", CAP_SYS_RESOURCE },
284 { "sys_time", CAP_SYS_TIME },
285 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
286 { "mknod", CAP_MKNOD },
287 { "lease", CAP_LEASE },
57b837e2
CB
288#ifdef CAP_AUDIT_READ
289 { "audit_read", CAP_AUDIT_READ },
290#endif
9527e566 291#ifdef CAP_AUDIT_WRITE
81810dd1 292 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
293#endif
294#ifdef CAP_AUDIT_CONTROL
81810dd1 295 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 296#endif
81810dd1
DL
297 { "setfcap", CAP_SETFCAP },
298 { "mac_override", CAP_MAC_OVERRIDE },
299 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
300#ifdef CAP_SYSLOG
301 { "syslog", CAP_SYSLOG },
302#endif
303#ifdef CAP_WAKE_ALARM
304 { "wake_alarm", CAP_WAKE_ALARM },
305#endif
2b54359b
CB
306#ifdef CAP_BLOCK_SUSPEND
307 { "block_suspend", CAP_BLOCK_SUSPEND },
308#endif
81810dd1 309};
495d2046
SG
310#else
311static struct caps_opt caps_opt[] = {};
312#endif
81810dd1 313
91c3830e
SH
314static int run_buffer(char *buffer)
315{
ebec9176 316 struct lxc_popen_FILE *f;
91c3830e 317 char *output;
8e7da691 318 int ret;
91c3830e 319
ebec9176 320 f = lxc_popen(buffer);
91c3830e
SH
321 if (!f) {
322 SYSERROR("popen failed");
323 return -1;
324 }
325
326 output = malloc(LXC_LOG_BUFFER_SIZE);
327 if (!output) {
328 ERROR("failed to allocate memory for script output");
ebec9176 329 lxc_pclose(f);
91c3830e
SH
330 return -1;
331 }
332
ebec9176 333 while(fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
91c3830e
SH
334 DEBUG("script output: %s", output);
335
336 free(output);
337
ebec9176 338 ret = lxc_pclose(f);
8e7da691 339 if (ret == -1) {
91c3830e
SH
340 SYSERROR("Script exited on error");
341 return -1;
8e7da691
DE
342 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
343 ERROR("Script exited with status %d", WEXITSTATUS(ret));
344 return -1;
345 } else if (WIFSIGNALED(ret)) {
346 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
347 strsignal(WTERMSIG(ret)));
348 return -1;
91c3830e
SH
349 }
350
351 return 0;
352}
353
148e91f5 354static int run_script_argv(const char *name, const char *section,
283678ed
SH
355 const char *script, const char *hook, const char *lxcpath,
356 char **argsin)
148e91f5
SH
357{
358 int ret, i;
359 char *buffer;
360 size_t size = 0;
361
362 INFO("Executing script '%s' for container '%s', config section '%s'",
363 script, name, section);
364
365 for (i=0; argsin && argsin[i]; i++)
366 size += strlen(argsin[i]) + 1;
367
368 size += strlen(hook) + 1;
369
370 size += strlen(script);
371 size += strlen(name);
372 size += strlen(section);
373 size += 3;
374
375 if (size > INT_MAX)
376 return -1;
377
378 buffer = alloca(size);
379 if (!buffer) {
380 ERROR("failed to allocate memory");
381 return -1;
382 }
383
384 ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
385 if (ret < 0 || ret >= size) {
386 ERROR("Script name too long");
387 return -1;
388 }
389
390 for (i=0; argsin && argsin[i]; i++) {
391 int len = size-ret;
392 int rc;
393 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
394 if (rc < 0 || rc >= len) {
395 ERROR("Script args too long");
396 return -1;
397 }
398 ret += rc;
399 }
400
401 return run_buffer(buffer);
402}
403
751d9dcd
DL
404static int run_script(const char *name, const char *section,
405 const char *script, ...)
e3b4c4c4 406{
abbfd20b 407 int ret;
91c3830e 408 char *buffer, *p;
abbfd20b
DL
409 size_t size = 0;
410 va_list ap;
751d9dcd
DL
411
412 INFO("Executing script '%s' for container '%s', config section '%s'",
413 script, name, section);
e3b4c4c4 414
abbfd20b
DL
415 va_start(ap, script);
416 while ((p = va_arg(ap, char *)))
95642a10 417 size += strlen(p) + 1;
abbfd20b
DL
418 va_end(ap);
419
420 size += strlen(script);
421 size += strlen(name);
422 size += strlen(section);
95642a10 423 size += 3;
abbfd20b 424
95642a10
MS
425 if (size > INT_MAX)
426 return -1;
427
428 buffer = alloca(size);
abbfd20b
DL
429 if (!buffer) {
430 ERROR("failed to allocate memory");
751d9dcd
DL
431 return -1;
432 }
433
9ba8130c
SH
434 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
435 if (ret < 0 || ret >= size) {
436 ERROR("Script name too long");
9ba8130c
SH
437 return -1;
438 }
751d9dcd 439
abbfd20b 440 va_start(ap, script);
9ba8130c
SH
441 while ((p = va_arg(ap, char *))) {
442 int len = size-ret;
443 int rc;
444 rc = snprintf(buffer + ret, len, " %s", p);
445 if (rc < 0 || rc >= len) {
9ba8130c
SH
446 ERROR("Script args too long");
447 return -1;
448 }
449 ret += rc;
450 }
abbfd20b 451 va_end(ap);
751d9dcd 452
91c3830e 453 return run_buffer(buffer);
e3b4c4c4
ST
454}
455
a17b1e65
SG
456static int mount_rootfs_dir(const char *rootfs, const char *target,
457 const char *options)
a6afdde9 458{
a17b1e65
SG
459 unsigned long mntflags;
460 char *mntdata;
461 int ret;
462
463 if (parse_mntopts(options, &mntflags, &mntdata) < 0) {
464 free(mntdata);
465 return -1;
466 }
467
468 ret = mount(rootfs, target, "none", MS_BIND | MS_REC | mntflags, mntdata);
469 free(mntdata);
470
471 return ret;
a6afdde9
DL
472}
473
474static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
475{
476 int rfd;
477 int ret = -1;
478
479 rfd = open(rootfs, O_RDWR);
480 if (rfd < 0) {
481 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 482 return -1;
483 }
484
a6afdde9 485 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 486
a6afdde9 487 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 488
a6afdde9
DL
489 if (ioctl(fd, LOOP_SET_FD, rfd)) {
490 SYSERROR("failed to LOOP_SET_FD");
491 goto out;
78ae2fcc 492 }
493
a6afdde9
DL
494 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
495 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 496 goto out;
497 }
498
a6afdde9 499 ret = 0;
78ae2fcc 500out:
a6afdde9 501 close(rfd);
78ae2fcc 502
a6afdde9 503 return ret;
78ae2fcc 504}
505
a17b1e65
SG
506static int mount_rootfs_file(const char *rootfs, const char *target,
507 const char *options)
78ae2fcc 508{
a6afdde9
DL
509 struct dirent dirent, *direntp;
510 struct loop_info64 loinfo;
9ba8130c 511 int ret = -1, fd = -1, rc;
a6afdde9
DL
512 DIR *dir;
513 char path[MAXPATHLEN];
78ae2fcc 514
a6afdde9
DL
515 dir = opendir("/dev");
516 if (!dir) {
517 SYSERROR("failed to open '/dev'");
78ae2fcc 518 return -1;
519 }
520
a6afdde9
DL
521 while (!readdir_r(dir, &dirent, &direntp)) {
522
523 if (!direntp)
524 break;
525
526 if (!strcmp(direntp->d_name, "."))
527 continue;
528
529 if (!strcmp(direntp->d_name, ".."))
530 continue;
531
532 if (strncmp(direntp->d_name, "loop", 4))
533 continue;
534
9ba8130c
SH
535 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
536 if (rc < 0 || rc >= MAXPATHLEN)
537 continue;
538
a6afdde9
DL
539 fd = open(path, O_RDWR);
540 if (fd < 0)
541 continue;
542
543 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
544 close(fd);
545 continue;
546 }
547
548 if (errno != ENXIO) {
549 WARN("unexpected error for ioctl on '%s': %m",
550 direntp->d_name);
00b6be44 551 close(fd);
a6afdde9
DL
552 continue;
553 }
554
555 DEBUG("found '%s' free lodev", path);
556
557 ret = setup_lodev(rootfs, fd, &loinfo);
558 if (!ret)
a17b1e65 559 ret = mount_unknown_fs(path, target, options);
a6afdde9
DL
560 close(fd);
561
562 break;
563 }
564
565 if (closedir(dir))
566 WARN("failed to close directory");
567
568 return ret;
78ae2fcc 569}
570
a17b1e65
SG
571static int mount_rootfs_block(const char *rootfs, const char *target,
572 const char *options)
a6afdde9 573{
a17b1e65 574 return mount_unknown_fs(rootfs, target, options);
a6afdde9
DL
575}
576
0c547523
SH
577/*
578 * pin_rootfs
b7ed4bf0
CS
579 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
580 * the duration of the container run, to prevent the container from marking
581 * the underlying fs readonly on shutdown. unlink the file immediately so
582 * no name pollution is happens
0c547523
SH
583 * return -1 on error.
584 * return -2 if nothing needed to be pinned.
585 * return an open fd (>=0) if we pinned it.
586 */
587int pin_rootfs(const char *rootfs)
588{
589 char absrootfs[MAXPATHLEN];
590 char absrootfspin[MAXPATHLEN];
591 struct stat s;
592 int ret, fd;
593
e99ee0de 594 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 595 return -2;
e99ee0de 596
00ec333b 597 if (!realpath(rootfs, absrootfs))
9be53773 598 return -2;
0c547523 599
00ec333b 600 if (access(absrootfs, F_OK))
0c547523 601 return -1;
0c547523 602
00ec333b 603 if (stat(absrootfs, &s))
0c547523 604 return -1;
0c547523 605
72f919c4 606 if (!S_ISDIR(s.st_mode))
0c547523
SH
607 return -2;
608
b7ed4bf0 609 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
00ec333b 610 if (ret >= MAXPATHLEN)
0c547523 611 return -1;
0c547523
SH
612
613 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
b7ed4bf0
CS
614 if (fd < 0)
615 return fd;
616 (void)unlink(absrootfspin);
0c547523
SH
617 return fd;
618}
619
e2a7e8dc
SH
620/*
621 * If we are asking to remount something, make sure that any
622 * NOEXEC etc are honored.
623 */
624static unsigned long add_required_remount_flags(const char *s, const char *d,
625 unsigned long flags)
626{
614305f3 627#ifdef HAVE_STATVFS
e2a7e8dc
SH
628 struct statvfs sb;
629 unsigned long required_flags = 0;
630
631 if (!(flags & MS_REMOUNT))
632 return flags;
633
634 if (!s)
635 s = d;
636
637 if (!s)
638 return flags;
639 if (statvfs(s, &sb) < 0)
640 return flags;
641
642 if (sb.f_flag & MS_NOSUID)
643 required_flags |= MS_NOSUID;
644 if (sb.f_flag & MS_NODEV)
645 required_flags |= MS_NODEV;
646 if (sb.f_flag & MS_RDONLY)
647 required_flags |= MS_RDONLY;
648 if (sb.f_flag & MS_NOEXEC)
649 required_flags |= MS_NOEXEC;
650
651 return flags | required_flags;
614305f3
SH
652#else
653 return flags;
654#endif
e2a7e8dc
SH
655}
656
4fb3cba5 657static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
368bbc02 658{
368bbc02 659 int r;
80e80c40 660 int i;
b06b8511
CS
661 static struct {
662 int match_mask;
663 int match_flag;
664 const char *source;
665 const char *destination;
666 const char *fstype;
667 unsigned long flags;
668 const char *options;
669 } default_mounts[] = {
670 /* Read-only bind-mounting... In older kernels, doing that required
671 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
672 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
673 * kernel 2.6.26 onwards. However, this apparently does not work on
674 * kernel 3.8. Unfortunately, on that very same kernel, doing the
675 * same trick as above doesn't seem to work either, there one needs
676 * to ALSO specify MS_BIND for the remount, otherwise the entire
677 * fs is remounted read-only or the mount fails because it's busy...
678 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
679 * 2.6.32...
368bbc02 680 */
f24a52d5 681 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
592fd47a
SH
682 /* proc/tty is used as a temporary placeholder for proc/sys/net which we'll move back in a few steps */
683 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys/net", "%r/proc/tty", NULL, MS_BIND, NULL },
f24a52d5
SG
684 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
685 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
592fd47a 686 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/tty", "%r/proc/sys/net", NULL, MS_MOVE, NULL },
f24a52d5
SG
687 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
688 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
689 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
690 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
691 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
692 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
693 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL },
694 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
695 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys/devices/virtual/net", "sysfs", 0, NULL },
696 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys/devices/virtual/net/devices/virtual/net", "%r/sys/devices/virtual/net", NULL, MS_BIND, NULL },
697 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys/devices/virtual/net", NULL, MS_REMOUNT|MS_BIND|MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL },
698 { 0, 0, NULL, NULL, NULL, 0, NULL }
b06b8511 699 };
368bbc02 700
b06b8511
CS
701 for (i = 0; default_mounts[i].match_mask; i++) {
702 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
703 char *source = NULL;
704 char *destination = NULL;
705 int saved_errno;
e2a7e8dc 706 unsigned long mflags;
b06b8511
CS
707
708 if (default_mounts[i].source) {
709 /* will act like strdup if %r is not present */
8ede5f4c 710 source = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].source);
b06b8511
CS
711 if (!source) {
712 SYSERROR("memory allocation error");
713 return -1;
714 }
715 }
cc4fd506
SH
716 if (!default_mounts[i].destination) {
717 ERROR("BUG: auto mounts destination %d was NULL", i);
718 return -1;
719 }
720 /* will act like strdup if %r is not present */
721 destination = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].destination);
722 if (!destination) {
723 saved_errno = errno;
724 SYSERROR("memory allocation error");
725 free(source);
726 errno = saved_errno;
727 return -1;
b06b8511 728 }
e2a7e8dc
SH
729 mflags = add_required_remount_flags(source, destination,
730 default_mounts[i].flags);
592fd47a 731 r = safe_mount(source, destination, default_mounts[i].fstype, mflags, default_mounts[i].options, conf->rootfs.path ? conf->rootfs.mount : NULL);
b06b8511 732 saved_errno = errno;
b88ff9a0
SG
733 if (r < 0 && errno == ENOENT) {
734 INFO("Mount source or target for %s on %s doesn't exist. Skipping.", source, destination);
735 r = 0;
736 }
737 else if (r < 0)
e2a7e8dc 738 SYSERROR("error mounting %s on %s flags %lu", source, destination, mflags);
f24a52d5 739
b06b8511
CS
740 free(source);
741 free(destination);
742 if (r < 0) {
b06b8511
CS
743 errno = saved_errno;
744 return -1;
745 }
368bbc02 746 }
368bbc02
CS
747 }
748
b06b8511 749 if (flags & LXC_AUTO_CGROUP_MASK) {
0769b82a
CS
750 int cg_flags;
751
752 cg_flags = flags & LXC_AUTO_CGROUP_MASK;
753 /* If the type of cgroup mount was not specified, it depends on the
754 * container's capabilities as to what makes sense: if we have
755 * CAP_SYS_ADMIN, the read-only part can be remounted read-write
756 * anyway, so we may as well default to read-write; then the admin
757 * will not be given a false sense of security. (And if they really
758 * want mixed r/o r/w, then they can explicitly specify :mixed.)
759 * OTOH, if the container lacks CAP_SYS_ADMIN, do only default to
760 * :mixed, because then the container can't remount it read-write. */
761 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
762 int has_sys_admin = 0;
763 if (!lxc_list_empty(&conf->keepcaps)) {
764 has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps);
765 } else {
766 has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps);
767 }
768 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC) {
769 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED;
770 } else {
771 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
772 }
773 }
774
8ede5f4c 775 if (!cgroup_mount(conf->rootfs.path ? conf->rootfs.mount : "", handler, cg_flags)) {
368bbc02 776 SYSERROR("error mounting /sys/fs/cgroup");
b06b8511 777 return -1;
368bbc02
CS
778 }
779 }
780
368bbc02 781 return 0;
368bbc02
CS
782}
783
a17b1e65 784static int mount_rootfs(const char *rootfs, const char *target, const char *options)
0ad19a3f 785{
b09ef133 786 char absrootfs[MAXPATHLEN];
78ae2fcc 787 struct stat s;
a6afdde9 788 int i;
78ae2fcc 789
a17b1e65 790 typedef int (*rootfs_cb)(const char *, const char *, const char *);
78ae2fcc 791
792 struct rootfs_type {
793 int type;
794 rootfs_cb cb;
795 } rtfs_type[] = {
2656d231
DL
796 { S_IFDIR, mount_rootfs_dir },
797 { S_IFBLK, mount_rootfs_block },
798 { S_IFREG, mount_rootfs_file },
78ae2fcc 799 };
0ad19a3f 800
4c8ab83b 801 if (!realpath(rootfs, absrootfs)) {
36eb9bde 802 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 803 return -1;
804 }
b09ef133 805
b09ef133 806 if (access(absrootfs, F_OK)) {
36eb9bde 807 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 808 return -1;
809 }
810
78ae2fcc 811 if (stat(absrootfs, &s)) {
36eb9bde 812 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 813 return -1;
814 }
815
78ae2fcc 816 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 817
78ae2fcc 818 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
819 continue;
9b0f0477 820
a17b1e65 821 return rtfs_type[i].cb(absrootfs, target, options);
78ae2fcc 822 }
9b0f0477 823
36eb9bde 824 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 825 return -1;
0ad19a3f 826}
827
4e5440c6 828static int setup_utsname(struct utsname *utsname)
0ad19a3f 829{
4e5440c6
DL
830 if (!utsname)
831 return 0;
0ad19a3f 832
4e5440c6
DL
833 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
834 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 835 return -1;
836 }
837
4e5440c6 838 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 839
0ad19a3f 840 return 0;
841}
842
69aa6655
DE
843struct dev_symlinks {
844 const char *oldpath;
845 const char *name;
846};
847
848static const struct dev_symlinks dev_symlinks[] = {
849 {"/proc/self/fd", "fd"},
850 {"/proc/self/fd/0", "stdin"},
851 {"/proc/self/fd/1", "stdout"},
852 {"/proc/self/fd/2", "stderr"},
853};
854
855static int setup_dev_symlinks(const struct lxc_rootfs *rootfs)
856{
857 char path[MAXPATHLEN];
858 int ret,i;
09227be2 859 struct stat s;
69aa6655
DE
860
861
862 for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
863 const struct dev_symlinks *d = &dev_symlinks[i];
cd2b3cfe 864 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->path ? rootfs->mount : "", d->name);
69aa6655
DE
865 if (ret < 0 || ret >= MAXPATHLEN)
866 return -1;
09227be2
MW
867
868 /*
869 * Stat the path first. If we don't get an error
870 * accept it as is and don't try to create it
871 */
872 if (!stat(path, &s)) {
873 continue;
874 }
875
69aa6655 876 ret = symlink(d->oldpath, path);
09227be2 877
69aa6655 878 if (ret && errno != EEXIST) {
09227be2
MW
879 if ( errno == EROFS ) {
880 WARN("Warning: Read Only file system while creating %s", path);
881 } else {
882 SYSERROR("Error creating %s", path);
883 return -1;
884 }
69aa6655
DE
885 }
886 }
887 return 0;
888}
889
393903d1
SH
890/*
891 * Build a space-separate list of ptys to pass to systemd.
892 */
893static bool append_ptyname(char **pp, char *name)
b0a33c1e 894{
393903d1
SH
895 char *p;
896
897 if (!*pp) {
898 *pp = malloc(strlen(name) + strlen("container_ttys=") + 1);
899 if (!*pp)
900 return false;
901 sprintf(*pp, "container_ttys=%s", name);
902 return true;
903 }
904 p = realloc(*pp, strlen(*pp) + strlen(name) + 2);
905 if (!p)
906 return false;
907 *pp = p;
908 strcat(p, " ");
909 strcat(p, name);
910 return true;
911}
912
913static int setup_tty(struct lxc_conf *conf)
914{
393903d1
SH
915 const struct lxc_tty_info *tty_info = &conf->tty_info;
916 char *ttydir = conf->ttydir;
7c6ef2a2
SH
917 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
918 int i, ret;
b0a33c1e 919
e8bd4e43 920 if (!conf->rootfs.path)
bc9bd0e3
DL
921 return 0;
922
b0a33c1e 923 for (i = 0; i < tty_info->nbtty; i++) {
924
925 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
926
e8bd4e43 927 ret = snprintf(path, sizeof(path), "/dev/tty%d", i + 1);
7c6ef2a2
SH
928 if (ret >= sizeof(path)) {
929 ERROR("pathname too long for ttys");
930 return -1;
931 }
932 if (ttydir) {
933 /* create dev/lxc/tty%d" */
e8bd4e43 934 ret = snprintf(lxcpath, sizeof(lxcpath), "/dev/%s/tty%d", ttydir, i + 1);
7c6ef2a2
SH
935 if (ret >= sizeof(lxcpath)) {
936 ERROR("pathname too long for ttys");
937 return -1;
938 }
939 ret = creat(lxcpath, 0660);
940 if (ret==-1 && errno != EEXIST) {
959aee9c 941 SYSERROR("error creating %s", lxcpath);
7c6ef2a2
SH
942 return -1;
943 }
4d44e274
SH
944 if (ret >= 0)
945 close(ret);
7c6ef2a2
SH
946 ret = unlink(path);
947 if (ret && errno != ENOENT) {
959aee9c 948 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
949 return -1;
950 }
b0a33c1e 951
7c6ef2a2
SH
952 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
953 WARN("failed to mount '%s'->'%s'",
954 pty_info->name, path);
955 continue;
956 }
13954cce 957
9ba8130c
SH
958 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
959 if (ret >= sizeof(lxcpath)) {
960 ERROR("tty pathname too long");
961 return -1;
962 }
7c6ef2a2
SH
963 ret = symlink(lxcpath, path);
964 if (ret) {
959aee9c 965 SYSERROR("failed to create symlink for tty %d", i+1);
7c6ef2a2
SH
966 return -1;
967 }
968 } else {
c6883f38
SH
969 /* If we populated /dev, then we need to create /dev/ttyN */
970 if (access(path, F_OK)) {
971 ret = creat(path, 0660);
972 if (ret==-1) {
959aee9c 973 SYSERROR("error creating %s", path);
c6883f38 974 /* this isn't fatal, continue */
025ed0f3 975 } else {
c6883f38 976 close(ret);
025ed0f3 977 }
c6883f38 978 }
7c6ef2a2 979 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
e8bd4e43 980 SYSERROR("failed to mount '%s'->'%s'", pty_info->name, path);
7c6ef2a2
SH
981 continue;
982 }
393903d1 983 }
e8bd4e43 984 if (!append_ptyname(&conf->pty_names, pty_info->name)) {
393903d1
SH
985 ERROR("Error setting up container_ttys string");
986 return -1;
b0a33c1e 987 }
988 }
989
cd54d859
DL
990 INFO("%d tty(s) has been setup", tty_info->nbtty);
991
b0a33c1e 992 return 0;
993}
994
bf601689 995
2d489f9e 996static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
bf601689 997{
2d489f9e 998 int oldroot = -1, newroot = -1;
bf601689 999
2d489f9e
SH
1000 oldroot = open("/", O_DIRECTORY | O_RDONLY);
1001 if (oldroot < 0) {
1002 SYSERROR("Error opening old-/ for fchdir");
9ba8130c
SH
1003 return -1;
1004 }
2d489f9e
SH
1005 newroot = open(rootfs, O_DIRECTORY | O_RDONLY);
1006 if (newroot < 0) {
1007 SYSERROR("Error opening new-/ for fchdir");
1008 goto fail;
c08556c6 1009 }
bf601689 1010
cc6f6dd7 1011 /* change into new root fs */
2d489f9e 1012 if (fchdir(newroot)) {
cc6f6dd7 1013 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
2d489f9e 1014 goto fail;
cc6f6dd7
DL
1015 }
1016
cc6f6dd7 1017 /* pivot_root into our new root fs */
2d489f9e 1018 if (pivot_root(".", ".")) {
cc6f6dd7 1019 SYSERROR("pivot_root syscall failed");
2d489f9e 1020 goto fail;
bf601689 1021 }
cc6f6dd7 1022
2d489f9e
SH
1023 /*
1024 * at this point the old-root is mounted on top of our new-root
1025 * To unmounted it we must not be chdir'd into it, so escape back
1026 * to old-root
1027 */
1028 if (fchdir(oldroot) < 0) {
1029 SYSERROR("Error entering oldroot");
1030 goto fail;
1031 }
7981ea46 1032 if (umount2(".", MNT_DETACH) < 0) {
2d489f9e
SH
1033 SYSERROR("Error detaching old root");
1034 goto fail;
cc6f6dd7
DL
1035 }
1036
2d489f9e
SH
1037 if (fchdir(newroot) < 0) {
1038 SYSERROR("Error re-entering newroot");
1039 goto fail;
1040 }
cc6f6dd7 1041
2d489f9e
SH
1042 close(oldroot);
1043 close(newroot);
bf601689 1044
2d489f9e 1045 DEBUG("pivot_root syscall to '%s' successful", rootfs);
bf601689 1046
bf601689 1047 return 0;
2d489f9e
SH
1048
1049fail:
1050 if (oldroot != -1)
1051 close(oldroot);
1052 if (newroot != -1)
1053 close(newroot);
1054 return -1;
bf601689
MH
1055}
1056
bc6928ff 1057/*
87da4ec3
SH
1058 * Just create a path for /dev under $lxcpath/$name and in rootfs
1059 * If we hit an error, log it but don't fail yet.
91c3830e 1060 */
14221cbb 1061static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, const char *lxcpath)
91c3830e
SH
1062{
1063 int ret;
87da4ec3
SH
1064 size_t clen;
1065 char *path;
91c3830e 1066
14221cbb 1067 INFO("Mounting container /dev");
bc6928ff 1068
14221cbb
DW
1069 /* $(rootfs->mount) + "/dev/pts" + '\0' */
1070 clen = (rootfs->path ? strlen(rootfs->mount) : 0) + 9;
87da4ec3 1071 path = alloca(clen);
bc6928ff 1072
14221cbb 1073 ret = snprintf(path, clen, "%s/dev", rootfs->path ? rootfs->mount : "");
87da4ec3 1074 if (ret < 0 || ret >= clen)
91c3830e 1075 return -1;
bc6928ff 1076
87da4ec3 1077 if (!dir_exists(path)) {
14221cbb 1078 WARN("No /dev in container.");
87da4ec3
SH
1079 WARN("Proceeding without autodev setup");
1080 return 0;
bc6928ff 1081 }
87da4ec3 1082
592fd47a
SH
1083 if (safe_mount("none", path, "tmpfs", 0, "size=100000,mode=755",
1084 rootfs->path ? rootfs->mount : NULL)) {
87da4ec3
SH
1085 SYSERROR("Failed mounting tmpfs onto %s\n", path);
1086 return false;
91c3830e 1087 }
87da4ec3
SH
1088
1089 INFO("Mounted tmpfs onto %s", path);
1090
14221cbb 1091 ret = snprintf(path, clen, "%s/dev/pts", rootfs->path ? rootfs->mount : "");
87da4ec3 1092 if (ret < 0 || ret >= clen)
91c3830e 1093 return -1;
87da4ec3 1094
bc6928ff
MW
1095 /*
1096 * If we are running on a devtmpfs mapping, dev/pts may already exist.
1097 * If not, then create it and exit if that fails...
1098 */
87da4ec3 1099 if (!dir_exists(path)) {
bc6928ff
MW
1100 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1101 if (ret) {
1102 SYSERROR("Failed to create /dev/pts in container");
1103 return -1;
1104 }
91c3830e
SH
1105 }
1106
14221cbb 1107 INFO("Mounted container /dev");
91c3830e
SH
1108 return 0;
1109}
1110
c6883f38 1111struct lxc_devs {
74a3920a 1112 const char *name;
c6883f38
SH
1113 mode_t mode;
1114 int maj;
1115 int min;
1116};
1117
74a3920a 1118static const struct lxc_devs lxc_devs[] = {
c6883f38
SH
1119 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1120 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1121 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1122 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1123 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1124 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1125 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1126};
1127
14221cbb 1128static int fill_autodev(const struct lxc_rootfs *rootfs)
c6883f38
SH
1129{
1130 int ret;
c6883f38
SH
1131 char path[MAXPATHLEN];
1132 int i;
3a32201c 1133 mode_t cmask;
c6883f38 1134
14221cbb 1135 INFO("Creating initial consoles under container /dev");
91c3830e 1136
14221cbb 1137 ret = snprintf(path, MAXPATHLEN, "%s/dev", rootfs->path ? rootfs->mount : "");
91c3830e
SH
1138 if (ret < 0 || ret >= MAXPATHLEN) {
1139 ERROR("Error calculating container /dev location");
c6883f38 1140 return -1;
f7bee6c6 1141 }
91c3830e 1142
9cb4d183
SH
1143 if (!dir_exists(path)) // ignore, just don't try to fill in
1144 return 0;
1145
14221cbb 1146 INFO("Populating container /dev");
3a32201c 1147 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38 1148 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
74a3920a 1149 const struct lxc_devs *d = &lxc_devs[i];
14221cbb 1150 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", rootfs->path ? rootfs->mount : "", d->name);
c6883f38
SH
1151 if (ret < 0 || ret >= MAXPATHLEN)
1152 return -1;
1153 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1154 if (ret && errno != EEXIST) {
9cb4d183
SH
1155 char hostpath[MAXPATHLEN];
1156 FILE *pathfile;
1157
1158 // Unprivileged containers cannot create devices, so
1159 // bind mount the device from the host
1160 ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", d->name);
1161 if (ret < 0 || ret >= MAXPATHLEN)
1162 return -1;
1163 pathfile = fopen(path, "wb");
1164 if (!pathfile) {
1165 SYSERROR("Failed to create device mount target '%s'", path);
1166 return -1;
1167 }
1168 fclose(pathfile);
592fd47a
SH
1169 if (safe_mount(hostpath, path, 0, MS_BIND, NULL,
1170 rootfs->path ? rootfs->mount : NULL) != 0) {
9cb4d183
SH
1171 SYSERROR("Failed bind mounting device %s from host into container",
1172 d->name);
1173 return -1;
1174 }
c6883f38
SH
1175 }
1176 }
3a32201c 1177 umask(cmask);
c6883f38 1178
14221cbb 1179 INFO("Populated container /dev");
c6883f38
SH
1180 return 0;
1181}
1182
cc28d0b0 1183static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1184{
cc28d0b0
SH
1185 const struct lxc_rootfs *rootfs = &conf->rootfs;
1186
a0f379bf
DW
1187 if (!rootfs->path) {
1188 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1189 SYSERROR("Failed to make / rslave");
1190 return -1;
1191 }
c69bd12f 1192 return 0;
a0f379bf 1193 }
0ad19a3f 1194
12297168 1195 if (access(rootfs->mount, F_OK)) {
b1789442 1196 SYSERROR("failed to access to '%s', check it is present",
12297168 1197 rootfs->mount);
b1789442
DL
1198 return -1;
1199 }
1200
9be53773 1201 // First try mounting rootfs using a bdev
76a26f55 1202 struct bdev *bdev = bdev_init(conf, rootfs->path, rootfs->mount, rootfs->options);
9be53773 1203 if (bdev && bdev->ops->mount(bdev) == 0) {
59d66af2 1204 bdev_put(bdev);
9be53773
SH
1205 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1206 return 0;
1207 }
59d66af2
SH
1208 if (bdev)
1209 bdev_put(bdev);
a17b1e65 1210 if (mount_rootfs(rootfs->path, rootfs->mount, rootfs->options)) {
a6afdde9 1211 ERROR("failed to mount rootfs");
c3f0a28c 1212 return -1;
1213 }
0ad19a3f 1214
12297168 1215 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1216
ac778708
DL
1217 return 0;
1218}
1219
91e93c71
AV
1220int prepare_ramfs_root(char *root)
1221{
1222 char buf[LINELEN], *p;
1223 char nroot[PATH_MAX];
1224 FILE *f;
1225 int i;
1226 char *p2;
1227
1228 if (realpath(root, nroot) == NULL)
1229 return -1;
1230
1231 if (chdir("/") == -1)
1232 return -1;
1233
1234 /*
1235 * We could use here MS_MOVE, but in userns this mount is
1236 * locked and can't be moved.
1237 */
1238 if (mount(root, "/", NULL, MS_REC | MS_BIND, NULL)) {
1239 SYSERROR("Failed to move %s into /", root);
1240 return -1;
1241 }
1242
88322f77 1243 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
91e93c71
AV
1244 SYSERROR("Failed to make . rprivate");
1245 return -1;
1246 }
1247
1248 /*
1249 * The following code cleans up inhereted mounts which are not
1250 * required for CT.
1251 *
1252 * The mountinfo file shows not all mounts, if a few points have been
1253 * unmounted between read operations from the mountinfo. So we need to
1254 * read mountinfo a few times.
1255 *
1256 * This loop can be skipped if a container uses unserns, because all
1257 * inherited mounts are locked and we should live with all this trash.
1258 */
1259 while (1) {
1260 int progress = 0;
1261
1262 f = fopen("./proc/self/mountinfo", "r");
1263 if (!f) {
1264 SYSERROR("Unable to open /proc/self/mountinfo");
1265 return -1;
1266 }
1267 while (fgets(buf, LINELEN, f)) {
1268 for (p = buf, i=0; p && i < 4; i++)
1269 p = strchr(p+1, ' ');
1270 if (!p)
1271 continue;
1272 p2 = strchr(p+1, ' ');
1273 if (!p2)
1274 continue;
1275
1276 *p2 = '\0';
1277 *p = '.';
1278
1279 if (strcmp(p + 1, "/") == 0)
1280 continue;
1281 if (strcmp(p + 1, "/proc") == 0)
1282 continue;
1283
1284 if (umount2(p, MNT_DETACH) == 0)
1285 progress++;
1286 }
1287 fclose(f);
1288 if (!progress)
1289 break;
1290 }
1291
8bea9fae
PR
1292 /* This also can be skipped if a container uses unserns */
1293 umount2("./proc", MNT_DETACH);
91e93c71
AV
1294
1295 /* It is weird, but chdir("..") moves us in a new root */
1296 if (chdir("..") == -1) {
1297 SYSERROR("Unable to change working directory");
1298 return -1;
1299 }
1300
1301 if (chroot(".") == -1) {
1302 SYSERROR("Unable to chroot");
1303 return -1;
1304 }
1305
1306 return 0;
1307}
1308
74a3920a 1309static int setup_pivot_root(const struct lxc_rootfs *rootfs)
ac778708 1310{
ac778708
DL
1311 if (!rootfs->path)
1312 return 0;
1313
91e93c71
AV
1314 if (detect_ramfs_rootfs()) {
1315 if (prepare_ramfs_root(rootfs->mount))
1316 return -1;
1317 } else if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
cc6f6dd7 1318 ERROR("failed to setup pivot root");
25368b52 1319 return -1;
c69bd12f
DL
1320 }
1321
25368b52 1322 return 0;
0ad19a3f 1323}
1324
d852c78c 1325static int setup_pts(int pts)
3c26f34e 1326{
77890c6d
SW
1327 char target[PATH_MAX];
1328
d852c78c
DL
1329 if (!pts)
1330 return 0;
3c26f34e 1331
1332 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1333 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1334 return -1;
1335 }
1336
7e40254a
JTLB
1337 if (mkdir("/dev/pts", 0755)) {
1338 if ( errno != EEXIST ) {
1339 SYSERROR("failed to create '/dev/pts'");
1340 return -1;
1341 }
1342 }
1343
a6afdde9 1344 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
67e5a20a 1345 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
36eb9bde 1346 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1347 return -1;
1348 }
1349
3c26f34e 1350 if (access("/dev/ptmx", F_OK)) {
1351 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1352 goto out;
36eb9bde 1353 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1354 return -1;
1355 }
1356
77890c6d
SW
1357 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1358 goto out;
1359
3c26f34e 1360 /* fallback here, /dev/pts/ptmx exists just mount bind */
1361 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1362 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1363 return -1;
1364 }
cd54d859
DL
1365
1366 INFO("created new pts instance");
d852c78c 1367
3c26f34e 1368out:
1369 return 0;
1370}
1371
cccc74b5
DL
1372static int setup_personality(int persona)
1373{
6ff05e18 1374 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1375 if (persona == -1)
1376 return 0;
1377
1378 if (personality(persona) < 0) {
1379 SYSERROR("failed to set personality to '0x%x'", persona);
1380 return -1;
1381 }
1382
1383 INFO("set personality to '0x%x'", persona);
6ff05e18 1384 #endif
cccc74b5
DL
1385
1386 return 0;
1387}
1388
7c6ef2a2 1389static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1390 const struct lxc_console *console)
6e590161 1391{
63376d7d
DL
1392 char path[MAXPATHLEN];
1393 struct stat s;
7c6ef2a2 1394 int ret;
52e35957 1395
7c6ef2a2
SH
1396 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1397 if (ret >= sizeof(path)) {
959aee9c 1398 ERROR("console path too long");
7c6ef2a2
SH
1399 return -1;
1400 }
52e35957 1401
63376d7d 1402 if (access(path, F_OK)) {
466978b0 1403 WARN("rootfs specified but no console found at '%s'", path);
63376d7d 1404 return 0;
52e35957
DL
1405 }
1406
b5159817
DE
1407 if (console->master < 0) {
1408 INFO("no console");
f78a1f32
DL
1409 return 0;
1410 }
ed502555 1411
63376d7d
DL
1412 if (stat(path, &s)) {
1413 SYSERROR("failed to stat '%s'", path);
1414 return -1;
1415 }
1416
1417 if (chmod(console->name, s.st_mode)) {
1418 SYSERROR("failed to set mode '0%o' to '%s'",
1419 s.st_mode, console->name);
1420 return -1;
1421 }
13954cce 1422
592fd47a 1423 if (safe_mount(console->name, path, "none", MS_BIND, 0, rootfs->mount)) {
63376d7d 1424 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1425 return -1;
1426 }
1427
63376d7d 1428 INFO("console has been setup");
7c6ef2a2
SH
1429 return 0;
1430}
1431
1432static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1433 const struct lxc_console *console,
1434 char *ttydir)
1435{
1436 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1437 int ret;
1438
1439 /* create rootfs/dev/<ttydir> directory */
1440 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1441 ttydir);
1442 if (ret >= sizeof(path))
1443 return -1;
1444 ret = mkdir(path, 0755);
1445 if (ret && errno != EEXIST) {
959aee9c 1446 SYSERROR("failed with errno %d to create %s", errno, path);
7c6ef2a2
SH
1447 return -1;
1448 }
959aee9c 1449 INFO("created %s", path);
7c6ef2a2
SH
1450
1451 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1452 rootfs->mount, ttydir);
1453 if (ret >= sizeof(lxcpath)) {
959aee9c 1454 ERROR("console path too long");
7c6ef2a2
SH
1455 return -1;
1456 }
1457
1458 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1459 ret = unlink(path);
1460 if (ret && errno != ENOENT) {
959aee9c 1461 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1462 return -1;
1463 }
1464
1465 ret = creat(lxcpath, 0660);
1466 if (ret==-1 && errno != EEXIST) {
959aee9c 1467 SYSERROR("error %d creating %s", errno, lxcpath);
7c6ef2a2
SH
1468 return -1;
1469 }
4d44e274
SH
1470 if (ret >= 0)
1471 close(ret);
7c6ef2a2 1472
b5159817
DE
1473 if (console->master < 0) {
1474 INFO("no console");
7c6ef2a2
SH
1475 return 0;
1476 }
1477
592fd47a 1478 if (safe_mount(console->name, lxcpath, "none", MS_BIND, 0, rootfs->mount)) {
7c6ef2a2
SH
1479 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1480 return -1;
1481 }
1482
1483 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1484 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1485 if (ret >= sizeof(lxcpath)) {
1486 ERROR("lxc/console path too long");
1487 return -1;
1488 }
7c6ef2a2
SH
1489 ret = symlink(lxcpath, path);
1490 if (ret) {
1491 SYSERROR("failed to create symlink for console");
1492 return -1;
1493 }
1494
1495 INFO("console has been setup on %s", lxcpath);
cd54d859 1496
6e590161 1497 return 0;
1498}
1499
7c6ef2a2
SH
1500static int setup_console(const struct lxc_rootfs *rootfs,
1501 const struct lxc_console *console,
1502 char *ttydir)
1503{
1504 /* We don't have a rootfs, /dev/console will be shared */
1505 if (!rootfs->path)
1506 return 0;
1507 if (!ttydir)
1508 return setup_dev_console(rootfs, console);
1509
1510 return setup_ttydir_console(rootfs, console, ttydir);
1511}
1512
1bd051a6
SH
1513static int setup_kmsg(const struct lxc_rootfs *rootfs,
1514 const struct lxc_console *console)
1515{
1516 char kpath[MAXPATHLEN];
1517 int ret;
1518
222fea5a
DE
1519 if (!rootfs->path)
1520 return 0;
1bd051a6
SH
1521 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1522 if (ret < 0 || ret >= sizeof(kpath))
1523 return -1;
1524
1525 ret = unlink(kpath);
1526 if (ret && errno != ENOENT) {
959aee9c 1527 SYSERROR("error unlinking %s", kpath);
1bd051a6
SH
1528 return -1;
1529 }
1530
1531 ret = symlink("console", kpath);
1532 if (ret) {
1533 SYSERROR("failed to create symlink for kmsg");
1534 return -1;
1535 }
1536
1537 return 0;
1538}
1539
998ac676
RT
1540static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1541{
1542 struct mount_opt *mo;
1543
1544 /* If opt is found in mount_opt, set or clear flags.
1545 * Otherwise append it to data. */
1546
1547 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1548 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1549 if (mo->clear)
1550 *flags &= ~mo->flag;
1551 else
1552 *flags |= mo->flag;
1553 return;
1554 }
1555 }
1556
1557 if (strlen(*data))
1558 strcat(*data, ",");
1559 strcat(*data, opt);
1560}
1561
a17b1e65 1562int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1563 char **mntdata)
1564{
1565 char *s, *data;
1566 char *p, *saveptr = NULL;
1567
911324ef 1568 *mntdata = NULL;
91656ce5 1569 *mntflags = 0L;
911324ef
DL
1570
1571 if (!mntopts)
998ac676
RT
1572 return 0;
1573
911324ef 1574 s = strdup(mntopts);
998ac676 1575 if (!s) {
36eb9bde 1576 SYSERROR("failed to allocate memory");
998ac676
RT
1577 return -1;
1578 }
1579
1580 data = malloc(strlen(s) + 1);
1581 if (!data) {
36eb9bde 1582 SYSERROR("failed to allocate memory");
998ac676
RT
1583 free(s);
1584 return -1;
1585 }
1586 *data = 0;
1587
1588 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1589 p = strtok_r(NULL, ",", &saveptr))
1590 parse_mntopt(p, mntflags, &data);
1591
1592 if (*data)
1593 *mntdata = data;
1594 else
1595 free(data);
1596 free(s);
1597
1598 return 0;
1599}
1600
6fd5e769
SH
1601static void null_endofword(char *word)
1602{
1603 while (*word && *word != ' ' && *word != '\t')
1604 word++;
1605 *word = '\0';
1606}
1607
1608/*
1609 * skip @nfields spaces in @src
1610 */
1611static char *get_field(char *src, int nfields)
1612{
1613 char *p = src;
1614 int i;
1615
1616 for (i = 0; i < nfields; i++) {
1617 while (*p && *p != ' ' && *p != '\t')
1618 p++;
1619 if (!*p)
1620 break;
1621 p++;
1622 }
1623 return p;
1624}
1625
911324ef
DL
1626static int mount_entry(const char *fsname, const char *target,
1627 const char *fstype, unsigned long mountflags,
592fd47a 1628 const char *data, int optional, const char *rootfs)
911324ef 1629{
614305f3 1630#ifdef HAVE_STATVFS
2938f7c8 1631 struct statvfs sb;
614305f3 1632#endif
2938f7c8 1633
592fd47a 1634 if (safe_mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data, rootfs)) {
1fc64d22
SG
1635 if (optional) {
1636 INFO("failed to mount '%s' on '%s' (optional): %s", fsname,
1637 target, strerror(errno));
1638 return 0;
1639 }
1640 else {
1641 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1642 return -1;
1643 }
911324ef
DL
1644 }
1645
1646 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
2938f7c8
SH
1647 DEBUG("remounting %s on %s to respect bind or remount options",
1648 fsname ? fsname : "(none)", target ? target : "(none)");
7c5b6e7c
AS
1649 unsigned long rqd_flags = 0;
1650 if (mountflags & MS_RDONLY)
1651 rqd_flags |= MS_RDONLY;
614305f3 1652#ifdef HAVE_STATVFS
2938f7c8 1653 if (statvfs(fsname, &sb) == 0) {
7c5b6e7c 1654 unsigned long required_flags = rqd_flags;
2938f7c8
SH
1655 if (sb.f_flag & MS_NOSUID)
1656 required_flags |= MS_NOSUID;
1657 if (sb.f_flag & MS_NODEV)
1658 required_flags |= MS_NODEV;
1659 if (sb.f_flag & MS_RDONLY)
1660 required_flags |= MS_RDONLY;
1661 if (sb.f_flag & MS_NOEXEC)
1662 required_flags |= MS_NOEXEC;
1663 DEBUG("(at remount) flags for %s was %lu, required extra flags are %lu", fsname, sb.f_flag, required_flags);
1664 /*
1665 * If this was a bind mount request, and required_flags
1666 * does not have any flags which are not already in
1667 * mountflags, then skip the remount
1668 */
1669 if (!(mountflags & MS_REMOUNT)) {
7c5b6e7c 1670 if (!(required_flags & ~mountflags) && rqd_flags == 0) {
2938f7c8
SH
1671 DEBUG("mountflags already was %lu, skipping remount",
1672 mountflags);
1673 goto skipremount;
1674 }
1675 }
1676 mountflags |= required_flags;
6fd5e769 1677 }
614305f3 1678#endif
911324ef
DL
1679
1680 if (mount(fsname, target, fstype,
592fd47a 1681 mountflags | MS_REMOUNT, data) < 0) {
1fc64d22
SG
1682 if (optional) {
1683 INFO("failed to mount '%s' on '%s' (optional): %s",
1684 fsname, target, strerror(errno));
1685 return 0;
1686 }
1687 else {
1688 SYSERROR("failed to mount '%s' on '%s'",
1689 fsname, target);
1690 return -1;
1691 }
911324ef
DL
1692 }
1693 }
1694
614305f3 1695#ifdef HAVE_STATVFS
6fd5e769 1696skipremount:
614305f3 1697#endif
911324ef
DL
1698 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1699
1700 return 0;
1701}
1702
4e4ca161
SH
1703/*
1704 * Remove 'optional', 'create=dir', and 'create=file' from mntopt
1705 */
1706static void cull_mntent_opt(struct mntent *mntent)
1707{
1708 int i;
1709 char *p, *p2;
1710 char *list[] = {"create=dir",
1711 "create=file",
1712 "optional",
1713 NULL };
1714
1715 for (i=0; list[i]; i++) {
1716 if (!(p = strstr(mntent->mnt_opts, list[i])))
1717 continue;
1718 p2 = strchr(p, ',');
1719 if (!p2) {
1720 /* no more mntopts, so just chop it here */
1721 *p = '\0';
1722 continue;
1723 }
1724 memmove(p, p2+1, strlen(p2+1)+1);
1725 }
1726}
1727
6e46cc0d 1728static int mount_entry_create_aufs_dirs(const struct mntent *mntent,
0a2dddd4
CB
1729 const struct lxc_rootfs *rootfs,
1730 const char *lxc_name,
1731 const char *lxc_path)
6e46cc0d 1732{
0a2dddd4 1733 char lxcpath[MAXPATHLEN];
1e3ce0da 1734 char *rootfsdir = NULL;
6e46cc0d
CB
1735 char *scratch = NULL;
1736 char *tmp = NULL;
1737 char *upperdir = NULL;
1738 char **opts = NULL;
9e5a2a01 1739 int fret = -1;
0a2dddd4 1740 int ret = 0;
6e46cc0d
CB
1741 size_t arrlen = 0;
1742 size_t i;
1743 size_t len = 0;
1e3ce0da 1744 size_t rootfslen = 0;
6e46cc0d 1745
0a2dddd4 1746 if (!rootfs->path || !lxc_name || !lxc_path)
1e3ce0da 1747 goto err;
6e46cc0d
CB
1748
1749 opts = lxc_string_split(mntent->mnt_opts, ',');
1750 if (opts)
1751 arrlen = lxc_array_len((void **)opts);
1752 else
1e3ce0da 1753 goto err;
6e46cc0d
CB
1754
1755 for (i = 0; i < arrlen; i++) {
1756 if (strstr(opts[i], "br=") && (strlen(opts[i]) > (len = strlen("br="))))
1757 tmp = opts[i] + len;
1758 }
1e3ce0da
CB
1759 if (!tmp)
1760 goto err;
6e46cc0d
CB
1761
1762 upperdir = strtok_r(tmp, ":=", &scratch);
1e3ce0da
CB
1763 if (!upperdir)
1764 goto err;
6e46cc0d 1765
0a2dddd4 1766 ret = snprintf(lxcpath, MAXPATHLEN, "%s/%s", lxc_path, lxc_name);
1e3ce0da
CB
1767 if (ret < 0 || ret >= MAXPATHLEN)
1768 goto err;
1769
5c484f79 1770 rootfsdir = ovl_get_rootfs(rootfs->path, &rootfslen);
1e3ce0da
CB
1771 if (!rootfsdir)
1772 goto err;
6e46cc0d
CB
1773
1774 /* We neither allow users to create upperdirs outside the containerdir
1775 * nor inside the rootfs. The latter might be debatable. */
1e3ce0da 1776 if ((strncmp(upperdir, lxcpath, strlen(lxcpath)) == 0) && (strncmp(upperdir, rootfsdir, rootfslen) != 0))
6e46cc0d
CB
1777 if (mkdir_p(upperdir, 0755) < 0) {
1778 WARN("Failed to create upperdir");
1779 }
1780
9e5a2a01 1781 fret = 0;
1e3ce0da
CB
1782
1783err:
1784 free(rootfsdir);
1785 lxc_free_array((void **)opts, free);
9e5a2a01 1786 return fret;
6e46cc0d
CB
1787}
1788
0a2dddd4 1789
4d5b72a1 1790static int mount_entry_create_dir_file(const struct mntent *mntent,
0a2dddd4
CB
1791 const char* path, const struct lxc_rootfs *rootfs,
1792 const char *lxc_name, const char *lxc_path)
0ad19a3f 1793{
4d5b72a1 1794 char *pathdirname = NULL;
608e3567 1795 int ret = 0;
34cfffb3 1796 FILE *pathfile = NULL;
911324ef 1797
6e46cc0d 1798 if (strncmp(mntent->mnt_type, "overlay", 7) == 0) {
5c484f79 1799 if (ovl_mkdir(mntent, rootfs, lxc_name, lxc_path) < 0)
6e46cc0d
CB
1800 return -1;
1801 } else if (strncmp(mntent->mnt_type, "aufs", 4) == 0) {
0a2dddd4 1802 if (mount_entry_create_aufs_dirs(mntent, rootfs, lxc_name, lxc_path) < 0)
6e46cc0d
CB
1803 return -1;
1804 }
1805
34cfffb3 1806 if (hasmntopt(mntent, "create=dir")) {
4d5b72a1
NC
1807 if (mkdir_p(path, 0755) < 0) {
1808 WARN("Failed to create mount target '%s'", path);
34cfffb3
SG
1809 ret = -1;
1810 }
1811 }
1812
4d5b72a1
NC
1813 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
1814 pathdirname = strdup(path);
34cfffb3 1815 pathdirname = dirname(pathdirname);
119126b6
SG
1816 if (mkdir_p(pathdirname, 0755) < 0) {
1817 WARN("Failed to create target directory");
1818 }
4d5b72a1 1819 pathfile = fopen(path, "wb");
34cfffb3 1820 if (!pathfile) {
4d5b72a1 1821 WARN("Failed to create mount target '%s'", path);
34cfffb3 1822 ret = -1;
6e46cc0d 1823 } else {
34cfffb3 1824 fclose(pathfile);
6e46cc0d 1825 }
34cfffb3 1826 }
4d5b72a1
NC
1827 free(pathdirname);
1828 return ret;
1829}
1830
db4aba38 1831static inline int mount_entry_on_generic(struct mntent *mntent,
0a2dddd4
CB
1832 const char* path, const struct lxc_rootfs *rootfs,
1833 const char *lxc_name, const char *lxc_path)
4d5b72a1
NC
1834{
1835 unsigned long mntflags;
1836 char *mntdata;
1837 int ret;
1838 bool optional = hasmntopt(mntent, "optional") != NULL;
1839
0a2dddd4 1840 ret = mount_entry_create_dir_file(mntent, path, rootfs, lxc_name, lxc_path);
34cfffb3 1841
608e3567
SH
1842 if (ret < 0)
1843 return optional ? 0 : -1;
1844
4e4ca161
SH
1845 cull_mntent_opt(mntent);
1846
a17b1e65
SG
1847 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1848 free(mntdata);
1849 return -1;
1850 }
1851
6e46cc0d
CB
1852 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type, mntflags,
1853 mntdata, optional,
1854 rootfs->path ? rootfs->mount : NULL);
68c152ef 1855
911324ef 1856 free(mntdata);
911324ef
DL
1857 return ret;
1858}
1859
db4aba38
NC
1860static inline int mount_entry_on_systemfs(struct mntent *mntent)
1861{
0a2dddd4 1862 return mount_entry_on_generic(mntent, mntent->mnt_dir, NULL, NULL, NULL);
db4aba38
NC
1863}
1864
4e4ca161 1865static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2 1866 const struct lxc_rootfs *rootfs,
0a2dddd4
CB
1867 const char *lxc_name,
1868 const char *lxc_path)
911324ef 1869{
013bd428 1870 char *aux;
59760f5d 1871 char path[MAXPATHLEN];
80a881b2 1872 int r, ret = 0, offset;
67e571de 1873 const char *lxcpath;
0ad19a3f 1874
593e8478 1875 lxcpath = lxc_global_config_value("lxc.lxcpath");
2a59a681
SH
1876 if (!lxcpath) {
1877 ERROR("Out of memory");
1878 return -1;
1879 }
1880
80a881b2 1881 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
1882 * use $lxcpath/CN/rootfs as the target prefix */
1883 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
1884 if (r < 0 || r >= MAXPATHLEN)
1885 goto skipvarlib;
1886
1887 aux = strstr(mntent->mnt_dir, path);
1888 if (aux) {
1889 offset = strlen(path);
1890 goto skipabs;
1891 }
1892
1893skipvarlib:
013bd428
DL
1894 aux = strstr(mntent->mnt_dir, rootfs->path);
1895 if (!aux) {
1896 WARN("ignoring mount point '%s'", mntent->mnt_dir);
db4aba38 1897 return ret;
013bd428 1898 }
80a881b2
SH
1899 offset = strlen(rootfs->path);
1900
1901skipabs:
013bd428 1902
9ba8130c 1903 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
1904 aux + offset);
1905 if (r < 0 || r >= MAXPATHLEN) {
1906 WARN("pathnme too long for '%s'", mntent->mnt_dir);
a17b1e65
SG
1907 return -1;
1908 }
1909
0a2dddd4 1910 return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
911324ef 1911}
d330fe7b 1912
4e4ca161 1913static int mount_entry_on_relative_rootfs(struct mntent *mntent,
0a2dddd4
CB
1914 const struct lxc_rootfs *rootfs,
1915 const char *lxc_name,
1916 const char *lxc_path)
911324ef
DL
1917{
1918 char path[MAXPATHLEN];
911324ef 1919 int ret;
d330fe7b 1920
34cfffb3 1921 /* relative to root mount point */
6e46cc0d 1922 ret = snprintf(path, sizeof(path), "%s/%s", rootfs->mount, mntent->mnt_dir);
9ba8130c
SH
1923 if (ret >= sizeof(path)) {
1924 ERROR("path name too long");
1925 return -1;
1926 }
911324ef 1927
0a2dddd4 1928 return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
911324ef
DL
1929}
1930
80a881b2 1931static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
0a2dddd4 1932 const char *lxc_name, const char *lxc_path)
911324ef 1933{
aaf901be
AM
1934 struct mntent mntent;
1935 char buf[4096];
911324ef 1936 int ret = -1;
e76b8764 1937
aaf901be 1938 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
e76b8764 1939
911324ef 1940 if (!rootfs->path) {
aaf901be 1941 if (mount_entry_on_systemfs(&mntent))
e76b8764 1942 goto out;
911324ef 1943 continue;
e76b8764
CDC
1944 }
1945
911324ef 1946 /* We have a separate root, mounts are relative to it */
aaf901be 1947 if (mntent.mnt_dir[0] != '/') {
0a2dddd4 1948 if (mount_entry_on_relative_rootfs(&mntent, rootfs, lxc_name, lxc_path))
911324ef
DL
1949 goto out;
1950 continue;
1951 }
cd54d859 1952
0a2dddd4 1953 if (mount_entry_on_absolute_rootfs(&mntent, rootfs, lxc_name, lxc_path))
911324ef 1954 goto out;
0ad19a3f 1955 }
cd54d859 1956
0ad19a3f 1957 ret = 0;
cd54d859
DL
1958
1959 INFO("mount points have been setup");
0ad19a3f 1960out:
e7938e9e
MN
1961 return ret;
1962}
1963
80a881b2 1964static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
0a2dddd4 1965 const char *lxc_name, const char *lxc_path)
e7938e9e
MN
1966{
1967 FILE *file;
1968 int ret;
1969
1970 if (!fstab)
1971 return 0;
1972
1973 file = setmntent(fstab, "r");
1974 if (!file) {
1975 SYSERROR("failed to use '%s'", fstab);
1976 return -1;
1977 }
1978
0a2dddd4 1979 ret = mount_file_entries(rootfs, file, lxc_name, lxc_path);
e7938e9e 1980
0ad19a3f 1981 endmntent(file);
1982 return ret;
1983}
1984
9fc7f8c0 1985FILE *write_mount_file(struct lxc_list *mount)
e7938e9e
MN
1986{
1987 FILE *file;
1988 struct lxc_list *iterator;
1989 char *mount_entry;
e7938e9e
MN
1990
1991 file = tmpfile();
1992 if (!file) {
1993 ERROR("tmpfile error: %m");
9fc7f8c0 1994 return NULL;
e7938e9e
MN
1995 }
1996
1997 lxc_list_for_each(iterator, mount) {
1998 mount_entry = iterator->elem;
1d6b1976 1999 fprintf(file, "%s\n", mount_entry);
e7938e9e
MN
2000 }
2001
2002 rewind(file);
9fc7f8c0
TA
2003 return file;
2004}
2005
2006static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
0a2dddd4 2007 const char *lxc_name, const char *lxc_path)
9fc7f8c0
TA
2008{
2009 FILE *file;
2010 int ret;
2011
2012 file = write_mount_file(mount);
2013 if (!file)
2014 return -1;
e7938e9e 2015
0a2dddd4 2016 ret = mount_file_entries(rootfs, file, lxc_name, lxc_path);
e7938e9e
MN
2017
2018 fclose(file);
2019 return ret;
2020}
2021
bab88e68
CS
2022static int parse_cap(const char *cap)
2023{
2024 char *ptr = NULL;
2025 int i, capid = -1;
2026
7035407c
DE
2027 if (!strcmp(cap, "none"))
2028 return -2;
2029
bab88e68
CS
2030 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2031
2032 if (strcmp(cap, caps_opt[i].name))
2033 continue;
2034
2035 capid = caps_opt[i].value;
2036 break;
2037 }
2038
2039 if (capid < 0) {
2040 /* try to see if it's numeric, so the user may specify
2041 * capabilities that the running kernel knows about but
2042 * we don't */
2043 errno = 0;
2044 capid = strtol(cap, &ptr, 10);
2045 if (!ptr || *ptr != '\0' || errno != 0)
2046 /* not a valid number */
2047 capid = -1;
2048 else if (capid > lxc_caps_last_cap())
2049 /* we have a number but it's not a valid
2050 * capability */
2051 capid = -1;
2052 }
2053
2054 return capid;
2055}
2056
0769b82a
CS
2057int in_caplist(int cap, struct lxc_list *caps)
2058{
2059 struct lxc_list *iterator;
2060 int capid;
2061
2062 lxc_list_for_each(iterator, caps) {
2063 capid = parse_cap(iterator->elem);
2064 if (capid == cap)
2065 return 1;
2066 }
2067
2068 return 0;
2069}
2070
81810dd1
DL
2071static int setup_caps(struct lxc_list *caps)
2072{
2073 struct lxc_list *iterator;
2074 char *drop_entry;
bab88e68 2075 int capid;
81810dd1
DL
2076
2077 lxc_list_for_each(iterator, caps) {
2078
2079 drop_entry = iterator->elem;
2080
bab88e68 2081 capid = parse_cap(drop_entry);
d55bc1ad 2082
81810dd1 2083 if (capid < 0) {
1e11be34
DL
2084 ERROR("unknown capability %s", drop_entry);
2085 return -1;
81810dd1
DL
2086 }
2087
2088 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
2089
2090 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
3ec1648d
SH
2091 SYSERROR("failed to remove %s capability", drop_entry);
2092 return -1;
2093 }
81810dd1
DL
2094
2095 }
2096
1fb86a7c
SH
2097 DEBUG("capabilities have been setup");
2098
2099 return 0;
2100}
2101
2102static int dropcaps_except(struct lxc_list *caps)
2103{
2104 struct lxc_list *iterator;
2105 char *keep_entry;
1fb86a7c
SH
2106 int i, capid;
2107 int numcaps = lxc_caps_last_cap() + 1;
959aee9c 2108 INFO("found %d capabilities", numcaps);
1fb86a7c 2109
2caf9a97
SH
2110 if (numcaps <= 0 || numcaps > 200)
2111 return -1;
2112
1fb86a7c
SH
2113 // caplist[i] is 1 if we keep capability i
2114 int *caplist = alloca(numcaps * sizeof(int));
2115 memset(caplist, 0, numcaps * sizeof(int));
2116
2117 lxc_list_for_each(iterator, caps) {
2118
2119 keep_entry = iterator->elem;
2120
bab88e68 2121 capid = parse_cap(keep_entry);
1fb86a7c 2122
7035407c
DE
2123 if (capid == -2)
2124 continue;
2125
1fb86a7c
SH
2126 if (capid < 0) {
2127 ERROR("unknown capability %s", keep_entry);
2128 return -1;
2129 }
2130
8255688a 2131 DEBUG("keep capability '%s' (%d)", keep_entry, capid);
1fb86a7c
SH
2132
2133 caplist[capid] = 1;
2134 }
2135 for (i=0; i<numcaps; i++) {
2136 if (caplist[i])
2137 continue;
2138 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
3ec1648d
SH
2139 SYSERROR("failed to remove capability %d", i);
2140 return -1;
2141 }
1fb86a7c
SH
2142 }
2143
2144 DEBUG("capabilities have been setup");
81810dd1
DL
2145
2146 return 0;
2147}
2148
0ad19a3f 2149static int setup_hw_addr(char *hwaddr, const char *ifname)
2150{
2151 struct sockaddr sockaddr;
2152 struct ifreq ifr;
2153 int ret, fd;
2154
3cfc0f3a
MN
2155 ret = lxc_convert_mac(hwaddr, &sockaddr);
2156 if (ret) {
2157 ERROR("mac address '%s' conversion failed : %s",
2158 hwaddr, strerror(-ret));
0ad19a3f 2159 return -1;
2160 }
2161
2162 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
5da6aa8c 2163 ifr.ifr_name[IFNAMSIZ-1] = '\0';
0ad19a3f 2164 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2165
2166 fd = socket(AF_INET, SOCK_DGRAM, 0);
2167 if (fd < 0) {
3ab87b66 2168 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 2169 return -1;
2170 }
2171
2172 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
2173 close(fd);
2174 if (ret)
3ab87b66 2175 ERROR("ioctl failure : %s", strerror(errno));
0ad19a3f 2176
5da6aa8c 2177 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
cd54d859 2178
0ad19a3f 2179 return ret;
2180}
2181
82d5ae15 2182static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2183{
82d5ae15
DL
2184 struct lxc_list *iterator;
2185 struct lxc_inetdev *inetdev;
3cfc0f3a 2186 int err;
0ad19a3f 2187
82d5ae15
DL
2188 lxc_list_for_each(iterator, ip) {
2189
2190 inetdev = iterator->elem;
2191
0093bb8c
DL
2192 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2193 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
2194 if (err) {
2195 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2196 ifindex, strerror(-err));
82d5ae15
DL
2197 return -1;
2198 }
2199 }
2200
2201 return 0;
0ad19a3f 2202}
2203
82d5ae15 2204static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2205{
82d5ae15 2206 struct lxc_list *iterator;
7fa9074f 2207 struct lxc_inet6dev *inet6dev;
3cfc0f3a 2208 int err;
0ad19a3f 2209
82d5ae15
DL
2210 lxc_list_for_each(iterator, ip) {
2211
2212 inet6dev = iterator->elem;
2213
b3df193c 2214 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
2215 &inet6dev->mcast, &inet6dev->acast,
2216 inet6dev->prefix);
3cfc0f3a
MN
2217 if (err) {
2218 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2219 ifindex, strerror(-err));
82d5ae15 2220 return -1;
3cfc0f3a 2221 }
82d5ae15
DL
2222 }
2223
2224 return 0;
0ad19a3f 2225}
2226
82d5ae15 2227static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 2228{
0ad19a3f 2229 char ifname[IFNAMSIZ];
0ad19a3f 2230 char *current_ifname = ifname;
3cfc0f3a 2231 int err;
0ad19a3f 2232
82d5ae15
DL
2233 /* empty network namespace */
2234 if (!netdev->ifindex) {
b0efbac4 2235 if (netdev->flags & IFF_UP) {
d472214b 2236 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2237 if (err) {
2238 ERROR("failed to set the loopback up : %s",
2239 strerror(-err));
82d5ae15
DL
2240 return -1;
2241 }
82d5ae15 2242 }
40790553
SH
2243 if (netdev->type != LXC_NET_VETH)
2244 return 0;
2245 netdev->ifindex = if_nametoindex(netdev->name);
0ad19a3f 2246 }
13954cce 2247
b466dc33 2248 /* get the new ifindex in case of physical netdev */
40790553 2249 if (netdev->type == LXC_NET_PHYS) {
b466dc33
BP
2250 if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
2251 ERROR("failed to get ifindex for %s",
2252 netdev->link);
2253 return -1;
2254 }
40790553 2255 }
b466dc33 2256
82d5ae15
DL
2257 /* retrieve the name of the interface */
2258 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 2259 ERROR("no interface corresponding to index '%d'",
82d5ae15 2260 netdev->ifindex);
0ad19a3f 2261 return -1;
2262 }
13954cce 2263
018ef520 2264 /* default: let the system to choose one interface name */
9d083402 2265 if (!netdev->name)
fb6d9b2f
DL
2266 netdev->name = netdev->type == LXC_NET_PHYS ?
2267 netdev->link : "eth%d";
018ef520 2268
82d5ae15 2269 /* rename the interface name */
40790553
SH
2270 if (strcmp(ifname, netdev->name) != 0) {
2271 err = lxc_netdev_rename_by_name(ifname, netdev->name);
2272 if (err) {
2273 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2274 strerror(-err));
2275 return -1;
2276 }
018ef520
DL
2277 }
2278
2279 /* Re-read the name of the interface because its name has changed
2280 * and would be automatically allocated by the system
2281 */
82d5ae15 2282 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 2283 ERROR("no interface corresponding to index '%d'",
82d5ae15 2284 netdev->ifindex);
018ef520 2285 return -1;
0ad19a3f 2286 }
2287
82d5ae15
DL
2288 /* set a mac address */
2289 if (netdev->hwaddr) {
2290 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 2291 ERROR("failed to setup hw address for '%s'",
82d5ae15 2292 current_ifname);
0ad19a3f 2293 return -1;
2294 }
2295 }
2296
82d5ae15
DL
2297 /* setup ipv4 addresses on the interface */
2298 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 2299 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 2300 ifname);
2301 return -1;
2302 }
2303
82d5ae15
DL
2304 /* setup ipv6 addresses on the interface */
2305 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 2306 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 2307 ifname);
2308 return -1;
2309 }
2310
82d5ae15 2311 /* set the network device up */
b0efbac4 2312 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
2313 int err;
2314
d472214b 2315 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
2316 if (err) {
2317 ERROR("failed to set '%s' up : %s", current_ifname,
2318 strerror(-err));
0ad19a3f 2319 return -1;
2320 }
2321
2322 /* the network is up, make the loopback up too */
d472214b 2323 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2324 if (err) {
2325 ERROR("failed to set the loopback up : %s",
2326 strerror(-err));
0ad19a3f 2327 return -1;
2328 }
2329 }
2330
f8fee0e2
MK
2331 /* We can only set up the default routes after bringing
2332 * up the interface, sine bringing up the interface adds
2333 * the link-local routes and we can't add a default
2334 * route if the gateway is not reachable. */
2335
2336 /* setup ipv4 gateway on the interface */
2337 if (netdev->ipv4_gateway) {
2338 if (!(netdev->flags & IFF_UP)) {
2339 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2340 return -1;
2341 }
2342
2343 if (lxc_list_empty(&netdev->ipv4)) {
2344 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2345 return -1;
2346 }
2347
2348 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2349 if (err) {
fc739df5
SG
2350 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2351 if (err) {
2352 ERROR("failed to add ipv4 dest for '%s': %s",
2353 ifname, strerror(-err));
2354 }
2355
2356 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2357 if (err) {
2358 ERROR("failed to setup ipv4 gateway for '%s': %s",
2359 ifname, strerror(-err));
2360 if (netdev->ipv4_gateway_auto) {
2361 char buf[INET_ADDRSTRLEN];
2362 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2363 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2364 }
2365 return -1;
19a26f82 2366 }
f8fee0e2
MK
2367 }
2368 }
2369
2370 /* setup ipv6 gateway on the interface */
2371 if (netdev->ipv6_gateway) {
2372 if (!(netdev->flags & IFF_UP)) {
2373 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2374 return -1;
2375 }
2376
2377 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2378 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2379 return -1;
2380 }
2381
2382 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2383 if (err) {
fc739df5
SG
2384 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2385 if (err) {
2386 ERROR("failed to add ipv6 dest for '%s': %s",
f8fee0e2 2387 ifname, strerror(-err));
19a26f82 2388 }
fc739df5
SG
2389
2390 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2391 if (err) {
2392 ERROR("failed to setup ipv6 gateway for '%s': %s",
2393 ifname, strerror(-err));
2394 if (netdev->ipv6_gateway_auto) {
2395 char buf[INET6_ADDRSTRLEN];
2396 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2397 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2398 }
2399 return -1;
2400 }
f8fee0e2
MK
2401 }
2402 }
2403
cd54d859
DL
2404 DEBUG("'%s' has been setup", current_ifname);
2405
0ad19a3f 2406 return 0;
2407}
2408
5f4535a3 2409static int setup_network(struct lxc_list *network)
0ad19a3f 2410{
82d5ae15 2411 struct lxc_list *iterator;
82d5ae15 2412 struct lxc_netdev *netdev;
0ad19a3f 2413
5f4535a3 2414 lxc_list_for_each(iterator, network) {
cd54d859 2415
5f4535a3 2416 netdev = iterator->elem;
82d5ae15
DL
2417
2418 if (setup_netdev(netdev)) {
2419 ERROR("failed to setup netdev");
2420 return -1;
2421 }
2422 }
cd54d859 2423
5f4535a3
DL
2424 if (!lxc_list_empty(network))
2425 INFO("network has been setup");
cd54d859
DL
2426
2427 return 0;
0ad19a3f 2428}
2429
2af6bd1b
SH
2430/* try to move physical nics to the init netns */
2431void restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf)
2432{
2433 int i, ret, oldfd;
2434 char path[MAXPATHLEN];
4ec31c52 2435 char ifname[IFNAMSIZ];
2af6bd1b
SH
2436
2437 if (netnsfd < 0)
2438 return;
2439
2440 ret = snprintf(path, MAXPATHLEN, "/proc/self/ns/net");
2441 if (ret < 0 || ret >= MAXPATHLEN) {
2442 WARN("Failed to open monitor netns fd");
2443 return;
2444 }
2445 if ((oldfd = open(path, O_RDONLY)) < 0) {
2446 SYSERROR("Failed to open monitor netns fd");
2447 return;
2448 }
2449 if (setns(netnsfd, 0) != 0) {
2450 SYSERROR("Failed to enter container netns to reset nics");
2451 close(oldfd);
2452 return;
2453 }
2454 for (i=0; i<conf->num_savednics; i++) {
2455 struct saved_nic *s = &conf->saved_nics[i];
f2e206ff 2456 /* retrieve the name of the interface */
2457 if (!if_indextoname(s->ifindex, ifname)) {
2458 WARN("no interface corresponding to index '%d'", s->ifindex);
2459 continue;
2460 }
2461 if (lxc_netdev_move_by_name(ifname, 1, NULL))
2462 WARN("Error moving nic name:%s back to host netns", ifname);
2af6bd1b
SH
2463 }
2464 if (setns(oldfd, 0) != 0)
2465 SYSERROR("Failed to re-enter monitor's netns");
2466 close(oldfd);
2467}
2468
2469void lxc_rename_phys_nics_on_shutdown(int netnsfd, struct lxc_conf *conf)
7b35f3d6
SH
2470{
2471 int i;
2472
2af6bd1b
SH
2473 if (conf->num_savednics == 0)
2474 return;
2475
7b35f3d6 2476 INFO("running to reset %d nic names", conf->num_savednics);
2af6bd1b 2477 restore_phys_nics_to_netns(netnsfd, conf);
7b35f3d6
SH
2478 for (i=0; i<conf->num_savednics; i++) {
2479 struct saved_nic *s = &conf->saved_nics[i];
959aee9c 2480 INFO("resetting nic %d to %s", s->ifindex, s->orig_name);
7b35f3d6
SH
2481 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
2482 free(s->orig_name);
2483 }
2484 conf->num_savednics = 0;
7b35f3d6
SH
2485}
2486
ae9242c8
SH
2487static char *default_rootfs_mount = LXCROOTFSMOUNT;
2488
7b379ab3 2489struct lxc_conf *lxc_conf_init(void)
089cd8b8 2490{
7b379ab3 2491 struct lxc_conf *new;
26ddeedd 2492 int i;
7b379ab3
MN
2493
2494 new = malloc(sizeof(*new));
2495 if (!new) {
2496 ERROR("lxc_conf_init : %m");
2497 return NULL;
2498 }
2499 memset(new, 0, sizeof(*new));
2500
b40a606e 2501 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
cccc74b5 2502 new->personality = -1;
124fa0a8 2503 new->autodev = 1;
596a818d
DE
2504 new->console.log_path = NULL;
2505 new->console.log_fd = -1;
28a4b0e5 2506 new->console.path = NULL;
63376d7d 2507 new->console.peer = -1;
b5159817
DE
2508 new->console.peerpty.busy = -1;
2509 new->console.peerpty.master = -1;
2510 new->console.peerpty.slave = -1;
63376d7d
DL
2511 new->console.master = -1;
2512 new->console.slave = -1;
2513 new->console.name[0] = '\0';
d2e30e99 2514 new->maincmd_fd = -1;
76a26f55 2515 new->nbd_idx = -1;
54c30e29 2516 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048
SH
2517 if (!new->rootfs.mount) {
2518 ERROR("lxc_conf_init : %m");
2519 free(new);
2520 return NULL;
2521 }
d89de239 2522 new->kmsg = 0;
858377e4 2523 new->logfd = -1;
7b379ab3
MN
2524 lxc_list_init(&new->cgroup);
2525 lxc_list_init(&new->network);
2526 lxc_list_init(&new->mount_list);
81810dd1 2527 lxc_list_init(&new->caps);
1fb86a7c 2528 lxc_list_init(&new->keepcaps);
f6d3e3e4 2529 lxc_list_init(&new->id_map);
f979ac15 2530 lxc_list_init(&new->includes);
4184c3e1 2531 lxc_list_init(&new->aliens);
7c661726 2532 lxc_list_init(&new->environment);
26ddeedd
SH
2533 for (i=0; i<NUM_LXC_HOOKS; i++)
2534 lxc_list_init(&new->hooks[i]);
ee1e7aa0 2535 lxc_list_init(&new->groups);
fe4de9a6
DE
2536 new->lsm_aa_profile = NULL;
2537 new->lsm_se_context = NULL;
5112cd70 2538 new->tmp_umount_proc = 0;
7b379ab3 2539
9f30a190
MM
2540 for (i = 0; i < LXC_NS_MAX; i++)
2541 new->inherit_ns_fd[i] = -1;
2542
72bb04e4
PT
2543 /* if running in a new user namespace, init and COMMAND
2544 * default to running as UID/GID 0 when using lxc-execute */
2545 new->init_uid = 0;
2546 new->init_gid = 0;
2547
7b379ab3 2548 return new;
089cd8b8
DL
2549}
2550
a589434e 2551static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2552{
8634bc19 2553 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2554 char veth2buf[IFNAMSIZ], *veth2;
e54864d3 2555 int err, mtu = 0;
13954cce 2556
8bee8851 2557 if (netdev->priv.veth_attr.pair) {
e892973e 2558 veth1 = netdev->priv.veth_attr.pair;
8bee8851
WB
2559 if (handler->conf->reboot)
2560 lxc_netdev_delete_by_name(veth1);
2561 } else {
9ba8130c
SH
2562 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2563 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2564 ERROR("veth1 name too long");
2565 return -1;
2566 }
a0265685 2567 veth1 = lxc_mkifname(veth1buf);
ad40563e
ÇO
2568 if (!veth1) {
2569 ERROR("failed to allocate a temporary name");
2570 return -1;
2571 }
74a2b586
JK
2572 /* store away for deconf */
2573 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2574 }
82d5ae15 2575
0e391e57 2576 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
a0265685 2577 veth2 = lxc_mkifname(veth2buf);
ad40563e 2578 if (!veth2) {
82d5ae15 2579 ERROR("failed to allocate a temporary name");
ad40563e 2580 goto out_delete;
0ad19a3f 2581 }
2582
3cfc0f3a
MN
2583 err = lxc_veth_create(veth1, veth2);
2584 if (err) {
2e2d6a7b 2585 ERROR("failed to create veth pair (%s and %s): %s", veth1, veth2,
3cfc0f3a 2586 strerror(-err));
ad40563e 2587 goto out_delete;
0ad19a3f 2588 }
13954cce 2589
49684c0b
CS
2590 /* changing the high byte of the mac address to 0xfe, the bridge interface
2591 * will always keep the host's mac address and not take the mac address
2592 * of a container */
2593 err = setup_private_host_hw_addr(veth1);
2594 if (err) {
2e2d6a7b 2595 ERROR("failed to change mac address of host interface '%s': %s",
49684c0b
CS
2596 veth1, strerror(-err));
2597 goto out_delete;
2598 }
2599
af651aa9
SN
2600 netdev->ifindex = if_nametoindex(veth2);
2601 if (!netdev->ifindex) {
2602 ERROR("failed to retrieve the index for %s", veth2);
2603 goto out_delete;
2604 }
2605
82d5ae15 2606 if (netdev->mtu) {
e54864d3
NC
2607 mtu = atoi(netdev->mtu);
2608 } else if (netdev->link) {
af651aa9 2609 mtu = netdev_get_mtu(netdev->ifindex);
e54864d3
NC
2610 }
2611
2612 if (mtu) {
2613 err = lxc_netdev_set_mtu(veth1, mtu);
3cfc0f3a 2614 if (!err)
e54864d3 2615 err = lxc_netdev_set_mtu(veth2, mtu);
3cfc0f3a 2616 if (err) {
e54864d3
NC
2617 ERROR("failed to set mtu '%i' for veth pair (%s and %s): %s",
2618 mtu, veth1, veth2, strerror(-err));
eb14c10a 2619 goto out_delete;
75d09f83
DL
2620 }
2621 }
2622
3cfc0f3a
MN
2623 if (netdev->link) {
2624 err = lxc_bridge_attach(netdev->link, veth1);
2625 if (err) {
2e2d6a7b 2626 ERROR("failed to attach '%s' to the bridge '%s': %s",
3cfc0f3a
MN
2627 veth1, netdev->link, strerror(-err));
2628 goto out_delete;
2629 }
eb14c10a
DL
2630 }
2631
d472214b 2632 err = lxc_netdev_up(veth1);
6e35af2e
DL
2633 if (err) {
2634 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2635 goto out_delete;
0ad19a3f 2636 }
2637
e3b4c4c4 2638 if (netdev->upscript) {
751d9dcd
DL
2639 err = run_script(handler->name, "net", netdev->upscript, "up",
2640 "veth", veth1, (char*) NULL);
2641 if (err)
e3b4c4c4 2642 goto out_delete;
e3b4c4c4
ST
2643 }
2644
a589434e 2645 DEBUG("instantiated veth '%s/%s', index is '%d'",
82d5ae15
DL
2646 veth1, veth2, netdev->ifindex);
2647
6ab9ab6d 2648 return 0;
eb14c10a
DL
2649
2650out_delete:
b84f58b9 2651 lxc_netdev_delete_by_name(veth1);
f10fad2f 2652 if (!netdev->priv.veth_attr.pair)
ad40563e 2653 free(veth1);
f10fad2f 2654 free(veth2);
6ab9ab6d 2655 return -1;
13954cce 2656}
d957ae2d 2657
74a2b586
JK
2658static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2659{
2660 char *veth1;
2661 int err;
2662
2663 if (netdev->priv.veth_attr.pair)
2664 veth1 = netdev->priv.veth_attr.pair;
2665 else
2666 veth1 = netdev->priv.veth_attr.veth1;
2667
2668 if (netdev->downscript) {
2669 err = run_script(handler->name, "net", netdev->downscript,
2670 "down", "veth", veth1, (char*) NULL);
2671 if (err)
2672 return -1;
2673 }
2674 return 0;
2675}
2676
a589434e 2677static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2678{
0e391e57 2679 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2680 int err;
d957ae2d
MT
2681
2682 if (!netdev->link) {
2683 ERROR("no link specified for macvlan netdev");
2684 return -1;
2685 }
13954cce 2686
9ba8130c
SH
2687 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2688 if (err >= sizeof(peerbuf))
2689 return -1;
82d5ae15 2690
a0265685 2691 peer = lxc_mkifname(peerbuf);
ad40563e 2692 if (!peer) {
82d5ae15
DL
2693 ERROR("failed to make a temporary name");
2694 return -1;
0ad19a3f 2695 }
2696
3cfc0f3a
MN
2697 err = lxc_macvlan_create(netdev->link, peer,
2698 netdev->priv.macvlan_attr.mode);
2699 if (err) {
2700 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2701 peer, netdev->link, strerror(-err));
ad40563e 2702 goto out;
0ad19a3f 2703 }
2704
82d5ae15
DL
2705 netdev->ifindex = if_nametoindex(peer);
2706 if (!netdev->ifindex) {
36eb9bde 2707 ERROR("failed to retrieve the index for %s", peer);
ad40563e 2708 goto out;
22ebac19 2709 }
2710
e3b4c4c4 2711 if (netdev->upscript) {
751d9dcd
DL
2712 err = run_script(handler->name, "net", netdev->upscript, "up",
2713 "macvlan", netdev->link, (char*) NULL);
2714 if (err)
ad40563e 2715 goto out;
e3b4c4c4
ST
2716 }
2717
a589434e 2718 DEBUG("instantiated macvlan '%s', index is '%d' and mode '%d'",
e892973e 2719 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 2720
d957ae2d 2721 return 0;
ad40563e
ÇO
2722out:
2723 lxc_netdev_delete_by_name(peer);
2724 free(peer);
2725 return -1;
0ad19a3f 2726}
2727
74a2b586
JK
2728static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2729{
2730 int err;
2731
2732 if (netdev->downscript) {
2733 err = run_script(handler->name, "net", netdev->downscript,
2734 "down", "macvlan", netdev->link,
2735 (char*) NULL);
2736 if (err)
2737 return -1;
2738 }
2739 return 0;
2740}
2741
a589434e
JN
2742/* XXX: merge with instantiate_macvlan */
2743static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
2744{
2745 char peer[IFNAMSIZ];
3cfc0f3a 2746 int err;
82f58d03 2747 static uint16_t vlan_cntr = 0;
26c39028
JHS
2748
2749 if (!netdev->link) {
2750 ERROR("no link specified for vlan netdev");
2751 return -1;
2752 }
2753
82f58d03 2754 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
9ba8130c
SH
2755 if (err >= sizeof(peer)) {
2756 ERROR("peer name too long");
2757 return -1;
2758 }
26c39028 2759
3cfc0f3a
MN
2760 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2761 if (err) {
2762 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2763 peer, netdev->link, strerror(-err));
26c39028
JHS
2764 return -1;
2765 }
2766
2767 netdev->ifindex = if_nametoindex(peer);
2768 if (!netdev->ifindex) {
2769 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 2770 lxc_netdev_delete_by_name(peer);
26c39028
JHS
2771 return -1;
2772 }
2773
a589434e 2774 DEBUG("instantiated vlan '%s', ifindex is '%d'", " vlan1000",
e892973e
DL
2775 netdev->ifindex);
2776
26c39028
JHS
2777 return 0;
2778}
2779
74a2b586
JK
2780static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2781{
2782 return 0;
2783}
2784
a589434e 2785static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2786{
6168e99f
DL
2787 if (!netdev->link) {
2788 ERROR("no link specified for the physical interface");
2789 return -1;
2790 }
2791
9d083402 2792 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 2793 if (!netdev->ifindex) {
9d083402 2794 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 2795 return -1;
2796 }
2797
e3b4c4c4
ST
2798 if (netdev->upscript) {
2799 int err;
751d9dcd
DL
2800 err = run_script(handler->name, "net", netdev->upscript,
2801 "up", "phys", netdev->link, (char*) NULL);
2802 if (err)
e3b4c4c4 2803 return -1;
e3b4c4c4
ST
2804 }
2805
82d5ae15 2806 return 0;
0ad19a3f 2807}
2808
74a2b586
JK
2809static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2810{
2811 int err;
2812
2813 if (netdev->downscript) {
2814 err = run_script(handler->name, "net", netdev->downscript,
2815 "down", "phys", netdev->link, (char*) NULL);
2816 if (err)
2817 return -1;
2818 }
2819 return 0;
2820}
2821
a589434e 2822static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
26b797f3
SH
2823{
2824 netdev->ifindex = 0;
2825 return 0;
2826}
2827
a589434e 2828static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2829{
82d5ae15 2830 netdev->ifindex = 0;
e3b4c4c4
ST
2831 if (netdev->upscript) {
2832 int err;
751d9dcd
DL
2833 err = run_script(handler->name, "net", netdev->upscript,
2834 "up", "empty", (char*) NULL);
2835 if (err)
e3b4c4c4 2836 return -1;
e3b4c4c4 2837 }
82d5ae15 2838 return 0;
0ad19a3f 2839}
2840
74a2b586
JK
2841static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2842{
2843 int err;
2844
2845 if (netdev->downscript) {
2846 err = run_script(handler->name, "net", netdev->downscript,
2847 "down", "empty", (char*) NULL);
2848 if (err)
2849 return -1;
2850 }
2851 return 0;
2852}
2853
26b797f3
SH
2854static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
2855{
2856 return 0;
2857}
2858
2859int lxc_requests_empty_network(struct lxc_handler *handler)
2860{
2861 struct lxc_list *network = &handler->conf->network;
2862 struct lxc_list *iterator;
2863 struct lxc_netdev *netdev;
2864 bool found_none = false, found_nic = false;
2865
2866 if (lxc_list_empty(network))
2867 return 0;
2868
2869 lxc_list_for_each(iterator, network) {
2870
2871 netdev = iterator->elem;
2872
2873 if (netdev->type == LXC_NET_NONE)
2874 found_none = true;
2875 else
2876 found_nic = true;
2877 }
2878 if (found_none && !found_nic)
2879 return 1;
2880 return 0;
2881}
2882
e3b4c4c4 2883int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 2884{
e3b4c4c4 2885 struct lxc_list *network = &handler->conf->network;
82d5ae15 2886 struct lxc_list *iterator;
82d5ae15 2887 struct lxc_netdev *netdev;
cbef6c52
SH
2888 int am_root = (getuid() == 0);
2889
2890 if (!am_root)
2891 return 0;
0ad19a3f 2892
5f4535a3 2893 lxc_list_for_each(iterator, network) {
0ad19a3f 2894
5f4535a3 2895 netdev = iterator->elem;
13954cce 2896
24654103 2897 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 2898 ERROR("invalid network configuration type '%d'",
5f4535a3 2899 netdev->type);
82d5ae15
DL
2900 return -1;
2901 }
0ad19a3f 2902
e3b4c4c4 2903 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
2904 ERROR("failed to create netdev");
2905 return -1;
2906 }
e3b4c4c4 2907
0ad19a3f 2908 }
2909
2910 return 0;
2911}
2912
74a2b586 2913void lxc_delete_network(struct lxc_handler *handler)
7fef7a06 2914{
74a2b586 2915 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
2916 struct lxc_list *iterator;
2917 struct lxc_netdev *netdev;
2918
2919 lxc_list_for_each(iterator, network) {
2920 netdev = iterator->elem;
d472214b 2921
74a2b586 2922 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352
DL
2923 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
2924 WARN("failed to rename to the initial name the " \
2925 "netdev '%s'", netdev->link);
d472214b 2926 continue;
d8f8e352 2927 }
d472214b 2928
74a2b586
JK
2929 if (netdev_deconf[netdev->type](handler, netdev)) {
2930 WARN("failed to destroy netdev");
2931 }
2932
d8f8e352
DL
2933 /* Recent kernel remove the virtual interfaces when the network
2934 * namespace is destroyed but in case we did not moved the
2935 * interface to the network namespace, we have to destroy it
2936 */
74a2b586
JK
2937 if (netdev->ifindex != 0 &&
2938 lxc_netdev_delete_by_index(netdev->ifindex))
d8f8e352 2939 WARN("failed to remove interface '%s'", netdev->name);
7fef7a06
DL
2940 }
2941}
2942
45e854dc
SG
2943#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
2944
fe1f672f
ÇO
2945/* lxc-user-nic returns "interface_name:interface_name\n" */
2946#define MAX_BUFFER_SIZE IFNAMSIZ*2 + 2
74a3920a 2947static int unpriv_assign_nic(struct lxc_netdev *netdev, pid_t pid)
cbef6c52
SH
2948{
2949 pid_t child;
a7242d9a
ÇO
2950 int bytes, pipefd[2];
2951 char *token, *saveptr = NULL;
fe1f672f 2952 char buffer[MAX_BUFFER_SIZE];
cff7b5eb 2953 char netdev_link[IFNAMSIZ+1];
cbef6c52
SH
2954
2955 if (netdev->type != LXC_NET_VETH) {
2956 ERROR("nic type %d not support for unprivileged use",
2957 netdev->type);
2958 return -1;
2959 }
2960
a7242d9a
ÇO
2961 if(pipe(pipefd) < 0) {
2962 SYSERROR("pipe failed");
2963 return -1;
2964 }
2965
cbef6c52
SH
2966 if ((child = fork()) < 0) {
2967 SYSERROR("fork");
a7242d9a
ÇO
2968 close(pipefd[0]);
2969 close(pipefd[1]);
2970 return -1;
2971 }
2972
2973 if (child == 0) { // child
2974 /* close the read-end of the pipe */
2975 close(pipefd[0]);
2976 /* redirect the stdout to write-end of the pipe */
2977 dup2(pipefd[1], STDOUT_FILENO);
2978 /* close the write-end of the pipe */
fe1f672f 2979 close(pipefd[1]);
a7242d9a
ÇO
2980
2981 // Call lxc-user-nic pid type bridge
2982 char pidstr[20];
cff7b5eb
FN
2983 if (netdev->link) {
2984 strncpy(netdev_link, netdev->link, IFNAMSIZ);
2985 } else {
2986 strncpy(netdev_link, "none", IFNAMSIZ);
2987 }
2988 char *args[] = {LXC_USERNIC_PATH, pidstr, "veth", netdev_link, netdev->name, NULL };
a7242d9a
ÇO
2989 snprintf(pidstr, 19, "%lu", (unsigned long) pid);
2990 pidstr[19] = '\0';
2991 execvp(args[0], args);
2992 SYSERROR("execvp lxc-user-nic");
2993 exit(1);
2994 }
2995
2996 /* close the write-end of the pipe */
2997 close(pipefd[1]);
2998
fe1f672f 2999 bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE);
a7242d9a
ÇO
3000 if (bytes < 0) {
3001 SYSERROR("read failed");
3002 }
3003 buffer[bytes - 1] = '\0';
3004
3005 if (wait_for_pid(child) != 0) {
3006 close(pipefd[0]);
cbef6c52
SH
3007 return -1;
3008 }
3009
a7242d9a
ÇO
3010 /* close the read-end of the pipe */
3011 close(pipefd[0]);
cbef6c52 3012
a7242d9a
ÇO
3013 /* fill netdev->name field */
3014 token = strtok_r(buffer, ":", &saveptr);
3015 if (!token)
3016 return -1;
658979c5
SH
3017 netdev->name = malloc(IFNAMSIZ+1);
3018 if (!netdev->name) {
3019 ERROR("Out of memory");
3020 return -1;
3021 }
3022 memset(netdev->name, 0, IFNAMSIZ+1);
3023 strncpy(netdev->name, token, IFNAMSIZ);
a7242d9a
ÇO
3024
3025 /* fill netdev->veth_attr.pair field */
3026 token = strtok_r(NULL, ":", &saveptr);
3027 if (!token)
3028 return -1;
3029 netdev->priv.veth_attr.pair = strdup(token);
658979c5
SH
3030 if (!netdev->priv.veth_attr.pair) {
3031 ERROR("Out of memory");
3032 return -1;
3033 }
45e854dc 3034
a7242d9a 3035 return 0;
cbef6c52
SH
3036}
3037
5f4535a3 3038int lxc_assign_network(struct lxc_list *network, pid_t pid)
0ad19a3f 3039{
82d5ae15 3040 struct lxc_list *iterator;
82d5ae15 3041 struct lxc_netdev *netdev;
f2e206ff 3042 char ifname[IFNAMSIZ];
cbef6c52 3043 int am_root = (getuid() == 0);
3cfc0f3a 3044 int err;
0ad19a3f 3045
5f4535a3 3046 lxc_list_for_each(iterator, network) {
82d5ae15 3047
5f4535a3 3048 netdev = iterator->elem;
82d5ae15 3049
fbb16259 3050 if (netdev->type == LXC_NET_VETH && !am_root) {
cbef6c52
SH
3051 if (unpriv_assign_nic(netdev, pid))
3052 return -1;
658979c5
SH
3053 // lxc-user-nic has moved the nic to the new ns.
3054 // unpriv_assign_nic() fills in netdev->name.
3055 // netdev->ifindex will be filed in at setup_netdev.
cbef6c52
SH
3056 continue;
3057 }
236087a6 3058
fbb16259
SH
3059 /* empty network namespace, nothing to move */
3060 if (!netdev->ifindex)
3061 continue;
3062
f2e206ff 3063 /* retrieve the name of the interface */
3064 if (!if_indextoname(netdev->ifindex, ifname)) {
3065 ERROR("no interface corresponding to index '%d'", netdev->ifindex);
3066 return -1;
3067 }
3068
3069 err = lxc_netdev_move_by_name(ifname, pid, NULL);
3cfc0f3a
MN
3070 if (err) {
3071 ERROR("failed to move '%s' to the container : %s",
3072 netdev->link, strerror(-err));
82d5ae15
DL
3073 return -1;
3074 }
3075
c1c75c04 3076 DEBUG("move '%s' to '%d'", netdev->name, pid);
0ad19a3f 3077 }
3078
3079 return 0;
3080}
3081
251d0d2a
DE
3082static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
3083 size_t buf_size)
f6d3e3e4
SH
3084{
3085 char path[PATH_MAX];
e4ccd113 3086 int ret, closeret;
f6d3e3e4
SH
3087 FILE *f;
3088
3089 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
3090 if (ret < 0 || ret >= PATH_MAX) {
03fadd16 3091 fprintf(stderr, "%s: path name too long\n", __func__);
f6d3e3e4
SH
3092 return -E2BIG;
3093 }
3094 f = fopen(path, "w");
3095 if (!f) {
3096 perror("open");
3097 return -EINVAL;
3098 }
251d0d2a 3099 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 3100 if (ret < 0)
e4ccd113
SH
3101 SYSERROR("writing id mapping");
3102 closeret = fclose(f);
3103 if (closeret)
3104 SYSERROR("writing id mapping");
3105 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
3106}
3107
3108int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
3109{
3110 struct lxc_list *iterator;
3111 struct id_map *map;
8afb3e61 3112 int ret = 0, use_shadow = 0;
251d0d2a 3113 enum idtype type;
8afb3e61
SG
3114 char *buf = NULL, *pos, *cmdpath = NULL;
3115
22038de5
SH
3116 /*
3117 * If newuidmap exists, that is, if shadow is handing out subuid
3118 * ranges, then insist that root also reserve ranges in subuid. This
3119 * will protected it by preventing another user from being handed the
3120 * range by shadow.
3121 */
9d9c111c 3122 cmdpath = on_path("newuidmap", NULL);
8afb3e61
SG
3123 if (cmdpath) {
3124 use_shadow = 1;
3125 free(cmdpath);
3126 }
3127
0e6e3a41
SG
3128 if (!use_shadow && geteuid()) {
3129 ERROR("Missing newuidmap/newgidmap");
3130 return -1;
3131 }
251d0d2a
DE
3132
3133 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4 3134 int left, fill;
cf3ef16d
SH
3135 int had_entry = 0;
3136 if (!buf) {
3137 buf = pos = malloc(4096);
4f7521b4
SH
3138 if (!buf)
3139 return -ENOMEM;
cf3ef16d
SH
3140 }
3141 pos = buf;
0e6e3a41 3142 if (use_shadow)
d1838f34 3143 pos += sprintf(buf, "new%cidmap %d",
cf3ef16d
SH
3144 type == ID_TYPE_UID ? 'u' : 'g',
3145 pid);
4f7521b4 3146
cf3ef16d
SH
3147 lxc_list_for_each(iterator, idmap) {
3148 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
251d0d2a 3149 map = iterator->elem;
cf3ef16d
SH
3150 if (map->idtype != type)
3151 continue;
3152
3153 had_entry = 1;
3154 left = 4096 - (pos - buf);
d1838f34 3155 fill = snprintf(pos, left, "%s%lu %lu %lu%s",
0e6e3a41 3156 use_shadow ? " " : "",
d1838f34 3157 map->nsid, map->hostid, map->range,
0e6e3a41 3158 use_shadow ? "" : "\n");
cf3ef16d
SH
3159 if (fill <= 0 || fill >= left)
3160 SYSERROR("snprintf failed, too many mappings");
3161 pos += fill;
251d0d2a 3162 }
cf3ef16d 3163 if (!had_entry)
4f7521b4 3164 continue;
cf3ef16d 3165
0e6e3a41 3166 if (!use_shadow) {
cf3ef16d 3167 ret = write_id_mapping(type, pid, buf, pos-buf);
d1838f34
MS
3168 } else {
3169 left = 4096 - (pos - buf);
3170 fill = snprintf(pos, left, "\n");
3171 if (fill <= 0 || fill >= left)
3172 SYSERROR("snprintf failed, too many mappings");
3173 pos += fill;
cf3ef16d 3174 ret = system(buf);
d1838f34 3175 }
cf3ef16d 3176
f6d3e3e4
SH
3177 if (ret)
3178 break;
3179 }
251d0d2a 3180
f10fad2f 3181 free(buf);
f6d3e3e4
SH
3182 return ret;
3183}
3184
cf3ef16d 3185/*
7b50c609
TS
3186 * return the host uid/gid to which the container root is mapped in
3187 * *val.
0b3a6504 3188 * Return true if id was found, false otherwise.
cf3ef16d 3189 */
2a9a80cb 3190bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
3ec1648d 3191 unsigned long *val)
cf3ef16d
SH
3192{
3193 struct lxc_list *it;
3194 struct id_map *map;
3195
3196 lxc_list_for_each(it, &conf->id_map) {
3197 map = it->elem;
7b50c609 3198 if (map->idtype != idtype)
cf3ef16d
SH
3199 continue;
3200 if (map->nsid != 0)
3201 continue;
2a9a80cb
SH
3202 *val = map->hostid;
3203 return true;
cf3ef16d 3204 }
2a9a80cb 3205 return false;
cf3ef16d
SH
3206}
3207
2133f58c 3208int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3209{
3210 struct lxc_list *it;
3211 struct id_map *map;
3212 lxc_list_for_each(it, &conf->id_map) {
3213 map = it->elem;
2133f58c 3214 if (map->idtype != idtype)
cf3ef16d
SH
3215 continue;
3216 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 3217 return (id - map->hostid) + map->nsid;
cf3ef16d 3218 }
57d116ab 3219 return -1;
cf3ef16d
SH
3220}
3221
2133f58c 3222int find_unmapped_nsuid(struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3223{
3224 struct lxc_list *it;
3225 struct id_map *map;
2133f58c 3226 unsigned int freeid = 0;
cf3ef16d
SH
3227again:
3228 lxc_list_for_each(it, &conf->id_map) {
3229 map = it->elem;
2133f58c 3230 if (map->idtype != idtype)
cf3ef16d
SH
3231 continue;
3232 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3233 freeid = map->nsid + map->range;
3234 goto again;
3235 }
3236 }
3237 return freeid;
3238}
3239
19a26f82
MK
3240int lxc_find_gateway_addresses(struct lxc_handler *handler)
3241{
3242 struct lxc_list *network = &handler->conf->network;
3243 struct lxc_list *iterator;
3244 struct lxc_netdev *netdev;
3245 int link_index;
3246
3247 lxc_list_for_each(iterator, network) {
3248 netdev = iterator->elem;
3249
3250 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
3251 continue;
3252
3253 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
3254 ERROR("gateway = auto only supported for "
3255 "veth and macvlan");
3256 return -1;
3257 }
3258
3259 if (!netdev->link) {
3260 ERROR("gateway = auto needs a link interface");
3261 return -1;
3262 }
3263
3264 link_index = if_nametoindex(netdev->link);
3265 if (!link_index)
3266 return -EINVAL;
3267
3268 if (netdev->ipv4_gateway_auto) {
3269 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
3270 ERROR("failed to automatically find ipv4 gateway "
3271 "address from link interface '%s'", netdev->link);
3272 return -1;
3273 }
3274 }
3275
3276 if (netdev->ipv6_gateway_auto) {
3277 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
3278 ERROR("failed to automatically find ipv6 gateway "
3279 "address from link interface '%s'", netdev->link);
3280 return -1;
3281 }
3282 }
3283 }
3284
3285 return 0;
3286}
3287
5e4a62bf 3288int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 3289{
5e4a62bf 3290 struct lxc_tty_info *tty_info = &conf->tty_info;
025ed0f3 3291 int i, ret;
b0a33c1e 3292
5e4a62bf
DL
3293 /* no tty in the configuration */
3294 if (!conf->tty)
b0a33c1e 3295 return 0;
3296
13954cce 3297 tty_info->pty_info =
e4e7d59d 3298 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 3299 if (!tty_info->pty_info) {
36eb9bde 3300 SYSERROR("failed to allocate pty_info");
985d15b1 3301 return -1;
b0a33c1e 3302 }
3303
985d15b1 3304 for (i = 0; i < conf->tty; i++) {
13954cce 3305
b0a33c1e 3306 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3307
025ed0f3
SH
3308 process_lock();
3309 ret = openpty(&pty_info->master, &pty_info->slave,
3310 pty_info->name, NULL, NULL);
3311 process_unlock();
3312 if (ret) {
36eb9bde 3313 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
3314 tty_info->nbtty = i;
3315 lxc_delete_tty(tty_info);
3316 return -1;
b0a33c1e 3317 }
3318
5332bb84
DL
3319 DEBUG("allocated pty '%s' (%d/%d)",
3320 pty_info->name, pty_info->master, pty_info->slave);
3321
3ec1648d 3322 /* Prevent leaking the file descriptors to the container */
b035ad62
MS
3323 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3324 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3325
b0a33c1e 3326 pty_info->busy = 0;
3327 }
3328
985d15b1 3329 tty_info->nbtty = conf->tty;
1ac470c0
DL
3330
3331 INFO("tty's configured");
3332
985d15b1 3333 return 0;
b0a33c1e 3334}
3335
3336void lxc_delete_tty(struct lxc_tty_info *tty_info)
3337{
3338 int i;
3339
3340 for (i = 0; i < tty_info->nbtty; i++) {
3341 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3342
3343 close(pty_info->master);
3344 close(pty_info->slave);
3345 }
3346
3347 free(tty_info->pty_info);
3348 tty_info->nbtty = 0;
3349}
3350
f6d3e3e4 3351/*
7b50c609
TS
3352 * chown_mapped_root: for an unprivileged user with uid/gid X to
3353 * chown a dir to subuid/subgid Y, he needs to run chown as root
3354 * in a userns where nsid 0 is mapped to hostuid/hostgid Y, and
3355 * nsid Y is mapped to hostuid/hostgid X. That way, the container
3356 * root is privileged with respect to hostuid/hostgid X, allowing
3357 * him to do the chown.
f6d3e3e4 3358 */
c4d10a05 3359int chown_mapped_root(char *path, struct lxc_conf *conf)
f6d3e3e4 3360{
7b50c609
TS
3361 uid_t rootuid;
3362 gid_t rootgid;
c4d10a05 3363 pid_t pid;
2a9a80cb 3364 unsigned long val;
a7ef8753 3365 char *chownpath = path;
f6d3e3e4 3366
2a9a80cb 3367 if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
c4d10a05
SH
3368 ERROR("No mapping for container root");
3369 return -1;
f6d3e3e4 3370 }
7b50c609
TS
3371 rootuid = (uid_t) val;
3372 if (!get_mapped_rootid(conf, ID_TYPE_GID, &val)) {
3373 ERROR("No mapping for container root");
3374 return -1;
3375 }
3376 rootgid = (gid_t) val;
2a9a80cb 3377
a7ef8753
SH
3378 /*
3379 * In case of overlay, we want only the writeable layer
3380 * to be chowned
3381 */
1f92162d 3382 if (strncmp(path, "overlayfs:", 10) == 0 || strncmp(path, "aufs:", 5) == 0) {
a7ef8753
SH
3383 chownpath = strchr(path, ':');
3384 if (!chownpath) {
3385 ERROR("Bad overlay path: %s", path);
3386 return -1;
3387 }
3388 chownpath = strchr(chownpath+1, ':');
3389 if (!chownpath) {
3390 ERROR("Bad overlay path: %s", path);
3391 return -1;
3392 }
3393 chownpath++;
3394 }
3395 path = chownpath;
c4d10a05 3396 if (geteuid() == 0) {
7b50c609 3397 if (chown(path, rootuid, rootgid) < 0) {
c4d10a05
SH
3398 ERROR("Error chowning %s", path);
3399 return -1;
3400 }
3401 return 0;
3402 }
f3d7e4ca 3403
7b50c609 3404 if (rootuid == geteuid()) {
f3d7e4ca
SH
3405 // nothing to do
3406 INFO("%s: container root is our uid; no need to chown" ,__func__);
3407 return 0;
3408 }
3409
c4d10a05
SH
3410 pid = fork();
3411 if (pid < 0) {
3412 SYSERROR("Failed forking");
f6d3e3e4
SH
3413 return -1;
3414 }
c4d10a05 3415 if (!pid) {
7b50c609
TS
3416 int hostuid = geteuid(), hostgid = getegid(), ret;
3417 struct stat sb;
3418 char map1[100], map2[100], map3[100], map4[100], map5[100];
3419 char ugid[100];
3420 char *args1[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3421 "-m", map3, "-m", map5,
3422 "--", "chown", ugid, path, NULL };
3423 char *args2[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3424 "-m", map3, "-m", map4, "-m", map5,
3425 "--", "chown", ugid, path, NULL };
3426
3427 // save the current gid of "path"
3428 if (stat(path, &sb) < 0) {
3429 ERROR("Error stat %s", path);
3430 return -1;
3431 }
f6d3e3e4 3432
9a7c2aba
SH
3433 /*
3434 * A file has to be group-owned by a gid mapped into the
3435 * container, or the container won't be privileged over it.
3436 */
3437 if (sb.st_uid == geteuid() &&
3438 mapped_hostid(sb.st_gid, conf, ID_TYPE_GID) < 0 &&
3439 chown(path, -1, hostgid) < 0) {
3440 ERROR("Failed chgrping %s", path);
7b50c609
TS
3441 return -1;
3442 }
3443
3444 // "u:0:rootuid:1"
3445 ret = snprintf(map1, 100, "u:0:%d:1", rootuid);
c4d10a05
SH
3446 if (ret < 0 || ret >= 100) {
3447 ERROR("Error uid printing map string");
f6d3e3e4
SH
3448 return -1;
3449 }
c4d10a05 3450
98e5ba51
SH
3451 // "u:hostuid:hostuid:1"
3452 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3453 if (ret < 0 || ret >= 100) {
3454 ERROR("Error uid printing map string");
3455 return -1;
3456 }
3457
7b50c609
TS
3458 // "g:0:rootgid:1"
3459 ret = snprintf(map3, 100, "g:0:%d:1", rootgid);
c4d10a05 3460 if (ret < 0 || ret >= 100) {
7b50c609 3461 ERROR("Error gid printing map string");
c4d10a05
SH
3462 return -1;
3463 }
3464
7b50c609 3465 // "g:pathgid:rootgid+pathgid:1"
b4c1e35d
SG
3466 ret = snprintf(map4, 100, "g:%d:%d:1", (gid_t)sb.st_gid,
3467 rootgid + (gid_t)sb.st_gid);
7b50c609
TS
3468 if (ret < 0 || ret >= 100) {
3469 ERROR("Error gid printing map string");
3470 return -1;
3471 }
3472
3473 // "g:hostgid:hostgid:1"
3474 ret = snprintf(map5, 100, "g:%d:%d:1", hostgid, hostgid);
3475 if (ret < 0 || ret >= 100) {
3476 ERROR("Error gid printing map string");
3477 return -1;
3478 }
3479
3480 // "0:pathgid" (chown)
b4c1e35d 3481 ret = snprintf(ugid, 100, "0:%d", (gid_t)sb.st_gid);
7b50c609
TS
3482 if (ret < 0 || ret >= 100) {
3483 ERROR("Error owner printing format string for chown");
3484 return -1;
3485 }
3486
3487 if (hostgid == sb.st_gid)
3488 ret = execvp("lxc-usernsexec", args1);
3489 else
3490 ret = execvp("lxc-usernsexec", args2);
c4d10a05
SH
3491 SYSERROR("Failed executing usernsexec");
3492 exit(1);
f6d3e3e4 3493 }
c4d10a05 3494 return wait_for_pid(pid);
f6d3e3e4
SH
3495}
3496
c4d10a05 3497int ttys_shift_ids(struct lxc_conf *c)
f6d3e3e4 3498{
c4d10a05 3499 if (lxc_list_empty(&c->id_map))
f6d3e3e4 3500 return 0;
c4d10a05 3501
29b10e4f 3502 if (strcmp(c->console.name, "") !=0 && chown_mapped_root(c->console.name, c) < 0) {
c4d10a05
SH
3503 ERROR("Failed to chown %s", c->console.name);
3504 return -1;
3505 }
3506
f6d3e3e4
SH
3507 return 0;
3508}
3509
5112cd70
SH
3510int tmp_proc_mount(struct lxc_conf *lxc_conf)
3511{
3512 int mounted;
3513
01958b1f 3514 mounted = mount_proc_if_needed(lxc_conf->rootfs.path ? lxc_conf->rootfs.mount : "");
5112cd70
SH
3515 if (mounted == -1) {
3516 SYSERROR("failed to mount /proc in the container.");
01958b1f
DW
3517 /* continue only if there is no rootfs */
3518 if (lxc_conf->rootfs.path)
3519 return -1;
5112cd70
SH
3520 } else if (mounted == 1) {
3521 lxc_conf->tmp_umount_proc = 1;
3522 }
3523 return 0;
3524}
3525
3526void tmp_proc_unmount(struct lxc_conf *lxc_conf)
3527{
3528 if (lxc_conf->tmp_umount_proc == 1) {
3529 umount("/proc");
3530 lxc_conf->tmp_umount_proc = 0;
3531 }
3532}
3533
6a0c909a 3534void remount_all_slave(void)
e995d7a2
SH
3535{
3536 /* walk /proc/mounts and change any shared entries to slave */
3537 FILE *f = fopen("/proc/self/mountinfo", "r");
3538 char *line = NULL;
3539 size_t len = 0;
3540
3541 if (!f) {
3542 SYSERROR("Failed to open /proc/self/mountinfo to mark all shared");
3543 ERROR("Continuing container startup...");
3544 return;
3545 }
3546
3547 while (getline(&line, &len, f) != -1) {
3548 char *target, *opts;
3549 target = get_field(line, 4);
3550 if (!target)
3551 continue;
3552 opts = get_field(target, 2);
3553 if (!opts)
3554 continue;
3555 null_endofword(opts);
3556 if (!strstr(opts, "shared"))
3557 continue;
3558 null_endofword(target);
3559 if (mount(NULL, target, NULL, MS_SLAVE, NULL)) {
3560 SYSERROR("Failed to make %s rslave", target);
3561 ERROR("Continuing...");
3562 }
3563 }
3564 fclose(f);
f10fad2f 3565 free(line);
e995d7a2
SH
3566}
3567
2322903b
SH
3568void lxc_execute_bind_init(struct lxc_conf *conf)
3569{
3570 int ret;
9d9c111c
SH
3571 char path[PATH_MAX], destpath[PATH_MAX], *p;
3572
3573 /* If init exists in the container, don't bind mount a static one */
3574 p = choose_init(conf->rootfs.mount);
3575 if (p) {
3576 free(p);
3577 return;
3578 }
2322903b
SH
3579
3580 ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
3581 if (ret < 0 || ret >= PATH_MAX) {
3582 WARN("Path name too long searching for lxc.init.static");
3583 return;
3584 }
3585
3586 if (!file_exists(path)) {
3587 INFO("%s does not exist on host", path);
3588 return;
3589 }
3590
3591 ret = snprintf(destpath, PATH_MAX, "%s%s", conf->rootfs.mount, "/init.lxc.static");
3592 if (ret < 0 || ret >= PATH_MAX) {
3593 WARN("Path name too long for container's lxc.init.static");
3594 return;
3595 }
3596
3597 if (!file_exists(destpath)) {
3598 FILE * pathfile = fopen(destpath, "wb");
3599 if (!pathfile) {
3600 SYSERROR("Failed to create mount target '%s'", destpath);
3601 return;
3602 }
3603 fclose(pathfile);
3604 }
3605
592fd47a 3606 ret = safe_mount(path, destpath, "none", MS_BIND, NULL, conf->rootfs.mount);
2322903b
SH
3607 if (ret < 0)
3608 SYSERROR("Failed to bind lxc.init.static into container");
3609 INFO("lxc.init.static bound into container at %s", path);
3610}
3611
35120d9c
SH
3612/*
3613 * This does the work of remounting / if it is shared, calling the
3614 * container pre-mount hooks, and mounting the rootfs.
3615 */
3616int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath)
0ad19a3f 3617{
35120d9c
SH
3618 if (conf->rootfs_setup) {
3619 /*
3620 * rootfs was set up in another namespace. bind-mount it
3621 * to give us a mount in our own ns so we can pivot_root to it
3622 */
3623 const char *path = conf->rootfs.mount;
3624 if (mount(path, path, "rootfs", MS_BIND, NULL) < 0) {
3625 ERROR("Failed to bind-mount container / onto itself");
145832ba 3626 return -1;
35120d9c 3627 }
145832ba 3628 return 0;
35120d9c 3629 }
d4ef7c50 3630
e995d7a2
SH
3631 remount_all_slave();
3632
35120d9c
SH
3633 if (run_lxc_hooks(name, "pre-mount", conf, lxcpath, NULL)) {
3634 ERROR("failed to run pre-mount hooks for container '%s'.", name);
3635 return -1;
3636 }
3637
3638 if (setup_rootfs(conf)) {
3639 ERROR("failed to setup rootfs for '%s'", name);
3640 return -1;
3641 }
3642
3643 conf->rootfs_setup = true;
3644 return 0;
3645}
3646
1c1c7051
SH
3647static bool verify_start_hooks(struct lxc_conf *conf)
3648{
3649 struct lxc_list *it;
3650 char path[MAXPATHLEN];
3651 lxc_list_for_each(it, &conf->hooks[LXCHOOK_START]) {
3652 char *hookname = it->elem;
3653 struct stat st;
3654 int ret;
3655
3656 ret = snprintf(path, MAXPATHLEN, "%s%s",
7b6753e7 3657 conf->rootfs.path ? conf->rootfs.mount : "", hookname);
1c1c7051
SH
3658 if (ret < 0 || ret >= MAXPATHLEN)
3659 return false;
3660 ret = stat(path, &st);
3661 if (ret) {
7b6753e7 3662 SYSERROR("Start hook %s not found in container",
1c1c7051
SH
3663 hookname);
3664 return false;
3665 }
6a0c909a 3666 return true;
1c1c7051
SH
3667 }
3668
3669 return true;
3670}
3671
e8bd4e43
SH
3672static int send_fd(int sock, int fd)
3673{
3674 int ret = lxc_abstract_unix_send_fd(sock, fd, NULL, 0);
3675
3676
3677 if (ret < 0) {
3678 SYSERROR("Error sending tty fd to parent");
3679 return -1;
3680 }
3681
3682 return 0;
3683}
3684
3685static int send_ttys_to_parent(struct lxc_handler *handler)
3686{
3687 struct lxc_conf *conf = handler->conf;
3688 const struct lxc_tty_info *tty_info = &conf->tty_info;
3689 int i;
3690 int sock = handler->ttysock[0];
3691
3692 for (i = 0; i < tty_info->nbtty; i++) {
3693 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3694 if (send_fd(sock, pty_info->slave) < 0)
3695 goto bad;
3696 close(pty_info->slave);
3697 pty_info->slave = -1;
3698 if (send_fd(sock, pty_info->master) < 0)
3699 goto bad;
3700 close(pty_info->master);
3701 pty_info->master = -1;
3702 }
3703
3704 close(handler->ttysock[0]);
3705 close(handler->ttysock[1]);
3706
3707 return 0;
3708
3709bad:
3710 ERROR("Error writing tty fd to parent");
3711 return -1;
3712}
3713
35120d9c
SH
3714int lxc_setup(struct lxc_handler *handler)
3715{
3716 const char *name = handler->name;
3717 struct lxc_conf *lxc_conf = handler->conf;
3718 const char *lxcpath = handler->lxcpath;
35120d9c
SH
3719
3720 if (do_rootfs_setup(lxc_conf, name, lxcpath) < 0) {
3721 ERROR("Error setting up rootfs mount after spawn");
3722 return -1;
3723 }
3724
6c544cb3
MM
3725 if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
3726 if (setup_utsname(lxc_conf->utsname)) {
3727 ERROR("failed to setup the utsname for '%s'", name);
3728 return -1;
3729 }
0ad19a3f 3730 }
3731
5f4535a3 3732 if (setup_network(&lxc_conf->network)) {
36eb9bde 3733 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 3734 return -1;
0ad19a3f 3735 }
3736
bc6928ff 3737 if (lxc_conf->autodev > 0) {
14221cbb 3738 if (mount_autodev(name, &lxc_conf->rootfs, lxcpath)) {
91c3830e 3739 ERROR("failed to mount /dev in the container");
c6883f38
SH
3740 return -1;
3741 }
3742 }
3743
368bbc02
CS
3744 /* do automatic mounts (mainly /proc and /sys), but exclude
3745 * those that need to wait until other stuff has finished
3746 */
4fb3cba5 3747 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
3748 ERROR("failed to setup the automatic mounts for '%s'", name);
3749 return -1;
3750 }
3751
0a2dddd4 3752 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name, lxcpath)) {
36eb9bde 3753 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 3754 return -1;
576f946d 3755 }
3756
0a2dddd4 3757 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name, lxcpath)) {
e7938e9e
MN
3758 ERROR("failed to setup the mount entries for '%s'", name);
3759 return -1;
3760 }
3761
7b6753e7 3762 /* Make sure any start hooks are in the container */
1c1c7051
SH
3763 if (!verify_start_hooks(lxc_conf))
3764 return -1;
3765
2322903b
SH
3766 if (lxc_conf->is_execute)
3767 lxc_execute_bind_init(lxc_conf);
3768
368bbc02
CS
3769 /* now mount only cgroup, if wanted;
3770 * before, /sys could not have been mounted
3771 * (is either mounted automatically or via fstab entries)
3772 */
4fb3cba5 3773 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
3774 ERROR("failed to setup the automatic mounts for '%s'", name);
3775 return -1;
3776 }
3777
283678ed 3778 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
773fb9ca
SH
3779 ERROR("failed to run mount hooks for container '%s'.", name);
3780 return -1;
3781 }
3782
bc6928ff 3783 if (lxc_conf->autodev > 0) {
283678ed 3784 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
f7bee6c6
MW
3785 ERROR("failed to run autodev hooks for container '%s'.", name);
3786 return -1;
3787 }
14221cbb 3788 if (fill_autodev(&lxc_conf->rootfs)) {
91c3830e
SH
3789 ERROR("failed to populate /dev in the container");
3790 return -1;
3791 }
3792 }
368bbc02 3793
37903589 3794 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 3795 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 3796 return -1;
6e590161 3797 }
3798
7e0e1d94
AV
3799 if (lxc_conf->kmsg) {
3800 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
3801 ERROR("failed to setup kmsg for '%s'", name);
3802 }
1bd051a6 3803
69aa6655
DE
3804 if (!lxc_conf->is_execute && setup_dev_symlinks(&lxc_conf->rootfs)) {
3805 ERROR("failed to setup /dev symlinks for '%s'", name);
3806 return -1;
3807 }
3808
5112cd70
SH
3809 /* mount /proc if it's not already there */
3810 if (tmp_proc_mount(lxc_conf) < 0) {
fe4de9a6 3811 ERROR("failed to LSM mount proc for '%s'", name);
e075f5d9 3812 return -1;
e075f5d9 3813 }
e075f5d9 3814
ac778708 3815 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 3816 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 3817 return -1;
ed502555 3818 }
3819
571e6ec8 3820 if (setup_pts(lxc_conf->pts)) {
36eb9bde 3821 ERROR("failed to setup the new pts instance");
95b5ffaf 3822 return -1;
3c26f34e 3823 }
3824
e8bd4e43
SH
3825 if (lxc_create_tty(name, lxc_conf)) {
3826 ERROR("failed to create the ttys");
3827 return -1;
3828 }
3829
3830 if (send_ttys_to_parent(handler) < 0) {
3831 ERROR("failure sending console info to parent");
3832 return -1;
3833 }
3834
3835
3836 if (!lxc_conf->is_execute && setup_tty(lxc_conf)) {
3837 ERROR("failed to setup the ttys for '%s'", name);
3838 return -1;
3839 }
3840
3841 if (lxc_conf->pty_names && setenv("container_ttys", lxc_conf->pty_names, 1))
3842 SYSERROR("failed to set environment variable for container ptys");
3843
3844
cccc74b5
DL
3845 if (setup_personality(lxc_conf->personality)) {
3846 ERROR("failed to setup personality");
3847 return -1;
3848 }
3849
97a8f74f
SG
3850 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
3851 if (!lxc_list_empty(&lxc_conf->caps)) {
3852 ERROR("Simultaneously requested dropping and keeping caps");
f6d3e3e4
SH
3853 return -1;
3854 }
97a8f74f
SG
3855 if (dropcaps_except(&lxc_conf->keepcaps)) {
3856 ERROR("failed to keep requested caps");
3857 return -1;
3858 }
3859 } else if (setup_caps(&lxc_conf->caps)) {
3860 ERROR("failed to drop capabilities");
3861 return -1;
81810dd1
DL
3862 }
3863
cd54d859
DL
3864 NOTICE("'%s' is setup.", name);
3865
0ad19a3f 3866 return 0;
3867}
26ddeedd 3868
283678ed
SH
3869int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
3870 const char *lxcpath, char *argv[])
26ddeedd
SH
3871{
3872 int which = -1;
3873 struct lxc_list *it;
3874
3875 if (strcmp(hook, "pre-start") == 0)
3876 which = LXCHOOK_PRESTART;
5ea6163a
SH
3877 else if (strcmp(hook, "pre-mount") == 0)
3878 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
3879 else if (strcmp(hook, "mount") == 0)
3880 which = LXCHOOK_MOUNT;
f7bee6c6
MW
3881 else if (strcmp(hook, "autodev") == 0)
3882 which = LXCHOOK_AUTODEV;
26ddeedd
SH
3883 else if (strcmp(hook, "start") == 0)
3884 which = LXCHOOK_START;
52492063
WB
3885 else if (strcmp(hook, "stop") == 0)
3886 which = LXCHOOK_STOP;
26ddeedd
SH
3887 else if (strcmp(hook, "post-stop") == 0)
3888 which = LXCHOOK_POSTSTOP;
148e91f5
SH
3889 else if (strcmp(hook, "clone") == 0)
3890 which = LXCHOOK_CLONE;
37cf711b
SY
3891 else if (strcmp(hook, "destroy") == 0)
3892 which = LXCHOOK_DESTROY;
26ddeedd
SH
3893 else
3894 return -1;
3895 lxc_list_for_each(it, &conf->hooks[which]) {
3896 int ret;
3897 char *hookname = it->elem;
283678ed 3898 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
26ddeedd
SH
3899 if (ret)
3900 return ret;
3901 }
3902 return 0;
3903}
72d0e1cb 3904
427b3a21 3905static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
3906{
3907 struct lxc_netdev *netdev = it->elem;
9ebb03ad 3908 struct lxc_list *it2,*next;
72d0e1cb
SG
3909
3910 lxc_list_del(it);
3911
f10fad2f
ME
3912 free(netdev->link);
3913 free(netdev->name);
3914 if (netdev->type == LXC_NET_VETH)
c9bb9a85 3915 free(netdev->priv.veth_attr.pair);
f10fad2f
ME
3916 free(netdev->upscript);
3917 free(netdev->hwaddr);
3918 free(netdev->mtu);
3919 free(netdev->ipv4_gateway);
3920 free(netdev->ipv6_gateway);
9ebb03ad 3921 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3922 lxc_list_del(it2);
3923 free(it2->elem);
3924 free(it2);
3925 }
9ebb03ad 3926 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3927 lxc_list_del(it2);
3928 free(it2->elem);
3929 free(it2);
3930 }
d95db067 3931 free(netdev);
72d0e1cb
SG
3932 free(it);
3933}
3934
3935/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 3936int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
3937{
3938 char *p1;
3939 int ret, idx, i;
3940 struct lxc_list *it;
3941 struct lxc_netdev *netdev;
3942
46cd2845 3943 p1 = strchr(key, '.');
72d0e1cb
SG
3944 if (!p1 || *(p1+1) == '\0')
3945 p1 = NULL;
3946
3947 ret = sscanf(key, "%d", &idx);
3948 if (ret != 1) return -1;
3949 if (idx < 0)
3950 return -1;
3951
3952 i = 0;
3953 lxc_list_for_each(it, &c->network) {
3954 if (i == idx)
3955 break;
3956 i++;
3957 }
3958 if (i < idx) // we don't have that many nics defined
3959 return -1;
3960
3961 if (!it || !it->elem)
3962 return -1;
3963
3964 netdev = it->elem;
3965
3966 if (!p1) {
3967 lxc_remove_nic(it);
52d21d40 3968 } else if (strcmp(p1, ".ipv4") == 0) {
9ebb03ad
DE
3969 struct lxc_list *it2,*next;
3970 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3971 lxc_list_del(it2);
3972 free(it2->elem);
3973 free(it2);
3974 }
52d21d40 3975 } else if (strcmp(p1, ".ipv6") == 0) {
9ebb03ad
DE
3976 struct lxc_list *it2,*next;
3977 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3978 lxc_list_del(it2);
3979 free(it2->elem);
3980 free(it2);
3981 }
72d0e1cb
SG
3982 }
3983 else return -1;
3984
3985 return 0;
3986}
3987
3988int lxc_clear_config_network(struct lxc_conf *c)
3989{
9ebb03ad
DE
3990 struct lxc_list *it,*next;
3991 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
3992 lxc_remove_nic(it);
3993 }
3994 return 0;
3995}
3996
3997int lxc_clear_config_caps(struct lxc_conf *c)
3998{
9ebb03ad 3999 struct lxc_list *it,*next;
72d0e1cb 4000
9ebb03ad 4001 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
4002 lxc_list_del(it);
4003 free(it->elem);
4004 free(it);
4005 }
4006 return 0;
4007}
4008
74a3920a 4009static int lxc_free_idmap(struct lxc_list *id_map) {
27c27d73
SH
4010 struct lxc_list *it, *next;
4011
4355ab5f 4012 lxc_list_for_each_safe(it, id_map, next) {
27c27d73
SH
4013 lxc_list_del(it);
4014 free(it->elem);
4015 free(it);
4016 }
4017 return 0;
4018}
4019
4355ab5f
SH
4020int lxc_clear_idmaps(struct lxc_conf *c)
4021{
4022 return lxc_free_idmap(&c->id_map);
4023}
4024
1fb86a7c
SH
4025int lxc_clear_config_keepcaps(struct lxc_conf *c)
4026{
4027 struct lxc_list *it,*next;
4028
4029 lxc_list_for_each_safe(it, &c->keepcaps, next) {
4030 lxc_list_del(it);
4031 free(it->elem);
4032 free(it);
4033 }
4034 return 0;
4035}
4036
12a50cc6 4037int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 4038{
9ebb03ad 4039 struct lxc_list *it,*next;
72d0e1cb 4040 bool all = false;
12a50cc6 4041 const char *k = key + 11;
72d0e1cb
SG
4042
4043 if (strcmp(key, "lxc.cgroup") == 0)
4044 all = true;
4045
9ebb03ad 4046 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
4047 struct lxc_cgroup *cg = it->elem;
4048 if (!all && strcmp(cg->subsystem, k) != 0)
4049 continue;
4050 lxc_list_del(it);
4051 free(cg->subsystem);
4052 free(cg->value);
4053 free(cg);
4054 free(it);
4055 }
4056 return 0;
4057}
4058
ee1e7aa0
SG
4059int lxc_clear_groups(struct lxc_conf *c)
4060{
4061 struct lxc_list *it,*next;
4062
4063 lxc_list_for_each_safe(it, &c->groups, next) {
4064 lxc_list_del(it);
4065 free(it->elem);
4066 free(it);
4067 }
4068 return 0;
4069}
4070
ab799c0b
SG
4071int lxc_clear_environment(struct lxc_conf *c)
4072{
4073 struct lxc_list *it,*next;
4074
4075 lxc_list_for_each_safe(it, &c->environment, next) {
4076 lxc_list_del(it);
4077 free(it->elem);
4078 free(it);
4079 }
4080 return 0;
4081}
4082
4083
72d0e1cb
SG
4084int lxc_clear_mount_entries(struct lxc_conf *c)
4085{
9ebb03ad 4086 struct lxc_list *it,*next;
72d0e1cb 4087
9ebb03ad 4088 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
4089 lxc_list_del(it);
4090 free(it->elem);
4091 free(it);
4092 }
4093 return 0;
4094}
4095
b099e9e9
SH
4096int lxc_clear_automounts(struct lxc_conf *c)
4097{
4098 c->auto_mounts = 0;
4099 return 0;
4100}
4101
12a50cc6 4102int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 4103{
9ebb03ad 4104 struct lxc_list *it,*next;
17ed13a3 4105 bool all = false, done = false;
12a50cc6 4106 const char *k = key + 9;
72d0e1cb
SG
4107 int i;
4108
17ed13a3
SH
4109 if (strcmp(key, "lxc.hook") == 0)
4110 all = true;
4111
72d0e1cb 4112 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 4113 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 4114 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
4115 lxc_list_del(it);
4116 free(it->elem);
4117 free(it);
4118 }
4119 done = true;
72d0e1cb
SG
4120 }
4121 }
17ed13a3
SH
4122
4123 if (!done) {
4124 ERROR("Invalid hook key: %s", key);
4125 return -1;
4126 }
72d0e1cb
SG
4127 return 0;
4128}
8eb5694b 4129
74a3920a 4130static void lxc_clear_saved_nics(struct lxc_conf *conf)
7b35f3d6
SH
4131{
4132 int i;
4133
0cf45501 4134 if (!conf->saved_nics)
7b35f3d6
SH
4135 return;
4136 for (i=0; i < conf->num_savednics; i++)
4137 free(conf->saved_nics[i].orig_name);
7b35f3d6
SH
4138 free(conf->saved_nics);
4139}
4140
4184c3e1
SH
4141static inline void lxc_clear_aliens(struct lxc_conf *conf)
4142{
4143 struct lxc_list *it,*next;
4144
4145 lxc_list_for_each_safe(it, &conf->aliens, next) {
4146 lxc_list_del(it);
4147 free(it->elem);
4148 free(it);
4149 }
4150}
4151
f979ac15
SH
4152static inline void lxc_clear_includes(struct lxc_conf *conf)
4153{
4154 struct lxc_list *it,*next;
4155
4156 lxc_list_for_each_safe(it, &conf->includes, next) {
4157 lxc_list_del(it);
4158 free(it->elem);
4159 free(it);
4160 }
4161}
4162
8eb5694b
SH
4163void lxc_conf_free(struct lxc_conf *conf)
4164{
4165 if (!conf)
4166 return;
858377e4
SH
4167 if (current_config == conf)
4168 current_config = NULL;
f10fad2f
ME
4169 free(conf->console.log_path);
4170 free(conf->console.path);
4171 free(conf->rootfs.mount);
4172 free(conf->rootfs.options);
4173 free(conf->rootfs.path);
4174 free(conf->rootfs.pivot);
4175 free(conf->logfile);
858377e4
SH
4176 if (conf->logfd != -1)
4177 close(conf->logfd);
f10fad2f
ME
4178 free(conf->utsname);
4179 free(conf->ttydir);
4180 free(conf->fstab);
4181 free(conf->rcfile);
4182 free(conf->init_cmd);
6b0d5538 4183 free(conf->unexpanded_config);
393903d1 4184 free(conf->pty_names);
8eb5694b 4185 lxc_clear_config_network(conf);
f10fad2f
ME
4186 free(conf->lsm_aa_profile);
4187 free(conf->lsm_se_context);
769872f9 4188 lxc_seccomp_free(conf);
8eb5694b 4189 lxc_clear_config_caps(conf);
1fb86a7c 4190 lxc_clear_config_keepcaps(conf);
8eb5694b 4191 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 4192 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 4193 lxc_clear_mount_entries(conf);
7b35f3d6 4194 lxc_clear_saved_nics(conf);
27c27d73 4195 lxc_clear_idmaps(conf);
ee1e7aa0 4196 lxc_clear_groups(conf);
f979ac15 4197 lxc_clear_includes(conf);
761d81ca 4198 lxc_clear_aliens(conf);
ab799c0b 4199 lxc_clear_environment(conf);
8eb5694b
SH
4200 free(conf);
4201}
4355ab5f
SH
4202
4203struct userns_fn_data {
4204 int (*fn)(void *);
4205 void *arg;
4206 int p[2];
4207};
4208
4209static int run_userns_fn(void *data)
4210{
4211 struct userns_fn_data *d = data;
4212 char c;
4213 // we're not sharing with the parent any more, if it was a thread
4214
4215 close(d->p[1]);
4216 if (read(d->p[0], &c, 1) != 1)
4217 return -1;
4218 close(d->p[0]);
4219 return d->fn(d->arg);
4220}
4221
4222/*
8b227008
TS
4223 * Add ID_TYPE_UID/ID_TYPE_GID entries to an existing lxc_conf,
4224 * if they are not already there.
4355ab5f 4225 */
8b227008
TS
4226static struct lxc_list *idmap_add_id(struct lxc_conf *conf,
4227 uid_t uid, gid_t gid)
4355ab5f 4228{
8b227008
TS
4229 int hostuid_mapped = mapped_hostid(uid, conf, ID_TYPE_UID);
4230 int hostgid_mapped = mapped_hostid(gid, conf, ID_TYPE_GID);
4355ab5f
SH
4231 struct lxc_list *new = NULL, *tmp, *it, *next;
4232 struct id_map *entry;
4233
3ec1648d
SH
4234 new = malloc(sizeof(*new));
4235 if (!new) {
4236 ERROR("Out of memory building id map");
4237 return NULL;
4238 }
4239 lxc_list_init(new);
4240
8b227008
TS
4241 if (hostuid_mapped < 0) {
4242 hostuid_mapped = find_unmapped_nsuid(conf, ID_TYPE_UID);
4243 if (hostuid_mapped < 0)
3ec1648d
SH
4244 goto err;
4245 tmp = malloc(sizeof(*tmp));
4246 if (!tmp)
4247 goto err;
4355ab5f
SH
4248 entry = malloc(sizeof(*entry));
4249 if (!entry) {
3ec1648d
SH
4250 free(tmp);
4251 goto err;
4355ab5f 4252 }
3ec1648d 4253 tmp->elem = entry;
4355ab5f 4254 entry->idtype = ID_TYPE_UID;
8b227008
TS
4255 entry->nsid = hostuid_mapped;
4256 entry->hostid = (unsigned long) uid;
4257 entry->range = 1;
4258 lxc_list_add_tail(new, tmp);
4259 }
4260 if (hostgid_mapped < 0) {
4261 hostgid_mapped = find_unmapped_nsuid(conf, ID_TYPE_GID);
4262 if (hostgid_mapped < 0)
4263 goto err;
4264 tmp = malloc(sizeof(*tmp));
4265 if (!tmp)
4266 goto err;
4267 entry = malloc(sizeof(*entry));
4268 if (!entry) {
4269 free(tmp);
4270 goto err;
4271 }
4272 tmp->elem = entry;
4273 entry->idtype = ID_TYPE_GID;
4274 entry->nsid = hostgid_mapped;
4275 entry->hostid = (unsigned long) gid;
4355ab5f 4276 entry->range = 1;
3ec1648d 4277 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4278 }
4279 lxc_list_for_each_safe(it, &conf->id_map, next) {
4280 tmp = malloc(sizeof(*tmp));
4281 if (!tmp)
4282 goto err;
4283 entry = malloc(sizeof(*entry));
4284 if (!entry) {
4285 free(tmp);
4286 goto err;
4287 }
4288 memset(entry, 0, sizeof(*entry));
4289 memcpy(entry, it->elem, sizeof(*entry));
4290 tmp->elem = entry;
3ec1648d 4291 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4292 }
4293
4294 return new;
4295
4296err:
8b227008 4297 ERROR("Out of memory building a new uid/gid map");
908fde6a
SH
4298 if (new)
4299 lxc_free_idmap(new);
c30ac545 4300 free(new);
4355ab5f
SH
4301 return NULL;
4302}
4303
4304/*
4305 * Run a function in a new user namespace.
8b227008 4306 * The caller's euid/egid will be mapped in if it is not already.
4355ab5f
SH
4307 */
4308int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
4309{
4310 int ret, pid;
4311 struct userns_fn_data d;
4312 char c = '1';
4313 int p[2];
4314 struct lxc_list *idmap;
4315
4355ab5f 4316 ret = pipe(p);
4355ab5f
SH
4317 if (ret < 0) {
4318 SYSERROR("opening pipe");
4319 return -1;
4320 }
4321 d.fn = fn;
4322 d.arg = data;
4323 d.p[0] = p[0];
4324 d.p[1] = p[1];
4325 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4326 if (pid < 0)
4327 goto err;
4355ab5f 4328 close(p[0]);
4355ab5f
SH
4329 p[0] = -1;
4330
8b227008
TS
4331 if ((idmap = idmap_add_id(conf, geteuid(), getegid())) == NULL) {
4332 ERROR("Error adding self to container uid/gid map");
4355ab5f
SH
4333 goto err;
4334 }
4335
4336 ret = lxc_map_ids(idmap, pid);
4337 lxc_free_idmap(idmap);
88dd66fc 4338 free(idmap);
565e571c 4339 if (ret) {
4355ab5f
SH
4340 ERROR("Error setting up child mappings");
4341 goto err;
4342 }
4343
4344 // kick the child
4345 if (write(p[1], &c, 1) != 1) {
4346 SYSERROR("writing to pipe to child");
4347 goto err;
4348 }
4349
3139aead
SG
4350 ret = wait_for_pid(pid);
4351
4352 close(p[1]);
4353 return ret;
4354
4355ab5f 4355err:
4355ab5f
SH
4356 if (p[0] != -1)
4357 close(p[0]);
4358 close(p[1]);
4355ab5f
SH
4359 return -1;
4360}
97e9cfa0 4361
a96a8e8c 4362/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4363static char* getuname(void)
4364{
a96a8e8c 4365 struct passwd *result;
97e9cfa0 4366
a96a8e8c
SH
4367 result = getpwuid(geteuid());
4368 if (!result)
97e9cfa0
SH
4369 return NULL;
4370
a96a8e8c 4371 return strdup(result->pw_name);
97e9cfa0
SH
4372}
4373
a96a8e8c 4374/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4375static char *getgname(void)
4376{
a96a8e8c 4377 struct group *result;
97e9cfa0 4378
a96a8e8c
SH
4379 result = getgrgid(getegid());
4380 if (!result)
97e9cfa0
SH
4381 return NULL;
4382
a96a8e8c 4383 return strdup(result->gr_name);
97e9cfa0
SH
4384}
4385
a96a8e8c 4386/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4387void suggest_default_idmap(void)
4388{
4389 FILE *f;
4390 unsigned int uid = 0, urange = 0, gid = 0, grange = 0;
4391 char *line = NULL;
4392 char *uname, *gname;
4393 size_t len = 0;
4394
4395 if (!(uname = getuname()))
4396 return;
4397
4398 if (!(gname = getgname())) {
4399 free(uname);
4400 return;
4401 }
4402
4403 f = fopen(subuidfile, "r");
4404 if (!f) {
4405 ERROR("Your system is not configured with subuids");
4406 free(gname);
4407 free(uname);
4408 return;
4409 }
4410 while (getline(&line, &len, f) != -1) {
4411 char *p = strchr(line, ':'), *p2;
4412 if (*line == '#')
4413 continue;
4414 if (!p)
4415 continue;
4416 *p = '\0';
4417 p++;
4418 if (strcmp(line, uname))
4419 continue;
4420 p2 = strchr(p, ':');
4421 if (!p2)
4422 continue;
4423 *p2 = '\0';
4424 p2++;
4425 if (!*p2)
4426 continue;
4427 uid = atoi(p);
4428 urange = atoi(p2);
4429 }
4430 fclose(f);
4431
4432 f = fopen(subuidfile, "r");
4433 if (!f) {
4434 ERROR("Your system is not configured with subgids");
4435 free(gname);
4436 free(uname);
4437 return;
4438 }
4439 while (getline(&line, &len, f) != -1) {
4440 char *p = strchr(line, ':'), *p2;
4441 if (*line == '#')
4442 continue;
4443 if (!p)
4444 continue;
4445 *p = '\0';
4446 p++;
4447 if (strcmp(line, uname))
4448 continue;
4449 p2 = strchr(p, ':');
4450 if (!p2)
4451 continue;
4452 *p2 = '\0';
4453 p2++;
4454 if (!*p2)
4455 continue;
4456 gid = atoi(p);
4457 grange = atoi(p2);
4458 }
4459 fclose(f);
4460
f10fad2f 4461 free(line);
97e9cfa0
SH
4462
4463 if (!urange || !grange) {
4464 ERROR("You do not have subuids or subgids allocated");
4465 ERROR("Unprivileged containers require subuids and subgids");
4466 return;
4467 }
4468
4469 ERROR("You must either run as root, or define uid mappings");
4470 ERROR("To pass uid mappings to lxc-create, you could create");
4471 ERROR("~/.config/lxc/default.conf:");
4472 ERROR("lxc.include = %s", LXC_DEFAULT_CONFIG);
4473 ERROR("lxc.id_map = u 0 %u %u", uid, urange);
4474 ERROR("lxc.id_map = g 0 %u %u", gid, grange);
4475
4476 free(gname);
4477 free(uname);
4478}
aaf26830 4479
a7307747
SH
4480static void free_cgroup_settings(struct lxc_list *result)
4481{
4482 struct lxc_list *iterator, *next;
4483
4484 lxc_list_for_each_safe(iterator, result, next) {
4485 lxc_list_del(iterator);
4486 free(iterator);
4487 }
4488 free(result);
4489}
4490
aaf26830
KT
4491/*
4492 * Return the list of cgroup_settings sorted according to the following rules
4493 * 1. Put memory.limit_in_bytes before memory.memsw.limit_in_bytes
4494 */
4495struct lxc_list *sort_cgroup_settings(struct lxc_list* cgroup_settings)
4496{
4497 struct lxc_list *result;
4498 struct lxc_list *memsw_limit = NULL;
4499 struct lxc_list *it = NULL;
4500 struct lxc_cgroup *cg = NULL;
4501 struct lxc_list *item = NULL;
4502
4503 result = malloc(sizeof(*result));
fac7c663
KT
4504 if (!result) {
4505 ERROR("failed to allocate memory to sort cgroup settings");
4506 return NULL;
4507 }
aaf26830
KT
4508 lxc_list_init(result);
4509
4510 /*Iterate over the cgroup settings and copy them to the output list*/
4511 lxc_list_for_each(it, cgroup_settings) {
4512 item = malloc(sizeof(*item));
fac7c663
KT
4513 if (!item) {
4514 ERROR("failed to allocate memory to sort cgroup settings");
a7307747 4515 free_cgroup_settings(result);
fac7c663
KT
4516 return NULL;
4517 }
aaf26830
KT
4518 item->elem = it->elem;
4519 cg = it->elem;
4520 if (strcmp(cg->subsystem, "memory.memsw.limit_in_bytes") == 0) {
4521 /* Store the memsw_limit location */
4522 memsw_limit = item;
4523 } else if (strcmp(cg->subsystem, "memory.limit_in_bytes") == 0 && memsw_limit != NULL) {
4d5b72a1 4524 /* lxc.cgroup.memory.memsw.limit_in_bytes is found before
aaf26830
KT
4525 * lxc.cgroup.memory.limit_in_bytes, swap these two items */
4526 item->elem = memsw_limit->elem;
4527 memsw_limit->elem = it->elem;
4528 }
4529 lxc_list_add_tail(result, item);
4530 }
4531
4532 return result;
a7307747 4533}