]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
doc: change "-t" option of lxc-create(1) to being required
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
d06245b8
NC
23#include "config.h"
24
0ad19a3f 25#include <stdio.h>
0ad19a3f 26#include <stdlib.h>
e3b4c4c4 27#include <stdarg.h>
0ad19a3f 28#include <errno.h>
29#include <string.h>
30#include <dirent.h>
0ad19a3f 31#include <unistd.h>
bc6928ff 32#include <inttypes.h>
e3b4c4c4 33#include <sys/wait.h>
2d76d1d7 34#include <sys/syscall.h>
97e9cfa0
SH
35#include <sys/types.h>
36#include <pwd.h>
37#include <grp.h>
4a0ba80d 38#include <time.h>
614305f3 39#ifdef HAVE_STATVFS
2938f7c8 40#include <sys/statvfs.h>
614305f3 41#endif
e827ff7e
SG
42
43#if HAVE_PTY_H
b0a33c1e 44#include <pty.h>
e827ff7e
SG
45#else
46#include <../include/openpty.h>
47#endif
0ad19a3f 48
b3ecde1e
DL
49#include <linux/loop.h>
50
0ad19a3f 51#include <sys/types.h>
52#include <sys/utsname.h>
53#include <sys/param.h>
54#include <sys/stat.h>
55#include <sys/socket.h>
56#include <sys/mount.h>
57#include <sys/mman.h>
81810dd1 58#include <sys/prctl.h>
0ad19a3f 59
60#include <arpa/inet.h>
61#include <fcntl.h>
62#include <netinet/in.h>
63#include <net/if.h>
6f4a3756 64#include <libgen.h>
0ad19a3f 65
e5bda9ee 66#include "network.h"
67#include "error.h"
b2718c72 68#include "parse.h"
1b09f2c0
DL
69#include "utils.h"
70#include "conf.h"
71#include "log.h"
d55bc1ad 72#include "caps.h" /* for lxc_caps_last_cap() */
9be53773 73#include "bdev.h"
368bbc02 74#include "cgroup.h"
025ed0f3 75#include "lxclock.h"
4355ab5f 76#include "namespace.h"
fe4de9a6 77#include "lsm/lsm.h"
d0a36f2c 78
495d2046
SG
79#if HAVE_SYS_CAPABILITY_H
80#include <sys/capability.h>
81#endif
82
6ff05e18
SG
83#if HAVE_SYS_PERSONALITY_H
84#include <sys/personality.h>
85#endif
86
edaf8b1b
SG
87#if IS_BIONIC
88#include <../include/lxcmntent.h>
89#else
90#include <mntent.h>
91#endif
92
769872f9
SH
93#include "lxcseccomp.h"
94
36eb9bde 95lxc_log_define(lxc_conf, lxc);
e5bda9ee 96
0ad19a3f 97#define MAXHWLEN 18
98#define MAXINDEXLEN 20
442cbbe6 99#define MAXMTULEN 16
0ad19a3f 100#define MAXLINELEN 128
101
495d2046 102#if HAVE_SYS_CAPABILITY_H
b09094da
MN
103#ifndef CAP_SETFCAP
104#define CAP_SETFCAP 31
105#endif
106
107#ifndef CAP_MAC_OVERRIDE
108#define CAP_MAC_OVERRIDE 32
109#endif
110
111#ifndef CAP_MAC_ADMIN
112#define CAP_MAC_ADMIN 33
113#endif
495d2046 114#endif
b09094da
MN
115
116#ifndef PR_CAPBSET_DROP
117#define PR_CAPBSET_DROP 24
118#endif
119
9818cae4
SG
120#ifndef LO_FLAGS_AUTOCLEAR
121#define LO_FLAGS_AUTOCLEAR 4
122#endif
123
0769b82a
CS
124/* needed for cgroup automount checks, regardless of whether we
125 * have included linux/capability.h or not */
126#ifndef CAP_SYS_ADMIN
127#define CAP_SYS_ADMIN 21
128#endif
129
2d76d1d7
SG
130/* Define pivot_root() if missing from the C library */
131#ifndef HAVE_PIVOT_ROOT
132static int pivot_root(const char * new_root, const char * put_old)
133{
134#ifdef __NR_pivot_root
135return syscall(__NR_pivot_root, new_root, put_old);
136#else
137errno = ENOSYS;
138return -1;
139#endif
140}
141#else
142extern int pivot_root(const char * new_root, const char * put_old);
143#endif
144
145/* Define sethostname() if missing from the C library */
146#ifndef HAVE_SETHOSTNAME
147static int sethostname(const char * name, size_t len)
148{
149#ifdef __NR_sethostname
150return syscall(__NR_sethostname, name, len);
151#else
152errno = ENOSYS;
153return -1;
154#endif
155}
156#endif
157
72f919c4
SG
158/* Define __S_ISTYPE if missing from the C library */
159#ifndef __S_ISTYPE
160#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
161#endif
162
ecec0126
SG
163#ifndef MS_PRIVATE
164#define MS_PRIVATE (1<<18)
165#endif
166
72d0e1cb 167char *lxchook_names[NUM_LXC_HOOKS] = {
148e91f5 168 "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop", "clone" };
72d0e1cb 169
a589434e 170typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 171
998ac676
RT
172struct mount_opt {
173 char *name;
174 int clear;
175 int flag;
176};
177
81810dd1
DL
178struct caps_opt {
179 char *name;
180 int value;
181};
182
0769b82a
CS
183/* Declare this here, since we don't want to reshuffle the whole file. */
184static int in_caplist(int cap, struct lxc_list *caps);
185
a589434e
JN
186static int instantiate_veth(struct lxc_handler *, struct lxc_netdev *);
187static int instantiate_macvlan(struct lxc_handler *, struct lxc_netdev *);
188static int instantiate_vlan(struct lxc_handler *, struct lxc_netdev *);
189static int instantiate_phys(struct lxc_handler *, struct lxc_netdev *);
190static int instantiate_empty(struct lxc_handler *, struct lxc_netdev *);
191static int instantiate_none(struct lxc_handler *, struct lxc_netdev *);
192
193static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
194 [LXC_NET_VETH] = instantiate_veth,
195 [LXC_NET_MACVLAN] = instantiate_macvlan,
196 [LXC_NET_VLAN] = instantiate_vlan,
197 [LXC_NET_PHYS] = instantiate_phys,
198 [LXC_NET_EMPTY] = instantiate_empty,
199 [LXC_NET_NONE] = instantiate_none,
0ad19a3f 200};
201
74a2b586
JK
202static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
203static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
204static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
205static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
206static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 207static int shutdown_none(struct lxc_handler *, struct lxc_netdev *);
74a2b586 208
a589434e 209static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
74a2b586
JK
210 [LXC_NET_VETH] = shutdown_veth,
211 [LXC_NET_MACVLAN] = shutdown_macvlan,
212 [LXC_NET_VLAN] = shutdown_vlan,
213 [LXC_NET_PHYS] = shutdown_phys,
214 [LXC_NET_EMPTY] = shutdown_empty,
26b797f3 215 [LXC_NET_NONE] = shutdown_none,
74a2b586
JK
216};
217
998ac676 218static struct mount_opt mount_opt[] = {
88d413d5
SW
219 { "defaults", 0, 0 },
220 { "ro", 0, MS_RDONLY },
221 { "rw", 1, MS_RDONLY },
222 { "suid", 1, MS_NOSUID },
223 { "nosuid", 0, MS_NOSUID },
224 { "dev", 1, MS_NODEV },
225 { "nodev", 0, MS_NODEV },
226 { "exec", 1, MS_NOEXEC },
227 { "noexec", 0, MS_NOEXEC },
228 { "sync", 0, MS_SYNCHRONOUS },
229 { "async", 1, MS_SYNCHRONOUS },
230 { "dirsync", 0, MS_DIRSYNC },
231 { "remount", 0, MS_REMOUNT },
232 { "mand", 0, MS_MANDLOCK },
233 { "nomand", 1, MS_MANDLOCK },
234 { "atime", 1, MS_NOATIME },
235 { "noatime", 0, MS_NOATIME },
236 { "diratime", 1, MS_NODIRATIME },
237 { "nodiratime", 0, MS_NODIRATIME },
238 { "bind", 0, MS_BIND },
239 { "rbind", 0, MS_BIND|MS_REC },
240 { "relatime", 0, MS_RELATIME },
241 { "norelatime", 1, MS_RELATIME },
242 { "strictatime", 0, MS_STRICTATIME },
243 { "nostrictatime", 1, MS_STRICTATIME },
244 { NULL, 0, 0 },
998ac676
RT
245};
246
495d2046 247#if HAVE_SYS_CAPABILITY_H
81810dd1 248static struct caps_opt caps_opt[] = {
a6afdde9 249 { "chown", CAP_CHOWN },
1e11be34
DL
250 { "dac_override", CAP_DAC_OVERRIDE },
251 { "dac_read_search", CAP_DAC_READ_SEARCH },
252 { "fowner", CAP_FOWNER },
253 { "fsetid", CAP_FSETID },
81810dd1
DL
254 { "kill", CAP_KILL },
255 { "setgid", CAP_SETGID },
256 { "setuid", CAP_SETUID },
257 { "setpcap", CAP_SETPCAP },
258 { "linux_immutable", CAP_LINUX_IMMUTABLE },
259 { "net_bind_service", CAP_NET_BIND_SERVICE },
260 { "net_broadcast", CAP_NET_BROADCAST },
261 { "net_admin", CAP_NET_ADMIN },
262 { "net_raw", CAP_NET_RAW },
263 { "ipc_lock", CAP_IPC_LOCK },
264 { "ipc_owner", CAP_IPC_OWNER },
265 { "sys_module", CAP_SYS_MODULE },
266 { "sys_rawio", CAP_SYS_RAWIO },
267 { "sys_chroot", CAP_SYS_CHROOT },
268 { "sys_ptrace", CAP_SYS_PTRACE },
269 { "sys_pacct", CAP_SYS_PACCT },
270 { "sys_admin", CAP_SYS_ADMIN },
271 { "sys_boot", CAP_SYS_BOOT },
272 { "sys_nice", CAP_SYS_NICE },
273 { "sys_resource", CAP_SYS_RESOURCE },
274 { "sys_time", CAP_SYS_TIME },
275 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
276 { "mknod", CAP_MKNOD },
277 { "lease", CAP_LEASE },
9527e566 278#ifdef CAP_AUDIT_WRITE
81810dd1 279 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
280#endif
281#ifdef CAP_AUDIT_CONTROL
81810dd1 282 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 283#endif
81810dd1
DL
284 { "setfcap", CAP_SETFCAP },
285 { "mac_override", CAP_MAC_OVERRIDE },
286 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
287#ifdef CAP_SYSLOG
288 { "syslog", CAP_SYSLOG },
289#endif
290#ifdef CAP_WAKE_ALARM
291 { "wake_alarm", CAP_WAKE_ALARM },
292#endif
81810dd1 293};
495d2046
SG
294#else
295static struct caps_opt caps_opt[] = {};
296#endif
81810dd1 297
f0d02950
JTLB
298const char *dev_base_path = "/dev/.lxc";
299const char *dev_user_path = "/dev/.lxc/user";
300
91c3830e
SH
301static int run_buffer(char *buffer)
302{
ebec9176 303 struct lxc_popen_FILE *f;
91c3830e 304 char *output;
8e7da691 305 int ret;
91c3830e 306
ebec9176 307 f = lxc_popen(buffer);
91c3830e
SH
308 if (!f) {
309 SYSERROR("popen failed");
310 return -1;
311 }
312
313 output = malloc(LXC_LOG_BUFFER_SIZE);
314 if (!output) {
315 ERROR("failed to allocate memory for script output");
ebec9176 316 lxc_pclose(f);
91c3830e
SH
317 return -1;
318 }
319
ebec9176 320 while(fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
91c3830e
SH
321 DEBUG("script output: %s", output);
322
323 free(output);
324
ebec9176 325 ret = lxc_pclose(f);
8e7da691 326 if (ret == -1) {
91c3830e
SH
327 SYSERROR("Script exited on error");
328 return -1;
8e7da691
DE
329 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
330 ERROR("Script exited with status %d", WEXITSTATUS(ret));
331 return -1;
332 } else if (WIFSIGNALED(ret)) {
333 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
334 strsignal(WTERMSIG(ret)));
335 return -1;
91c3830e
SH
336 }
337
338 return 0;
339}
340
148e91f5 341static int run_script_argv(const char *name, const char *section,
283678ed
SH
342 const char *script, const char *hook, const char *lxcpath,
343 char **argsin)
148e91f5
SH
344{
345 int ret, i;
346 char *buffer;
347 size_t size = 0;
348
349 INFO("Executing script '%s' for container '%s', config section '%s'",
350 script, name, section);
351
352 for (i=0; argsin && argsin[i]; i++)
353 size += strlen(argsin[i]) + 1;
354
355 size += strlen(hook) + 1;
356
357 size += strlen(script);
358 size += strlen(name);
359 size += strlen(section);
360 size += 3;
361
362 if (size > INT_MAX)
363 return -1;
364
365 buffer = alloca(size);
366 if (!buffer) {
367 ERROR("failed to allocate memory");
368 return -1;
369 }
370
371 ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
372 if (ret < 0 || ret >= size) {
373 ERROR("Script name too long");
374 return -1;
375 }
376
377 for (i=0; argsin && argsin[i]; i++) {
378 int len = size-ret;
379 int rc;
380 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
381 if (rc < 0 || rc >= len) {
382 ERROR("Script args too long");
383 return -1;
384 }
385 ret += rc;
386 }
387
388 return run_buffer(buffer);
389}
390
751d9dcd
DL
391static int run_script(const char *name, const char *section,
392 const char *script, ...)
e3b4c4c4 393{
abbfd20b 394 int ret;
91c3830e 395 char *buffer, *p;
abbfd20b
DL
396 size_t size = 0;
397 va_list ap;
751d9dcd
DL
398
399 INFO("Executing script '%s' for container '%s', config section '%s'",
400 script, name, section);
e3b4c4c4 401
abbfd20b
DL
402 va_start(ap, script);
403 while ((p = va_arg(ap, char *)))
95642a10 404 size += strlen(p) + 1;
abbfd20b
DL
405 va_end(ap);
406
407 size += strlen(script);
408 size += strlen(name);
409 size += strlen(section);
95642a10 410 size += 3;
abbfd20b 411
95642a10
MS
412 if (size > INT_MAX)
413 return -1;
414
415 buffer = alloca(size);
abbfd20b
DL
416 if (!buffer) {
417 ERROR("failed to allocate memory");
751d9dcd
DL
418 return -1;
419 }
420
9ba8130c
SH
421 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
422 if (ret < 0 || ret >= size) {
423 ERROR("Script name too long");
9ba8130c
SH
424 return -1;
425 }
751d9dcd 426
abbfd20b 427 va_start(ap, script);
9ba8130c
SH
428 while ((p = va_arg(ap, char *))) {
429 int len = size-ret;
430 int rc;
431 rc = snprintf(buffer + ret, len, " %s", p);
432 if (rc < 0 || rc >= len) {
9ba8130c
SH
433 ERROR("Script args too long");
434 return -1;
435 }
436 ret += rc;
437 }
abbfd20b 438 va_end(ap);
751d9dcd 439
91c3830e 440 return run_buffer(buffer);
e3b4c4c4
ST
441}
442
a6afdde9 443static int find_fstype_cb(char* buffer, void *data)
78ae2fcc 444{
445 struct cbarg {
446 const char *rootfs;
a6afdde9 447 const char *target;
a17b1e65 448 const char *options;
78ae2fcc 449 } *cbarg = data;
450
a17b1e65
SG
451 unsigned long mntflags;
452 char *mntdata;
78ae2fcc 453 char *fstype;
454
455 /* we don't try 'nodev' entries */
456 if (strstr(buffer, "nodev"))
457 return 0;
458
459 fstype = buffer;
b2718c72 460 fstype += lxc_char_left_gc(fstype, strlen(fstype));
461 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
78ae2fcc 462
9827ecdb
YK
463 /* ignore blank line and comment */
464 if (fstype[0] == '\0' || fstype[0] == '#')
465 return 0;
466
a6afdde9
DL
467 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
468 cbarg->rootfs, cbarg->target, fstype);
469
a17b1e65
SG
470 if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) {
471 free(mntdata);
472 return -1;
473 }
474
475 if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
a6afdde9 476 DEBUG("mount failed with error: %s", strerror(errno));
a17b1e65 477 free(mntdata);
78ae2fcc 478 return 0;
a6afdde9 479 }
a17b1e65 480 free(mntdata);
78ae2fcc 481
a6afdde9
DL
482 INFO("mounted '%s' on '%s', with fstype '%s'",
483 cbarg->rootfs, cbarg->target, fstype);
78ae2fcc 484
485 return 1;
486}
487
a17b1e65
SG
488static int mount_unknown_fs(const char *rootfs, const char *target,
489 const char *options)
78ae2fcc 490{
a6afdde9 491 int i;
78ae2fcc 492
493 struct cbarg {
494 const char *rootfs;
a6afdde9 495 const char *target;
a17b1e65 496 const char *options;
78ae2fcc 497 } cbarg = {
498 .rootfs = rootfs,
a6afdde9 499 .target = target,
a17b1e65 500 .options = options,
78ae2fcc 501 };
502
a6afdde9
DL
503 /*
504 * find the filesystem type with brute force:
505 * first we check with /etc/filesystems, in case the modules
78ae2fcc 506 * are auto-loaded and fall back to the supported kernel fs
507 */
508 char *fsfile[] = {
509 "/etc/filesystems",
510 "/proc/filesystems",
511 };
512
a6afdde9
DL
513 for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
514
515 int ret;
516
517 if (access(fsfile[i], F_OK))
518 continue;
519
520 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
521 if (ret < 0) {
522 ERROR("failed to parse '%s'", fsfile[i]);
523 return -1;
524 }
525
526 if (ret)
527 return 0;
78ae2fcc 528 }
529
a6afdde9
DL
530 ERROR("failed to determine fs type for '%s'", rootfs);
531 return -1;
532}
533
a17b1e65
SG
534static int mount_rootfs_dir(const char *rootfs, const char *target,
535 const char *options)
a6afdde9 536{
a17b1e65
SG
537 unsigned long mntflags;
538 char *mntdata;
539 int ret;
540
541 if (parse_mntopts(options, &mntflags, &mntdata) < 0) {
542 free(mntdata);
543 return -1;
544 }
545
546 ret = mount(rootfs, target, "none", MS_BIND | MS_REC | mntflags, mntdata);
547 free(mntdata);
548
549 return ret;
a6afdde9
DL
550}
551
552static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
553{
554 int rfd;
555 int ret = -1;
556
557 rfd = open(rootfs, O_RDWR);
558 if (rfd < 0) {
559 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 560 return -1;
561 }
562
a6afdde9 563 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 564
a6afdde9 565 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 566
a6afdde9
DL
567 if (ioctl(fd, LOOP_SET_FD, rfd)) {
568 SYSERROR("failed to LOOP_SET_FD");
569 goto out;
78ae2fcc 570 }
571
a6afdde9
DL
572 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
573 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 574 goto out;
575 }
576
a6afdde9 577 ret = 0;
78ae2fcc 578out:
a6afdde9 579 close(rfd);
78ae2fcc 580
a6afdde9 581 return ret;
78ae2fcc 582}
583
a17b1e65
SG
584static int mount_rootfs_file(const char *rootfs, const char *target,
585 const char *options)
78ae2fcc 586{
a6afdde9
DL
587 struct dirent dirent, *direntp;
588 struct loop_info64 loinfo;
9ba8130c 589 int ret = -1, fd = -1, rc;
a6afdde9
DL
590 DIR *dir;
591 char path[MAXPATHLEN];
78ae2fcc 592
a6afdde9
DL
593 dir = opendir("/dev");
594 if (!dir) {
595 SYSERROR("failed to open '/dev'");
78ae2fcc 596 return -1;
597 }
598
a6afdde9
DL
599 while (!readdir_r(dir, &dirent, &direntp)) {
600
601 if (!direntp)
602 break;
603
604 if (!strcmp(direntp->d_name, "."))
605 continue;
606
607 if (!strcmp(direntp->d_name, ".."))
608 continue;
609
610 if (strncmp(direntp->d_name, "loop", 4))
611 continue;
612
9ba8130c
SH
613 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
614 if (rc < 0 || rc >= MAXPATHLEN)
615 continue;
616
a6afdde9
DL
617 fd = open(path, O_RDWR);
618 if (fd < 0)
619 continue;
620
621 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
622 close(fd);
623 continue;
624 }
625
626 if (errno != ENXIO) {
627 WARN("unexpected error for ioctl on '%s': %m",
628 direntp->d_name);
00b6be44 629 close(fd);
a6afdde9
DL
630 continue;
631 }
632
633 DEBUG("found '%s' free lodev", path);
634
635 ret = setup_lodev(rootfs, fd, &loinfo);
636 if (!ret)
a17b1e65 637 ret = mount_unknown_fs(path, target, options);
a6afdde9
DL
638 close(fd);
639
640 break;
641 }
642
643 if (closedir(dir))
644 WARN("failed to close directory");
645
646 return ret;
78ae2fcc 647}
648
a17b1e65
SG
649static int mount_rootfs_block(const char *rootfs, const char *target,
650 const char *options)
a6afdde9 651{
a17b1e65 652 return mount_unknown_fs(rootfs, target, options);
a6afdde9
DL
653}
654
0c547523
SH
655/*
656 * pin_rootfs
b7ed4bf0
CS
657 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
658 * the duration of the container run, to prevent the container from marking
659 * the underlying fs readonly on shutdown. unlink the file immediately so
660 * no name pollution is happens
0c547523
SH
661 * return -1 on error.
662 * return -2 if nothing needed to be pinned.
663 * return an open fd (>=0) if we pinned it.
664 */
665int pin_rootfs(const char *rootfs)
666{
667 char absrootfs[MAXPATHLEN];
668 char absrootfspin[MAXPATHLEN];
669 struct stat s;
670 int ret, fd;
671
e99ee0de 672 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 673 return -2;
e99ee0de 674
00ec333b 675 if (!realpath(rootfs, absrootfs))
9be53773 676 return -2;
0c547523 677
00ec333b 678 if (access(absrootfs, F_OK))
0c547523 679 return -1;
0c547523 680
00ec333b 681 if (stat(absrootfs, &s))
0c547523 682 return -1;
0c547523 683
72f919c4 684 if (!S_ISDIR(s.st_mode))
0c547523
SH
685 return -2;
686
b7ed4bf0 687 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
00ec333b 688 if (ret >= MAXPATHLEN)
0c547523 689 return -1;
0c547523
SH
690
691 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
b7ed4bf0
CS
692 if (fd < 0)
693 return fd;
694 (void)unlink(absrootfspin);
0c547523
SH
695 return fd;
696}
697
e2a7e8dc
SH
698/*
699 * If we are asking to remount something, make sure that any
700 * NOEXEC etc are honored.
701 */
702static unsigned long add_required_remount_flags(const char *s, const char *d,
703 unsigned long flags)
704{
614305f3 705#ifdef HAVE_STATVFS
e2a7e8dc
SH
706 struct statvfs sb;
707 unsigned long required_flags = 0;
708
709 if (!(flags & MS_REMOUNT))
710 return flags;
711
712 if (!s)
713 s = d;
714
715 if (!s)
716 return flags;
717 if (statvfs(s, &sb) < 0)
718 return flags;
719
720 if (sb.f_flag & MS_NOSUID)
721 required_flags |= MS_NOSUID;
722 if (sb.f_flag & MS_NODEV)
723 required_flags |= MS_NODEV;
724 if (sb.f_flag & MS_RDONLY)
725 required_flags |= MS_RDONLY;
726 if (sb.f_flag & MS_NOEXEC)
727 required_flags |= MS_NOEXEC;
728
729 return flags | required_flags;
614305f3
SH
730#else
731 return flags;
732#endif
e2a7e8dc
SH
733}
734
4fb3cba5 735static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
368bbc02 736{
368bbc02 737 int r;
b06b8511
CS
738 size_t i;
739 static struct {
740 int match_mask;
741 int match_flag;
742 const char *source;
743 const char *destination;
744 const char *fstype;
745 unsigned long flags;
746 const char *options;
747 } default_mounts[] = {
748 /* Read-only bind-mounting... In older kernels, doing that required
749 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
750 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
751 * kernel 2.6.26 onwards. However, this apparently does not work on
752 * kernel 3.8. Unfortunately, on that very same kernel, doing the
753 * same trick as above doesn't seem to work either, there one needs
754 * to ALSO specify MS_BIND for the remount, otherwise the entire
755 * fs is remounted read-only or the mount fails because it's busy...
756 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
757 * 2.6.32...
368bbc02 758 */
b06b8511
CS
759 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
760 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
761 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
762 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
763 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
764 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
765 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
766 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
767 { 0, 0, NULL, NULL, NULL, 0, NULL }
768 };
368bbc02 769
b06b8511
CS
770 for (i = 0; default_mounts[i].match_mask; i++) {
771 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
772 char *source = NULL;
773 char *destination = NULL;
774 int saved_errno;
e2a7e8dc 775 unsigned long mflags;
b06b8511
CS
776
777 if (default_mounts[i].source) {
778 /* will act like strdup if %r is not present */
779 source = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].source);
780 if (!source) {
781 SYSERROR("memory allocation error");
782 return -1;
783 }
784 }
785 if (default_mounts[i].destination) {
786 /* will act like strdup if %r is not present */
787 destination = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].destination);
788 if (!destination) {
789 saved_errno = errno;
790 SYSERROR("memory allocation error");
791 free(source);
792 errno = saved_errno;
793 return -1;
794 }
795 }
e2a7e8dc
SH
796 mflags = add_required_remount_flags(source, destination,
797 default_mounts[i].flags);
798 r = mount(source, destination, default_mounts[i].fstype, mflags, default_mounts[i].options);
b06b8511 799 saved_errno = errno;
c414be25 800 if (r < 0)
e2a7e8dc 801 SYSERROR("error mounting %s on %s flags %lu", source, destination, mflags);
b06b8511
CS
802 free(source);
803 free(destination);
804 if (r < 0) {
b06b8511
CS
805 errno = saved_errno;
806 return -1;
807 }
368bbc02 808 }
368bbc02
CS
809 }
810
b06b8511 811 if (flags & LXC_AUTO_CGROUP_MASK) {
0769b82a
CS
812 int cg_flags;
813
814 cg_flags = flags & LXC_AUTO_CGROUP_MASK;
815 /* If the type of cgroup mount was not specified, it depends on the
816 * container's capabilities as to what makes sense: if we have
817 * CAP_SYS_ADMIN, the read-only part can be remounted read-write
818 * anyway, so we may as well default to read-write; then the admin
819 * will not be given a false sense of security. (And if they really
820 * want mixed r/o r/w, then they can explicitly specify :mixed.)
821 * OTOH, if the container lacks CAP_SYS_ADMIN, do only default to
822 * :mixed, because then the container can't remount it read-write. */
823 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
824 int has_sys_admin = 0;
825 if (!lxc_list_empty(&conf->keepcaps)) {
826 has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps);
827 } else {
828 has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps);
829 }
830 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC) {
831 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED;
832 } else {
833 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
834 }
835 }
836
837 if (!cgroup_mount(conf->rootfs.mount, handler, cg_flags)) {
368bbc02 838 SYSERROR("error mounting /sys/fs/cgroup");
b06b8511 839 return -1;
368bbc02
CS
840 }
841 }
842
368bbc02 843 return 0;
368bbc02
CS
844}
845
a17b1e65 846static int mount_rootfs(const char *rootfs, const char *target, const char *options)
0ad19a3f 847{
b09ef133 848 char absrootfs[MAXPATHLEN];
78ae2fcc 849 struct stat s;
a6afdde9 850 int i;
78ae2fcc 851
a17b1e65 852 typedef int (*rootfs_cb)(const char *, const char *, const char *);
78ae2fcc 853
854 struct rootfs_type {
855 int type;
856 rootfs_cb cb;
857 } rtfs_type[] = {
2656d231
DL
858 { S_IFDIR, mount_rootfs_dir },
859 { S_IFBLK, mount_rootfs_block },
860 { S_IFREG, mount_rootfs_file },
78ae2fcc 861 };
0ad19a3f 862
4c8ab83b 863 if (!realpath(rootfs, absrootfs)) {
36eb9bde 864 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 865 return -1;
866 }
b09ef133 867
b09ef133 868 if (access(absrootfs, F_OK)) {
36eb9bde 869 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 870 return -1;
871 }
872
78ae2fcc 873 if (stat(absrootfs, &s)) {
36eb9bde 874 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 875 return -1;
876 }
877
78ae2fcc 878 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 879
78ae2fcc 880 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
881 continue;
9b0f0477 882
a17b1e65 883 return rtfs_type[i].cb(absrootfs, target, options);
78ae2fcc 884 }
9b0f0477 885
36eb9bde 886 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 887 return -1;
0ad19a3f 888}
889
4e5440c6 890static int setup_utsname(struct utsname *utsname)
0ad19a3f 891{
4e5440c6
DL
892 if (!utsname)
893 return 0;
0ad19a3f 894
4e5440c6
DL
895 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
896 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 897 return -1;
898 }
899
4e5440c6 900 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 901
0ad19a3f 902 return 0;
903}
904
69aa6655
DE
905struct dev_symlinks {
906 const char *oldpath;
907 const char *name;
908};
909
910static const struct dev_symlinks dev_symlinks[] = {
911 {"/proc/self/fd", "fd"},
912 {"/proc/self/fd/0", "stdin"},
913 {"/proc/self/fd/1", "stdout"},
914 {"/proc/self/fd/2", "stderr"},
915};
916
917static int setup_dev_symlinks(const struct lxc_rootfs *rootfs)
918{
919 char path[MAXPATHLEN];
920 int ret,i;
09227be2 921 struct stat s;
69aa6655
DE
922
923
924 for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
925 const struct dev_symlinks *d = &dev_symlinks[i];
926 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount, d->name);
927 if (ret < 0 || ret >= MAXPATHLEN)
928 return -1;
09227be2
MW
929
930 /*
931 * Stat the path first. If we don't get an error
932 * accept it as is and don't try to create it
933 */
934 if (!stat(path, &s)) {
935 continue;
936 }
937
69aa6655 938 ret = symlink(d->oldpath, path);
09227be2 939
69aa6655 940 if (ret && errno != EEXIST) {
09227be2
MW
941 if ( errno == EROFS ) {
942 WARN("Warning: Read Only file system while creating %s", path);
943 } else {
944 SYSERROR("Error creating %s", path);
945 return -1;
946 }
69aa6655
DE
947 }
948 }
949 return 0;
950}
951
33fcb7a0 952static int setup_tty(const struct lxc_rootfs *rootfs,
7c6ef2a2 953 const struct lxc_tty_info *tty_info, char *ttydir)
b0a33c1e 954{
7c6ef2a2
SH
955 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
956 int i, ret;
b0a33c1e 957
bc9bd0e3
DL
958 if (!rootfs->path)
959 return 0;
960
b0a33c1e 961 for (i = 0; i < tty_info->nbtty; i++) {
962
963 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
964
7c6ef2a2 965 ret = snprintf(path, sizeof(path), "%s/dev/tty%d",
12297168 966 rootfs->mount, i + 1);
7c6ef2a2
SH
967 if (ret >= sizeof(path)) {
968 ERROR("pathname too long for ttys");
969 return -1;
970 }
971 if (ttydir) {
972 /* create dev/lxc/tty%d" */
9ba8130c 973 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/tty%d",
7c6ef2a2
SH
974 rootfs->mount, ttydir, i + 1);
975 if (ret >= sizeof(lxcpath)) {
976 ERROR("pathname too long for ttys");
977 return -1;
978 }
979 ret = creat(lxcpath, 0660);
980 if (ret==-1 && errno != EEXIST) {
959aee9c 981 SYSERROR("error creating %s", lxcpath);
7c6ef2a2
SH
982 return -1;
983 }
4d44e274
SH
984 if (ret >= 0)
985 close(ret);
7c6ef2a2
SH
986 ret = unlink(path);
987 if (ret && errno != ENOENT) {
959aee9c 988 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
989 return -1;
990 }
b0a33c1e 991
7c6ef2a2
SH
992 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
993 WARN("failed to mount '%s'->'%s'",
994 pty_info->name, path);
995 continue;
996 }
13954cce 997
9ba8130c
SH
998 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
999 if (ret >= sizeof(lxcpath)) {
1000 ERROR("tty pathname too long");
1001 return -1;
1002 }
7c6ef2a2
SH
1003 ret = symlink(lxcpath, path);
1004 if (ret) {
959aee9c 1005 SYSERROR("failed to create symlink for tty %d", i+1);
7c6ef2a2
SH
1006 return -1;
1007 }
1008 } else {
c6883f38
SH
1009 /* If we populated /dev, then we need to create /dev/ttyN */
1010 if (access(path, F_OK)) {
1011 ret = creat(path, 0660);
1012 if (ret==-1) {
959aee9c 1013 SYSERROR("error creating %s", path);
c6883f38 1014 /* this isn't fatal, continue */
025ed0f3 1015 } else {
c6883f38 1016 close(ret);
025ed0f3 1017 }
c6883f38 1018 }
7c6ef2a2
SH
1019 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
1020 WARN("failed to mount '%s'->'%s'",
1021 pty_info->name, path);
1022 continue;
1023 }
b0a33c1e 1024 }
1025 }
1026
cd54d859
DL
1027 INFO("%d tty(s) has been setup", tty_info->nbtty);
1028
b0a33c1e 1029 return 0;
1030}
1031
bf601689 1032
2d489f9e 1033static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
bf601689 1034{
2d489f9e 1035 int oldroot = -1, newroot = -1;
bf601689 1036
2d489f9e
SH
1037 oldroot = open("/", O_DIRECTORY | O_RDONLY);
1038 if (oldroot < 0) {
1039 SYSERROR("Error opening old-/ for fchdir");
9ba8130c
SH
1040 return -1;
1041 }
2d489f9e
SH
1042 newroot = open(rootfs, O_DIRECTORY | O_RDONLY);
1043 if (newroot < 0) {
1044 SYSERROR("Error opening new-/ for fchdir");
1045 goto fail;
c08556c6 1046 }
bf601689 1047
cc6f6dd7 1048 /* change into new root fs */
2d489f9e 1049 if (fchdir(newroot)) {
cc6f6dd7 1050 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
2d489f9e 1051 goto fail;
cc6f6dd7
DL
1052 }
1053
cc6f6dd7 1054 /* pivot_root into our new root fs */
2d489f9e 1055 if (pivot_root(".", ".")) {
cc6f6dd7 1056 SYSERROR("pivot_root syscall failed");
2d489f9e 1057 goto fail;
bf601689 1058 }
cc6f6dd7 1059
2d489f9e
SH
1060 /*
1061 * at this point the old-root is mounted on top of our new-root
1062 * To unmounted it we must not be chdir'd into it, so escape back
1063 * to old-root
1064 */
1065 if (fchdir(oldroot) < 0) {
1066 SYSERROR("Error entering oldroot");
1067 goto fail;
1068 }
7981ea46 1069 if (umount2(".", MNT_DETACH) < 0) {
2d489f9e
SH
1070 SYSERROR("Error detaching old root");
1071 goto fail;
cc6f6dd7
DL
1072 }
1073
2d489f9e
SH
1074 if (fchdir(newroot) < 0) {
1075 SYSERROR("Error re-entering newroot");
1076 goto fail;
1077 }
cc6f6dd7 1078
2d489f9e
SH
1079 close(oldroot);
1080 close(newroot);
bf601689 1081
2d489f9e 1082 DEBUG("pivot_root syscall to '%s' successful", rootfs);
bf601689 1083
bf601689 1084 return 0;
2d489f9e
SH
1085
1086fail:
1087 if (oldroot != -1)
1088 close(oldroot);
1089 if (newroot != -1)
1090 close(newroot);
1091 return -1;
bf601689
MH
1092}
1093
bc6928ff
MW
1094/*
1095 * Check to see if a directory has something mounted on it and,
1096 * if it does, return the fstype.
1097 *
1098 * Code largely based on detect_shared_rootfs below
1099 *
1100 * Returns: # of matching entries in /proc/self/mounts
1101 * if != 0 fstype is filled with the last filesystem value.
1102 * if == 0 no matches found, fstype unchanged.
1103 *
1104 * ToDo: Maybe return the mount options in another parameter...
1105 */
1106
1107#define LINELEN 4096
1108#define MAX_FSTYPE_LEN 128
74a3920a 1109static int mount_check_fs( const char *dir, char *fstype )
bc6928ff
MW
1110{
1111 char buf[LINELEN], *p;
1112 struct stat s;
1113 FILE *f;
1114 int found_fs = 0;
1115 char *p2;
1116
959aee9c 1117 DEBUG("entering mount_check_fs for %s", dir);
bc6928ff
MW
1118
1119 if ( 0 != access(dir, F_OK) || 0 != stat(dir, &s) || 0 == S_ISDIR(s.st_mode) ) {
1120 return 0;
1121 }
1122
bc6928ff 1123 f = fopen("/proc/self/mounts", "r");
bc6928ff
MW
1124 if (!f)
1125 return 0;
4ad9f44b 1126 while (fgets(buf, LINELEN, f)) {
bc6928ff
MW
1127 p = index(buf, ' ');
1128 if( !p )
1129 continue;
1130 *p = '\0';
1131 p2 = p + 1;
1132
1133 p = index(p2, ' ');
1134 if( !p )
1135 continue;
1136 *p = '\0';
1137
1138 /* Compare the directory in the entry to desired */
1139 if( strcmp( p2, dir ) ) {
1140 continue;
1141 }
1142
1143 p2 = p + 1;
1144 p = index( p2, ' ');
1145 if( !p )
1146 continue;
1147 *p = '\0';
1148
1149 ++found_fs;
1150
1151 if( fstype ) {
1152 strncpy( fstype, p2, MAX_FSTYPE_LEN - 1 );
1153 fstype [ MAX_FSTYPE_LEN - 1 ] = '\0';
1154 }
1155 }
1156
bc6928ff 1157 fclose(f);
bc6928ff 1158
959aee9c 1159 DEBUG("mount_check_fs returning %d last %s", found_fs, fstype);
bc6928ff
MW
1160
1161 return found_fs;
1162}
1163
1164/*
1165 * Locate a devtmpfs mount (should be on /dev) and create a container
1166 * subdirectory on it which we can then bind mount to the container
1167 * /dev instead of mounting a tmpfs there.
1168 * If we fail, return NULL.
1169 * Else return the pointer to the name buffer with the string to
1170 * the devtmpfs subdirectory.
1171 */
1172
74a3920a 1173static char *mk_devtmpfs(const char *name, char *path, const char *lxcpath)
bc6928ff
MW
1174{
1175 int ret;
1176 struct stat s;
1177 char tmp_path[MAXPATHLEN];
1178 char fstype[MAX_FSTYPE_LEN];
bc6928ff
MW
1179 uint64_t hash;
1180
f0d02950 1181 if ( 0 != access(dev_base_path, F_OK) || 0 != stat(dev_base_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
bc6928ff 1182 /* This is just making /dev/.lxc it better work or we're done */
f0d02950 1183 ret = mkdir(dev_base_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
bc6928ff
MW
1184 if ( ret ) {
1185 SYSERROR( "Unable to create /dev/.lxc for autodev" );
1186 return NULL;
1187 }
1188 }
1189
1190 /*
1191 * Programmers notes:
1192 * We can not do mounts in this area of code that we want
1193 * to be visible in the host. Consequently, /dev/.lxc must
1194 * be set up earlier if we need a tmpfs mounted there.
1195 * That only affects the rare cases where autodev is enabled
1196 * for a container and devtmpfs is not mounted on /dev in the
1197 * host. In that case, we'll fall back to the old method
1198 * of mounting a tmpfs in the container and have no visibility
1199 * into the container /dev.
1200 */
1201 if( ! mount_check_fs( "/dev", fstype )
1202 || strcmp( "devtmpfs", fstype ) ) {
1203 /* Either /dev was not mounted or was not devtmpfs */
1204
1205 if ( ! mount_check_fs( "/dev/.lxc", NULL ) ) {
1206 /*
1207 * /dev/.lxc is not already mounted
1208 * Doing a mount here does no good, since
1209 * it's not visible in the host.
1210 */
1211
1212 ERROR("/dev/.lxc is not setup - taking fallback" );
1213 return NULL;
1214 }
1215 }
1216
f0d02950 1217 if ( 0 != access(dev_user_path, F_OK) || 0 != stat(dev_user_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
bc6928ff
MW
1218 /*
1219 * This is making /dev/.lxc/user path for non-priv users.
1220 * If this doesn't work, we'll have to fall back in the
1221 * case of non-priv users. It's mode 1777 like /tmp.
1222 */
f0d02950 1223 ret = mkdir(dev_user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
bc6928ff
MW
1224 if ( ret ) {
1225 /* Issue an error but don't fail yet! */
1226 ERROR("Unable to create /dev/.lxc/user");
1227 }
1228 /* Umask tends to screw us up here */
f0d02950 1229 chmod(dev_user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
bc6928ff
MW
1230 }
1231
1232 /*
1233 * Since the container name must be unique within a given
1234 * lxcpath, we're going to use a hash of the path
1235 * /lxcpath/name as our hash name in /dev/.lxc/
1236 */
1237
1238 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s", lxcpath, name);
1239 if (ret < 0 || ret >= MAXPATHLEN)
1240 return NULL;
1241
1242 hash = fnv_64a_buf(tmp_path, ret, FNV1A_64_INIT);
1243
f0d02950 1244 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, dev_base_path, name, hash);
bc6928ff
MW
1245 if (ret < 0 || ret >= MAXPATHLEN)
1246 return NULL;
1247
1248 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1249 ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1250 if ( ret ) {
f0d02950
JTLB
1251 /* Something must have failed with the dev_base_path...
1252 * Maybe unpriv user. Try dev_user_path now... */
bc6928ff
MW
1253 INFO("Setup in /dev/.lxc failed. Trying /dev/.lxc/user." );
1254
f0d02950 1255 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, dev_user_path, name, hash);
bc6928ff
MW
1256 if (ret < 0 || ret >= MAXPATHLEN)
1257 return NULL;
1258
1259 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1260 ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1261 if ( ret ) {
1262 ERROR("Container /dev setup in host /dev failed - taking fallback" );
1263 return NULL;
1264 }
1265 }
1266 }
1267 }
1268
1269 strcpy( path, tmp_path );
1270 return path;
1271}
1272
91c3830e
SH
1273/*
1274 * Do we want to add options for max size of /dev and a file to
1275 * specify which devices to create?
1276 */
bc6928ff 1277static int mount_autodev(const char *name, char *root, const char *lxcpath)
91c3830e
SH
1278{
1279 int ret;
bc6928ff 1280 struct stat s;
91c3830e 1281 char path[MAXPATHLEN];
bc6928ff
MW
1282 char host_path[MAXPATHLEN];
1283 char devtmpfs_path[MAXPATHLEN];
91c3830e 1284
959aee9c 1285 INFO("Mounting /dev under %s", root);
bc6928ff
MW
1286
1287 ret = snprintf(host_path, MAXPATHLEN, "%s/%s/rootfs.dev", lxcpath, name);
1288 if (ret < 0 || ret > MAXPATHLEN)
1289 return -1;
1290
91c3830e
SH
1291 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
1292 if (ret < 0 || ret > MAXPATHLEN)
1293 return -1;
bc6928ff
MW
1294
1295 if (mk_devtmpfs( name, devtmpfs_path, lxcpath ) ) {
1296 /*
1297 * Get rid of old links and directoriess
1298 * This could be either a symlink and we remove it,
1299 * or an empty directory and we remove it,
1300 * or non-existant and we don't care,
1301 * or a non-empty directory, and we will then emit an error
1302 * but we will not fail out the process.
1303 */
1304 unlink( host_path );
1305 rmdir( host_path );
1306 ret = symlink(devtmpfs_path, host_path);
1307
1308 if ( ret < 0 ) {
959aee9c 1309 SYSERROR("WARNING: Failed to create symlink '%s'->'%s'", host_path, devtmpfs_path);
bc6928ff
MW
1310 }
1311 DEBUG("Bind mounting %s to %s", devtmpfs_path , path );
1312 ret = mount(devtmpfs_path, path, NULL, MS_BIND, 0 );
1313 } else {
1314 /* Only mount a tmpfs on here if we don't already a mount */
1315 if ( ! mount_check_fs( host_path, NULL ) ) {
1316 DEBUG("Mounting tmpfs to %s", host_path );
58ab99ae 1317 ret = mount("none", path, "tmpfs", 0, "size=100000,mode=755");
bc6928ff
MW
1318 } else {
1319 /* This allows someone to manually set up a mount */
1320 DEBUG("Bind mounting %s to %s", host_path, path );
1321 ret = mount(host_path , path, NULL, MS_BIND, 0 );
1322 }
1323 }
91c3830e 1324 if (ret) {
959aee9c 1325 SYSERROR("Failed to mount /dev at %s", root);
91c3830e
SH
1326 return -1;
1327 }
1328 ret = snprintf(path, MAXPATHLEN, "%s/dev/pts", root);
1329 if (ret < 0 || ret >= MAXPATHLEN)
1330 return -1;
bc6928ff
MW
1331 /*
1332 * If we are running on a devtmpfs mapping, dev/pts may already exist.
1333 * If not, then create it and exit if that fails...
1334 */
1335 if ( 0 != access(path, F_OK) || 0 != stat(path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1336 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1337 if (ret) {
1338 SYSERROR("Failed to create /dev/pts in container");
1339 return -1;
1340 }
91c3830e
SH
1341 }
1342
959aee9c 1343 INFO("Mounted /dev under %s", root);
91c3830e
SH
1344 return 0;
1345}
1346
c6883f38 1347struct lxc_devs {
74a3920a 1348 const char *name;
c6883f38
SH
1349 mode_t mode;
1350 int maj;
1351 int min;
1352};
1353
74a3920a 1354static const struct lxc_devs lxc_devs[] = {
c6883f38
SH
1355 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1356 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1357 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1358 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1359 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1360 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1361 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1362};
1363
74a3920a 1364static int setup_autodev(const char *root)
c6883f38
SH
1365{
1366 int ret;
c6883f38
SH
1367 char path[MAXPATHLEN];
1368 int i;
3a32201c 1369 mode_t cmask;
c6883f38 1370
959aee9c 1371 INFO("Creating initial consoles under %s/dev", root);
91c3830e 1372
c6883f38 1373 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
91c3830e
SH
1374 if (ret < 0 || ret >= MAXPATHLEN) {
1375 ERROR("Error calculating container /dev location");
c6883f38 1376 return -1;
f7bee6c6 1377 }
91c3830e 1378
959aee9c 1379 INFO("Populating /dev under %s", root);
3a32201c 1380 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38 1381 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
74a3920a 1382 const struct lxc_devs *d = &lxc_devs[i];
c6883f38
SH
1383 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", root, d->name);
1384 if (ret < 0 || ret >= MAXPATHLEN)
1385 return -1;
1386 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1387 if (ret && errno != EEXIST) {
959aee9c 1388 SYSERROR("Error creating %s", d->name);
c6883f38
SH
1389 return -1;
1390 }
1391 }
3a32201c 1392 umask(cmask);
c6883f38 1393
959aee9c 1394 INFO("Populated /dev under %s", root);
c6883f38
SH
1395 return 0;
1396}
1397
f0d02950
JTLB
1398/*
1399 * Locate allocated devtmpfs mount and purge it.
1400 * path lookup mostly taken from mk_devtmpfs
1401 */
1402int lxc_delete_autodev(struct lxc_handler *handler)
1403{
1404 int ret;
1405 struct stat s;
1406 struct lxc_conf *lxc_conf = handler->conf;
1407 const char *name = handler->name;
1408 const char *lxcpath = handler->lxcpath;
1409 char tmp_path[MAXPATHLEN];
1410 uint64_t hash;
1411
1412 if ( lxc_conf->autodev <= 0 )
1413 return 0;
1414
1c90734d
JTLB
1415 /* don't clean on reboot */
1416 if ( lxc_conf->reboot == 1 )
1417 return 0;
f0d02950
JTLB
1418
1419 /*
1420 * Use the same logic as mk_devtmpfs to compute candidate
1421 * path for cleanup.
1422 */
1423
1424 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s", lxcpath, name);
1425 if (ret < 0 || ret >= MAXPATHLEN)
1426 return -1;
1427
1428 hash = fnv_64a_buf(tmp_path, ret, FNV1A_64_INIT);
1429
1430 /* Probe /dev/.lxc/<container name>.<hash> */
1431 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, dev_base_path, name, hash);
1432 if (ret < 0 || ret >= MAXPATHLEN)
1433 return -1;
1434
1435 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1436 /* Probe /dev/.lxc/user/<container name>.<hash> */
1437 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, dev_user_path, name, hash);
1438 if (ret < 0 || ret >= MAXPATHLEN)
1439 return -1;
1440
1441 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1442 WARN("Failed to locate autodev /dev/.lxc and /dev/.lxc/user." );
1443 return -1;
1444 }
1445 }
1446
1447 /* Do the cleanup */
1448 INFO("Cleaning %s", tmp_path );
1449 if ( 0 != lxc_rmdir_onedev(tmp_path, NULL) ) {
1450 ERROR("Failed to cleanup autodev" );
1451 }
1452
1453 return 0;
1454}
1455
cc28d0b0 1456static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1457{
cc28d0b0
SH
1458 const struct lxc_rootfs *rootfs = &conf->rootfs;
1459
a0f379bf
DW
1460 if (!rootfs->path) {
1461 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1462 SYSERROR("Failed to make / rslave");
1463 return -1;
1464 }
c69bd12f 1465 return 0;
a0f379bf 1466 }
0ad19a3f 1467
12297168 1468 if (access(rootfs->mount, F_OK)) {
b1789442 1469 SYSERROR("failed to access to '%s', check it is present",
12297168 1470 rootfs->mount);
b1789442
DL
1471 return -1;
1472 }
1473
9be53773 1474 // First try mounting rootfs using a bdev
76a26f55 1475 struct bdev *bdev = bdev_init(conf, rootfs->path, rootfs->mount, rootfs->options);
9be53773 1476 if (bdev && bdev->ops->mount(bdev) == 0) {
59d66af2 1477 bdev_put(bdev);
9be53773
SH
1478 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1479 return 0;
1480 }
59d66af2
SH
1481 if (bdev)
1482 bdev_put(bdev);
a17b1e65 1483 if (mount_rootfs(rootfs->path, rootfs->mount, rootfs->options)) {
a6afdde9 1484 ERROR("failed to mount rootfs");
c3f0a28c 1485 return -1;
1486 }
0ad19a3f 1487
12297168 1488 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1489
ac778708
DL
1490 return 0;
1491}
1492
91e93c71
AV
1493int prepare_ramfs_root(char *root)
1494{
1495 char buf[LINELEN], *p;
1496 char nroot[PATH_MAX];
1497 FILE *f;
1498 int i;
1499 char *p2;
1500
1501 if (realpath(root, nroot) == NULL)
1502 return -1;
1503
1504 if (chdir("/") == -1)
1505 return -1;
1506
1507 /*
1508 * We could use here MS_MOVE, but in userns this mount is
1509 * locked and can't be moved.
1510 */
1511 if (mount(root, "/", NULL, MS_REC | MS_BIND, NULL)) {
1512 SYSERROR("Failed to move %s into /", root);
1513 return -1;
1514 }
1515
1516 if (mount(".", NULL, NULL, MS_REC | MS_PRIVATE, NULL)) {
1517 SYSERROR("Failed to make . rprivate");
1518 return -1;
1519 }
1520
1521 /*
1522 * The following code cleans up inhereted mounts which are not
1523 * required for CT.
1524 *
1525 * The mountinfo file shows not all mounts, if a few points have been
1526 * unmounted between read operations from the mountinfo. So we need to
1527 * read mountinfo a few times.
1528 *
1529 * This loop can be skipped if a container uses unserns, because all
1530 * inherited mounts are locked and we should live with all this trash.
1531 */
1532 while (1) {
1533 int progress = 0;
1534
1535 f = fopen("./proc/self/mountinfo", "r");
1536 if (!f) {
1537 SYSERROR("Unable to open /proc/self/mountinfo");
1538 return -1;
1539 }
1540 while (fgets(buf, LINELEN, f)) {
1541 for (p = buf, i=0; p && i < 4; i++)
1542 p = strchr(p+1, ' ');
1543 if (!p)
1544 continue;
1545 p2 = strchr(p+1, ' ');
1546 if (!p2)
1547 continue;
1548
1549 *p2 = '\0';
1550 *p = '.';
1551
1552 if (strcmp(p + 1, "/") == 0)
1553 continue;
1554 if (strcmp(p + 1, "/proc") == 0)
1555 continue;
1556
1557 if (umount2(p, MNT_DETACH) == 0)
1558 progress++;
1559 }
1560 fclose(f);
1561 if (!progress)
1562 break;
1563 }
1564
1565 if (umount2("./proc", MNT_DETACH)) {
1566 SYSERROR("Unable to umount /proc");
1567 return -1;
1568 }
1569
1570 /* It is weird, but chdir("..") moves us in a new root */
1571 if (chdir("..") == -1) {
1572 SYSERROR("Unable to change working directory");
1573 return -1;
1574 }
1575
1576 if (chroot(".") == -1) {
1577 SYSERROR("Unable to chroot");
1578 return -1;
1579 }
1580
1581 return 0;
1582}
1583
74a3920a 1584static int setup_pivot_root(const struct lxc_rootfs *rootfs)
ac778708 1585{
ac778708
DL
1586 if (!rootfs->path)
1587 return 0;
1588
91e93c71
AV
1589 if (detect_ramfs_rootfs()) {
1590 if (prepare_ramfs_root(rootfs->mount))
1591 return -1;
1592 } else if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
cc6f6dd7 1593 ERROR("failed to setup pivot root");
25368b52 1594 return -1;
c69bd12f
DL
1595 }
1596
25368b52 1597 return 0;
0ad19a3f 1598}
1599
d852c78c 1600static int setup_pts(int pts)
3c26f34e 1601{
77890c6d
SW
1602 char target[PATH_MAX];
1603
d852c78c
DL
1604 if (!pts)
1605 return 0;
3c26f34e 1606
1607 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1608 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1609 return -1;
1610 }
1611
7e40254a
JTLB
1612 if (mkdir("/dev/pts", 0755)) {
1613 if ( errno != EEXIST ) {
1614 SYSERROR("failed to create '/dev/pts'");
1615 return -1;
1616 }
1617 }
1618
a6afdde9 1619 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
67e5a20a 1620 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
36eb9bde 1621 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1622 return -1;
1623 }
1624
3c26f34e 1625 if (access("/dev/ptmx", F_OK)) {
1626 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1627 goto out;
36eb9bde 1628 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1629 return -1;
1630 }
1631
77890c6d
SW
1632 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1633 goto out;
1634
3c26f34e 1635 /* fallback here, /dev/pts/ptmx exists just mount bind */
1636 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1637 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1638 return -1;
1639 }
cd54d859
DL
1640
1641 INFO("created new pts instance");
d852c78c 1642
3c26f34e 1643out:
1644 return 0;
1645}
1646
cccc74b5
DL
1647static int setup_personality(int persona)
1648{
6ff05e18 1649 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1650 if (persona == -1)
1651 return 0;
1652
1653 if (personality(persona) < 0) {
1654 SYSERROR("failed to set personality to '0x%x'", persona);
1655 return -1;
1656 }
1657
1658 INFO("set personality to '0x%x'", persona);
6ff05e18 1659 #endif
cccc74b5
DL
1660
1661 return 0;
1662}
1663
7c6ef2a2 1664static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1665 const struct lxc_console *console)
6e590161 1666{
63376d7d
DL
1667 char path[MAXPATHLEN];
1668 struct stat s;
7c6ef2a2 1669 int ret;
52e35957 1670
7c6ef2a2
SH
1671 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1672 if (ret >= sizeof(path)) {
959aee9c 1673 ERROR("console path too long");
7c6ef2a2
SH
1674 return -1;
1675 }
52e35957 1676
63376d7d 1677 if (access(path, F_OK)) {
466978b0 1678 WARN("rootfs specified but no console found at '%s'", path);
63376d7d 1679 return 0;
52e35957
DL
1680 }
1681
b5159817
DE
1682 if (console->master < 0) {
1683 INFO("no console");
f78a1f32
DL
1684 return 0;
1685 }
ed502555 1686
63376d7d
DL
1687 if (stat(path, &s)) {
1688 SYSERROR("failed to stat '%s'", path);
1689 return -1;
1690 }
1691
1692 if (chmod(console->name, s.st_mode)) {
1693 SYSERROR("failed to set mode '0%o' to '%s'",
1694 s.st_mode, console->name);
1695 return -1;
1696 }
13954cce 1697
63376d7d
DL
1698 if (mount(console->name, path, "none", MS_BIND, 0)) {
1699 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1700 return -1;
1701 }
1702
63376d7d 1703 INFO("console has been setup");
7c6ef2a2
SH
1704 return 0;
1705}
1706
1707static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1708 const struct lxc_console *console,
1709 char *ttydir)
1710{
1711 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1712 int ret;
1713
1714 /* create rootfs/dev/<ttydir> directory */
1715 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1716 ttydir);
1717 if (ret >= sizeof(path))
1718 return -1;
1719 ret = mkdir(path, 0755);
1720 if (ret && errno != EEXIST) {
959aee9c 1721 SYSERROR("failed with errno %d to create %s", errno, path);
7c6ef2a2
SH
1722 return -1;
1723 }
959aee9c 1724 INFO("created %s", path);
7c6ef2a2
SH
1725
1726 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1727 rootfs->mount, ttydir);
1728 if (ret >= sizeof(lxcpath)) {
959aee9c 1729 ERROR("console path too long");
7c6ef2a2
SH
1730 return -1;
1731 }
1732
1733 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1734 ret = unlink(path);
1735 if (ret && errno != ENOENT) {
959aee9c 1736 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1737 return -1;
1738 }
1739
1740 ret = creat(lxcpath, 0660);
1741 if (ret==-1 && errno != EEXIST) {
959aee9c 1742 SYSERROR("error %d creating %s", errno, lxcpath);
7c6ef2a2
SH
1743 return -1;
1744 }
4d44e274
SH
1745 if (ret >= 0)
1746 close(ret);
7c6ef2a2 1747
b5159817
DE
1748 if (console->master < 0) {
1749 INFO("no console");
7c6ef2a2
SH
1750 return 0;
1751 }
1752
1753 if (mount(console->name, lxcpath, "none", MS_BIND, 0)) {
1754 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1755 return -1;
1756 }
1757
1758 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1759 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1760 if (ret >= sizeof(lxcpath)) {
1761 ERROR("lxc/console path too long");
1762 return -1;
1763 }
7c6ef2a2
SH
1764 ret = symlink(lxcpath, path);
1765 if (ret) {
1766 SYSERROR("failed to create symlink for console");
1767 return -1;
1768 }
1769
1770 INFO("console has been setup on %s", lxcpath);
cd54d859 1771
6e590161 1772 return 0;
1773}
1774
7c6ef2a2
SH
1775static int setup_console(const struct lxc_rootfs *rootfs,
1776 const struct lxc_console *console,
1777 char *ttydir)
1778{
1779 /* We don't have a rootfs, /dev/console will be shared */
1780 if (!rootfs->path)
1781 return 0;
1782 if (!ttydir)
1783 return setup_dev_console(rootfs, console);
1784
1785 return setup_ttydir_console(rootfs, console, ttydir);
1786}
1787
1bd051a6
SH
1788static int setup_kmsg(const struct lxc_rootfs *rootfs,
1789 const struct lxc_console *console)
1790{
1791 char kpath[MAXPATHLEN];
1792 int ret;
1793
222fea5a
DE
1794 if (!rootfs->path)
1795 return 0;
1bd051a6
SH
1796 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1797 if (ret < 0 || ret >= sizeof(kpath))
1798 return -1;
1799
1800 ret = unlink(kpath);
1801 if (ret && errno != ENOENT) {
959aee9c 1802 SYSERROR("error unlinking %s", kpath);
1bd051a6
SH
1803 return -1;
1804 }
1805
1806 ret = symlink("console", kpath);
1807 if (ret) {
1808 SYSERROR("failed to create symlink for kmsg");
1809 return -1;
1810 }
1811
1812 return 0;
1813}
1814
998ac676
RT
1815static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1816{
1817 struct mount_opt *mo;
1818
1819 /* If opt is found in mount_opt, set or clear flags.
1820 * Otherwise append it to data. */
1821
1822 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1823 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1824 if (mo->clear)
1825 *flags &= ~mo->flag;
1826 else
1827 *flags |= mo->flag;
1828 return;
1829 }
1830 }
1831
1832 if (strlen(*data))
1833 strcat(*data, ",");
1834 strcat(*data, opt);
1835}
1836
a17b1e65 1837int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1838 char **mntdata)
1839{
1840 char *s, *data;
1841 char *p, *saveptr = NULL;
1842
911324ef 1843 *mntdata = NULL;
91656ce5 1844 *mntflags = 0L;
911324ef
DL
1845
1846 if (!mntopts)
998ac676
RT
1847 return 0;
1848
911324ef 1849 s = strdup(mntopts);
998ac676 1850 if (!s) {
36eb9bde 1851 SYSERROR("failed to allocate memory");
998ac676
RT
1852 return -1;
1853 }
1854
1855 data = malloc(strlen(s) + 1);
1856 if (!data) {
36eb9bde 1857 SYSERROR("failed to allocate memory");
998ac676
RT
1858 free(s);
1859 return -1;
1860 }
1861 *data = 0;
1862
1863 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1864 p = strtok_r(NULL, ",", &saveptr))
1865 parse_mntopt(p, mntflags, &data);
1866
1867 if (*data)
1868 *mntdata = data;
1869 else
1870 free(data);
1871 free(s);
1872
1873 return 0;
1874}
1875
6fd5e769
SH
1876static void null_endofword(char *word)
1877{
1878 while (*word && *word != ' ' && *word != '\t')
1879 word++;
1880 *word = '\0';
1881}
1882
1883/*
1884 * skip @nfields spaces in @src
1885 */
1886static char *get_field(char *src, int nfields)
1887{
1888 char *p = src;
1889 int i;
1890
1891 for (i = 0; i < nfields; i++) {
1892 while (*p && *p != ' ' && *p != '\t')
1893 p++;
1894 if (!*p)
1895 break;
1896 p++;
1897 }
1898 return p;
1899}
1900
911324ef
DL
1901static int mount_entry(const char *fsname, const char *target,
1902 const char *fstype, unsigned long mountflags,
1fc64d22 1903 const char *data, int optional)
911324ef 1904{
614305f3 1905#ifdef HAVE_STATVFS
2938f7c8 1906 struct statvfs sb;
614305f3 1907#endif
2938f7c8 1908
911324ef 1909 if (mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data)) {
1fc64d22
SG
1910 if (optional) {
1911 INFO("failed to mount '%s' on '%s' (optional): %s", fsname,
1912 target, strerror(errno));
1913 return 0;
1914 }
1915 else {
1916 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1917 return -1;
1918 }
911324ef
DL
1919 }
1920
1921 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
2938f7c8
SH
1922 DEBUG("remounting %s on %s to respect bind or remount options",
1923 fsname ? fsname : "(none)", target ? target : "(none)");
1924
614305f3 1925#ifdef HAVE_STATVFS
2938f7c8
SH
1926 if (statvfs(fsname, &sb) == 0) {
1927 unsigned long required_flags = 0;
1928 if (sb.f_flag & MS_NOSUID)
1929 required_flags |= MS_NOSUID;
1930 if (sb.f_flag & MS_NODEV)
1931 required_flags |= MS_NODEV;
1932 if (sb.f_flag & MS_RDONLY)
1933 required_flags |= MS_RDONLY;
1934 if (sb.f_flag & MS_NOEXEC)
1935 required_flags |= MS_NOEXEC;
1936 DEBUG("(at remount) flags for %s was %lu, required extra flags are %lu", fsname, sb.f_flag, required_flags);
1937 /*
1938 * If this was a bind mount request, and required_flags
1939 * does not have any flags which are not already in
1940 * mountflags, then skip the remount
1941 */
1942 if (!(mountflags & MS_REMOUNT)) {
1943 if (!(required_flags & ~mountflags)) {
1944 DEBUG("mountflags already was %lu, skipping remount",
1945 mountflags);
1946 goto skipremount;
1947 }
1948 }
1949 mountflags |= required_flags;
6fd5e769 1950 }
614305f3 1951#endif
911324ef
DL
1952
1953 if (mount(fsname, target, fstype,
1954 mountflags | MS_REMOUNT, data)) {
1fc64d22
SG
1955 if (optional) {
1956 INFO("failed to mount '%s' on '%s' (optional): %s",
1957 fsname, target, strerror(errno));
1958 return 0;
1959 }
1960 else {
1961 SYSERROR("failed to mount '%s' on '%s'",
1962 fsname, target);
1963 return -1;
1964 }
911324ef
DL
1965 }
1966 }
1967
614305f3 1968#ifdef HAVE_STATVFS
6fd5e769 1969skipremount:
614305f3 1970#endif
911324ef
DL
1971 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1972
1973 return 0;
1974}
1975
4e4ca161
SH
1976/*
1977 * Remove 'optional', 'create=dir', and 'create=file' from mntopt
1978 */
1979static void cull_mntent_opt(struct mntent *mntent)
1980{
1981 int i;
1982 char *p, *p2;
1983 char *list[] = {"create=dir",
1984 "create=file",
1985 "optional",
1986 NULL };
1987
1988 for (i=0; list[i]; i++) {
1989 if (!(p = strstr(mntent->mnt_opts, list[i])))
1990 continue;
1991 p2 = strchr(p, ',');
1992 if (!p2) {
1993 /* no more mntopts, so just chop it here */
1994 *p = '\0';
1995 continue;
1996 }
1997 memmove(p, p2+1, strlen(p2+1)+1);
1998 }
1999}
2000
2001static inline int mount_entry_on_systemfs(struct mntent *mntent)
0ad19a3f 2002{
998ac676
RT
2003 unsigned long mntflags;
2004 char *mntdata;
911324ef 2005 int ret;
34cfffb3
SG
2006 FILE *pathfile = NULL;
2007 char* pathdirname = NULL;
4f1d50d1 2008 bool optional = hasmntopt(mntent, "optional") != NULL;
911324ef 2009
34cfffb3 2010 if (hasmntopt(mntent, "create=dir")) {
119126b6 2011 if (mkdir_p(mntent->mnt_dir, 0755) < 0) {
34cfffb3
SG
2012 WARN("Failed to create mount target '%s'", mntent->mnt_dir);
2013 ret = -1;
2014 }
2015 }
2016
2017 if (hasmntopt(mntent, "create=file") && access(mntent->mnt_dir, F_OK)) {
2018 pathdirname = strdup(mntent->mnt_dir);
2019 pathdirname = dirname(pathdirname);
119126b6
SG
2020 if (mkdir_p(pathdirname, 0755) < 0) {
2021 WARN("Failed to create target directory");
2022 }
34cfffb3
SG
2023 pathfile = fopen(mntent->mnt_dir, "wb");
2024 if (!pathfile) {
2025 WARN("Failed to create mount target '%s'", mntent->mnt_dir);
2026 ret = -1;
2027 }
2028 else
2029 fclose(pathfile);
2030 }
2031
4e4ca161
SH
2032 cull_mntent_opt(mntent);
2033
a17b1e65
SG
2034 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
2035 free(mntdata);
2036 return -1;
2037 }
2038
911324ef 2039 ret = mount_entry(mntent->mnt_fsname, mntent->mnt_dir,
1fc64d22 2040 mntent->mnt_type, mntflags, mntdata, optional);
68c152ef 2041
34cfffb3 2042 free(pathdirname);
911324ef
DL
2043 free(mntdata);
2044
2045 return ret;
2046}
2047
4e4ca161 2048static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2
SH
2049 const struct lxc_rootfs *rootfs,
2050 const char *lxc_name)
911324ef 2051{
013bd428 2052 char *aux;
59760f5d 2053 char path[MAXPATHLEN];
911324ef
DL
2054 unsigned long mntflags;
2055 char *mntdata;
80a881b2 2056 int r, ret = 0, offset;
67e571de 2057 const char *lxcpath;
34cfffb3
SG
2058 FILE *pathfile = NULL;
2059 char *pathdirname = NULL;
4f1d50d1 2060 bool optional = hasmntopt(mntent, "optional") != NULL;
0ad19a3f 2061
593e8478 2062 lxcpath = lxc_global_config_value("lxc.lxcpath");
2a59a681
SH
2063 if (!lxcpath) {
2064 ERROR("Out of memory");
2065 return -1;
2066 }
2067
80a881b2 2068 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
2069 * use $lxcpath/CN/rootfs as the target prefix */
2070 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
2071 if (r < 0 || r >= MAXPATHLEN)
2072 goto skipvarlib;
2073
2074 aux = strstr(mntent->mnt_dir, path);
2075 if (aux) {
2076 offset = strlen(path);
2077 goto skipabs;
2078 }
2079
2080skipvarlib:
013bd428
DL
2081 aux = strstr(mntent->mnt_dir, rootfs->path);
2082 if (!aux) {
2083 WARN("ignoring mount point '%s'", mntent->mnt_dir);
2084 goto out;
2085 }
80a881b2
SH
2086 offset = strlen(rootfs->path);
2087
2088skipabs:
013bd428 2089
9ba8130c 2090 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
2091 aux + offset);
2092 if (r < 0 || r >= MAXPATHLEN) {
2093 WARN("pathnme too long for '%s'", mntent->mnt_dir);
2094 ret = -1;
2095 goto out;
2096 }
2097
34cfffb3 2098 if (hasmntopt(mntent, "create=dir")) {
119126b6 2099 if (mkdir_p(path, 0755) < 0) {
34cfffb3
SG
2100 WARN("Failed to create mount target '%s'", path);
2101 ret = -1;
2102 }
2103 }
2104
2105 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
2106 pathdirname = strdup(path);
2107 pathdirname = dirname(pathdirname);
119126b6
SG
2108 if (mkdir_p(pathdirname, 0755) < 0) {
2109 WARN("Failed to create target directory");
2110 }
34cfffb3
SG
2111 pathfile = fopen(path, "wb");
2112 if (!pathfile) {
2113 WARN("Failed to create mount target '%s'", path);
2114 ret = -1;
2115 }
2116 else
2117 fclose(pathfile);
2118 }
4e4ca161 2119 cull_mntent_opt(mntent);
d330fe7b 2120
a17b1e65
SG
2121 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
2122 free(mntdata);
2123 return -1;
2124 }
2125
013bd428 2126 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1fc64d22 2127 mntflags, mntdata, optional);
0ad19a3f 2128
a17b1e65
SG
2129 free(mntdata);
2130
013bd428 2131out:
34cfffb3 2132 free(pathdirname);
911324ef
DL
2133 return ret;
2134}
d330fe7b 2135
4e4ca161 2136static int mount_entry_on_relative_rootfs(struct mntent *mntent,
911324ef
DL
2137 const char *rootfs)
2138{
2139 char path[MAXPATHLEN];
2140 unsigned long mntflags;
2141 char *mntdata;
2142 int ret;
34cfffb3
SG
2143 FILE *pathfile = NULL;
2144 char *pathdirname = NULL;
4f1d50d1 2145 bool optional = hasmntopt(mntent, "optional") != NULL;
d330fe7b 2146
34cfffb3 2147 /* relative to root mount point */
9ba8130c
SH
2148 ret = snprintf(path, sizeof(path), "%s/%s", rootfs, mntent->mnt_dir);
2149 if (ret >= sizeof(path)) {
2150 ERROR("path name too long");
2151 return -1;
2152 }
911324ef 2153
34cfffb3 2154 if (hasmntopt(mntent, "create=dir")) {
119126b6 2155 if (mkdir_p(path, 0755) < 0) {
34cfffb3
SG
2156 WARN("Failed to create mount target '%s'", path);
2157 ret = -1;
2158 }
2159 }
2160
2161 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
2162 pathdirname = strdup(path);
2163 pathdirname = dirname(pathdirname);
119126b6
SG
2164 if (mkdir_p(pathdirname, 0755) < 0) {
2165 WARN("Failed to create target directory");
2166 }
34cfffb3
SG
2167 pathfile = fopen(path, "wb");
2168 if (!pathfile) {
2169 WARN("Failed to create mount target '%s'", path);
2170 ret = -1;
2171 }
2172 else
2173 fclose(pathfile);
2174 }
4e4ca161 2175 cull_mntent_opt(mntent);
34cfffb3 2176
a17b1e65
SG
2177 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
2178 free(mntdata);
2179 return -1;
2180 }
2181
911324ef 2182 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1fc64d22 2183 mntflags, mntdata, optional);
68c152ef 2184
34cfffb3 2185 free(pathdirname);
911324ef 2186 free(mntdata);
998ac676 2187
911324ef
DL
2188 return ret;
2189}
2190
80a881b2
SH
2191static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
2192 const char *lxc_name)
911324ef 2193{
aaf901be
AM
2194 struct mntent mntent;
2195 char buf[4096];
911324ef 2196 int ret = -1;
e76b8764 2197
aaf901be 2198 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
e76b8764 2199
911324ef 2200 if (!rootfs->path) {
aaf901be 2201 if (mount_entry_on_systemfs(&mntent))
e76b8764 2202 goto out;
911324ef 2203 continue;
e76b8764
CDC
2204 }
2205
911324ef 2206 /* We have a separate root, mounts are relative to it */
aaf901be
AM
2207 if (mntent.mnt_dir[0] != '/') {
2208 if (mount_entry_on_relative_rootfs(&mntent,
911324ef
DL
2209 rootfs->mount))
2210 goto out;
2211 continue;
2212 }
cd54d859 2213
aaf901be 2214 if (mount_entry_on_absolute_rootfs(&mntent, rootfs, lxc_name))
911324ef 2215 goto out;
0ad19a3f 2216 }
cd54d859 2217
0ad19a3f 2218 ret = 0;
cd54d859
DL
2219
2220 INFO("mount points have been setup");
0ad19a3f 2221out:
e7938e9e
MN
2222 return ret;
2223}
2224
80a881b2
SH
2225static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
2226 const char *lxc_name)
e7938e9e
MN
2227{
2228 FILE *file;
2229 int ret;
2230
2231 if (!fstab)
2232 return 0;
2233
2234 file = setmntent(fstab, "r");
2235 if (!file) {
2236 SYSERROR("failed to use '%s'", fstab);
2237 return -1;
2238 }
2239
80a881b2 2240 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e 2241
0ad19a3f 2242 endmntent(file);
2243 return ret;
2244}
2245
80a881b2
SH
2246static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
2247 const char *lxc_name)
e7938e9e
MN
2248{
2249 FILE *file;
2250 struct lxc_list *iterator;
2251 char *mount_entry;
2252 int ret;
2253
2254 file = tmpfile();
2255 if (!file) {
2256 ERROR("tmpfile error: %m");
2257 return -1;
2258 }
2259
2260 lxc_list_for_each(iterator, mount) {
2261 mount_entry = iterator->elem;
1d6b1976 2262 fprintf(file, "%s\n", mount_entry);
e7938e9e
MN
2263 }
2264
2265 rewind(file);
2266
80a881b2 2267 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e
MN
2268
2269 fclose(file);
2270 return ret;
2271}
2272
bab88e68
CS
2273static int parse_cap(const char *cap)
2274{
2275 char *ptr = NULL;
2276 int i, capid = -1;
2277
7035407c
DE
2278 if (!strcmp(cap, "none"))
2279 return -2;
2280
bab88e68
CS
2281 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2282
2283 if (strcmp(cap, caps_opt[i].name))
2284 continue;
2285
2286 capid = caps_opt[i].value;
2287 break;
2288 }
2289
2290 if (capid < 0) {
2291 /* try to see if it's numeric, so the user may specify
2292 * capabilities that the running kernel knows about but
2293 * we don't */
2294 errno = 0;
2295 capid = strtol(cap, &ptr, 10);
2296 if (!ptr || *ptr != '\0' || errno != 0)
2297 /* not a valid number */
2298 capid = -1;
2299 else if (capid > lxc_caps_last_cap())
2300 /* we have a number but it's not a valid
2301 * capability */
2302 capid = -1;
2303 }
2304
2305 return capid;
2306}
2307
0769b82a
CS
2308int in_caplist(int cap, struct lxc_list *caps)
2309{
2310 struct lxc_list *iterator;
2311 int capid;
2312
2313 lxc_list_for_each(iterator, caps) {
2314 capid = parse_cap(iterator->elem);
2315 if (capid == cap)
2316 return 1;
2317 }
2318
2319 return 0;
2320}
2321
81810dd1
DL
2322static int setup_caps(struct lxc_list *caps)
2323{
2324 struct lxc_list *iterator;
2325 char *drop_entry;
bab88e68 2326 int capid;
81810dd1
DL
2327
2328 lxc_list_for_each(iterator, caps) {
2329
2330 drop_entry = iterator->elem;
2331
bab88e68 2332 capid = parse_cap(drop_entry);
d55bc1ad 2333
81810dd1 2334 if (capid < 0) {
1e11be34
DL
2335 ERROR("unknown capability %s", drop_entry);
2336 return -1;
81810dd1
DL
2337 }
2338
2339 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
2340
2341 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
3ec1648d
SH
2342 SYSERROR("failed to remove %s capability", drop_entry);
2343 return -1;
2344 }
81810dd1
DL
2345
2346 }
2347
1fb86a7c
SH
2348 DEBUG("capabilities have been setup");
2349
2350 return 0;
2351}
2352
2353static int dropcaps_except(struct lxc_list *caps)
2354{
2355 struct lxc_list *iterator;
2356 char *keep_entry;
1fb86a7c
SH
2357 int i, capid;
2358 int numcaps = lxc_caps_last_cap() + 1;
959aee9c 2359 INFO("found %d capabilities", numcaps);
1fb86a7c 2360
2caf9a97
SH
2361 if (numcaps <= 0 || numcaps > 200)
2362 return -1;
2363
1fb86a7c
SH
2364 // caplist[i] is 1 if we keep capability i
2365 int *caplist = alloca(numcaps * sizeof(int));
2366 memset(caplist, 0, numcaps * sizeof(int));
2367
2368 lxc_list_for_each(iterator, caps) {
2369
2370 keep_entry = iterator->elem;
2371
bab88e68 2372 capid = parse_cap(keep_entry);
1fb86a7c 2373
7035407c
DE
2374 if (capid == -2)
2375 continue;
2376
1fb86a7c
SH
2377 if (capid < 0) {
2378 ERROR("unknown capability %s", keep_entry);
2379 return -1;
2380 }
2381
8255688a 2382 DEBUG("keep capability '%s' (%d)", keep_entry, capid);
1fb86a7c
SH
2383
2384 caplist[capid] = 1;
2385 }
2386 for (i=0; i<numcaps; i++) {
2387 if (caplist[i])
2388 continue;
2389 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
3ec1648d
SH
2390 SYSERROR("failed to remove capability %d", i);
2391 return -1;
2392 }
1fb86a7c
SH
2393 }
2394
2395 DEBUG("capabilities have been setup");
81810dd1
DL
2396
2397 return 0;
2398}
2399
0ad19a3f 2400static int setup_hw_addr(char *hwaddr, const char *ifname)
2401{
2402 struct sockaddr sockaddr;
2403 struct ifreq ifr;
2404 int ret, fd;
2405
3cfc0f3a
MN
2406 ret = lxc_convert_mac(hwaddr, &sockaddr);
2407 if (ret) {
2408 ERROR("mac address '%s' conversion failed : %s",
2409 hwaddr, strerror(-ret));
0ad19a3f 2410 return -1;
2411 }
2412
2413 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
5da6aa8c 2414 ifr.ifr_name[IFNAMSIZ-1] = '\0';
0ad19a3f 2415 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2416
2417 fd = socket(AF_INET, SOCK_DGRAM, 0);
2418 if (fd < 0) {
3ab87b66 2419 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 2420 return -1;
2421 }
2422
2423 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
2424 close(fd);
2425 if (ret)
3ab87b66 2426 ERROR("ioctl failure : %s", strerror(errno));
0ad19a3f 2427
5da6aa8c 2428 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
cd54d859 2429
0ad19a3f 2430 return ret;
2431}
2432
82d5ae15 2433static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2434{
82d5ae15
DL
2435 struct lxc_list *iterator;
2436 struct lxc_inetdev *inetdev;
3cfc0f3a 2437 int err;
0ad19a3f 2438
82d5ae15
DL
2439 lxc_list_for_each(iterator, ip) {
2440
2441 inetdev = iterator->elem;
2442
0093bb8c
DL
2443 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2444 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
2445 if (err) {
2446 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2447 ifindex, strerror(-err));
82d5ae15
DL
2448 return -1;
2449 }
2450 }
2451
2452 return 0;
0ad19a3f 2453}
2454
82d5ae15 2455static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2456{
82d5ae15 2457 struct lxc_list *iterator;
7fa9074f 2458 struct lxc_inet6dev *inet6dev;
3cfc0f3a 2459 int err;
0ad19a3f 2460
82d5ae15
DL
2461 lxc_list_for_each(iterator, ip) {
2462
2463 inet6dev = iterator->elem;
2464
b3df193c 2465 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
2466 &inet6dev->mcast, &inet6dev->acast,
2467 inet6dev->prefix);
3cfc0f3a
MN
2468 if (err) {
2469 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2470 ifindex, strerror(-err));
82d5ae15 2471 return -1;
3cfc0f3a 2472 }
82d5ae15
DL
2473 }
2474
2475 return 0;
0ad19a3f 2476}
2477
82d5ae15 2478static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 2479{
0ad19a3f 2480 char ifname[IFNAMSIZ];
0ad19a3f 2481 char *current_ifname = ifname;
3cfc0f3a 2482 int err;
0ad19a3f 2483
82d5ae15
DL
2484 /* empty network namespace */
2485 if (!netdev->ifindex) {
b0efbac4 2486 if (netdev->flags & IFF_UP) {
d472214b 2487 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2488 if (err) {
2489 ERROR("failed to set the loopback up : %s",
2490 strerror(-err));
82d5ae15
DL
2491 return -1;
2492 }
82d5ae15 2493 }
40790553
SH
2494 if (netdev->type != LXC_NET_VETH)
2495 return 0;
2496 netdev->ifindex = if_nametoindex(netdev->name);
0ad19a3f 2497 }
13954cce 2498
b466dc33 2499 /* get the new ifindex in case of physical netdev */
40790553 2500 if (netdev->type == LXC_NET_PHYS) {
b466dc33
BP
2501 if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
2502 ERROR("failed to get ifindex for %s",
2503 netdev->link);
2504 return -1;
2505 }
40790553 2506 }
b466dc33 2507
82d5ae15
DL
2508 /* retrieve the name of the interface */
2509 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 2510 ERROR("no interface corresponding to index '%d'",
82d5ae15 2511 netdev->ifindex);
0ad19a3f 2512 return -1;
2513 }
13954cce 2514
018ef520 2515 /* default: let the system to choose one interface name */
9d083402 2516 if (!netdev->name)
fb6d9b2f
DL
2517 netdev->name = netdev->type == LXC_NET_PHYS ?
2518 netdev->link : "eth%d";
018ef520 2519
82d5ae15 2520 /* rename the interface name */
40790553
SH
2521 if (strcmp(ifname, netdev->name) != 0) {
2522 err = lxc_netdev_rename_by_name(ifname, netdev->name);
2523 if (err) {
2524 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2525 strerror(-err));
2526 return -1;
2527 }
018ef520
DL
2528 }
2529
2530 /* Re-read the name of the interface because its name has changed
2531 * and would be automatically allocated by the system
2532 */
82d5ae15 2533 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 2534 ERROR("no interface corresponding to index '%d'",
82d5ae15 2535 netdev->ifindex);
018ef520 2536 return -1;
0ad19a3f 2537 }
2538
82d5ae15
DL
2539 /* set a mac address */
2540 if (netdev->hwaddr) {
2541 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 2542 ERROR("failed to setup hw address for '%s'",
82d5ae15 2543 current_ifname);
0ad19a3f 2544 return -1;
2545 }
2546 }
2547
82d5ae15
DL
2548 /* setup ipv4 addresses on the interface */
2549 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 2550 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 2551 ifname);
2552 return -1;
2553 }
2554
82d5ae15
DL
2555 /* setup ipv6 addresses on the interface */
2556 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 2557 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 2558 ifname);
2559 return -1;
2560 }
2561
82d5ae15 2562 /* set the network device up */
b0efbac4 2563 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
2564 int err;
2565
d472214b 2566 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
2567 if (err) {
2568 ERROR("failed to set '%s' up : %s", current_ifname,
2569 strerror(-err));
0ad19a3f 2570 return -1;
2571 }
2572
2573 /* the network is up, make the loopback up too */
d472214b 2574 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2575 if (err) {
2576 ERROR("failed to set the loopback up : %s",
2577 strerror(-err));
0ad19a3f 2578 return -1;
2579 }
2580 }
2581
f8fee0e2
MK
2582 /* We can only set up the default routes after bringing
2583 * up the interface, sine bringing up the interface adds
2584 * the link-local routes and we can't add a default
2585 * route if the gateway is not reachable. */
2586
2587 /* setup ipv4 gateway on the interface */
2588 if (netdev->ipv4_gateway) {
2589 if (!(netdev->flags & IFF_UP)) {
2590 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2591 return -1;
2592 }
2593
2594 if (lxc_list_empty(&netdev->ipv4)) {
2595 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2596 return -1;
2597 }
2598
2599 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2600 if (err) {
fc739df5
SG
2601 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2602 if (err) {
2603 ERROR("failed to add ipv4 dest for '%s': %s",
2604 ifname, strerror(-err));
2605 }
2606
2607 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2608 if (err) {
2609 ERROR("failed to setup ipv4 gateway for '%s': %s",
2610 ifname, strerror(-err));
2611 if (netdev->ipv4_gateway_auto) {
2612 char buf[INET_ADDRSTRLEN];
2613 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2614 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2615 }
2616 return -1;
19a26f82 2617 }
f8fee0e2
MK
2618 }
2619 }
2620
2621 /* setup ipv6 gateway on the interface */
2622 if (netdev->ipv6_gateway) {
2623 if (!(netdev->flags & IFF_UP)) {
2624 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2625 return -1;
2626 }
2627
2628 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2629 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2630 return -1;
2631 }
2632
2633 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2634 if (err) {
fc739df5
SG
2635 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2636 if (err) {
2637 ERROR("failed to add ipv6 dest for '%s': %s",
f8fee0e2 2638 ifname, strerror(-err));
19a26f82 2639 }
fc739df5
SG
2640
2641 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2642 if (err) {
2643 ERROR("failed to setup ipv6 gateway for '%s': %s",
2644 ifname, strerror(-err));
2645 if (netdev->ipv6_gateway_auto) {
2646 char buf[INET6_ADDRSTRLEN];
2647 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2648 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2649 }
2650 return -1;
2651 }
f8fee0e2
MK
2652 }
2653 }
2654
cd54d859
DL
2655 DEBUG("'%s' has been setup", current_ifname);
2656
0ad19a3f 2657 return 0;
2658}
2659
5f4535a3 2660static int setup_network(struct lxc_list *network)
0ad19a3f 2661{
82d5ae15 2662 struct lxc_list *iterator;
82d5ae15 2663 struct lxc_netdev *netdev;
0ad19a3f 2664
5f4535a3 2665 lxc_list_for_each(iterator, network) {
cd54d859 2666
5f4535a3 2667 netdev = iterator->elem;
82d5ae15
DL
2668
2669 if (setup_netdev(netdev)) {
2670 ERROR("failed to setup netdev");
2671 return -1;
2672 }
2673 }
cd54d859 2674
5f4535a3
DL
2675 if (!lxc_list_empty(network))
2676 INFO("network has been setup");
cd54d859
DL
2677
2678 return 0;
0ad19a3f 2679}
2680
2af6bd1b
SH
2681/* try to move physical nics to the init netns */
2682void restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf)
2683{
2684 int i, ret, oldfd;
2685 char path[MAXPATHLEN];
2686
2687 if (netnsfd < 0)
2688 return;
2689
2690 ret = snprintf(path, MAXPATHLEN, "/proc/self/ns/net");
2691 if (ret < 0 || ret >= MAXPATHLEN) {
2692 WARN("Failed to open monitor netns fd");
2693 return;
2694 }
2695 if ((oldfd = open(path, O_RDONLY)) < 0) {
2696 SYSERROR("Failed to open monitor netns fd");
2697 return;
2698 }
2699 if (setns(netnsfd, 0) != 0) {
2700 SYSERROR("Failed to enter container netns to reset nics");
2701 close(oldfd);
2702 return;
2703 }
2704 for (i=0; i<conf->num_savednics; i++) {
2705 struct saved_nic *s = &conf->saved_nics[i];
8d357196 2706 if (lxc_netdev_move_by_index(s->ifindex, 1, NULL))
2af6bd1b
SH
2707 WARN("Error moving nic index:%d back to host netns",
2708 s->ifindex);
2709 }
2710 if (setns(oldfd, 0) != 0)
2711 SYSERROR("Failed to re-enter monitor's netns");
2712 close(oldfd);
2713}
2714
2715void lxc_rename_phys_nics_on_shutdown(int netnsfd, struct lxc_conf *conf)
7b35f3d6
SH
2716{
2717 int i;
2718
2af6bd1b
SH
2719 if (conf->num_savednics == 0)
2720 return;
2721
7b35f3d6 2722 INFO("running to reset %d nic names", conf->num_savednics);
2af6bd1b 2723 restore_phys_nics_to_netns(netnsfd, conf);
7b35f3d6
SH
2724 for (i=0; i<conf->num_savednics; i++) {
2725 struct saved_nic *s = &conf->saved_nics[i];
959aee9c 2726 INFO("resetting nic %d to %s", s->ifindex, s->orig_name);
7b35f3d6
SH
2727 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
2728 free(s->orig_name);
2729 }
2730 conf->num_savednics = 0;
7b35f3d6
SH
2731}
2732
ae9242c8
SH
2733static char *default_rootfs_mount = LXCROOTFSMOUNT;
2734
7b379ab3 2735struct lxc_conf *lxc_conf_init(void)
089cd8b8 2736{
7b379ab3 2737 struct lxc_conf *new;
26ddeedd 2738 int i;
7b379ab3
MN
2739
2740 new = malloc(sizeof(*new));
2741 if (!new) {
2742 ERROR("lxc_conf_init : %m");
2743 return NULL;
2744 }
2745 memset(new, 0, sizeof(*new));
2746
b40a606e 2747 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
cccc74b5 2748 new->personality = -1;
bc6928ff 2749 new->autodev = -1;
596a818d
DE
2750 new->console.log_path = NULL;
2751 new->console.log_fd = -1;
28a4b0e5 2752 new->console.path = NULL;
63376d7d 2753 new->console.peer = -1;
b5159817
DE
2754 new->console.peerpty.busy = -1;
2755 new->console.peerpty.master = -1;
2756 new->console.peerpty.slave = -1;
63376d7d
DL
2757 new->console.master = -1;
2758 new->console.slave = -1;
2759 new->console.name[0] = '\0';
d2e30e99 2760 new->maincmd_fd = -1;
76a26f55 2761 new->nbd_idx = -1;
54c30e29 2762 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048
SH
2763 if (!new->rootfs.mount) {
2764 ERROR("lxc_conf_init : %m");
2765 free(new);
2766 return NULL;
2767 }
2f3f41d0 2768 new->kmsg = 1;
7b379ab3
MN
2769 lxc_list_init(&new->cgroup);
2770 lxc_list_init(&new->network);
2771 lxc_list_init(&new->mount_list);
81810dd1 2772 lxc_list_init(&new->caps);
1fb86a7c 2773 lxc_list_init(&new->keepcaps);
f6d3e3e4 2774 lxc_list_init(&new->id_map);
f979ac15 2775 lxc_list_init(&new->includes);
4184c3e1 2776 lxc_list_init(&new->aliens);
7c661726 2777 lxc_list_init(&new->environment);
26ddeedd
SH
2778 for (i=0; i<NUM_LXC_HOOKS; i++)
2779 lxc_list_init(&new->hooks[i]);
ee1e7aa0 2780 lxc_list_init(&new->groups);
fe4de9a6
DE
2781 new->lsm_aa_profile = NULL;
2782 new->lsm_se_context = NULL;
5112cd70 2783 new->tmp_umount_proc = 0;
7b379ab3 2784
9f30a190
MM
2785 for (i = 0; i < LXC_NS_MAX; i++)
2786 new->inherit_ns_fd[i] = -1;
2787
7b379ab3 2788 return new;
089cd8b8
DL
2789}
2790
a589434e 2791static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2792{
8634bc19 2793 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2794 char veth2buf[IFNAMSIZ], *veth2;
3cfc0f3a 2795 int err;
13954cce 2796
e892973e
DL
2797 if (netdev->priv.veth_attr.pair)
2798 veth1 = netdev->priv.veth_attr.pair;
8634bc19 2799 else {
9ba8130c
SH
2800 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2801 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2802 ERROR("veth1 name too long");
2803 return -1;
2804 }
a0265685 2805 veth1 = lxc_mkifname(veth1buf);
ad40563e
ÇO
2806 if (!veth1) {
2807 ERROR("failed to allocate a temporary name");
2808 return -1;
2809 }
74a2b586
JK
2810 /* store away for deconf */
2811 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2812 }
82d5ae15 2813
0e391e57 2814 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
a0265685 2815 veth2 = lxc_mkifname(veth2buf);
ad40563e 2816 if (!veth2) {
82d5ae15 2817 ERROR("failed to allocate a temporary name");
ad40563e 2818 goto out_delete;
0ad19a3f 2819 }
2820
3cfc0f3a
MN
2821 err = lxc_veth_create(veth1, veth2);
2822 if (err) {
2823 ERROR("failed to create %s-%s : %s", veth1, veth2,
2824 strerror(-err));
ad40563e 2825 goto out_delete;
0ad19a3f 2826 }
13954cce 2827
49684c0b
CS
2828 /* changing the high byte of the mac address to 0xfe, the bridge interface
2829 * will always keep the host's mac address and not take the mac address
2830 * of a container */
2831 err = setup_private_host_hw_addr(veth1);
2832 if (err) {
2833 ERROR("failed to change mac address of host interface '%s' : %s",
2834 veth1, strerror(-err));
2835 goto out_delete;
2836 }
2837
82d5ae15 2838 if (netdev->mtu) {
d472214b 2839 err = lxc_netdev_set_mtu(veth1, atoi(netdev->mtu));
3cfc0f3a 2840 if (!err)
d472214b 2841 err = lxc_netdev_set_mtu(veth2, atoi(netdev->mtu));
3cfc0f3a
MN
2842 if (err) {
2843 ERROR("failed to set mtu '%s' for %s-%s : %s",
2844 netdev->mtu, veth1, veth2, strerror(-err));
eb14c10a 2845 goto out_delete;
75d09f83
DL
2846 }
2847 }
2848
3cfc0f3a
MN
2849 if (netdev->link) {
2850 err = lxc_bridge_attach(netdev->link, veth1);
2851 if (err) {
2852 ERROR("failed to attach '%s' to the bridge '%s' : %s",
2853 veth1, netdev->link, strerror(-err));
2854 goto out_delete;
2855 }
eb14c10a
DL
2856 }
2857
82d5ae15
DL
2858 netdev->ifindex = if_nametoindex(veth2);
2859 if (!netdev->ifindex) {
36eb9bde 2860 ERROR("failed to retrieve the index for %s", veth2);
eb14c10a
DL
2861 goto out_delete;
2862 }
2863
d472214b 2864 err = lxc_netdev_up(veth1);
6e35af2e
DL
2865 if (err) {
2866 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2867 goto out_delete;
0ad19a3f 2868 }
2869
e3b4c4c4 2870 if (netdev->upscript) {
751d9dcd
DL
2871 err = run_script(handler->name, "net", netdev->upscript, "up",
2872 "veth", veth1, (char*) NULL);
2873 if (err)
e3b4c4c4 2874 goto out_delete;
e3b4c4c4
ST
2875 }
2876
a589434e 2877 DEBUG("instantiated veth '%s/%s', index is '%d'",
82d5ae15
DL
2878 veth1, veth2, netdev->ifindex);
2879
6ab9ab6d 2880 return 0;
eb14c10a
DL
2881
2882out_delete:
b84f58b9 2883 lxc_netdev_delete_by_name(veth1);
ad40563e
ÇO
2884 if (!netdev->priv.veth_attr.pair && veth1)
2885 free(veth1);
2886 if(veth2)
2887 free(veth2);
6ab9ab6d 2888 return -1;
13954cce 2889}
d957ae2d 2890
74a2b586
JK
2891static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2892{
2893 char *veth1;
2894 int err;
2895
2896 if (netdev->priv.veth_attr.pair)
2897 veth1 = netdev->priv.veth_attr.pair;
2898 else
2899 veth1 = netdev->priv.veth_attr.veth1;
2900
2901 if (netdev->downscript) {
2902 err = run_script(handler->name, "net", netdev->downscript,
2903 "down", "veth", veth1, (char*) NULL);
2904 if (err)
2905 return -1;
2906 }
2907 return 0;
2908}
2909
a589434e 2910static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2911{
0e391e57 2912 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2913 int err;
d957ae2d
MT
2914
2915 if (!netdev->link) {
2916 ERROR("no link specified for macvlan netdev");
2917 return -1;
2918 }
13954cce 2919
9ba8130c
SH
2920 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2921 if (err >= sizeof(peerbuf))
2922 return -1;
82d5ae15 2923
a0265685 2924 peer = lxc_mkifname(peerbuf);
ad40563e 2925 if (!peer) {
82d5ae15
DL
2926 ERROR("failed to make a temporary name");
2927 return -1;
0ad19a3f 2928 }
2929
3cfc0f3a
MN
2930 err = lxc_macvlan_create(netdev->link, peer,
2931 netdev->priv.macvlan_attr.mode);
2932 if (err) {
2933 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2934 peer, netdev->link, strerror(-err));
ad40563e 2935 goto out;
0ad19a3f 2936 }
2937
82d5ae15
DL
2938 netdev->ifindex = if_nametoindex(peer);
2939 if (!netdev->ifindex) {
36eb9bde 2940 ERROR("failed to retrieve the index for %s", peer);
ad40563e 2941 goto out;
22ebac19 2942 }
2943
e3b4c4c4 2944 if (netdev->upscript) {
751d9dcd
DL
2945 err = run_script(handler->name, "net", netdev->upscript, "up",
2946 "macvlan", netdev->link, (char*) NULL);
2947 if (err)
ad40563e 2948 goto out;
e3b4c4c4
ST
2949 }
2950
a589434e 2951 DEBUG("instantiated macvlan '%s', index is '%d' and mode '%d'",
e892973e 2952 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 2953
d957ae2d 2954 return 0;
ad40563e
ÇO
2955out:
2956 lxc_netdev_delete_by_name(peer);
2957 free(peer);
2958 return -1;
0ad19a3f 2959}
2960
74a2b586
JK
2961static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2962{
2963 int err;
2964
2965 if (netdev->downscript) {
2966 err = run_script(handler->name, "net", netdev->downscript,
2967 "down", "macvlan", netdev->link,
2968 (char*) NULL);
2969 if (err)
2970 return -1;
2971 }
2972 return 0;
2973}
2974
a589434e
JN
2975/* XXX: merge with instantiate_macvlan */
2976static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
2977{
2978 char peer[IFNAMSIZ];
3cfc0f3a 2979 int err;
26c39028
JHS
2980
2981 if (!netdev->link) {
2982 ERROR("no link specified for vlan netdev");
2983 return -1;
2984 }
2985
9ba8130c
SH
2986 err = snprintf(peer, sizeof(peer), "vlan%d", netdev->priv.vlan_attr.vid);
2987 if (err >= sizeof(peer)) {
2988 ERROR("peer name too long");
2989 return -1;
2990 }
26c39028 2991
3cfc0f3a
MN
2992 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2993 if (err) {
2994 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2995 peer, netdev->link, strerror(-err));
26c39028
JHS
2996 return -1;
2997 }
2998
2999 netdev->ifindex = if_nametoindex(peer);
3000 if (!netdev->ifindex) {
3001 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 3002 lxc_netdev_delete_by_name(peer);
26c39028
JHS
3003 return -1;
3004 }
3005
a589434e 3006 DEBUG("instantiated vlan '%s', ifindex is '%d'", " vlan1000",
e892973e
DL
3007 netdev->ifindex);
3008
26c39028
JHS
3009 return 0;
3010}
3011
74a2b586
JK
3012static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
3013{
3014 return 0;
3015}
3016
a589434e 3017static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 3018{
6168e99f
DL
3019 if (!netdev->link) {
3020 ERROR("no link specified for the physical interface");
3021 return -1;
3022 }
3023
9d083402 3024 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 3025 if (!netdev->ifindex) {
9d083402 3026 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 3027 return -1;
3028 }
3029
e3b4c4c4
ST
3030 if (netdev->upscript) {
3031 int err;
751d9dcd
DL
3032 err = run_script(handler->name, "net", netdev->upscript,
3033 "up", "phys", netdev->link, (char*) NULL);
3034 if (err)
e3b4c4c4 3035 return -1;
e3b4c4c4
ST
3036 }
3037
82d5ae15 3038 return 0;
0ad19a3f 3039}
3040
74a2b586
JK
3041static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
3042{
3043 int err;
3044
3045 if (netdev->downscript) {
3046 err = run_script(handler->name, "net", netdev->downscript,
3047 "down", "phys", netdev->link, (char*) NULL);
3048 if (err)
3049 return -1;
3050 }
3051 return 0;
3052}
3053
a589434e 3054static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
26b797f3
SH
3055{
3056 netdev->ifindex = 0;
3057 return 0;
3058}
3059
a589434e 3060static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 3061{
82d5ae15 3062 netdev->ifindex = 0;
e3b4c4c4
ST
3063 if (netdev->upscript) {
3064 int err;
751d9dcd
DL
3065 err = run_script(handler->name, "net", netdev->upscript,
3066 "up", "empty", (char*) NULL);
3067 if (err)
e3b4c4c4 3068 return -1;
e3b4c4c4 3069 }
82d5ae15 3070 return 0;
0ad19a3f 3071}
3072
74a2b586
JK
3073static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
3074{
3075 int err;
3076
3077 if (netdev->downscript) {
3078 err = run_script(handler->name, "net", netdev->downscript,
3079 "down", "empty", (char*) NULL);
3080 if (err)
3081 return -1;
3082 }
3083 return 0;
3084}
3085
26b797f3
SH
3086static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
3087{
3088 return 0;
3089}
3090
3091int lxc_requests_empty_network(struct lxc_handler *handler)
3092{
3093 struct lxc_list *network = &handler->conf->network;
3094 struct lxc_list *iterator;
3095 struct lxc_netdev *netdev;
3096 bool found_none = false, found_nic = false;
3097
3098 if (lxc_list_empty(network))
3099 return 0;
3100
3101 lxc_list_for_each(iterator, network) {
3102
3103 netdev = iterator->elem;
3104
3105 if (netdev->type == LXC_NET_NONE)
3106 found_none = true;
3107 else
3108 found_nic = true;
3109 }
3110 if (found_none && !found_nic)
3111 return 1;
3112 return 0;
3113}
3114
e3b4c4c4 3115int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 3116{
e3b4c4c4 3117 struct lxc_list *network = &handler->conf->network;
82d5ae15 3118 struct lxc_list *iterator;
82d5ae15 3119 struct lxc_netdev *netdev;
cbef6c52
SH
3120 int am_root = (getuid() == 0);
3121
3122 if (!am_root)
3123 return 0;
0ad19a3f 3124
5f4535a3 3125 lxc_list_for_each(iterator, network) {
0ad19a3f 3126
5f4535a3 3127 netdev = iterator->elem;
13954cce 3128
24654103 3129 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 3130 ERROR("invalid network configuration type '%d'",
5f4535a3 3131 netdev->type);
82d5ae15
DL
3132 return -1;
3133 }
0ad19a3f 3134
e3b4c4c4 3135 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
3136 ERROR("failed to create netdev");
3137 return -1;
3138 }
e3b4c4c4 3139
0ad19a3f 3140 }
3141
3142 return 0;
3143}
3144
74a2b586 3145void lxc_delete_network(struct lxc_handler *handler)
7fef7a06 3146{
74a2b586 3147 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
3148 struct lxc_list *iterator;
3149 struct lxc_netdev *netdev;
3150
3151 lxc_list_for_each(iterator, network) {
3152 netdev = iterator->elem;
d472214b 3153
74a2b586 3154 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352
DL
3155 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
3156 WARN("failed to rename to the initial name the " \
3157 "netdev '%s'", netdev->link);
d472214b 3158 continue;
d8f8e352 3159 }
d472214b 3160
74a2b586
JK
3161 if (netdev_deconf[netdev->type](handler, netdev)) {
3162 WARN("failed to destroy netdev");
3163 }
3164
d8f8e352
DL
3165 /* Recent kernel remove the virtual interfaces when the network
3166 * namespace is destroyed but in case we did not moved the
3167 * interface to the network namespace, we have to destroy it
3168 */
74a2b586
JK
3169 if (netdev->ifindex != 0 &&
3170 lxc_netdev_delete_by_index(netdev->ifindex))
d8f8e352 3171 WARN("failed to remove interface '%s'", netdev->name);
7fef7a06
DL
3172 }
3173}
3174
45e854dc
SG
3175#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
3176
fe1f672f
ÇO
3177/* lxc-user-nic returns "interface_name:interface_name\n" */
3178#define MAX_BUFFER_SIZE IFNAMSIZ*2 + 2
74a3920a 3179static int unpriv_assign_nic(struct lxc_netdev *netdev, pid_t pid)
cbef6c52
SH
3180{
3181 pid_t child;
a7242d9a
ÇO
3182 int bytes, pipefd[2];
3183 char *token, *saveptr = NULL;
fe1f672f 3184 char buffer[MAX_BUFFER_SIZE];
cbef6c52
SH
3185
3186 if (netdev->type != LXC_NET_VETH) {
3187 ERROR("nic type %d not support for unprivileged use",
3188 netdev->type);
3189 return -1;
3190 }
3191
a7242d9a
ÇO
3192 if(pipe(pipefd) < 0) {
3193 SYSERROR("pipe failed");
3194 return -1;
3195 }
3196
cbef6c52
SH
3197 if ((child = fork()) < 0) {
3198 SYSERROR("fork");
a7242d9a
ÇO
3199 close(pipefd[0]);
3200 close(pipefd[1]);
3201 return -1;
3202 }
3203
3204 if (child == 0) { // child
3205 /* close the read-end of the pipe */
3206 close(pipefd[0]);
3207 /* redirect the stdout to write-end of the pipe */
3208 dup2(pipefd[1], STDOUT_FILENO);
3209 /* close the write-end of the pipe */
fe1f672f 3210 close(pipefd[1]);
a7242d9a
ÇO
3211
3212 // Call lxc-user-nic pid type bridge
3213 char pidstr[20];
3214 char *args[] = {LXC_USERNIC_PATH, pidstr, "veth", netdev->link, netdev->name, NULL };
3215 snprintf(pidstr, 19, "%lu", (unsigned long) pid);
3216 pidstr[19] = '\0';
3217 execvp(args[0], args);
3218 SYSERROR("execvp lxc-user-nic");
3219 exit(1);
3220 }
3221
3222 /* close the write-end of the pipe */
3223 close(pipefd[1]);
3224
fe1f672f 3225 bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE);
a7242d9a
ÇO
3226 if (bytes < 0) {
3227 SYSERROR("read failed");
3228 }
3229 buffer[bytes - 1] = '\0';
3230
3231 if (wait_for_pid(child) != 0) {
3232 close(pipefd[0]);
cbef6c52
SH
3233 return -1;
3234 }
3235
a7242d9a
ÇO
3236 /* close the read-end of the pipe */
3237 close(pipefd[0]);
cbef6c52 3238
a7242d9a
ÇO
3239 /* fill netdev->name field */
3240 token = strtok_r(buffer, ":", &saveptr);
3241 if (!token)
3242 return -1;
658979c5
SH
3243 netdev->name = malloc(IFNAMSIZ+1);
3244 if (!netdev->name) {
3245 ERROR("Out of memory");
3246 return -1;
3247 }
3248 memset(netdev->name, 0, IFNAMSIZ+1);
3249 strncpy(netdev->name, token, IFNAMSIZ);
a7242d9a
ÇO
3250
3251 /* fill netdev->veth_attr.pair field */
3252 token = strtok_r(NULL, ":", &saveptr);
3253 if (!token)
3254 return -1;
3255 netdev->priv.veth_attr.pair = strdup(token);
658979c5
SH
3256 if (!netdev->priv.veth_attr.pair) {
3257 ERROR("Out of memory");
3258 return -1;
3259 }
45e854dc 3260
a7242d9a 3261 return 0;
cbef6c52
SH
3262}
3263
5f4535a3 3264int lxc_assign_network(struct lxc_list *network, pid_t pid)
0ad19a3f 3265{
82d5ae15 3266 struct lxc_list *iterator;
82d5ae15 3267 struct lxc_netdev *netdev;
cbef6c52 3268 int am_root = (getuid() == 0);
3cfc0f3a 3269 int err;
0ad19a3f 3270
5f4535a3 3271 lxc_list_for_each(iterator, network) {
82d5ae15 3272
5f4535a3 3273 netdev = iterator->elem;
82d5ae15 3274
fbb16259 3275 if (netdev->type == LXC_NET_VETH && !am_root) {
cbef6c52
SH
3276 if (unpriv_assign_nic(netdev, pid))
3277 return -1;
658979c5
SH
3278 // lxc-user-nic has moved the nic to the new ns.
3279 // unpriv_assign_nic() fills in netdev->name.
3280 // netdev->ifindex will be filed in at setup_netdev.
cbef6c52
SH
3281 continue;
3282 }
236087a6 3283
fbb16259
SH
3284 /* empty network namespace, nothing to move */
3285 if (!netdev->ifindex)
3286 continue;
3287
8d357196 3288 err = lxc_netdev_move_by_index(netdev->ifindex, pid, NULL);
3cfc0f3a
MN
3289 if (err) {
3290 ERROR("failed to move '%s' to the container : %s",
3291 netdev->link, strerror(-err));
82d5ae15
DL
3292 return -1;
3293 }
3294
c1c75c04 3295 DEBUG("move '%s' to '%d'", netdev->name, pid);
0ad19a3f 3296 }
3297
3298 return 0;
3299}
3300
251d0d2a
DE
3301static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
3302 size_t buf_size)
f6d3e3e4
SH
3303{
3304 char path[PATH_MAX];
e4ccd113 3305 int ret, closeret;
f6d3e3e4
SH
3306 FILE *f;
3307
3308 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
3309 if (ret < 0 || ret >= PATH_MAX) {
03fadd16 3310 fprintf(stderr, "%s: path name too long\n", __func__);
f6d3e3e4
SH
3311 return -E2BIG;
3312 }
3313 f = fopen(path, "w");
3314 if (!f) {
3315 perror("open");
3316 return -EINVAL;
3317 }
251d0d2a 3318 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 3319 if (ret < 0)
e4ccd113
SH
3320 SYSERROR("writing id mapping");
3321 closeret = fclose(f);
3322 if (closeret)
3323 SYSERROR("writing id mapping");
3324 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
3325}
3326
3327int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
3328{
3329 struct lxc_list *iterator;
3330 struct id_map *map;
8afb3e61 3331 int ret = 0, use_shadow = 0;
251d0d2a 3332 enum idtype type;
8afb3e61
SG
3333 char *buf = NULL, *pos, *cmdpath = NULL;
3334
22038de5
SH
3335 /*
3336 * If newuidmap exists, that is, if shadow is handing out subuid
3337 * ranges, then insist that root also reserve ranges in subuid. This
3338 * will protected it by preventing another user from being handed the
3339 * range by shadow.
3340 */
9d9c111c 3341 cmdpath = on_path("newuidmap", NULL);
8afb3e61
SG
3342 if (cmdpath) {
3343 use_shadow = 1;
3344 free(cmdpath);
3345 }
3346
0e6e3a41
SG
3347 if (!use_shadow && geteuid()) {
3348 ERROR("Missing newuidmap/newgidmap");
3349 return -1;
3350 }
251d0d2a
DE
3351
3352 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4 3353 int left, fill;
cf3ef16d
SH
3354 int had_entry = 0;
3355 if (!buf) {
3356 buf = pos = malloc(4096);
4f7521b4
SH
3357 if (!buf)
3358 return -ENOMEM;
cf3ef16d
SH
3359 }
3360 pos = buf;
0e6e3a41 3361 if (use_shadow)
d1838f34 3362 pos += sprintf(buf, "new%cidmap %d",
cf3ef16d
SH
3363 type == ID_TYPE_UID ? 'u' : 'g',
3364 pid);
4f7521b4 3365
cf3ef16d
SH
3366 lxc_list_for_each(iterator, idmap) {
3367 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
251d0d2a 3368 map = iterator->elem;
cf3ef16d
SH
3369 if (map->idtype != type)
3370 continue;
3371
3372 had_entry = 1;
3373 left = 4096 - (pos - buf);
d1838f34 3374 fill = snprintf(pos, left, "%s%lu %lu %lu%s",
0e6e3a41 3375 use_shadow ? " " : "",
d1838f34 3376 map->nsid, map->hostid, map->range,
0e6e3a41 3377 use_shadow ? "" : "\n");
cf3ef16d
SH
3378 if (fill <= 0 || fill >= left)
3379 SYSERROR("snprintf failed, too many mappings");
3380 pos += fill;
251d0d2a 3381 }
cf3ef16d 3382 if (!had_entry)
4f7521b4 3383 continue;
cf3ef16d 3384
0e6e3a41 3385 if (!use_shadow) {
cf3ef16d 3386 ret = write_id_mapping(type, pid, buf, pos-buf);
d1838f34
MS
3387 } else {
3388 left = 4096 - (pos - buf);
3389 fill = snprintf(pos, left, "\n");
3390 if (fill <= 0 || fill >= left)
3391 SYSERROR("snprintf failed, too many mappings");
3392 pos += fill;
cf3ef16d 3393 ret = system(buf);
d1838f34 3394 }
cf3ef16d 3395
f6d3e3e4
SH
3396 if (ret)
3397 break;
3398 }
251d0d2a 3399
4f7521b4
SH
3400 if (buf)
3401 free(buf);
f6d3e3e4
SH
3402 return ret;
3403}
3404
cf3ef16d 3405/*
7b50c609
TS
3406 * return the host uid/gid to which the container root is mapped in
3407 * *val.
0b3a6504 3408 * Return true if id was found, false otherwise.
cf3ef16d 3409 */
2a9a80cb 3410bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
3ec1648d 3411 unsigned long *val)
cf3ef16d
SH
3412{
3413 struct lxc_list *it;
3414 struct id_map *map;
3415
3416 lxc_list_for_each(it, &conf->id_map) {
3417 map = it->elem;
7b50c609 3418 if (map->idtype != idtype)
cf3ef16d
SH
3419 continue;
3420 if (map->nsid != 0)
3421 continue;
2a9a80cb
SH
3422 *val = map->hostid;
3423 return true;
cf3ef16d 3424 }
2a9a80cb 3425 return false;
cf3ef16d
SH
3426}
3427
2133f58c 3428int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3429{
3430 struct lxc_list *it;
3431 struct id_map *map;
3432 lxc_list_for_each(it, &conf->id_map) {
3433 map = it->elem;
2133f58c 3434 if (map->idtype != idtype)
cf3ef16d
SH
3435 continue;
3436 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 3437 return (id - map->hostid) + map->nsid;
cf3ef16d 3438 }
57d116ab 3439 return -1;
cf3ef16d
SH
3440}
3441
2133f58c 3442int find_unmapped_nsuid(struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3443{
3444 struct lxc_list *it;
3445 struct id_map *map;
2133f58c 3446 unsigned int freeid = 0;
cf3ef16d
SH
3447again:
3448 lxc_list_for_each(it, &conf->id_map) {
3449 map = it->elem;
2133f58c 3450 if (map->idtype != idtype)
cf3ef16d
SH
3451 continue;
3452 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3453 freeid = map->nsid + map->range;
3454 goto again;
3455 }
3456 }
3457 return freeid;
3458}
3459
19a26f82
MK
3460int lxc_find_gateway_addresses(struct lxc_handler *handler)
3461{
3462 struct lxc_list *network = &handler->conf->network;
3463 struct lxc_list *iterator;
3464 struct lxc_netdev *netdev;
3465 int link_index;
3466
3467 lxc_list_for_each(iterator, network) {
3468 netdev = iterator->elem;
3469
3470 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
3471 continue;
3472
3473 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
3474 ERROR("gateway = auto only supported for "
3475 "veth and macvlan");
3476 return -1;
3477 }
3478
3479 if (!netdev->link) {
3480 ERROR("gateway = auto needs a link interface");
3481 return -1;
3482 }
3483
3484 link_index = if_nametoindex(netdev->link);
3485 if (!link_index)
3486 return -EINVAL;
3487
3488 if (netdev->ipv4_gateway_auto) {
3489 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
3490 ERROR("failed to automatically find ipv4 gateway "
3491 "address from link interface '%s'", netdev->link);
3492 return -1;
3493 }
3494 }
3495
3496 if (netdev->ipv6_gateway_auto) {
3497 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
3498 ERROR("failed to automatically find ipv6 gateway "
3499 "address from link interface '%s'", netdev->link);
3500 return -1;
3501 }
3502 }
3503 }
3504
3505 return 0;
3506}
3507
5e4a62bf 3508int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 3509{
5e4a62bf 3510 struct lxc_tty_info *tty_info = &conf->tty_info;
025ed0f3 3511 int i, ret;
b0a33c1e 3512
5e4a62bf
DL
3513 /* no tty in the configuration */
3514 if (!conf->tty)
b0a33c1e 3515 return 0;
3516
13954cce 3517 tty_info->pty_info =
e4e7d59d 3518 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 3519 if (!tty_info->pty_info) {
36eb9bde 3520 SYSERROR("failed to allocate pty_info");
985d15b1 3521 return -1;
b0a33c1e 3522 }
3523
985d15b1 3524 for (i = 0; i < conf->tty; i++) {
13954cce 3525
b0a33c1e 3526 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3527
025ed0f3
SH
3528 process_lock();
3529 ret = openpty(&pty_info->master, &pty_info->slave,
3530 pty_info->name, NULL, NULL);
3531 process_unlock();
3532 if (ret) {
36eb9bde 3533 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
3534 tty_info->nbtty = i;
3535 lxc_delete_tty(tty_info);
3536 return -1;
b0a33c1e 3537 }
3538
5332bb84
DL
3539 DEBUG("allocated pty '%s' (%d/%d)",
3540 pty_info->name, pty_info->master, pty_info->slave);
3541
3ec1648d 3542 /* Prevent leaking the file descriptors to the container */
b035ad62
MS
3543 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3544 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3545
b0a33c1e 3546 pty_info->busy = 0;
3547 }
3548
985d15b1 3549 tty_info->nbtty = conf->tty;
1ac470c0
DL
3550
3551 INFO("tty's configured");
3552
985d15b1 3553 return 0;
b0a33c1e 3554}
3555
3556void lxc_delete_tty(struct lxc_tty_info *tty_info)
3557{
3558 int i;
3559
3560 for (i = 0; i < tty_info->nbtty; i++) {
3561 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3562
3563 close(pty_info->master);
3564 close(pty_info->slave);
3565 }
3566
3567 free(tty_info->pty_info);
3568 tty_info->nbtty = 0;
3569}
3570
f6d3e3e4 3571/*
7b50c609
TS
3572 * chown_mapped_root: for an unprivileged user with uid/gid X to
3573 * chown a dir to subuid/subgid Y, he needs to run chown as root
3574 * in a userns where nsid 0 is mapped to hostuid/hostgid Y, and
3575 * nsid Y is mapped to hostuid/hostgid X. That way, the container
3576 * root is privileged with respect to hostuid/hostgid X, allowing
3577 * him to do the chown.
f6d3e3e4 3578 */
c4d10a05 3579int chown_mapped_root(char *path, struct lxc_conf *conf)
f6d3e3e4 3580{
7b50c609
TS
3581 uid_t rootuid;
3582 gid_t rootgid;
c4d10a05 3583 pid_t pid;
2a9a80cb 3584 unsigned long val;
a7ef8753 3585 char *chownpath = path;
f6d3e3e4 3586
2a9a80cb 3587 if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
c4d10a05
SH
3588 ERROR("No mapping for container root");
3589 return -1;
f6d3e3e4 3590 }
7b50c609
TS
3591 rootuid = (uid_t) val;
3592 if (!get_mapped_rootid(conf, ID_TYPE_GID, &val)) {
3593 ERROR("No mapping for container root");
3594 return -1;
3595 }
3596 rootgid = (gid_t) val;
2a9a80cb 3597
a7ef8753
SH
3598 /*
3599 * In case of overlay, we want only the writeable layer
3600 * to be chowned
3601 */
1f92162d 3602 if (strncmp(path, "overlayfs:", 10) == 0 || strncmp(path, "aufs:", 5) == 0) {
a7ef8753
SH
3603 chownpath = strchr(path, ':');
3604 if (!chownpath) {
3605 ERROR("Bad overlay path: %s", path);
3606 return -1;
3607 }
3608 chownpath = strchr(chownpath+1, ':');
3609 if (!chownpath) {
3610 ERROR("Bad overlay path: %s", path);
3611 return -1;
3612 }
3613 chownpath++;
3614 }
3615 path = chownpath;
c4d10a05 3616 if (geteuid() == 0) {
7b50c609 3617 if (chown(path, rootuid, rootgid) < 0) {
c4d10a05
SH
3618 ERROR("Error chowning %s", path);
3619 return -1;
3620 }
3621 return 0;
3622 }
f3d7e4ca 3623
7b50c609 3624 if (rootuid == geteuid()) {
f3d7e4ca
SH
3625 // nothing to do
3626 INFO("%s: container root is our uid; no need to chown" ,__func__);
3627 return 0;
3628 }
3629
c4d10a05
SH
3630 pid = fork();
3631 if (pid < 0) {
3632 SYSERROR("Failed forking");
f6d3e3e4
SH
3633 return -1;
3634 }
c4d10a05 3635 if (!pid) {
7b50c609
TS
3636 int hostuid = geteuid(), hostgid = getegid(), ret;
3637 struct stat sb;
3638 char map1[100], map2[100], map3[100], map4[100], map5[100];
3639 char ugid[100];
3640 char *args1[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3641 "-m", map3, "-m", map5,
3642 "--", "chown", ugid, path, NULL };
3643 char *args2[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3644 "-m", map3, "-m", map4, "-m", map5,
3645 "--", "chown", ugid, path, NULL };
3646
3647 // save the current gid of "path"
3648 if (stat(path, &sb) < 0) {
3649 ERROR("Error stat %s", path);
3650 return -1;
3651 }
f6d3e3e4 3652
9a7c2aba
SH
3653 /*
3654 * A file has to be group-owned by a gid mapped into the
3655 * container, or the container won't be privileged over it.
3656 */
3657 if (sb.st_uid == geteuid() &&
3658 mapped_hostid(sb.st_gid, conf, ID_TYPE_GID) < 0 &&
3659 chown(path, -1, hostgid) < 0) {
3660 ERROR("Failed chgrping %s", path);
7b50c609
TS
3661 return -1;
3662 }
3663
3664 // "u:0:rootuid:1"
3665 ret = snprintf(map1, 100, "u:0:%d:1", rootuid);
c4d10a05
SH
3666 if (ret < 0 || ret >= 100) {
3667 ERROR("Error uid printing map string");
f6d3e3e4
SH
3668 return -1;
3669 }
c4d10a05 3670
98e5ba51
SH
3671 // "u:hostuid:hostuid:1"
3672 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3673 if (ret < 0 || ret >= 100) {
3674 ERROR("Error uid printing map string");
3675 return -1;
3676 }
3677
7b50c609
TS
3678 // "g:0:rootgid:1"
3679 ret = snprintf(map3, 100, "g:0:%d:1", rootgid);
c4d10a05 3680 if (ret < 0 || ret >= 100) {
7b50c609 3681 ERROR("Error gid printing map string");
c4d10a05
SH
3682 return -1;
3683 }
3684
7b50c609 3685 // "g:pathgid:rootgid+pathgid:1"
b4c1e35d
SG
3686 ret = snprintf(map4, 100, "g:%d:%d:1", (gid_t)sb.st_gid,
3687 rootgid + (gid_t)sb.st_gid);
7b50c609
TS
3688 if (ret < 0 || ret >= 100) {
3689 ERROR("Error gid printing map string");
3690 return -1;
3691 }
3692
3693 // "g:hostgid:hostgid:1"
3694 ret = snprintf(map5, 100, "g:%d:%d:1", hostgid, hostgid);
3695 if (ret < 0 || ret >= 100) {
3696 ERROR("Error gid printing map string");
3697 return -1;
3698 }
3699
3700 // "0:pathgid" (chown)
b4c1e35d 3701 ret = snprintf(ugid, 100, "0:%d", (gid_t)sb.st_gid);
7b50c609
TS
3702 if (ret < 0 || ret >= 100) {
3703 ERROR("Error owner printing format string for chown");
3704 return -1;
3705 }
3706
3707 if (hostgid == sb.st_gid)
3708 ret = execvp("lxc-usernsexec", args1);
3709 else
3710 ret = execvp("lxc-usernsexec", args2);
c4d10a05
SH
3711 SYSERROR("Failed executing usernsexec");
3712 exit(1);
f6d3e3e4 3713 }
c4d10a05 3714 return wait_for_pid(pid);
f6d3e3e4
SH
3715}
3716
c4d10a05 3717int ttys_shift_ids(struct lxc_conf *c)
f6d3e3e4 3718{
c4d10a05 3719 int i;
f6d3e3e4 3720
c4d10a05 3721 if (lxc_list_empty(&c->id_map))
f6d3e3e4 3722 return 0;
c4d10a05
SH
3723
3724 for (i = 0; i < c->tty_info.nbtty; i++) {
3725 struct lxc_pty_info *pty_info = &c->tty_info.pty_info[i];
3726
3727 if (chown_mapped_root(pty_info->name, c) < 0) {
3728 ERROR("Failed to chown %s", pty_info->name);
f6d3e3e4
SH
3729 return -1;
3730 }
3731 }
3732
29b10e4f 3733 if (strcmp(c->console.name, "") !=0 && chown_mapped_root(c->console.name, c) < 0) {
c4d10a05
SH
3734 ERROR("Failed to chown %s", c->console.name);
3735 return -1;
3736 }
3737
f6d3e3e4
SH
3738 return 0;
3739}
3740
bc6928ff
MW
3741/*
3742 * This routine is called when the configuration does not already specify a value
3743 * for autodev (mounting a file system on /dev and populating it in a container).
3744 * If a hard override value has not be specified, then we try to apply some
3745 * heuristics to determine if we should switch to autodev mode.
3746 *
3747 * For instance, if the container has an /etc/systemd/system directory then it
3748 * is probably running systemd as the init process and it needs the autodev
3749 * mount to prevent it from mounting devtmpfs on /dev on it's own causing conflicts
3750 * in the host.
3751 *
3752 * We may also want to enable autodev if the host has devtmpfs mounted on its
3753 * /dev as this then enable us to use subdirectories under /dev for the container
3754 * /dev directories and we can fake udev devices.
3755 */
3756struct start_args {
3757 char *const *argv;
3758};
3759
3760#define MAX_SYMLINK_DEPTH 32
3761
74a3920a 3762static int check_autodev( const char *rootfs, void *data )
bc6928ff
MW
3763{
3764 struct start_args *arg = data;
3765 int ret;
3766 int loop_count = 0;
3767 struct stat s;
3768 char absrootfs[MAXPATHLEN];
3769 char path[MAXPATHLEN];
3770 char abs_path[MAXPATHLEN];
3771 char *command = "/sbin/init";
3772
3773 if (rootfs == NULL || strlen(rootfs) == 0)
3774 return -2;
3775
3776 if (!realpath(rootfs, absrootfs))
3777 return -2;
3778
3779 if( arg && arg->argv[0] ) {
3780 command = arg->argv[0];
959aee9c 3781 DEBUG("Set exec command to %s", command );
bc6928ff
MW
3782 }
3783
3784 strncpy( path, command, MAXPATHLEN-1 );
3785
3786 if ( 0 != access(path, F_OK) || 0 != stat(path, &s) )
3787 return -2;
3788
3789 /* Dereference down the symlink merry path testing as we go. */
3790 /* If anything references systemd in the path - set autodev! */
3791 /* Renormalize to the rootfs before each dereference */
3792 /* Relative symlinks should fall out in the wash even with .. */
3793 while( 1 ) {
3794 if ( strstr( path, "systemd" ) ) {
3795 INFO("Container with systemd init detected - enabling autodev!");
3796 return 1;
3797 }
3798
3799 ret = snprintf(abs_path, MAXPATHLEN-1, "%s/%s", absrootfs, path);
3800 if (ret < 0 || ret > MAXPATHLEN)
3801 return -2;
3802
3803 ret = readlink( abs_path, path, MAXPATHLEN-1 );
3804
3805 if ( ( ret <= 0 ) || ( ++loop_count > MAX_SYMLINK_DEPTH ) ) {
3806 break; /* Break out for other tests */
3807 }
3808 path[ret] = '\0';
3809 }
3810
3811 /*
3812 * Add future checks here.
3813 * Return positive if we should go autodev
3814 * Return 0 if we should NOT go autodev
3815 * Return negative if we encounter an error or can not determine...
3816 */
3817
3818 /* All else fails, we don't need autodev */
3819 INFO("Autodev not required.");
3820 return 0;
3821}
3822
5112cd70
SH
3823/*
3824 * _do_tmp_proc_mount: Mount /proc inside container if not already
3825 * mounted
3826 *
3827 * @rootfs : the rootfs where proc should be mounted
3828 *
3829 * Returns < 0 on failure, 0 if the correct proc was already mounted
3830 * and 1 if a new proc was mounted.
3831 */
3832static int do_tmp_proc_mount(const char *rootfs)
3833{
3834 char path[MAXPATHLEN];
3835 char link[20];
3836 int linklen, ret;
3837
3838 ret = snprintf(path, MAXPATHLEN, "%s/proc/self", rootfs);
3839 if (ret < 0 || ret >= MAXPATHLEN) {
3840 SYSERROR("proc path name too long");
3841 return -1;
3842 }
3843 memset(link, 0, 20);
3844 linklen = readlink(path, link, 20);
3845 INFO("I am %d, /proc/self points to '%s'", getpid(), link);
3846 ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs);
3847 if (linklen < 0) /* /proc not mounted */
3848 goto domount;
3849 /* can't be longer than rootfs/proc/1 */
3850 if (strncmp(link, "1", linklen) != 0) {
3851 /* wrong /procs mounted */
3852 umount2(path, MNT_DETACH); /* ignore failure */
3853 goto domount;
3854 }
3855 /* the right proc is already mounted */
3856 return 0;
3857
3858domount:
3859 if (mount("proc", path, "proc", 0, NULL))
3860 return -1;
3861 INFO("Mounted /proc in container for security transition");
3862 return 1;
3863}
3864
3865int tmp_proc_mount(struct lxc_conf *lxc_conf)
3866{
3867 int mounted;
3868
3869 if (lxc_conf->rootfs.path == NULL || strlen(lxc_conf->rootfs.path) == 0) {
3870 if (mount("proc", "/proc", "proc", 0, NULL)) {
3871 SYSERROR("Failed mounting /proc, proceeding");
3872 mounted = 0;
3873 } else
3874 mounted = 1;
3875 } else
3876 mounted = do_tmp_proc_mount(lxc_conf->rootfs.mount);
3877 if (mounted == -1) {
3878 SYSERROR("failed to mount /proc in the container.");
3879 return -1;
3880 } else if (mounted == 1) {
3881 lxc_conf->tmp_umount_proc = 1;
3882 }
3883 return 0;
3884}
3885
3886void tmp_proc_unmount(struct lxc_conf *lxc_conf)
3887{
3888 if (lxc_conf->tmp_umount_proc == 1) {
3889 umount("/proc");
3890 lxc_conf->tmp_umount_proc = 0;
3891 }
3892}
3893
6a0c909a 3894void remount_all_slave(void)
e995d7a2
SH
3895{
3896 /* walk /proc/mounts and change any shared entries to slave */
3897 FILE *f = fopen("/proc/self/mountinfo", "r");
3898 char *line = NULL;
3899 size_t len = 0;
3900
3901 if (!f) {
3902 SYSERROR("Failed to open /proc/self/mountinfo to mark all shared");
3903 ERROR("Continuing container startup...");
3904 return;
3905 }
3906
3907 while (getline(&line, &len, f) != -1) {
3908 char *target, *opts;
3909 target = get_field(line, 4);
3910 if (!target)
3911 continue;
3912 opts = get_field(target, 2);
3913 if (!opts)
3914 continue;
3915 null_endofword(opts);
3916 if (!strstr(opts, "shared"))
3917 continue;
3918 null_endofword(target);
3919 if (mount(NULL, target, NULL, MS_SLAVE, NULL)) {
3920 SYSERROR("Failed to make %s rslave", target);
3921 ERROR("Continuing...");
3922 }
3923 }
3924 fclose(f);
3925 if (line)
3926 free(line);
3927}
3928
2322903b
SH
3929void lxc_execute_bind_init(struct lxc_conf *conf)
3930{
3931 int ret;
9d9c111c
SH
3932 char path[PATH_MAX], destpath[PATH_MAX], *p;
3933
3934 /* If init exists in the container, don't bind mount a static one */
3935 p = choose_init(conf->rootfs.mount);
3936 if (p) {
3937 free(p);
3938 return;
3939 }
2322903b
SH
3940
3941 ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
3942 if (ret < 0 || ret >= PATH_MAX) {
3943 WARN("Path name too long searching for lxc.init.static");
3944 return;
3945 }
3946
3947 if (!file_exists(path)) {
3948 INFO("%s does not exist on host", path);
3949 return;
3950 }
3951
3952 ret = snprintf(destpath, PATH_MAX, "%s%s", conf->rootfs.mount, "/init.lxc.static");
3953 if (ret < 0 || ret >= PATH_MAX) {
3954 WARN("Path name too long for container's lxc.init.static");
3955 return;
3956 }
3957
3958 if (!file_exists(destpath)) {
3959 FILE * pathfile = fopen(destpath, "wb");
3960 if (!pathfile) {
3961 SYSERROR("Failed to create mount target '%s'", destpath);
3962 return;
3963 }
3964 fclose(pathfile);
3965 }
3966
3967 ret = mount(path, destpath, "none", MS_BIND, NULL);
3968 if (ret < 0)
3969 SYSERROR("Failed to bind lxc.init.static into container");
3970 INFO("lxc.init.static bound into container at %s", path);
3971}
3972
35120d9c
SH
3973/*
3974 * This does the work of remounting / if it is shared, calling the
3975 * container pre-mount hooks, and mounting the rootfs.
3976 */
3977int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath)
0ad19a3f 3978{
35120d9c
SH
3979 if (conf->rootfs_setup) {
3980 /*
3981 * rootfs was set up in another namespace. bind-mount it
3982 * to give us a mount in our own ns so we can pivot_root to it
3983 */
3984 const char *path = conf->rootfs.mount;
3985 if (mount(path, path, "rootfs", MS_BIND, NULL) < 0) {
3986 ERROR("Failed to bind-mount container / onto itself");
145832ba 3987 return -1;
35120d9c 3988 }
145832ba 3989 return 0;
35120d9c 3990 }
d4ef7c50 3991
e995d7a2
SH
3992 remount_all_slave();
3993
35120d9c
SH
3994 if (run_lxc_hooks(name, "pre-mount", conf, lxcpath, NULL)) {
3995 ERROR("failed to run pre-mount hooks for container '%s'.", name);
3996 return -1;
3997 }
3998
3999 if (setup_rootfs(conf)) {
4000 ERROR("failed to setup rootfs for '%s'", name);
4001 return -1;
4002 }
4003
4004 conf->rootfs_setup = true;
4005 return 0;
4006}
4007
1c1c7051
SH
4008static bool verify_start_hooks(struct lxc_conf *conf)
4009{
4010 struct lxc_list *it;
4011 char path[MAXPATHLEN];
4012 lxc_list_for_each(it, &conf->hooks[LXCHOOK_START]) {
4013 char *hookname = it->elem;
4014 struct stat st;
4015 int ret;
4016
4017 ret = snprintf(path, MAXPATHLEN, "%s%s",
4018 conf->rootfs.mount, hookname);
4019 if (ret < 0 || ret >= MAXPATHLEN)
4020 return false;
4021 ret = stat(path, &st);
4022 if (ret) {
4023 SYSERROR("Start hook %s not found in container rootfs",
4024 hookname);
4025 return false;
4026 }
6a0c909a 4027 return true;
1c1c7051
SH
4028 }
4029
4030 return true;
4031}
4032
35120d9c
SH
4033int lxc_setup(struct lxc_handler *handler)
4034{
4035 const char *name = handler->name;
4036 struct lxc_conf *lxc_conf = handler->conf;
4037 const char *lxcpath = handler->lxcpath;
4038 void *data = handler->data;
4039
4040 if (do_rootfs_setup(lxc_conf, name, lxcpath) < 0) {
4041 ERROR("Error setting up rootfs mount after spawn");
4042 return -1;
4043 }
4044
6c544cb3
MM
4045 if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
4046 if (setup_utsname(lxc_conf->utsname)) {
4047 ERROR("failed to setup the utsname for '%s'", name);
4048 return -1;
4049 }
0ad19a3f 4050 }
4051
5f4535a3 4052 if (setup_network(&lxc_conf->network)) {
36eb9bde 4053 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 4054 return -1;
0ad19a3f 4055 }
4056
bc6928ff
MW
4057 if (lxc_conf->autodev < 0) {
4058 lxc_conf->autodev = check_autodev(lxc_conf->rootfs.mount, data);
4059 }
4060
4061 if (lxc_conf->autodev > 0) {
4062 if (mount_autodev(name, lxc_conf->rootfs.mount, lxcpath)) {
91c3830e 4063 ERROR("failed to mount /dev in the container");
c6883f38
SH
4064 return -1;
4065 }
4066 }
4067
368bbc02
CS
4068 /* do automatic mounts (mainly /proc and /sys), but exclude
4069 * those that need to wait until other stuff has finished
4070 */
4fb3cba5 4071 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
4072 ERROR("failed to setup the automatic mounts for '%s'", name);
4073 return -1;
4074 }
4075
80a881b2 4076 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
36eb9bde 4077 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 4078 return -1;
576f946d 4079 }
4080
c1dc38c2 4081 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
e7938e9e
MN
4082 ERROR("failed to setup the mount entries for '%s'", name);
4083 return -1;
4084 }
4085
1c1c7051
SH
4086 /* Make sure any start hooks are in the rootfs */
4087 if (!verify_start_hooks(lxc_conf))
4088 return -1;
4089
2322903b
SH
4090 if (lxc_conf->is_execute)
4091 lxc_execute_bind_init(lxc_conf);
4092
368bbc02
CS
4093 /* now mount only cgroup, if wanted;
4094 * before, /sys could not have been mounted
4095 * (is either mounted automatically or via fstab entries)
4096 */
4fb3cba5 4097 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
4098 ERROR("failed to setup the automatic mounts for '%s'", name);
4099 return -1;
4100 }
4101
283678ed 4102 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
773fb9ca
SH
4103 ERROR("failed to run mount hooks for container '%s'.", name);
4104 return -1;
4105 }
4106
bc6928ff 4107 if (lxc_conf->autodev > 0) {
283678ed 4108 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
f7bee6c6
MW
4109 ERROR("failed to run autodev hooks for container '%s'.", name);
4110 return -1;
4111 }
91c3830e
SH
4112 if (setup_autodev(lxc_conf->rootfs.mount)) {
4113 ERROR("failed to populate /dev in the container");
4114 return -1;
4115 }
4116 }
368bbc02 4117
37903589 4118 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 4119 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 4120 return -1;
6e590161 4121 }
4122
7e0e1d94
AV
4123 if (lxc_conf->kmsg) {
4124 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
4125 ERROR("failed to setup kmsg for '%s'", name);
4126 }
1bd051a6 4127
37903589 4128 if (!lxc_conf->is_execute && setup_tty(&lxc_conf->rootfs, &lxc_conf->tty_info, lxc_conf->ttydir)) {
36eb9bde 4129 ERROR("failed to setup the ttys for '%s'", name);
95b5ffaf 4130 return -1;
b0a33c1e 4131 }
4132
69aa6655
DE
4133 if (!lxc_conf->is_execute && setup_dev_symlinks(&lxc_conf->rootfs)) {
4134 ERROR("failed to setup /dev symlinks for '%s'", name);
4135 return -1;
4136 }
4137
5112cd70
SH
4138 /* mount /proc if it's not already there */
4139 if (tmp_proc_mount(lxc_conf) < 0) {
fe4de9a6 4140 ERROR("failed to LSM mount proc for '%s'", name);
e075f5d9 4141 return -1;
e075f5d9 4142 }
e075f5d9 4143
ac778708 4144 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 4145 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 4146 return -1;
ed502555 4147 }
4148
571e6ec8 4149 if (setup_pts(lxc_conf->pts)) {
36eb9bde 4150 ERROR("failed to setup the new pts instance");
95b5ffaf 4151 return -1;
3c26f34e 4152 }
4153
cccc74b5
DL
4154 if (setup_personality(lxc_conf->personality)) {
4155 ERROR("failed to setup personality");
4156 return -1;
4157 }
4158
f6d3e3e4 4159 if (lxc_list_empty(&lxc_conf->id_map)) {
1fb86a7c
SH
4160 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
4161 if (!lxc_list_empty(&lxc_conf->caps)) {
4162 ERROR("Simultaneously requested dropping and keeping caps");
4163 return -1;
4164 }
4165 if (dropcaps_except(&lxc_conf->keepcaps)) {
959aee9c 4166 ERROR("failed to keep requested caps");
1fb86a7c
SH
4167 return -1;
4168 }
4169 } else if (setup_caps(&lxc_conf->caps)) {
f6d3e3e4
SH
4170 ERROR("failed to drop capabilities");
4171 return -1;
4172 }
81810dd1
DL
4173 }
4174
cd54d859
DL
4175 NOTICE("'%s' is setup.", name);
4176
0ad19a3f 4177 return 0;
4178}
26ddeedd 4179
283678ed
SH
4180int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
4181 const char *lxcpath, char *argv[])
26ddeedd
SH
4182{
4183 int which = -1;
4184 struct lxc_list *it;
4185
4186 if (strcmp(hook, "pre-start") == 0)
4187 which = LXCHOOK_PRESTART;
5ea6163a
SH
4188 else if (strcmp(hook, "pre-mount") == 0)
4189 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
4190 else if (strcmp(hook, "mount") == 0)
4191 which = LXCHOOK_MOUNT;
f7bee6c6
MW
4192 else if (strcmp(hook, "autodev") == 0)
4193 which = LXCHOOK_AUTODEV;
26ddeedd
SH
4194 else if (strcmp(hook, "start") == 0)
4195 which = LXCHOOK_START;
4196 else if (strcmp(hook, "post-stop") == 0)
4197 which = LXCHOOK_POSTSTOP;
148e91f5
SH
4198 else if (strcmp(hook, "clone") == 0)
4199 which = LXCHOOK_CLONE;
26ddeedd
SH
4200 else
4201 return -1;
4202 lxc_list_for_each(it, &conf->hooks[which]) {
4203 int ret;
4204 char *hookname = it->elem;
283678ed 4205 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
26ddeedd
SH
4206 if (ret)
4207 return ret;
4208 }
4209 return 0;
4210}
72d0e1cb 4211
427b3a21 4212static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
4213{
4214 struct lxc_netdev *netdev = it->elem;
9ebb03ad 4215 struct lxc_list *it2,*next;
72d0e1cb
SG
4216
4217 lxc_list_del(it);
4218
4219 if (netdev->link)
4220 free(netdev->link);
4221 if (netdev->name)
4222 free(netdev->name);
c9bb9a85
DE
4223 if (netdev->type == LXC_NET_VETH && netdev->priv.veth_attr.pair)
4224 free(netdev->priv.veth_attr.pair);
72d0e1cb
SG
4225 if (netdev->upscript)
4226 free(netdev->upscript);
4227 if (netdev->hwaddr)
4228 free(netdev->hwaddr);
4229 if (netdev->mtu)
4230 free(netdev->mtu);
4231 if (netdev->ipv4_gateway)
4232 free(netdev->ipv4_gateway);
4233 if (netdev->ipv6_gateway)
4234 free(netdev->ipv6_gateway);
9ebb03ad 4235 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
4236 lxc_list_del(it2);
4237 free(it2->elem);
4238 free(it2);
4239 }
9ebb03ad 4240 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
4241 lxc_list_del(it2);
4242 free(it2->elem);
4243 free(it2);
4244 }
d95db067 4245 free(netdev);
72d0e1cb
SG
4246 free(it);
4247}
4248
4249/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 4250int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
4251{
4252 char *p1;
4253 int ret, idx, i;
4254 struct lxc_list *it;
4255 struct lxc_netdev *netdev;
4256
4257 p1 = index(key, '.');
4258 if (!p1 || *(p1+1) == '\0')
4259 p1 = NULL;
4260
4261 ret = sscanf(key, "%d", &idx);
4262 if (ret != 1) return -1;
4263 if (idx < 0)
4264 return -1;
4265
4266 i = 0;
4267 lxc_list_for_each(it, &c->network) {
4268 if (i == idx)
4269 break;
4270 i++;
4271 }
4272 if (i < idx) // we don't have that many nics defined
4273 return -1;
4274
4275 if (!it || !it->elem)
4276 return -1;
4277
4278 netdev = it->elem;
4279
4280 if (!p1) {
4281 lxc_remove_nic(it);
52d21d40 4282 } else if (strcmp(p1, ".ipv4") == 0) {
9ebb03ad
DE
4283 struct lxc_list *it2,*next;
4284 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
4285 lxc_list_del(it2);
4286 free(it2->elem);
4287 free(it2);
4288 }
52d21d40 4289 } else if (strcmp(p1, ".ipv6") == 0) {
9ebb03ad
DE
4290 struct lxc_list *it2,*next;
4291 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
4292 lxc_list_del(it2);
4293 free(it2->elem);
4294 free(it2);
4295 }
52d21d40 4296 } else if (strcmp(p1, ".link") == 0) {
72d0e1cb
SG
4297 if (netdev->link) {
4298 free(netdev->link);
4299 netdev->link = NULL;
4300 }
52d21d40 4301 } else if (strcmp(p1, ".name") == 0) {
72d0e1cb
SG
4302 if (netdev->name) {
4303 free(netdev->name);
4304 netdev->name = NULL;
4305 }
52d21d40 4306 } else if (strcmp(p1, ".script.up") == 0) {
72d0e1cb
SG
4307 if (netdev->upscript) {
4308 free(netdev->upscript);
4309 netdev->upscript = NULL;
4310 }
52d21d40 4311 } else if (strcmp(p1, ".hwaddr") == 0) {
72d0e1cb
SG
4312 if (netdev->hwaddr) {
4313 free(netdev->hwaddr);
4314 netdev->hwaddr = NULL;
4315 }
52d21d40 4316 } else if (strcmp(p1, ".mtu") == 0) {
72d0e1cb
SG
4317 if (netdev->mtu) {
4318 free(netdev->mtu);
4319 netdev->mtu = NULL;
4320 }
9eaf8a59 4321 } else if (strcmp(p1, ".ipv4.gateway") == 0) {
72d0e1cb
SG
4322 if (netdev->ipv4_gateway) {
4323 free(netdev->ipv4_gateway);
4324 netdev->ipv4_gateway = NULL;
4325 }
9eaf8a59 4326 } else if (strcmp(p1, ".ipv6.gateway") == 0) {
72d0e1cb
SG
4327 if (netdev->ipv6_gateway) {
4328 free(netdev->ipv6_gateway);
4329 netdev->ipv6_gateway = NULL;
4330 }
4331 }
4332 else return -1;
4333
4334 return 0;
4335}
4336
4337int lxc_clear_config_network(struct lxc_conf *c)
4338{
9ebb03ad
DE
4339 struct lxc_list *it,*next;
4340 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
4341 lxc_remove_nic(it);
4342 }
4343 return 0;
4344}
4345
4346int lxc_clear_config_caps(struct lxc_conf *c)
4347{
9ebb03ad 4348 struct lxc_list *it,*next;
72d0e1cb 4349
9ebb03ad 4350 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
4351 lxc_list_del(it);
4352 free(it->elem);
4353 free(it);
4354 }
4355 return 0;
4356}
4357
74a3920a 4358static int lxc_free_idmap(struct lxc_list *id_map) {
27c27d73
SH
4359 struct lxc_list *it, *next;
4360
4355ab5f 4361 lxc_list_for_each_safe(it, id_map, next) {
27c27d73
SH
4362 lxc_list_del(it);
4363 free(it->elem);
4364 free(it);
4365 }
4366 return 0;
4367}
4368
4355ab5f
SH
4369int lxc_clear_idmaps(struct lxc_conf *c)
4370{
4371 return lxc_free_idmap(&c->id_map);
4372}
4373
1fb86a7c
SH
4374int lxc_clear_config_keepcaps(struct lxc_conf *c)
4375{
4376 struct lxc_list *it,*next;
4377
4378 lxc_list_for_each_safe(it, &c->keepcaps, next) {
4379 lxc_list_del(it);
4380 free(it->elem);
4381 free(it);
4382 }
4383 return 0;
4384}
4385
12a50cc6 4386int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 4387{
9ebb03ad 4388 struct lxc_list *it,*next;
72d0e1cb 4389 bool all = false;
12a50cc6 4390 const char *k = key + 11;
72d0e1cb
SG
4391
4392 if (strcmp(key, "lxc.cgroup") == 0)
4393 all = true;
4394
9ebb03ad 4395 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
4396 struct lxc_cgroup *cg = it->elem;
4397 if (!all && strcmp(cg->subsystem, k) != 0)
4398 continue;
4399 lxc_list_del(it);
4400 free(cg->subsystem);
4401 free(cg->value);
4402 free(cg);
4403 free(it);
4404 }
4405 return 0;
4406}
4407
ee1e7aa0
SG
4408int lxc_clear_groups(struct lxc_conf *c)
4409{
4410 struct lxc_list *it,*next;
4411
4412 lxc_list_for_each_safe(it, &c->groups, next) {
4413 lxc_list_del(it);
4414 free(it->elem);
4415 free(it);
4416 }
4417 return 0;
4418}
4419
ab799c0b
SG
4420int lxc_clear_environment(struct lxc_conf *c)
4421{
4422 struct lxc_list *it,*next;
4423
4424 lxc_list_for_each_safe(it, &c->environment, next) {
4425 lxc_list_del(it);
4426 free(it->elem);
4427 free(it);
4428 }
4429 return 0;
4430}
4431
4432
72d0e1cb
SG
4433int lxc_clear_mount_entries(struct lxc_conf *c)
4434{
9ebb03ad 4435 struct lxc_list *it,*next;
72d0e1cb 4436
9ebb03ad 4437 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
4438 lxc_list_del(it);
4439 free(it->elem);
4440 free(it);
4441 }
4442 return 0;
4443}
4444
b099e9e9
SH
4445int lxc_clear_automounts(struct lxc_conf *c)
4446{
4447 c->auto_mounts = 0;
4448 return 0;
4449}
4450
12a50cc6 4451int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 4452{
9ebb03ad 4453 struct lxc_list *it,*next;
17ed13a3 4454 bool all = false, done = false;
12a50cc6 4455 const char *k = key + 9;
72d0e1cb
SG
4456 int i;
4457
17ed13a3
SH
4458 if (strcmp(key, "lxc.hook") == 0)
4459 all = true;
4460
72d0e1cb 4461 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 4462 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 4463 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
4464 lxc_list_del(it);
4465 free(it->elem);
4466 free(it);
4467 }
4468 done = true;
72d0e1cb
SG
4469 }
4470 }
17ed13a3
SH
4471
4472 if (!done) {
4473 ERROR("Invalid hook key: %s", key);
4474 return -1;
4475 }
72d0e1cb
SG
4476 return 0;
4477}
8eb5694b 4478
74a3920a 4479static void lxc_clear_saved_nics(struct lxc_conf *conf)
7b35f3d6
SH
4480{
4481 int i;
4482
0cf45501 4483 if (!conf->saved_nics)
7b35f3d6
SH
4484 return;
4485 for (i=0; i < conf->num_savednics; i++)
4486 free(conf->saved_nics[i].orig_name);
7b35f3d6
SH
4487 free(conf->saved_nics);
4488}
4489
4184c3e1
SH
4490static inline void lxc_clear_aliens(struct lxc_conf *conf)
4491{
4492 struct lxc_list *it,*next;
4493
4494 lxc_list_for_each_safe(it, &conf->aliens, next) {
4495 lxc_list_del(it);
4496 free(it->elem);
4497 free(it);
4498 }
4499}
4500
f979ac15
SH
4501static inline void lxc_clear_includes(struct lxc_conf *conf)
4502{
4503 struct lxc_list *it,*next;
4504
4505 lxc_list_for_each_safe(it, &conf->includes, next) {
4506 lxc_list_del(it);
4507 free(it->elem);
4508 free(it);
4509 }
4510}
4511
8eb5694b
SH
4512void lxc_conf_free(struct lxc_conf *conf)
4513{
4514 if (!conf)
4515 return;
b91f00d3
SH
4516 if (conf->console.log_path)
4517 free(conf->console.log_path);
8eb5694b
SH
4518 if (conf->console.path)
4519 free(conf->console.path);
54c30e29 4520 if (conf->rootfs.mount)
8eb5694b 4521 free(conf->rootfs.mount);
a17b1e65
SG
4522 if (conf->rootfs.options)
4523 free(conf->rootfs.options);
d95db067
DE
4524 if (conf->rootfs.path)
4525 free(conf->rootfs.path);
a58878d6
SH
4526 if (conf->rootfs.pivot)
4527 free(conf->rootfs.pivot);
4528 if (conf->logfile)
4529 free(conf->logfile);
d95db067
DE
4530 if (conf->utsname)
4531 free(conf->utsname);
4532 if (conf->ttydir)
4533 free(conf->ttydir);
4534 if (conf->fstab)
4535 free(conf->fstab);
fc7e8864
WM
4536 if (conf->rcfile)
4537 free(conf->rcfile);
6b0d5538 4538 free(conf->unexpanded_config);
8eb5694b 4539 lxc_clear_config_network(conf);
fe4de9a6
DE
4540 if (conf->lsm_aa_profile)
4541 free(conf->lsm_aa_profile);
4542 if (conf->lsm_se_context)
4543 free(conf->lsm_se_context);
769872f9 4544 lxc_seccomp_free(conf);
8eb5694b 4545 lxc_clear_config_caps(conf);
1fb86a7c 4546 lxc_clear_config_keepcaps(conf);
8eb5694b 4547 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 4548 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 4549 lxc_clear_mount_entries(conf);
7b35f3d6 4550 lxc_clear_saved_nics(conf);
27c27d73 4551 lxc_clear_idmaps(conf);
ee1e7aa0 4552 lxc_clear_groups(conf);
f979ac15 4553 lxc_clear_includes(conf);
761d81ca 4554 lxc_clear_aliens(conf);
ab799c0b 4555 lxc_clear_environment(conf);
8eb5694b
SH
4556 free(conf);
4557}
4355ab5f
SH
4558
4559struct userns_fn_data {
4560 int (*fn)(void *);
4561 void *arg;
4562 int p[2];
4563};
4564
4565static int run_userns_fn(void *data)
4566{
4567 struct userns_fn_data *d = data;
4568 char c;
4569 // we're not sharing with the parent any more, if it was a thread
4570
4571 close(d->p[1]);
4572 if (read(d->p[0], &c, 1) != 1)
4573 return -1;
4574 close(d->p[0]);
4575 return d->fn(d->arg);
4576}
4577
4578/*
8b227008
TS
4579 * Add ID_TYPE_UID/ID_TYPE_GID entries to an existing lxc_conf,
4580 * if they are not already there.
4355ab5f 4581 */
8b227008
TS
4582static struct lxc_list *idmap_add_id(struct lxc_conf *conf,
4583 uid_t uid, gid_t gid)
4355ab5f 4584{
8b227008
TS
4585 int hostuid_mapped = mapped_hostid(uid, conf, ID_TYPE_UID);
4586 int hostgid_mapped = mapped_hostid(gid, conf, ID_TYPE_GID);
4355ab5f
SH
4587 struct lxc_list *new = NULL, *tmp, *it, *next;
4588 struct id_map *entry;
4589
3ec1648d
SH
4590 new = malloc(sizeof(*new));
4591 if (!new) {
4592 ERROR("Out of memory building id map");
4593 return NULL;
4594 }
4595 lxc_list_init(new);
4596
8b227008
TS
4597 if (hostuid_mapped < 0) {
4598 hostuid_mapped = find_unmapped_nsuid(conf, ID_TYPE_UID);
4599 if (hostuid_mapped < 0)
3ec1648d
SH
4600 goto err;
4601 tmp = malloc(sizeof(*tmp));
4602 if (!tmp)
4603 goto err;
4355ab5f
SH
4604 entry = malloc(sizeof(*entry));
4605 if (!entry) {
3ec1648d
SH
4606 free(tmp);
4607 goto err;
4355ab5f 4608 }
3ec1648d 4609 tmp->elem = entry;
4355ab5f 4610 entry->idtype = ID_TYPE_UID;
8b227008
TS
4611 entry->nsid = hostuid_mapped;
4612 entry->hostid = (unsigned long) uid;
4613 entry->range = 1;
4614 lxc_list_add_tail(new, tmp);
4615 }
4616 if (hostgid_mapped < 0) {
4617 hostgid_mapped = find_unmapped_nsuid(conf, ID_TYPE_GID);
4618 if (hostgid_mapped < 0)
4619 goto err;
4620 tmp = malloc(sizeof(*tmp));
4621 if (!tmp)
4622 goto err;
4623 entry = malloc(sizeof(*entry));
4624 if (!entry) {
4625 free(tmp);
4626 goto err;
4627 }
4628 tmp->elem = entry;
4629 entry->idtype = ID_TYPE_GID;
4630 entry->nsid = hostgid_mapped;
4631 entry->hostid = (unsigned long) gid;
4355ab5f 4632 entry->range = 1;
3ec1648d 4633 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4634 }
4635 lxc_list_for_each_safe(it, &conf->id_map, next) {
4636 tmp = malloc(sizeof(*tmp));
4637 if (!tmp)
4638 goto err;
4639 entry = malloc(sizeof(*entry));
4640 if (!entry) {
4641 free(tmp);
4642 goto err;
4643 }
4644 memset(entry, 0, sizeof(*entry));
4645 memcpy(entry, it->elem, sizeof(*entry));
4646 tmp->elem = entry;
3ec1648d 4647 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4648 }
4649
4650 return new;
4651
4652err:
8b227008 4653 ERROR("Out of memory building a new uid/gid map");
908fde6a
SH
4654 if (new)
4655 lxc_free_idmap(new);
c30ac545 4656 free(new);
4355ab5f
SH
4657 return NULL;
4658}
4659
4660/*
4661 * Run a function in a new user namespace.
8b227008 4662 * The caller's euid/egid will be mapped in if it is not already.
4355ab5f
SH
4663 */
4664int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
4665{
4666 int ret, pid;
4667 struct userns_fn_data d;
4668 char c = '1';
4669 int p[2];
4670 struct lxc_list *idmap;
4671
4355ab5f 4672 ret = pipe(p);
4355ab5f
SH
4673 if (ret < 0) {
4674 SYSERROR("opening pipe");
4675 return -1;
4676 }
4677 d.fn = fn;
4678 d.arg = data;
4679 d.p[0] = p[0];
4680 d.p[1] = p[1];
4681 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4682 if (pid < 0)
4683 goto err;
4355ab5f 4684 close(p[0]);
4355ab5f
SH
4685 p[0] = -1;
4686
8b227008
TS
4687 if ((idmap = idmap_add_id(conf, geteuid(), getegid())) == NULL) {
4688 ERROR("Error adding self to container uid/gid map");
4355ab5f
SH
4689 goto err;
4690 }
4691
4692 ret = lxc_map_ids(idmap, pid);
4693 lxc_free_idmap(idmap);
88dd66fc 4694 free(idmap);
565e571c 4695 if (ret) {
4355ab5f
SH
4696 ERROR("Error setting up child mappings");
4697 goto err;
4698 }
4699
4700 // kick the child
4701 if (write(p[1], &c, 1) != 1) {
4702 SYSERROR("writing to pipe to child");
4703 goto err;
4704 }
4705
3139aead
SG
4706 ret = wait_for_pid(pid);
4707
4708 close(p[1]);
4709 return ret;
4710
4355ab5f 4711err:
4355ab5f
SH
4712 if (p[0] != -1)
4713 close(p[0]);
4714 close(p[1]);
4355ab5f
SH
4715 return -1;
4716}
97e9cfa0 4717
a96a8e8c 4718/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4719static char* getuname(void)
4720{
a96a8e8c 4721 struct passwd *result;
97e9cfa0 4722
a96a8e8c
SH
4723 result = getpwuid(geteuid());
4724 if (!result)
97e9cfa0
SH
4725 return NULL;
4726
a96a8e8c 4727 return strdup(result->pw_name);
97e9cfa0
SH
4728}
4729
a96a8e8c 4730/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4731static char *getgname(void)
4732{
a96a8e8c 4733 struct group *result;
97e9cfa0 4734
a96a8e8c
SH
4735 result = getgrgid(getegid());
4736 if (!result)
97e9cfa0
SH
4737 return NULL;
4738
a96a8e8c 4739 return strdup(result->gr_name);
97e9cfa0
SH
4740}
4741
a96a8e8c 4742/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4743void suggest_default_idmap(void)
4744{
4745 FILE *f;
4746 unsigned int uid = 0, urange = 0, gid = 0, grange = 0;
4747 char *line = NULL;
4748 char *uname, *gname;
4749 size_t len = 0;
4750
4751 if (!(uname = getuname()))
4752 return;
4753
4754 if (!(gname = getgname())) {
4755 free(uname);
4756 return;
4757 }
4758
4759 f = fopen(subuidfile, "r");
4760 if (!f) {
4761 ERROR("Your system is not configured with subuids");
4762 free(gname);
4763 free(uname);
4764 return;
4765 }
4766 while (getline(&line, &len, f) != -1) {
4767 char *p = strchr(line, ':'), *p2;
4768 if (*line == '#')
4769 continue;
4770 if (!p)
4771 continue;
4772 *p = '\0';
4773 p++;
4774 if (strcmp(line, uname))
4775 continue;
4776 p2 = strchr(p, ':');
4777 if (!p2)
4778 continue;
4779 *p2 = '\0';
4780 p2++;
4781 if (!*p2)
4782 continue;
4783 uid = atoi(p);
4784 urange = atoi(p2);
4785 }
4786 fclose(f);
4787
4788 f = fopen(subuidfile, "r");
4789 if (!f) {
4790 ERROR("Your system is not configured with subgids");
4791 free(gname);
4792 free(uname);
4793 return;
4794 }
4795 while (getline(&line, &len, f) != -1) {
4796 char *p = strchr(line, ':'), *p2;
4797 if (*line == '#')
4798 continue;
4799 if (!p)
4800 continue;
4801 *p = '\0';
4802 p++;
4803 if (strcmp(line, uname))
4804 continue;
4805 p2 = strchr(p, ':');
4806 if (!p2)
4807 continue;
4808 *p2 = '\0';
4809 p2++;
4810 if (!*p2)
4811 continue;
4812 gid = atoi(p);
4813 grange = atoi(p2);
4814 }
4815 fclose(f);
4816
4817 if (line)
4818 free(line);
4819
4820 if (!urange || !grange) {
4821 ERROR("You do not have subuids or subgids allocated");
4822 ERROR("Unprivileged containers require subuids and subgids");
4823 return;
4824 }
4825
4826 ERROR("You must either run as root, or define uid mappings");
4827 ERROR("To pass uid mappings to lxc-create, you could create");
4828 ERROR("~/.config/lxc/default.conf:");
4829 ERROR("lxc.include = %s", LXC_DEFAULT_CONFIG);
4830 ERROR("lxc.id_map = u 0 %u %u", uid, urange);
4831 ERROR("lxc.id_map = g 0 %u %u", gid, grange);
4832
4833 free(gname);
4834 free(uname);
4835}