]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
mount_entry: use statvfs
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
d06245b8
NC
23#include "config.h"
24
0ad19a3f 25#include <stdio.h>
0ad19a3f 26#include <stdlib.h>
e3b4c4c4 27#include <stdarg.h>
0ad19a3f 28#include <errno.h>
29#include <string.h>
30#include <dirent.h>
0ad19a3f 31#include <unistd.h>
bc6928ff 32#include <inttypes.h>
e3b4c4c4 33#include <sys/wait.h>
2d76d1d7 34#include <sys/syscall.h>
97e9cfa0
SH
35#include <sys/types.h>
36#include <pwd.h>
37#include <grp.h>
4a0ba80d 38#include <time.h>
2938f7c8 39#include <sys/statvfs.h>
e827ff7e
SG
40
41#if HAVE_PTY_H
b0a33c1e 42#include <pty.h>
e827ff7e
SG
43#else
44#include <../include/openpty.h>
45#endif
0ad19a3f 46
b3ecde1e
DL
47#include <linux/loop.h>
48
0ad19a3f 49#include <sys/types.h>
50#include <sys/utsname.h>
51#include <sys/param.h>
52#include <sys/stat.h>
53#include <sys/socket.h>
54#include <sys/mount.h>
55#include <sys/mman.h>
81810dd1 56#include <sys/prctl.h>
0ad19a3f 57
58#include <arpa/inet.h>
59#include <fcntl.h>
60#include <netinet/in.h>
61#include <net/if.h>
6f4a3756 62#include <libgen.h>
0ad19a3f 63
e5bda9ee 64#include "network.h"
65#include "error.h"
b2718c72 66#include "parse.h"
1b09f2c0
DL
67#include "utils.h"
68#include "conf.h"
69#include "log.h"
d55bc1ad 70#include "caps.h" /* for lxc_caps_last_cap() */
9be53773 71#include "bdev.h"
368bbc02 72#include "cgroup.h"
025ed0f3 73#include "lxclock.h"
4355ab5f 74#include "namespace.h"
fe4de9a6 75#include "lsm/lsm.h"
d0a36f2c 76
495d2046
SG
77#if HAVE_SYS_CAPABILITY_H
78#include <sys/capability.h>
79#endif
80
6ff05e18
SG
81#if HAVE_SYS_PERSONALITY_H
82#include <sys/personality.h>
83#endif
84
edaf8b1b
SG
85#if IS_BIONIC
86#include <../include/lxcmntent.h>
87#else
88#include <mntent.h>
89#endif
90
769872f9
SH
91#include "lxcseccomp.h"
92
36eb9bde 93lxc_log_define(lxc_conf, lxc);
e5bda9ee 94
0ad19a3f 95#define MAXHWLEN 18
96#define MAXINDEXLEN 20
442cbbe6 97#define MAXMTULEN 16
0ad19a3f 98#define MAXLINELEN 128
99
495d2046 100#if HAVE_SYS_CAPABILITY_H
b09094da
MN
101#ifndef CAP_SETFCAP
102#define CAP_SETFCAP 31
103#endif
104
105#ifndef CAP_MAC_OVERRIDE
106#define CAP_MAC_OVERRIDE 32
107#endif
108
109#ifndef CAP_MAC_ADMIN
110#define CAP_MAC_ADMIN 33
111#endif
495d2046 112#endif
b09094da
MN
113
114#ifndef PR_CAPBSET_DROP
115#define PR_CAPBSET_DROP 24
116#endif
117
9818cae4
SG
118#ifndef LO_FLAGS_AUTOCLEAR
119#define LO_FLAGS_AUTOCLEAR 4
120#endif
121
0769b82a
CS
122/* needed for cgroup automount checks, regardless of whether we
123 * have included linux/capability.h or not */
124#ifndef CAP_SYS_ADMIN
125#define CAP_SYS_ADMIN 21
126#endif
127
2d76d1d7
SG
128/* Define pivot_root() if missing from the C library */
129#ifndef HAVE_PIVOT_ROOT
130static int pivot_root(const char * new_root, const char * put_old)
131{
132#ifdef __NR_pivot_root
133return syscall(__NR_pivot_root, new_root, put_old);
134#else
135errno = ENOSYS;
136return -1;
137#endif
138}
139#else
140extern int pivot_root(const char * new_root, const char * put_old);
141#endif
142
143/* Define sethostname() if missing from the C library */
144#ifndef HAVE_SETHOSTNAME
145static int sethostname(const char * name, size_t len)
146{
147#ifdef __NR_sethostname
148return syscall(__NR_sethostname, name, len);
149#else
150errno = ENOSYS;
151return -1;
152#endif
153}
154#endif
155
72f919c4
SG
156/* Define __S_ISTYPE if missing from the C library */
157#ifndef __S_ISTYPE
158#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
159#endif
160
72d0e1cb 161char *lxchook_names[NUM_LXC_HOOKS] = {
148e91f5 162 "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop", "clone" };
72d0e1cb 163
e3b4c4c4 164typedef int (*instanciate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 165
998ac676
RT
166struct mount_opt {
167 char *name;
168 int clear;
169 int flag;
170};
171
81810dd1
DL
172struct caps_opt {
173 char *name;
174 int value;
175};
176
0769b82a
CS
177/* Declare this here, since we don't want to reshuffle the whole file. */
178static int in_caplist(int cap, struct lxc_list *caps);
179
e3b4c4c4
ST
180static int instanciate_veth(struct lxc_handler *, struct lxc_netdev *);
181static int instanciate_macvlan(struct lxc_handler *, struct lxc_netdev *);
182static int instanciate_vlan(struct lxc_handler *, struct lxc_netdev *);
183static int instanciate_phys(struct lxc_handler *, struct lxc_netdev *);
184static int instanciate_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 185static int instanciate_none(struct lxc_handler *, struct lxc_netdev *);
82d5ae15 186
24654103
DL
187static instanciate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
188 [LXC_NET_VETH] = instanciate_veth,
189 [LXC_NET_MACVLAN] = instanciate_macvlan,
190 [LXC_NET_VLAN] = instanciate_vlan,
191 [LXC_NET_PHYS] = instanciate_phys,
192 [LXC_NET_EMPTY] = instanciate_empty,
26b797f3 193 [LXC_NET_NONE] = instanciate_none,
0ad19a3f 194};
195
74a2b586
JK
196static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
197static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
198static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
199static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
200static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 201static int shutdown_none(struct lxc_handler *, struct lxc_netdev *);
74a2b586
JK
202
203static instanciate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
204 [LXC_NET_VETH] = shutdown_veth,
205 [LXC_NET_MACVLAN] = shutdown_macvlan,
206 [LXC_NET_VLAN] = shutdown_vlan,
207 [LXC_NET_PHYS] = shutdown_phys,
208 [LXC_NET_EMPTY] = shutdown_empty,
26b797f3 209 [LXC_NET_NONE] = shutdown_none,
74a2b586
JK
210};
211
998ac676 212static struct mount_opt mount_opt[] = {
88d413d5
SW
213 { "defaults", 0, 0 },
214 { "ro", 0, MS_RDONLY },
215 { "rw", 1, MS_RDONLY },
216 { "suid", 1, MS_NOSUID },
217 { "nosuid", 0, MS_NOSUID },
218 { "dev", 1, MS_NODEV },
219 { "nodev", 0, MS_NODEV },
220 { "exec", 1, MS_NOEXEC },
221 { "noexec", 0, MS_NOEXEC },
222 { "sync", 0, MS_SYNCHRONOUS },
223 { "async", 1, MS_SYNCHRONOUS },
224 { "dirsync", 0, MS_DIRSYNC },
225 { "remount", 0, MS_REMOUNT },
226 { "mand", 0, MS_MANDLOCK },
227 { "nomand", 1, MS_MANDLOCK },
228 { "atime", 1, MS_NOATIME },
229 { "noatime", 0, MS_NOATIME },
230 { "diratime", 1, MS_NODIRATIME },
231 { "nodiratime", 0, MS_NODIRATIME },
232 { "bind", 0, MS_BIND },
233 { "rbind", 0, MS_BIND|MS_REC },
234 { "relatime", 0, MS_RELATIME },
235 { "norelatime", 1, MS_RELATIME },
236 { "strictatime", 0, MS_STRICTATIME },
237 { "nostrictatime", 1, MS_STRICTATIME },
238 { NULL, 0, 0 },
998ac676
RT
239};
240
495d2046 241#if HAVE_SYS_CAPABILITY_H
81810dd1 242static struct caps_opt caps_opt[] = {
a6afdde9 243 { "chown", CAP_CHOWN },
1e11be34
DL
244 { "dac_override", CAP_DAC_OVERRIDE },
245 { "dac_read_search", CAP_DAC_READ_SEARCH },
246 { "fowner", CAP_FOWNER },
247 { "fsetid", CAP_FSETID },
81810dd1
DL
248 { "kill", CAP_KILL },
249 { "setgid", CAP_SETGID },
250 { "setuid", CAP_SETUID },
251 { "setpcap", CAP_SETPCAP },
252 { "linux_immutable", CAP_LINUX_IMMUTABLE },
253 { "net_bind_service", CAP_NET_BIND_SERVICE },
254 { "net_broadcast", CAP_NET_BROADCAST },
255 { "net_admin", CAP_NET_ADMIN },
256 { "net_raw", CAP_NET_RAW },
257 { "ipc_lock", CAP_IPC_LOCK },
258 { "ipc_owner", CAP_IPC_OWNER },
259 { "sys_module", CAP_SYS_MODULE },
260 { "sys_rawio", CAP_SYS_RAWIO },
261 { "sys_chroot", CAP_SYS_CHROOT },
262 { "sys_ptrace", CAP_SYS_PTRACE },
263 { "sys_pacct", CAP_SYS_PACCT },
264 { "sys_admin", CAP_SYS_ADMIN },
265 { "sys_boot", CAP_SYS_BOOT },
266 { "sys_nice", CAP_SYS_NICE },
267 { "sys_resource", CAP_SYS_RESOURCE },
268 { "sys_time", CAP_SYS_TIME },
269 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
270 { "mknod", CAP_MKNOD },
271 { "lease", CAP_LEASE },
9527e566 272#ifdef CAP_AUDIT_WRITE
81810dd1 273 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
274#endif
275#ifdef CAP_AUDIT_CONTROL
81810dd1 276 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 277#endif
81810dd1
DL
278 { "setfcap", CAP_SETFCAP },
279 { "mac_override", CAP_MAC_OVERRIDE },
280 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
281#ifdef CAP_SYSLOG
282 { "syslog", CAP_SYSLOG },
283#endif
284#ifdef CAP_WAKE_ALARM
285 { "wake_alarm", CAP_WAKE_ALARM },
286#endif
81810dd1 287};
495d2046
SG
288#else
289static struct caps_opt caps_opt[] = {};
290#endif
81810dd1 291
f0d02950
JTLB
292const char *dev_base_path = "/dev/.lxc";
293const char *dev_user_path = "/dev/.lxc/user";
294
91c3830e
SH
295static int run_buffer(char *buffer)
296{
ebec9176 297 struct lxc_popen_FILE *f;
91c3830e 298 char *output;
8e7da691 299 int ret;
91c3830e 300
ebec9176 301 f = lxc_popen(buffer);
91c3830e
SH
302 if (!f) {
303 SYSERROR("popen failed");
304 return -1;
305 }
306
307 output = malloc(LXC_LOG_BUFFER_SIZE);
308 if (!output) {
309 ERROR("failed to allocate memory for script output");
ebec9176 310 lxc_pclose(f);
91c3830e
SH
311 return -1;
312 }
313
ebec9176 314 while(fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
91c3830e
SH
315 DEBUG("script output: %s", output);
316
317 free(output);
318
ebec9176 319 ret = lxc_pclose(f);
8e7da691 320 if (ret == -1) {
91c3830e
SH
321 SYSERROR("Script exited on error");
322 return -1;
8e7da691
DE
323 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
324 ERROR("Script exited with status %d", WEXITSTATUS(ret));
325 return -1;
326 } else if (WIFSIGNALED(ret)) {
327 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
328 strsignal(WTERMSIG(ret)));
329 return -1;
91c3830e
SH
330 }
331
332 return 0;
333}
334
148e91f5 335static int run_script_argv(const char *name, const char *section,
283678ed
SH
336 const char *script, const char *hook, const char *lxcpath,
337 char **argsin)
148e91f5
SH
338{
339 int ret, i;
340 char *buffer;
341 size_t size = 0;
342
343 INFO("Executing script '%s' for container '%s', config section '%s'",
344 script, name, section);
345
346 for (i=0; argsin && argsin[i]; i++)
347 size += strlen(argsin[i]) + 1;
348
349 size += strlen(hook) + 1;
350
351 size += strlen(script);
352 size += strlen(name);
353 size += strlen(section);
354 size += 3;
355
356 if (size > INT_MAX)
357 return -1;
358
359 buffer = alloca(size);
360 if (!buffer) {
361 ERROR("failed to allocate memory");
362 return -1;
363 }
364
365 ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
366 if (ret < 0 || ret >= size) {
367 ERROR("Script name too long");
368 return -1;
369 }
370
371 for (i=0; argsin && argsin[i]; i++) {
372 int len = size-ret;
373 int rc;
374 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
375 if (rc < 0 || rc >= len) {
376 ERROR("Script args too long");
377 return -1;
378 }
379 ret += rc;
380 }
381
382 return run_buffer(buffer);
383}
384
751d9dcd
DL
385static int run_script(const char *name, const char *section,
386 const char *script, ...)
e3b4c4c4 387{
abbfd20b 388 int ret;
91c3830e 389 char *buffer, *p;
abbfd20b
DL
390 size_t size = 0;
391 va_list ap;
751d9dcd
DL
392
393 INFO("Executing script '%s' for container '%s', config section '%s'",
394 script, name, section);
e3b4c4c4 395
abbfd20b
DL
396 va_start(ap, script);
397 while ((p = va_arg(ap, char *)))
95642a10 398 size += strlen(p) + 1;
abbfd20b
DL
399 va_end(ap);
400
401 size += strlen(script);
402 size += strlen(name);
403 size += strlen(section);
95642a10 404 size += 3;
abbfd20b 405
95642a10
MS
406 if (size > INT_MAX)
407 return -1;
408
409 buffer = alloca(size);
abbfd20b
DL
410 if (!buffer) {
411 ERROR("failed to allocate memory");
751d9dcd
DL
412 return -1;
413 }
414
9ba8130c
SH
415 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
416 if (ret < 0 || ret >= size) {
417 ERROR("Script name too long");
9ba8130c
SH
418 return -1;
419 }
751d9dcd 420
abbfd20b 421 va_start(ap, script);
9ba8130c
SH
422 while ((p = va_arg(ap, char *))) {
423 int len = size-ret;
424 int rc;
425 rc = snprintf(buffer + ret, len, " %s", p);
426 if (rc < 0 || rc >= len) {
9ba8130c
SH
427 ERROR("Script args too long");
428 return -1;
429 }
430 ret += rc;
431 }
abbfd20b 432 va_end(ap);
751d9dcd 433
91c3830e 434 return run_buffer(buffer);
e3b4c4c4
ST
435}
436
a6afdde9 437static int find_fstype_cb(char* buffer, void *data)
78ae2fcc 438{
439 struct cbarg {
440 const char *rootfs;
a6afdde9 441 const char *target;
a17b1e65 442 const char *options;
78ae2fcc 443 } *cbarg = data;
444
a17b1e65
SG
445 unsigned long mntflags;
446 char *mntdata;
78ae2fcc 447 char *fstype;
448
449 /* we don't try 'nodev' entries */
450 if (strstr(buffer, "nodev"))
451 return 0;
452
453 fstype = buffer;
b2718c72 454 fstype += lxc_char_left_gc(fstype, strlen(fstype));
455 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
78ae2fcc 456
9827ecdb
YK
457 /* ignore blank line and comment */
458 if (fstype[0] == '\0' || fstype[0] == '#')
459 return 0;
460
a6afdde9
DL
461 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
462 cbarg->rootfs, cbarg->target, fstype);
463
a17b1e65
SG
464 if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) {
465 free(mntdata);
466 return -1;
467 }
468
469 if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
a6afdde9 470 DEBUG("mount failed with error: %s", strerror(errno));
a17b1e65 471 free(mntdata);
78ae2fcc 472 return 0;
a6afdde9 473 }
a17b1e65 474 free(mntdata);
78ae2fcc 475
a6afdde9
DL
476 INFO("mounted '%s' on '%s', with fstype '%s'",
477 cbarg->rootfs, cbarg->target, fstype);
78ae2fcc 478
479 return 1;
480}
481
a17b1e65
SG
482static int mount_unknown_fs(const char *rootfs, const char *target,
483 const char *options)
78ae2fcc 484{
a6afdde9 485 int i;
78ae2fcc 486
487 struct cbarg {
488 const char *rootfs;
a6afdde9 489 const char *target;
a17b1e65 490 const char *options;
78ae2fcc 491 } cbarg = {
492 .rootfs = rootfs,
a6afdde9 493 .target = target,
a17b1e65 494 .options = options,
78ae2fcc 495 };
496
a6afdde9
DL
497 /*
498 * find the filesystem type with brute force:
499 * first we check with /etc/filesystems, in case the modules
78ae2fcc 500 * are auto-loaded and fall back to the supported kernel fs
501 */
502 char *fsfile[] = {
503 "/etc/filesystems",
504 "/proc/filesystems",
505 };
506
a6afdde9
DL
507 for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
508
509 int ret;
510
511 if (access(fsfile[i], F_OK))
512 continue;
513
514 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
515 if (ret < 0) {
516 ERROR("failed to parse '%s'", fsfile[i]);
517 return -1;
518 }
519
520 if (ret)
521 return 0;
78ae2fcc 522 }
523
a6afdde9
DL
524 ERROR("failed to determine fs type for '%s'", rootfs);
525 return -1;
526}
527
a17b1e65
SG
528static int mount_rootfs_dir(const char *rootfs, const char *target,
529 const char *options)
a6afdde9 530{
a17b1e65
SG
531 unsigned long mntflags;
532 char *mntdata;
533 int ret;
534
535 if (parse_mntopts(options, &mntflags, &mntdata) < 0) {
536 free(mntdata);
537 return -1;
538 }
539
540 ret = mount(rootfs, target, "none", MS_BIND | MS_REC | mntflags, mntdata);
541 free(mntdata);
542
543 return ret;
a6afdde9
DL
544}
545
546static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
547{
548 int rfd;
549 int ret = -1;
550
551 rfd = open(rootfs, O_RDWR);
552 if (rfd < 0) {
553 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 554 return -1;
555 }
556
a6afdde9 557 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 558
a6afdde9 559 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 560
a6afdde9
DL
561 if (ioctl(fd, LOOP_SET_FD, rfd)) {
562 SYSERROR("failed to LOOP_SET_FD");
563 goto out;
78ae2fcc 564 }
565
a6afdde9
DL
566 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
567 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 568 goto out;
569 }
570
a6afdde9 571 ret = 0;
78ae2fcc 572out:
a6afdde9 573 close(rfd);
78ae2fcc 574
a6afdde9 575 return ret;
78ae2fcc 576}
577
a17b1e65
SG
578static int mount_rootfs_file(const char *rootfs, const char *target,
579 const char *options)
78ae2fcc 580{
a6afdde9
DL
581 struct dirent dirent, *direntp;
582 struct loop_info64 loinfo;
9ba8130c 583 int ret = -1, fd = -1, rc;
a6afdde9
DL
584 DIR *dir;
585 char path[MAXPATHLEN];
78ae2fcc 586
a6afdde9
DL
587 dir = opendir("/dev");
588 if (!dir) {
589 SYSERROR("failed to open '/dev'");
78ae2fcc 590 return -1;
591 }
592
a6afdde9
DL
593 while (!readdir_r(dir, &dirent, &direntp)) {
594
595 if (!direntp)
596 break;
597
598 if (!strcmp(direntp->d_name, "."))
599 continue;
600
601 if (!strcmp(direntp->d_name, ".."))
602 continue;
603
604 if (strncmp(direntp->d_name, "loop", 4))
605 continue;
606
9ba8130c
SH
607 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
608 if (rc < 0 || rc >= MAXPATHLEN)
609 continue;
610
a6afdde9
DL
611 fd = open(path, O_RDWR);
612 if (fd < 0)
613 continue;
614
615 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
616 close(fd);
617 continue;
618 }
619
620 if (errno != ENXIO) {
621 WARN("unexpected error for ioctl on '%s': %m",
622 direntp->d_name);
00b6be44 623 close(fd);
a6afdde9
DL
624 continue;
625 }
626
627 DEBUG("found '%s' free lodev", path);
628
629 ret = setup_lodev(rootfs, fd, &loinfo);
630 if (!ret)
a17b1e65 631 ret = mount_unknown_fs(path, target, options);
a6afdde9
DL
632 close(fd);
633
634 break;
635 }
636
637 if (closedir(dir))
638 WARN("failed to close directory");
639
640 return ret;
78ae2fcc 641}
642
a17b1e65
SG
643static int mount_rootfs_block(const char *rootfs, const char *target,
644 const char *options)
a6afdde9 645{
a17b1e65 646 return mount_unknown_fs(rootfs, target, options);
a6afdde9
DL
647}
648
0c547523
SH
649/*
650 * pin_rootfs
b7ed4bf0
CS
651 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
652 * the duration of the container run, to prevent the container from marking
653 * the underlying fs readonly on shutdown. unlink the file immediately so
654 * no name pollution is happens
0c547523
SH
655 * return -1 on error.
656 * return -2 if nothing needed to be pinned.
657 * return an open fd (>=0) if we pinned it.
658 */
659int pin_rootfs(const char *rootfs)
660{
661 char absrootfs[MAXPATHLEN];
662 char absrootfspin[MAXPATHLEN];
663 struct stat s;
664 int ret, fd;
665
e99ee0de 666 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 667 return -2;
e99ee0de 668
00ec333b 669 if (!realpath(rootfs, absrootfs))
9be53773 670 return -2;
0c547523 671
00ec333b 672 if (access(absrootfs, F_OK))
0c547523 673 return -1;
0c547523 674
00ec333b 675 if (stat(absrootfs, &s))
0c547523 676 return -1;
0c547523 677
72f919c4 678 if (!S_ISDIR(s.st_mode))
0c547523
SH
679 return -2;
680
b7ed4bf0 681 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
00ec333b 682 if (ret >= MAXPATHLEN)
0c547523 683 return -1;
0c547523
SH
684
685 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
b7ed4bf0
CS
686 if (fd < 0)
687 return fd;
688 (void)unlink(absrootfspin);
0c547523
SH
689 return fd;
690}
691
4fb3cba5 692static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
368bbc02 693{
368bbc02 694 int r;
b06b8511
CS
695 size_t i;
696 static struct {
697 int match_mask;
698 int match_flag;
699 const char *source;
700 const char *destination;
701 const char *fstype;
702 unsigned long flags;
703 const char *options;
704 } default_mounts[] = {
705 /* Read-only bind-mounting... In older kernels, doing that required
706 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
707 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
708 * kernel 2.6.26 onwards. However, this apparently does not work on
709 * kernel 3.8. Unfortunately, on that very same kernel, doing the
710 * same trick as above doesn't seem to work either, there one needs
711 * to ALSO specify MS_BIND for the remount, otherwise the entire
712 * fs is remounted read-only or the mount fails because it's busy...
713 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
714 * 2.6.32...
368bbc02 715 */
b06b8511
CS
716 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
717 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
718 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
719 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
720 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
721 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
722 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
723 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
724 { 0, 0, NULL, NULL, NULL, 0, NULL }
725 };
368bbc02 726
b06b8511
CS
727 for (i = 0; default_mounts[i].match_mask; i++) {
728 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
729 char *source = NULL;
730 char *destination = NULL;
731 int saved_errno;
732
733 if (default_mounts[i].source) {
734 /* will act like strdup if %r is not present */
735 source = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].source);
736 if (!source) {
737 SYSERROR("memory allocation error");
738 return -1;
739 }
740 }
741 if (default_mounts[i].destination) {
742 /* will act like strdup if %r is not present */
743 destination = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].destination);
744 if (!destination) {
745 saved_errno = errno;
746 SYSERROR("memory allocation error");
747 free(source);
748 errno = saved_errno;
749 return -1;
750 }
751 }
752 r = mount(source, destination, default_mounts[i].fstype, default_mounts[i].flags, default_mounts[i].options);
753 saved_errno = errno;
c414be25
DE
754 if (r < 0)
755 SYSERROR("error mounting %s on %s", source, destination);
b06b8511
CS
756 free(source);
757 free(destination);
758 if (r < 0) {
b06b8511
CS
759 errno = saved_errno;
760 return -1;
761 }
368bbc02 762 }
368bbc02
CS
763 }
764
b06b8511 765 if (flags & LXC_AUTO_CGROUP_MASK) {
0769b82a
CS
766 int cg_flags;
767
768 cg_flags = flags & LXC_AUTO_CGROUP_MASK;
769 /* If the type of cgroup mount was not specified, it depends on the
770 * container's capabilities as to what makes sense: if we have
771 * CAP_SYS_ADMIN, the read-only part can be remounted read-write
772 * anyway, so we may as well default to read-write; then the admin
773 * will not be given a false sense of security. (And if they really
774 * want mixed r/o r/w, then they can explicitly specify :mixed.)
775 * OTOH, if the container lacks CAP_SYS_ADMIN, do only default to
776 * :mixed, because then the container can't remount it read-write. */
777 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
778 int has_sys_admin = 0;
779 if (!lxc_list_empty(&conf->keepcaps)) {
780 has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps);
781 } else {
782 has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps);
783 }
784 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC) {
785 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED;
786 } else {
787 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
788 }
789 }
790
791 if (!cgroup_mount(conf->rootfs.mount, handler, cg_flags)) {
368bbc02 792 SYSERROR("error mounting /sys/fs/cgroup");
b06b8511 793 return -1;
368bbc02
CS
794 }
795 }
796
368bbc02 797 return 0;
368bbc02
CS
798}
799
a17b1e65 800static int mount_rootfs(const char *rootfs, const char *target, const char *options)
0ad19a3f 801{
b09ef133 802 char absrootfs[MAXPATHLEN];
78ae2fcc 803 struct stat s;
a6afdde9 804 int i;
78ae2fcc 805
a17b1e65 806 typedef int (*rootfs_cb)(const char *, const char *, const char *);
78ae2fcc 807
808 struct rootfs_type {
809 int type;
810 rootfs_cb cb;
811 } rtfs_type[] = {
2656d231
DL
812 { S_IFDIR, mount_rootfs_dir },
813 { S_IFBLK, mount_rootfs_block },
814 { S_IFREG, mount_rootfs_file },
78ae2fcc 815 };
0ad19a3f 816
4c8ab83b 817 if (!realpath(rootfs, absrootfs)) {
36eb9bde 818 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 819 return -1;
820 }
b09ef133 821
b09ef133 822 if (access(absrootfs, F_OK)) {
36eb9bde 823 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 824 return -1;
825 }
826
78ae2fcc 827 if (stat(absrootfs, &s)) {
36eb9bde 828 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 829 return -1;
830 }
831
78ae2fcc 832 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 833
78ae2fcc 834 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
835 continue;
9b0f0477 836
a17b1e65 837 return rtfs_type[i].cb(absrootfs, target, options);
78ae2fcc 838 }
9b0f0477 839
36eb9bde 840 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 841 return -1;
0ad19a3f 842}
843
4e5440c6 844static int setup_utsname(struct utsname *utsname)
0ad19a3f 845{
4e5440c6
DL
846 if (!utsname)
847 return 0;
0ad19a3f 848
4e5440c6
DL
849 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
850 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 851 return -1;
852 }
853
4e5440c6 854 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 855
0ad19a3f 856 return 0;
857}
858
69aa6655
DE
859struct dev_symlinks {
860 const char *oldpath;
861 const char *name;
862};
863
864static const struct dev_symlinks dev_symlinks[] = {
865 {"/proc/self/fd", "fd"},
866 {"/proc/self/fd/0", "stdin"},
867 {"/proc/self/fd/1", "stdout"},
868 {"/proc/self/fd/2", "stderr"},
869};
870
871static int setup_dev_symlinks(const struct lxc_rootfs *rootfs)
872{
873 char path[MAXPATHLEN];
874 int ret,i;
09227be2 875 struct stat s;
69aa6655
DE
876
877
878 for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
879 const struct dev_symlinks *d = &dev_symlinks[i];
880 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount, d->name);
881 if (ret < 0 || ret >= MAXPATHLEN)
882 return -1;
09227be2
MW
883
884 /*
885 * Stat the path first. If we don't get an error
886 * accept it as is and don't try to create it
887 */
888 if (!stat(path, &s)) {
889 continue;
890 }
891
69aa6655 892 ret = symlink(d->oldpath, path);
09227be2 893
69aa6655 894 if (ret && errno != EEXIST) {
09227be2
MW
895 if ( errno == EROFS ) {
896 WARN("Warning: Read Only file system while creating %s", path);
897 } else {
898 SYSERROR("Error creating %s", path);
899 return -1;
900 }
69aa6655
DE
901 }
902 }
903 return 0;
904}
905
33fcb7a0 906static int setup_tty(const struct lxc_rootfs *rootfs,
7c6ef2a2 907 const struct lxc_tty_info *tty_info, char *ttydir)
b0a33c1e 908{
7c6ef2a2
SH
909 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
910 int i, ret;
b0a33c1e 911
bc9bd0e3
DL
912 if (!rootfs->path)
913 return 0;
914
b0a33c1e 915 for (i = 0; i < tty_info->nbtty; i++) {
916
917 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
918
7c6ef2a2 919 ret = snprintf(path, sizeof(path), "%s/dev/tty%d",
12297168 920 rootfs->mount, i + 1);
7c6ef2a2
SH
921 if (ret >= sizeof(path)) {
922 ERROR("pathname too long for ttys");
923 return -1;
924 }
925 if (ttydir) {
926 /* create dev/lxc/tty%d" */
9ba8130c 927 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/tty%d",
7c6ef2a2
SH
928 rootfs->mount, ttydir, i + 1);
929 if (ret >= sizeof(lxcpath)) {
930 ERROR("pathname too long for ttys");
931 return -1;
932 }
933 ret = creat(lxcpath, 0660);
934 if (ret==-1 && errno != EEXIST) {
959aee9c 935 SYSERROR("error creating %s", lxcpath);
7c6ef2a2
SH
936 return -1;
937 }
4d44e274
SH
938 if (ret >= 0)
939 close(ret);
7c6ef2a2
SH
940 ret = unlink(path);
941 if (ret && errno != ENOENT) {
959aee9c 942 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
943 return -1;
944 }
b0a33c1e 945
7c6ef2a2
SH
946 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
947 WARN("failed to mount '%s'->'%s'",
948 pty_info->name, path);
949 continue;
950 }
13954cce 951
9ba8130c
SH
952 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
953 if (ret >= sizeof(lxcpath)) {
954 ERROR("tty pathname too long");
955 return -1;
956 }
7c6ef2a2
SH
957 ret = symlink(lxcpath, path);
958 if (ret) {
959aee9c 959 SYSERROR("failed to create symlink for tty %d", i+1);
7c6ef2a2
SH
960 return -1;
961 }
962 } else {
c6883f38
SH
963 /* If we populated /dev, then we need to create /dev/ttyN */
964 if (access(path, F_OK)) {
965 ret = creat(path, 0660);
966 if (ret==-1) {
959aee9c 967 SYSERROR("error creating %s", path);
c6883f38 968 /* this isn't fatal, continue */
025ed0f3 969 } else {
c6883f38 970 close(ret);
025ed0f3 971 }
c6883f38 972 }
7c6ef2a2
SH
973 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
974 WARN("failed to mount '%s'->'%s'",
975 pty_info->name, path);
976 continue;
977 }
b0a33c1e 978 }
979 }
980
cd54d859
DL
981 INFO("%d tty(s) has been setup", tty_info->nbtty);
982
b0a33c1e 983 return 0;
984}
985
7a7ff0c6 986static int setup_rootfs_pivot_root_cb(char *buffer, void *data)
bf601689
MH
987{
988 struct lxc_list *mountlist, *listentry, *iterator;
2c7d90ac 989 char *pivotdir, *mountpoint, *mountentry, *saveptr = NULL;
bf601689
MH
990 int found;
991 void **cbparm;
992
993 mountentry = buffer;
994 cbparm = (void **)data;
995
996 mountlist = cbparm[0];
997 pivotdir = cbparm[1];
998
999 /* parse entry, first field is mountname, ignore */
2796cf79 1000 mountpoint = strtok_r(mountentry, " ", &saveptr);
bf601689
MH
1001 if (!mountpoint)
1002 return -1;
1003
1004 /* second field is mountpoint */
2796cf79 1005 mountpoint = strtok_r(NULL, " ", &saveptr);
bf601689
MH
1006 if (!mountpoint)
1007 return -1;
1008
1009 /* only consider mountpoints below old root fs */
1010 if (strncmp(mountpoint, pivotdir, strlen(pivotdir)))
1011 return 0;
1012
1013 /* filter duplicate mountpoints */
1014 found = 0;
1015 lxc_list_for_each(iterator, mountlist) {
1016 if (!strcmp(iterator->elem, mountpoint)) {
1017 found = 1;
1018 break;
1019 }
1020 }
1021 if (found)
1022 return 0;
1023
1024 /* add entry to list */
1025 listentry = malloc(sizeof(*listentry));
1026 if (!listentry) {
1027 SYSERROR("malloc for mountpoint listentry failed");
1028 return -1;
1029 }
1030
1031 listentry->elem = strdup(mountpoint);
1032 if (!listentry->elem) {
1033 SYSERROR("strdup failed");
00b6be44 1034 free(listentry);
bf601689
MH
1035 return -1;
1036 }
1037 lxc_list_add_tail(mountlist, listentry);
1038
1039 return 0;
1040}
1041
cc6f6dd7 1042static int umount_oldrootfs(const char *oldrootfs)
bf601689 1043{
2382ecff 1044 char path[MAXPATHLEN];
bf601689 1045 void *cbparm[2];
9ebb03ad 1046 struct lxc_list mountlist, *iterator, *next;
bf601689 1047 int ok, still_mounted, last_still_mounted;
9ba8130c 1048 int rc;
bf601689
MH
1049
1050 /* read and parse /proc/mounts in old root fs */
1051 lxc_list_init(&mountlist);
1052
cc6f6dd7 1053 /* oldrootfs is on the top tree directory now */
9ba8130c
SH
1054 rc = snprintf(path, sizeof(path), "/%s", oldrootfs);
1055 if (rc >= sizeof(path)) {
1056 ERROR("rootfs name too long");
1057 return -1;
1058 }
bf601689 1059 cbparm[0] = &mountlist;
bf601689 1060
cc6f6dd7 1061 cbparm[1] = strdup(path);
bf601689
MH
1062 if (!cbparm[1]) {
1063 SYSERROR("strdup failed");
1064 return -1;
1065 }
1066
9ba8130c
SH
1067 rc = snprintf(path, sizeof(path), "%s/proc/mounts", oldrootfs);
1068 if (rc >= sizeof(path)) {
1069 ERROR("container proc/mounts name too long");
1070 return -1;
1071 }
cc6f6dd7
DL
1072
1073 ok = lxc_file_for_each_line(path,
1074 setup_rootfs_pivot_root_cb, &cbparm);
bf601689
MH
1075 if (ok < 0) {
1076 SYSERROR("failed to read or parse mount list '%s'", path);
1077 return -1;
1078 }
1079
1080 /* umount filesystems until none left or list no longer shrinks */
1081 still_mounted = 0;
1082 do {
1083 last_still_mounted = still_mounted;
1084 still_mounted = 0;
1085
9ebb03ad 1086 lxc_list_for_each_safe(iterator, &mountlist, next) {
bf601689 1087
c08556c6 1088 /* umount normally */
bf601689
MH
1089 if (!umount(iterator->elem)) {
1090 DEBUG("umounted '%s'", (char *)iterator->elem);
1091 lxc_list_del(iterator);
1092 continue;
1093 }
1094
bf601689
MH
1095 still_mounted++;
1096 }
7df119ee 1097
bf601689
MH
1098 } while (still_mounted > 0 && still_mounted != last_still_mounted);
1099
7df119ee 1100
c08556c6
DL
1101 lxc_list_for_each(iterator, &mountlist) {
1102
1103 /* let's try a lazy umount */
1104 if (!umount2(iterator->elem, MNT_DETACH)) {
1105 INFO("lazy unmount of '%s'", (char *)iterator->elem);
1106 continue;
1107 }
1108
1109 /* be more brutal (nfs) */
1110 if (!umount2(iterator->elem, MNT_FORCE)) {
1111 INFO("forced unmount of '%s'", (char *)iterator->elem);
1112 continue;
1113 }
1114
7df119ee 1115 WARN("failed to unmount '%s'", (char *)iterator->elem);
c08556c6 1116 }
bf601689 1117
cc6f6dd7
DL
1118 return 0;
1119}
1120
1121static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
1122{
1123 char path[MAXPATHLEN];
1124 int remove_pivotdir = 0;
9ba8130c 1125 int rc;
cc6f6dd7
DL
1126
1127 /* change into new root fs */
1128 if (chdir(rootfs)) {
1129 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
1130 return -1;
1131 }
1132
1133 if (!pivotdir)
30c5d292 1134 pivotdir = "lxc_putold";
cc6f6dd7 1135
4f9293b1 1136 /* compute the full path to pivotdir under rootfs */
9ba8130c
SH
1137 rc = snprintf(path, sizeof(path), "%s/%s", rootfs, pivotdir);
1138 if (rc >= sizeof(path)) {
1139 ERROR("pivot dir name too long");
1140 return -1;
1141 }
cc6f6dd7
DL
1142
1143 if (access(path, F_OK)) {
1144
119126b6 1145 if (mkdir_p(path, 0755) < 0) {
cc6f6dd7
DL
1146 SYSERROR("failed to create pivotdir '%s'", path);
1147 return -1;
1148 }
1149
1150 remove_pivotdir = 1;
1151 DEBUG("created '%s' directory", path);
1152 }
1153
1154 DEBUG("mountpoint for old rootfs is '%s'", path);
1155
1156 /* pivot_root into our new root fs */
1157 if (pivot_root(".", path)) {
1158 SYSERROR("pivot_root syscall failed");
bf601689
MH
1159 return -1;
1160 }
cc6f6dd7
DL
1161
1162 if (chdir("/")) {
1163 SYSERROR("can't chdir to / after pivot_root");
1164 return -1;
1165 }
1166
1167 DEBUG("pivot_root syscall to '%s' successful", rootfs);
1168
1169 /* we switch from absolute path to relative path */
1170 if (umount_oldrootfs(pivotdir))
1171 return -1;
bf601689 1172
c08556c6
DL
1173 /* remove temporary mount point, we don't consider the removing
1174 * as fatal */
a91d897a
FW
1175 if (remove_pivotdir && rmdir(pivotdir))
1176 WARN("can't remove mountpoint '%s': %m", pivotdir);
bf601689 1177
bf601689
MH
1178 return 0;
1179}
1180
bc6928ff
MW
1181/*
1182 * Check to see if a directory has something mounted on it and,
1183 * if it does, return the fstype.
1184 *
1185 * Code largely based on detect_shared_rootfs below
1186 *
1187 * Returns: # of matching entries in /proc/self/mounts
1188 * if != 0 fstype is filled with the last filesystem value.
1189 * if == 0 no matches found, fstype unchanged.
1190 *
1191 * ToDo: Maybe return the mount options in another parameter...
1192 */
1193
1194#define LINELEN 4096
1195#define MAX_FSTYPE_LEN 128
74a3920a 1196static int mount_check_fs( const char *dir, char *fstype )
bc6928ff
MW
1197{
1198 char buf[LINELEN], *p;
1199 struct stat s;
1200 FILE *f;
1201 int found_fs = 0;
1202 char *p2;
1203
959aee9c 1204 DEBUG("entering mount_check_fs for %s", dir);
bc6928ff
MW
1205
1206 if ( 0 != access(dir, F_OK) || 0 != stat(dir, &s) || 0 == S_ISDIR(s.st_mode) ) {
1207 return 0;
1208 }
1209
bc6928ff 1210 f = fopen("/proc/self/mounts", "r");
bc6928ff
MW
1211 if (!f)
1212 return 0;
4ad9f44b 1213 while (fgets(buf, LINELEN, f)) {
bc6928ff
MW
1214 p = index(buf, ' ');
1215 if( !p )
1216 continue;
1217 *p = '\0';
1218 p2 = p + 1;
1219
1220 p = index(p2, ' ');
1221 if( !p )
1222 continue;
1223 *p = '\0';
1224
1225 /* Compare the directory in the entry to desired */
1226 if( strcmp( p2, dir ) ) {
1227 continue;
1228 }
1229
1230 p2 = p + 1;
1231 p = index( p2, ' ');
1232 if( !p )
1233 continue;
1234 *p = '\0';
1235
1236 ++found_fs;
1237
1238 if( fstype ) {
1239 strncpy( fstype, p2, MAX_FSTYPE_LEN - 1 );
1240 fstype [ MAX_FSTYPE_LEN - 1 ] = '\0';
1241 }
1242 }
1243
bc6928ff 1244 fclose(f);
bc6928ff 1245
959aee9c 1246 DEBUG("mount_check_fs returning %d last %s", found_fs, fstype);
bc6928ff
MW
1247
1248 return found_fs;
1249}
1250
1251/*
1252 * Locate a devtmpfs mount (should be on /dev) and create a container
1253 * subdirectory on it which we can then bind mount to the container
1254 * /dev instead of mounting a tmpfs there.
1255 * If we fail, return NULL.
1256 * Else return the pointer to the name buffer with the string to
1257 * the devtmpfs subdirectory.
1258 */
1259
74a3920a 1260static char *mk_devtmpfs(const char *name, char *path, const char *lxcpath)
bc6928ff
MW
1261{
1262 int ret;
1263 struct stat s;
1264 char tmp_path[MAXPATHLEN];
1265 char fstype[MAX_FSTYPE_LEN];
bc6928ff
MW
1266 uint64_t hash;
1267
f0d02950 1268 if ( 0 != access(dev_base_path, F_OK) || 0 != stat(dev_base_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
bc6928ff 1269 /* This is just making /dev/.lxc it better work or we're done */
f0d02950 1270 ret = mkdir(dev_base_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
bc6928ff
MW
1271 if ( ret ) {
1272 SYSERROR( "Unable to create /dev/.lxc for autodev" );
1273 return NULL;
1274 }
1275 }
1276
1277 /*
1278 * Programmers notes:
1279 * We can not do mounts in this area of code that we want
1280 * to be visible in the host. Consequently, /dev/.lxc must
1281 * be set up earlier if we need a tmpfs mounted there.
1282 * That only affects the rare cases where autodev is enabled
1283 * for a container and devtmpfs is not mounted on /dev in the
1284 * host. In that case, we'll fall back to the old method
1285 * of mounting a tmpfs in the container and have no visibility
1286 * into the container /dev.
1287 */
1288 if( ! mount_check_fs( "/dev", fstype )
1289 || strcmp( "devtmpfs", fstype ) ) {
1290 /* Either /dev was not mounted or was not devtmpfs */
1291
1292 if ( ! mount_check_fs( "/dev/.lxc", NULL ) ) {
1293 /*
1294 * /dev/.lxc is not already mounted
1295 * Doing a mount here does no good, since
1296 * it's not visible in the host.
1297 */
1298
1299 ERROR("/dev/.lxc is not setup - taking fallback" );
1300 return NULL;
1301 }
1302 }
1303
f0d02950 1304 if ( 0 != access(dev_user_path, F_OK) || 0 != stat(dev_user_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
bc6928ff
MW
1305 /*
1306 * This is making /dev/.lxc/user path for non-priv users.
1307 * If this doesn't work, we'll have to fall back in the
1308 * case of non-priv users. It's mode 1777 like /tmp.
1309 */
f0d02950 1310 ret = mkdir(dev_user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
bc6928ff
MW
1311 if ( ret ) {
1312 /* Issue an error but don't fail yet! */
1313 ERROR("Unable to create /dev/.lxc/user");
1314 }
1315 /* Umask tends to screw us up here */
f0d02950 1316 chmod(dev_user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
bc6928ff
MW
1317 }
1318
1319 /*
1320 * Since the container name must be unique within a given
1321 * lxcpath, we're going to use a hash of the path
1322 * /lxcpath/name as our hash name in /dev/.lxc/
1323 */
1324
1325 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s", lxcpath, name);
1326 if (ret < 0 || ret >= MAXPATHLEN)
1327 return NULL;
1328
1329 hash = fnv_64a_buf(tmp_path, ret, FNV1A_64_INIT);
1330
f0d02950 1331 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, dev_base_path, name, hash);
bc6928ff
MW
1332 if (ret < 0 || ret >= MAXPATHLEN)
1333 return NULL;
1334
1335 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1336 ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1337 if ( ret ) {
f0d02950
JTLB
1338 /* Something must have failed with the dev_base_path...
1339 * Maybe unpriv user. Try dev_user_path now... */
bc6928ff
MW
1340 INFO("Setup in /dev/.lxc failed. Trying /dev/.lxc/user." );
1341
f0d02950 1342 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, dev_user_path, name, hash);
bc6928ff
MW
1343 if (ret < 0 || ret >= MAXPATHLEN)
1344 return NULL;
1345
1346 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1347 ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1348 if ( ret ) {
1349 ERROR("Container /dev setup in host /dev failed - taking fallback" );
1350 return NULL;
1351 }
1352 }
1353 }
1354 }
1355
1356 strcpy( path, tmp_path );
1357 return path;
1358}
1359
91c3830e
SH
1360/*
1361 * Do we want to add options for max size of /dev and a file to
1362 * specify which devices to create?
1363 */
bc6928ff 1364static int mount_autodev(const char *name, char *root, const char *lxcpath)
91c3830e
SH
1365{
1366 int ret;
bc6928ff 1367 struct stat s;
91c3830e 1368 char path[MAXPATHLEN];
bc6928ff
MW
1369 char host_path[MAXPATHLEN];
1370 char devtmpfs_path[MAXPATHLEN];
91c3830e 1371
959aee9c 1372 INFO("Mounting /dev under %s", root);
bc6928ff
MW
1373
1374 ret = snprintf(host_path, MAXPATHLEN, "%s/%s/rootfs.dev", lxcpath, name);
1375 if (ret < 0 || ret > MAXPATHLEN)
1376 return -1;
1377
91c3830e
SH
1378 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
1379 if (ret < 0 || ret > MAXPATHLEN)
1380 return -1;
bc6928ff
MW
1381
1382 if (mk_devtmpfs( name, devtmpfs_path, lxcpath ) ) {
1383 /*
1384 * Get rid of old links and directoriess
1385 * This could be either a symlink and we remove it,
1386 * or an empty directory and we remove it,
1387 * or non-existant and we don't care,
1388 * or a non-empty directory, and we will then emit an error
1389 * but we will not fail out the process.
1390 */
1391 unlink( host_path );
1392 rmdir( host_path );
1393 ret = symlink(devtmpfs_path, host_path);
1394
1395 if ( ret < 0 ) {
959aee9c 1396 SYSERROR("WARNING: Failed to create symlink '%s'->'%s'", host_path, devtmpfs_path);
bc6928ff
MW
1397 }
1398 DEBUG("Bind mounting %s to %s", devtmpfs_path , path );
1399 ret = mount(devtmpfs_path, path, NULL, MS_BIND, 0 );
1400 } else {
1401 /* Only mount a tmpfs on here if we don't already a mount */
1402 if ( ! mount_check_fs( host_path, NULL ) ) {
1403 DEBUG("Mounting tmpfs to %s", host_path );
58ab99ae 1404 ret = mount("none", path, "tmpfs", 0, "size=100000,mode=755");
bc6928ff
MW
1405 } else {
1406 /* This allows someone to manually set up a mount */
1407 DEBUG("Bind mounting %s to %s", host_path, path );
1408 ret = mount(host_path , path, NULL, MS_BIND, 0 );
1409 }
1410 }
91c3830e 1411 if (ret) {
959aee9c 1412 SYSERROR("Failed to mount /dev at %s", root);
91c3830e
SH
1413 return -1;
1414 }
1415 ret = snprintf(path, MAXPATHLEN, "%s/dev/pts", root);
1416 if (ret < 0 || ret >= MAXPATHLEN)
1417 return -1;
bc6928ff
MW
1418 /*
1419 * If we are running on a devtmpfs mapping, dev/pts may already exist.
1420 * If not, then create it and exit if that fails...
1421 */
1422 if ( 0 != access(path, F_OK) || 0 != stat(path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1423 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1424 if (ret) {
1425 SYSERROR("Failed to create /dev/pts in container");
1426 return -1;
1427 }
91c3830e
SH
1428 }
1429
959aee9c 1430 INFO("Mounted /dev under %s", root);
91c3830e
SH
1431 return 0;
1432}
1433
c6883f38 1434struct lxc_devs {
74a3920a 1435 const char *name;
c6883f38
SH
1436 mode_t mode;
1437 int maj;
1438 int min;
1439};
1440
74a3920a 1441static const struct lxc_devs lxc_devs[] = {
c6883f38
SH
1442 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1443 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1444 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1445 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1446 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1447 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1448 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1449};
1450
74a3920a 1451static int setup_autodev(const char *root)
c6883f38
SH
1452{
1453 int ret;
c6883f38
SH
1454 char path[MAXPATHLEN];
1455 int i;
3a32201c 1456 mode_t cmask;
c6883f38 1457
959aee9c 1458 INFO("Creating initial consoles under %s/dev", root);
91c3830e 1459
c6883f38 1460 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
91c3830e
SH
1461 if (ret < 0 || ret >= MAXPATHLEN) {
1462 ERROR("Error calculating container /dev location");
c6883f38 1463 return -1;
f7bee6c6 1464 }
91c3830e 1465
959aee9c 1466 INFO("Populating /dev under %s", root);
3a32201c 1467 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38 1468 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
74a3920a 1469 const struct lxc_devs *d = &lxc_devs[i];
c6883f38
SH
1470 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", root, d->name);
1471 if (ret < 0 || ret >= MAXPATHLEN)
1472 return -1;
1473 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1474 if (ret && errno != EEXIST) {
959aee9c 1475 SYSERROR("Error creating %s", d->name);
c6883f38
SH
1476 return -1;
1477 }
1478 }
3a32201c 1479 umask(cmask);
c6883f38 1480
959aee9c 1481 INFO("Populated /dev under %s", root);
c6883f38
SH
1482 return 0;
1483}
1484
f0d02950
JTLB
1485/*
1486 * Locate allocated devtmpfs mount and purge it.
1487 * path lookup mostly taken from mk_devtmpfs
1488 */
1489int lxc_delete_autodev(struct lxc_handler *handler)
1490{
1491 int ret;
1492 struct stat s;
1493 struct lxc_conf *lxc_conf = handler->conf;
1494 const char *name = handler->name;
1495 const char *lxcpath = handler->lxcpath;
1496 char tmp_path[MAXPATHLEN];
1497 uint64_t hash;
1498
1499 if ( lxc_conf->autodev <= 0 )
1500 return 0;
1501
1c90734d
JTLB
1502 /* don't clean on reboot */
1503 if ( lxc_conf->reboot == 1 )
1504 return 0;
f0d02950
JTLB
1505
1506 /*
1507 * Use the same logic as mk_devtmpfs to compute candidate
1508 * path for cleanup.
1509 */
1510
1511 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s", lxcpath, name);
1512 if (ret < 0 || ret >= MAXPATHLEN)
1513 return -1;
1514
1515 hash = fnv_64a_buf(tmp_path, ret, FNV1A_64_INIT);
1516
1517 /* Probe /dev/.lxc/<container name>.<hash> */
1518 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, dev_base_path, name, hash);
1519 if (ret < 0 || ret >= MAXPATHLEN)
1520 return -1;
1521
1522 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1523 /* Probe /dev/.lxc/user/<container name>.<hash> */
1524 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, dev_user_path, name, hash);
1525 if (ret < 0 || ret >= MAXPATHLEN)
1526 return -1;
1527
1528 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1529 WARN("Failed to locate autodev /dev/.lxc and /dev/.lxc/user." );
1530 return -1;
1531 }
1532 }
1533
1534 /* Do the cleanup */
1535 INFO("Cleaning %s", tmp_path );
1536 if ( 0 != lxc_rmdir_onedev(tmp_path, NULL) ) {
1537 ERROR("Failed to cleanup autodev" );
1538 }
1539
1540 return 0;
1541}
1542
cc28d0b0
SH
1543/*
1544 * I'll forgive you for asking whether all of this is needed :) The
1545 * answer is yes.
1546 * pivot_root will fail if the new root, the put_old dir, or the parent
1547 * of current->fs->root are MS_SHARED. (parent of current->fs_root may
1548 * or may not be current->fs_root - if we assumed it always was, we could
1549 * just mount --make-rslave /). So,
1550 * 1. mount a tiny tmpfs to be parent of current->fs->root.
1551 * 2. make that MS_SLAVE
1552 * 3. make a 'root' directory under that
1553 * 4. mount --rbind / under the $tinyroot/root.
1554 * 5. make that rslave
1555 * 6. chdir and chroot into $tinyroot/root
1556 * 7. $tinyroot will be unmounted by our parent in start.c
1557 */
1558static int chroot_into_slave(struct lxc_conf *conf)
1559{
1560 char path[MAXPATHLEN];
1561 const char *destpath = conf->rootfs.mount;
1562 int ret;
1563
1564 if (mount(destpath, destpath, NULL, MS_BIND, 0)) {
1565 SYSERROR("failed to mount %s bind", destpath);
1566 return -1;
1567 }
1568 if (mount("", destpath, NULL, MS_SLAVE, 0)) {
1569 SYSERROR("failed to make %s slave", destpath);
1570 return -1;
1571 }
58ab99ae 1572 if (mount("none", destpath, "tmpfs", 0, "size=10000,mode=755")) {
cc28d0b0
SH
1573 SYSERROR("Failed to mount tmpfs / at %s", destpath);
1574 return -1;
1575 }
1576 ret = snprintf(path, MAXPATHLEN, "%s/root", destpath);
1577 if (ret < 0 || ret >= MAXPATHLEN) {
1578 ERROR("out of memory making root path");
1579 return -1;
1580 }
1581 if (mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) {
1582 SYSERROR("Failed to create /dev/pts in container");
1583 return -1;
1584 }
1585 if (mount("/", path, NULL, MS_BIND|MS_REC, 0)) {
1586 SYSERROR("Failed to rbind mount / to %s", path);
1587 return -1;
1588 }
1589 if (mount("", destpath, NULL, MS_SLAVE|MS_REC, 0)) {
1590 SYSERROR("Failed to make tmp-/ at %s rslave", path);
1591 return -1;
1592 }
cc28d0b0
SH
1593 if (chroot(path)) {
1594 SYSERROR("Failed to chroot into tmp-/");
1595 return -1;
1596 }
6b9324bd
SG
1597 if (chdir("/")) {
1598 SYSERROR("Failed to chdir into tmp-/");
1599 return -1;
1600 }
959aee9c 1601 INFO("Chrooted into tmp-/ at %s", path);
cc28d0b0
SH
1602 return 0;
1603}
1604
1605static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1606{
cc28d0b0
SH
1607 const struct lxc_rootfs *rootfs = &conf->rootfs;
1608
a0f379bf
DW
1609 if (!rootfs->path) {
1610 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1611 SYSERROR("Failed to make / rslave");
1612 return -1;
1613 }
c69bd12f 1614 return 0;
a0f379bf 1615 }
0ad19a3f 1616
12297168 1617 if (access(rootfs->mount, F_OK)) {
b1789442 1618 SYSERROR("failed to access to '%s', check it is present",
12297168 1619 rootfs->mount);
b1789442
DL
1620 return -1;
1621 }
1622
9be53773 1623 // First try mounting rootfs using a bdev
76a26f55 1624 struct bdev *bdev = bdev_init(conf, rootfs->path, rootfs->mount, rootfs->options);
9be53773 1625 if (bdev && bdev->ops->mount(bdev) == 0) {
59d66af2 1626 bdev_put(bdev);
9be53773
SH
1627 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1628 return 0;
1629 }
59d66af2
SH
1630 if (bdev)
1631 bdev_put(bdev);
a17b1e65 1632 if (mount_rootfs(rootfs->path, rootfs->mount, rootfs->options)) {
a6afdde9 1633 ERROR("failed to mount rootfs");
c3f0a28c 1634 return -1;
1635 }
0ad19a3f 1636
12297168 1637 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1638
ac778708
DL
1639 return 0;
1640}
1641
74a3920a 1642static int setup_pivot_root(const struct lxc_rootfs *rootfs)
ac778708 1643{
ac778708
DL
1644 if (!rootfs->path)
1645 return 0;
1646
12297168 1647 if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
cc6f6dd7 1648 ERROR("failed to setup pivot root");
25368b52 1649 return -1;
c69bd12f
DL
1650 }
1651
25368b52 1652 return 0;
0ad19a3f 1653}
1654
d852c78c 1655static int setup_pts(int pts)
3c26f34e 1656{
77890c6d
SW
1657 char target[PATH_MAX];
1658
d852c78c
DL
1659 if (!pts)
1660 return 0;
3c26f34e 1661
1662 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1663 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1664 return -1;
1665 }
1666
7e40254a
JTLB
1667 if (mkdir("/dev/pts", 0755)) {
1668 if ( errno != EEXIST ) {
1669 SYSERROR("failed to create '/dev/pts'");
1670 return -1;
1671 }
1672 }
1673
a6afdde9 1674 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
67e5a20a 1675 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
36eb9bde 1676 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1677 return -1;
1678 }
1679
3c26f34e 1680 if (access("/dev/ptmx", F_OK)) {
1681 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1682 goto out;
36eb9bde 1683 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1684 return -1;
1685 }
1686
77890c6d
SW
1687 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1688 goto out;
1689
3c26f34e 1690 /* fallback here, /dev/pts/ptmx exists just mount bind */
1691 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1692 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1693 return -1;
1694 }
cd54d859
DL
1695
1696 INFO("created new pts instance");
d852c78c 1697
3c26f34e 1698out:
1699 return 0;
1700}
1701
cccc74b5
DL
1702static int setup_personality(int persona)
1703{
6ff05e18 1704 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1705 if (persona == -1)
1706 return 0;
1707
1708 if (personality(persona) < 0) {
1709 SYSERROR("failed to set personality to '0x%x'", persona);
1710 return -1;
1711 }
1712
1713 INFO("set personality to '0x%x'", persona);
6ff05e18 1714 #endif
cccc74b5
DL
1715
1716 return 0;
1717}
1718
7c6ef2a2 1719static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1720 const struct lxc_console *console)
6e590161 1721{
63376d7d
DL
1722 char path[MAXPATHLEN];
1723 struct stat s;
7c6ef2a2 1724 int ret;
52e35957 1725
7c6ef2a2
SH
1726 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1727 if (ret >= sizeof(path)) {
959aee9c 1728 ERROR("console path too long");
7c6ef2a2
SH
1729 return -1;
1730 }
52e35957 1731
63376d7d 1732 if (access(path, F_OK)) {
466978b0 1733 WARN("rootfs specified but no console found at '%s'", path);
63376d7d 1734 return 0;
52e35957
DL
1735 }
1736
b5159817
DE
1737 if (console->master < 0) {
1738 INFO("no console");
f78a1f32
DL
1739 return 0;
1740 }
ed502555 1741
63376d7d
DL
1742 if (stat(path, &s)) {
1743 SYSERROR("failed to stat '%s'", path);
1744 return -1;
1745 }
1746
1747 if (chmod(console->name, s.st_mode)) {
1748 SYSERROR("failed to set mode '0%o' to '%s'",
1749 s.st_mode, console->name);
1750 return -1;
1751 }
13954cce 1752
63376d7d
DL
1753 if (mount(console->name, path, "none", MS_BIND, 0)) {
1754 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1755 return -1;
1756 }
1757
63376d7d 1758 INFO("console has been setup");
7c6ef2a2
SH
1759 return 0;
1760}
1761
1762static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1763 const struct lxc_console *console,
1764 char *ttydir)
1765{
1766 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1767 int ret;
1768
1769 /* create rootfs/dev/<ttydir> directory */
1770 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1771 ttydir);
1772 if (ret >= sizeof(path))
1773 return -1;
1774 ret = mkdir(path, 0755);
1775 if (ret && errno != EEXIST) {
959aee9c 1776 SYSERROR("failed with errno %d to create %s", errno, path);
7c6ef2a2
SH
1777 return -1;
1778 }
959aee9c 1779 INFO("created %s", path);
7c6ef2a2
SH
1780
1781 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1782 rootfs->mount, ttydir);
1783 if (ret >= sizeof(lxcpath)) {
959aee9c 1784 ERROR("console path too long");
7c6ef2a2
SH
1785 return -1;
1786 }
1787
1788 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1789 ret = unlink(path);
1790 if (ret && errno != ENOENT) {
959aee9c 1791 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1792 return -1;
1793 }
1794
1795 ret = creat(lxcpath, 0660);
1796 if (ret==-1 && errno != EEXIST) {
959aee9c 1797 SYSERROR("error %d creating %s", errno, lxcpath);
7c6ef2a2
SH
1798 return -1;
1799 }
4d44e274
SH
1800 if (ret >= 0)
1801 close(ret);
7c6ef2a2 1802
b5159817
DE
1803 if (console->master < 0) {
1804 INFO("no console");
7c6ef2a2
SH
1805 return 0;
1806 }
1807
1808 if (mount(console->name, lxcpath, "none", MS_BIND, 0)) {
1809 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1810 return -1;
1811 }
1812
1813 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1814 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1815 if (ret >= sizeof(lxcpath)) {
1816 ERROR("lxc/console path too long");
1817 return -1;
1818 }
7c6ef2a2
SH
1819 ret = symlink(lxcpath, path);
1820 if (ret) {
1821 SYSERROR("failed to create symlink for console");
1822 return -1;
1823 }
1824
1825 INFO("console has been setup on %s", lxcpath);
cd54d859 1826
6e590161 1827 return 0;
1828}
1829
7c6ef2a2
SH
1830static int setup_console(const struct lxc_rootfs *rootfs,
1831 const struct lxc_console *console,
1832 char *ttydir)
1833{
1834 /* We don't have a rootfs, /dev/console will be shared */
1835 if (!rootfs->path)
1836 return 0;
1837 if (!ttydir)
1838 return setup_dev_console(rootfs, console);
1839
1840 return setup_ttydir_console(rootfs, console, ttydir);
1841}
1842
1bd051a6
SH
1843static int setup_kmsg(const struct lxc_rootfs *rootfs,
1844 const struct lxc_console *console)
1845{
1846 char kpath[MAXPATHLEN];
1847 int ret;
1848
222fea5a
DE
1849 if (!rootfs->path)
1850 return 0;
1bd051a6
SH
1851 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1852 if (ret < 0 || ret >= sizeof(kpath))
1853 return -1;
1854
1855 ret = unlink(kpath);
1856 if (ret && errno != ENOENT) {
959aee9c 1857 SYSERROR("error unlinking %s", kpath);
1bd051a6
SH
1858 return -1;
1859 }
1860
1861 ret = symlink("console", kpath);
1862 if (ret) {
1863 SYSERROR("failed to create symlink for kmsg");
1864 return -1;
1865 }
1866
1867 return 0;
1868}
1869
998ac676
RT
1870static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1871{
1872 struct mount_opt *mo;
1873
1874 /* If opt is found in mount_opt, set or clear flags.
1875 * Otherwise append it to data. */
1876
1877 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1878 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1879 if (mo->clear)
1880 *flags &= ~mo->flag;
1881 else
1882 *flags |= mo->flag;
1883 return;
1884 }
1885 }
1886
1887 if (strlen(*data))
1888 strcat(*data, ",");
1889 strcat(*data, opt);
1890}
1891
a17b1e65 1892int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1893 char **mntdata)
1894{
1895 char *s, *data;
1896 char *p, *saveptr = NULL;
1897
911324ef 1898 *mntdata = NULL;
91656ce5 1899 *mntflags = 0L;
911324ef
DL
1900
1901 if (!mntopts)
998ac676
RT
1902 return 0;
1903
911324ef 1904 s = strdup(mntopts);
998ac676 1905 if (!s) {
36eb9bde 1906 SYSERROR("failed to allocate memory");
998ac676
RT
1907 return -1;
1908 }
1909
1910 data = malloc(strlen(s) + 1);
1911 if (!data) {
36eb9bde 1912 SYSERROR("failed to allocate memory");
998ac676
RT
1913 free(s);
1914 return -1;
1915 }
1916 *data = 0;
1917
1918 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1919 p = strtok_r(NULL, ",", &saveptr))
1920 parse_mntopt(p, mntflags, &data);
1921
1922 if (*data)
1923 *mntdata = data;
1924 else
1925 free(data);
1926 free(s);
1927
1928 return 0;
1929}
1930
6fd5e769
SH
1931static void null_endofword(char *word)
1932{
1933 while (*word && *word != ' ' && *word != '\t')
1934 word++;
1935 *word = '\0';
1936}
1937
1938/*
1939 * skip @nfields spaces in @src
1940 */
1941static char *get_field(char *src, int nfields)
1942{
1943 char *p = src;
1944 int i;
1945
1946 for (i = 0; i < nfields; i++) {
1947 while (*p && *p != ' ' && *p != '\t')
1948 p++;
1949 if (!*p)
1950 break;
1951 p++;
1952 }
1953 return p;
1954}
1955
911324ef
DL
1956static int mount_entry(const char *fsname, const char *target,
1957 const char *fstype, unsigned long mountflags,
1fc64d22 1958 const char *data, int optional)
911324ef 1959{
2938f7c8
SH
1960 struct statvfs sb;
1961
911324ef 1962 if (mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data)) {
1fc64d22
SG
1963 if (optional) {
1964 INFO("failed to mount '%s' on '%s' (optional): %s", fsname,
1965 target, strerror(errno));
1966 return 0;
1967 }
1968 else {
1969 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1970 return -1;
1971 }
911324ef
DL
1972 }
1973
1974 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
2938f7c8
SH
1975 DEBUG("remounting %s on %s to respect bind or remount options",
1976 fsname ? fsname : "(none)", target ? target : "(none)");
1977
1978 if (statvfs(fsname, &sb) == 0) {
1979 unsigned long required_flags = 0;
1980 if (sb.f_flag & MS_NOSUID)
1981 required_flags |= MS_NOSUID;
1982 if (sb.f_flag & MS_NODEV)
1983 required_flags |= MS_NODEV;
1984 if (sb.f_flag & MS_RDONLY)
1985 required_flags |= MS_RDONLY;
1986 if (sb.f_flag & MS_NOEXEC)
1987 required_flags |= MS_NOEXEC;
1988 DEBUG("(at remount) flags for %s was %lu, required extra flags are %lu", fsname, sb.f_flag, required_flags);
1989 /*
1990 * If this was a bind mount request, and required_flags
1991 * does not have any flags which are not already in
1992 * mountflags, then skip the remount
1993 */
1994 if (!(mountflags & MS_REMOUNT)) {
1995 if (!(required_flags & ~mountflags)) {
1996 DEBUG("mountflags already was %lu, skipping remount",
1997 mountflags);
1998 goto skipremount;
1999 }
2000 }
2001 mountflags |= required_flags;
6fd5e769 2002 }
911324ef
DL
2003
2004 if (mount(fsname, target, fstype,
2005 mountflags | MS_REMOUNT, data)) {
1fc64d22
SG
2006 if (optional) {
2007 INFO("failed to mount '%s' on '%s' (optional): %s",
2008 fsname, target, strerror(errno));
2009 return 0;
2010 }
2011 else {
2012 SYSERROR("failed to mount '%s' on '%s'",
2013 fsname, target);
2014 return -1;
2015 }
911324ef
DL
2016 }
2017 }
2018
6fd5e769 2019skipremount:
911324ef
DL
2020 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
2021
2022 return 0;
2023}
2024
4e4ca161
SH
2025/*
2026 * Remove 'optional', 'create=dir', and 'create=file' from mntopt
2027 */
2028static void cull_mntent_opt(struct mntent *mntent)
2029{
2030 int i;
2031 char *p, *p2;
2032 char *list[] = {"create=dir",
2033 "create=file",
2034 "optional",
2035 NULL };
2036
2037 for (i=0; list[i]; i++) {
2038 if (!(p = strstr(mntent->mnt_opts, list[i])))
2039 continue;
2040 p2 = strchr(p, ',');
2041 if (!p2) {
2042 /* no more mntopts, so just chop it here */
2043 *p = '\0';
2044 continue;
2045 }
2046 memmove(p, p2+1, strlen(p2+1)+1);
2047 }
2048}
2049
2050static inline int mount_entry_on_systemfs(struct mntent *mntent)
0ad19a3f 2051{
998ac676
RT
2052 unsigned long mntflags;
2053 char *mntdata;
911324ef 2054 int ret;
34cfffb3
SG
2055 FILE *pathfile = NULL;
2056 char* pathdirname = NULL;
4f1d50d1 2057 bool optional = hasmntopt(mntent, "optional") != NULL;
911324ef 2058
34cfffb3 2059 if (hasmntopt(mntent, "create=dir")) {
119126b6 2060 if (mkdir_p(mntent->mnt_dir, 0755) < 0) {
34cfffb3
SG
2061 WARN("Failed to create mount target '%s'", mntent->mnt_dir);
2062 ret = -1;
2063 }
2064 }
2065
2066 if (hasmntopt(mntent, "create=file") && access(mntent->mnt_dir, F_OK)) {
2067 pathdirname = strdup(mntent->mnt_dir);
2068 pathdirname = dirname(pathdirname);
119126b6
SG
2069 if (mkdir_p(pathdirname, 0755) < 0) {
2070 WARN("Failed to create target directory");
2071 }
34cfffb3
SG
2072 pathfile = fopen(mntent->mnt_dir, "wb");
2073 if (!pathfile) {
2074 WARN("Failed to create mount target '%s'", mntent->mnt_dir);
2075 ret = -1;
2076 }
2077 else
2078 fclose(pathfile);
2079 }
2080
4e4ca161
SH
2081 cull_mntent_opt(mntent);
2082
a17b1e65
SG
2083 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
2084 free(mntdata);
2085 return -1;
2086 }
2087
911324ef 2088 ret = mount_entry(mntent->mnt_fsname, mntent->mnt_dir,
1fc64d22 2089 mntent->mnt_type, mntflags, mntdata, optional);
68c152ef 2090
34cfffb3 2091 free(pathdirname);
911324ef
DL
2092 free(mntdata);
2093
2094 return ret;
2095}
2096
4e4ca161 2097static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2
SH
2098 const struct lxc_rootfs *rootfs,
2099 const char *lxc_name)
911324ef 2100{
013bd428 2101 char *aux;
59760f5d 2102 char path[MAXPATHLEN];
911324ef
DL
2103 unsigned long mntflags;
2104 char *mntdata;
80a881b2 2105 int r, ret = 0, offset;
67e571de 2106 const char *lxcpath;
34cfffb3
SG
2107 FILE *pathfile = NULL;
2108 char *pathdirname = NULL;
4f1d50d1 2109 bool optional = hasmntopt(mntent, "optional") != NULL;
0ad19a3f 2110
593e8478 2111 lxcpath = lxc_global_config_value("lxc.lxcpath");
2a59a681
SH
2112 if (!lxcpath) {
2113 ERROR("Out of memory");
2114 return -1;
2115 }
2116
80a881b2 2117 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
2118 * use $lxcpath/CN/rootfs as the target prefix */
2119 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
2120 if (r < 0 || r >= MAXPATHLEN)
2121 goto skipvarlib;
2122
2123 aux = strstr(mntent->mnt_dir, path);
2124 if (aux) {
2125 offset = strlen(path);
2126 goto skipabs;
2127 }
2128
2129skipvarlib:
013bd428
DL
2130 aux = strstr(mntent->mnt_dir, rootfs->path);
2131 if (!aux) {
2132 WARN("ignoring mount point '%s'", mntent->mnt_dir);
2133 goto out;
2134 }
80a881b2
SH
2135 offset = strlen(rootfs->path);
2136
2137skipabs:
013bd428 2138
9ba8130c 2139 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
2140 aux + offset);
2141 if (r < 0 || r >= MAXPATHLEN) {
2142 WARN("pathnme too long for '%s'", mntent->mnt_dir);
2143 ret = -1;
2144 goto out;
2145 }
2146
34cfffb3 2147 if (hasmntopt(mntent, "create=dir")) {
119126b6 2148 if (mkdir_p(path, 0755) < 0) {
34cfffb3
SG
2149 WARN("Failed to create mount target '%s'", path);
2150 ret = -1;
2151 }
2152 }
2153
2154 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
2155 pathdirname = strdup(path);
2156 pathdirname = dirname(pathdirname);
119126b6
SG
2157 if (mkdir_p(pathdirname, 0755) < 0) {
2158 WARN("Failed to create target directory");
2159 }
34cfffb3
SG
2160 pathfile = fopen(path, "wb");
2161 if (!pathfile) {
2162 WARN("Failed to create mount target '%s'", path);
2163 ret = -1;
2164 }
2165 else
2166 fclose(pathfile);
2167 }
4e4ca161 2168 cull_mntent_opt(mntent);
d330fe7b 2169
a17b1e65
SG
2170 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
2171 free(mntdata);
2172 return -1;
2173 }
2174
013bd428 2175 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1fc64d22 2176 mntflags, mntdata, optional);
0ad19a3f 2177
a17b1e65
SG
2178 free(mntdata);
2179
013bd428 2180out:
34cfffb3 2181 free(pathdirname);
911324ef
DL
2182 return ret;
2183}
d330fe7b 2184
4e4ca161 2185static int mount_entry_on_relative_rootfs(struct mntent *mntent,
911324ef
DL
2186 const char *rootfs)
2187{
2188 char path[MAXPATHLEN];
2189 unsigned long mntflags;
2190 char *mntdata;
2191 int ret;
34cfffb3
SG
2192 FILE *pathfile = NULL;
2193 char *pathdirname = NULL;
4f1d50d1 2194 bool optional = hasmntopt(mntent, "optional") != NULL;
d330fe7b 2195
34cfffb3 2196 /* relative to root mount point */
9ba8130c
SH
2197 ret = snprintf(path, sizeof(path), "%s/%s", rootfs, mntent->mnt_dir);
2198 if (ret >= sizeof(path)) {
2199 ERROR("path name too long");
2200 return -1;
2201 }
911324ef 2202
34cfffb3 2203 if (hasmntopt(mntent, "create=dir")) {
119126b6 2204 if (mkdir_p(path, 0755) < 0) {
34cfffb3
SG
2205 WARN("Failed to create mount target '%s'", path);
2206 ret = -1;
2207 }
2208 }
2209
2210 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
2211 pathdirname = strdup(path);
2212 pathdirname = dirname(pathdirname);
119126b6
SG
2213 if (mkdir_p(pathdirname, 0755) < 0) {
2214 WARN("Failed to create target directory");
2215 }
34cfffb3
SG
2216 pathfile = fopen(path, "wb");
2217 if (!pathfile) {
2218 WARN("Failed to create mount target '%s'", path);
2219 ret = -1;
2220 }
2221 else
2222 fclose(pathfile);
2223 }
4e4ca161 2224 cull_mntent_opt(mntent);
34cfffb3 2225
a17b1e65
SG
2226 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
2227 free(mntdata);
2228 return -1;
2229 }
2230
911324ef 2231 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1fc64d22 2232 mntflags, mntdata, optional);
68c152ef 2233
34cfffb3 2234 free(pathdirname);
911324ef 2235 free(mntdata);
998ac676 2236
911324ef
DL
2237 return ret;
2238}
2239
80a881b2
SH
2240static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
2241 const char *lxc_name)
911324ef 2242{
aaf901be
AM
2243 struct mntent mntent;
2244 char buf[4096];
911324ef 2245 int ret = -1;
e76b8764 2246
aaf901be 2247 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
e76b8764 2248
911324ef 2249 if (!rootfs->path) {
aaf901be 2250 if (mount_entry_on_systemfs(&mntent))
e76b8764 2251 goto out;
911324ef 2252 continue;
e76b8764
CDC
2253 }
2254
911324ef 2255 /* We have a separate root, mounts are relative to it */
aaf901be
AM
2256 if (mntent.mnt_dir[0] != '/') {
2257 if (mount_entry_on_relative_rootfs(&mntent,
911324ef
DL
2258 rootfs->mount))
2259 goto out;
2260 continue;
2261 }
cd54d859 2262
aaf901be 2263 if (mount_entry_on_absolute_rootfs(&mntent, rootfs, lxc_name))
911324ef 2264 goto out;
0ad19a3f 2265 }
cd54d859 2266
0ad19a3f 2267 ret = 0;
cd54d859
DL
2268
2269 INFO("mount points have been setup");
0ad19a3f 2270out:
e7938e9e
MN
2271 return ret;
2272}
2273
80a881b2
SH
2274static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
2275 const char *lxc_name)
e7938e9e
MN
2276{
2277 FILE *file;
2278 int ret;
2279
2280 if (!fstab)
2281 return 0;
2282
2283 file = setmntent(fstab, "r");
2284 if (!file) {
2285 SYSERROR("failed to use '%s'", fstab);
2286 return -1;
2287 }
2288
80a881b2 2289 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e 2290
0ad19a3f 2291 endmntent(file);
2292 return ret;
2293}
2294
80a881b2
SH
2295static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
2296 const char *lxc_name)
e7938e9e
MN
2297{
2298 FILE *file;
2299 struct lxc_list *iterator;
2300 char *mount_entry;
2301 int ret;
2302
2303 file = tmpfile();
2304 if (!file) {
2305 ERROR("tmpfile error: %m");
2306 return -1;
2307 }
2308
2309 lxc_list_for_each(iterator, mount) {
2310 mount_entry = iterator->elem;
1d6b1976 2311 fprintf(file, "%s\n", mount_entry);
e7938e9e
MN
2312 }
2313
2314 rewind(file);
2315
80a881b2 2316 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e
MN
2317
2318 fclose(file);
2319 return ret;
2320}
2321
bab88e68
CS
2322static int parse_cap(const char *cap)
2323{
2324 char *ptr = NULL;
2325 int i, capid = -1;
2326
7035407c
DE
2327 if (!strcmp(cap, "none"))
2328 return -2;
2329
bab88e68
CS
2330 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2331
2332 if (strcmp(cap, caps_opt[i].name))
2333 continue;
2334
2335 capid = caps_opt[i].value;
2336 break;
2337 }
2338
2339 if (capid < 0) {
2340 /* try to see if it's numeric, so the user may specify
2341 * capabilities that the running kernel knows about but
2342 * we don't */
2343 errno = 0;
2344 capid = strtol(cap, &ptr, 10);
2345 if (!ptr || *ptr != '\0' || errno != 0)
2346 /* not a valid number */
2347 capid = -1;
2348 else if (capid > lxc_caps_last_cap())
2349 /* we have a number but it's not a valid
2350 * capability */
2351 capid = -1;
2352 }
2353
2354 return capid;
2355}
2356
0769b82a
CS
2357int in_caplist(int cap, struct lxc_list *caps)
2358{
2359 struct lxc_list *iterator;
2360 int capid;
2361
2362 lxc_list_for_each(iterator, caps) {
2363 capid = parse_cap(iterator->elem);
2364 if (capid == cap)
2365 return 1;
2366 }
2367
2368 return 0;
2369}
2370
81810dd1
DL
2371static int setup_caps(struct lxc_list *caps)
2372{
2373 struct lxc_list *iterator;
2374 char *drop_entry;
bab88e68 2375 int capid;
81810dd1
DL
2376
2377 lxc_list_for_each(iterator, caps) {
2378
2379 drop_entry = iterator->elem;
2380
bab88e68 2381 capid = parse_cap(drop_entry);
d55bc1ad 2382
81810dd1 2383 if (capid < 0) {
1e11be34
DL
2384 ERROR("unknown capability %s", drop_entry);
2385 return -1;
81810dd1
DL
2386 }
2387
2388 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
2389
2390 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
3ec1648d
SH
2391 SYSERROR("failed to remove %s capability", drop_entry);
2392 return -1;
2393 }
81810dd1
DL
2394
2395 }
2396
1fb86a7c
SH
2397 DEBUG("capabilities have been setup");
2398
2399 return 0;
2400}
2401
2402static int dropcaps_except(struct lxc_list *caps)
2403{
2404 struct lxc_list *iterator;
2405 char *keep_entry;
1fb86a7c
SH
2406 int i, capid;
2407 int numcaps = lxc_caps_last_cap() + 1;
959aee9c 2408 INFO("found %d capabilities", numcaps);
1fb86a7c 2409
2caf9a97
SH
2410 if (numcaps <= 0 || numcaps > 200)
2411 return -1;
2412
1fb86a7c
SH
2413 // caplist[i] is 1 if we keep capability i
2414 int *caplist = alloca(numcaps * sizeof(int));
2415 memset(caplist, 0, numcaps * sizeof(int));
2416
2417 lxc_list_for_each(iterator, caps) {
2418
2419 keep_entry = iterator->elem;
2420
bab88e68 2421 capid = parse_cap(keep_entry);
1fb86a7c 2422
7035407c
DE
2423 if (capid == -2)
2424 continue;
2425
1fb86a7c
SH
2426 if (capid < 0) {
2427 ERROR("unknown capability %s", keep_entry);
2428 return -1;
2429 }
2430
8255688a 2431 DEBUG("keep capability '%s' (%d)", keep_entry, capid);
1fb86a7c
SH
2432
2433 caplist[capid] = 1;
2434 }
2435 for (i=0; i<numcaps; i++) {
2436 if (caplist[i])
2437 continue;
2438 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
3ec1648d
SH
2439 SYSERROR("failed to remove capability %d", i);
2440 return -1;
2441 }
1fb86a7c
SH
2442 }
2443
2444 DEBUG("capabilities have been setup");
81810dd1
DL
2445
2446 return 0;
2447}
2448
0ad19a3f 2449static int setup_hw_addr(char *hwaddr, const char *ifname)
2450{
2451 struct sockaddr sockaddr;
2452 struct ifreq ifr;
2453 int ret, fd;
2454
3cfc0f3a
MN
2455 ret = lxc_convert_mac(hwaddr, &sockaddr);
2456 if (ret) {
2457 ERROR("mac address '%s' conversion failed : %s",
2458 hwaddr, strerror(-ret));
0ad19a3f 2459 return -1;
2460 }
2461
2462 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
5da6aa8c 2463 ifr.ifr_name[IFNAMSIZ-1] = '\0';
0ad19a3f 2464 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2465
2466 fd = socket(AF_INET, SOCK_DGRAM, 0);
2467 if (fd < 0) {
3ab87b66 2468 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 2469 return -1;
2470 }
2471
2472 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
2473 close(fd);
2474 if (ret)
3ab87b66 2475 ERROR("ioctl failure : %s", strerror(errno));
0ad19a3f 2476
5da6aa8c 2477 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
cd54d859 2478
0ad19a3f 2479 return ret;
2480}
2481
82d5ae15 2482static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2483{
82d5ae15
DL
2484 struct lxc_list *iterator;
2485 struct lxc_inetdev *inetdev;
3cfc0f3a 2486 int err;
0ad19a3f 2487
82d5ae15
DL
2488 lxc_list_for_each(iterator, ip) {
2489
2490 inetdev = iterator->elem;
2491
0093bb8c
DL
2492 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2493 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
2494 if (err) {
2495 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2496 ifindex, strerror(-err));
82d5ae15
DL
2497 return -1;
2498 }
2499 }
2500
2501 return 0;
0ad19a3f 2502}
2503
82d5ae15 2504static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2505{
82d5ae15 2506 struct lxc_list *iterator;
7fa9074f 2507 struct lxc_inet6dev *inet6dev;
3cfc0f3a 2508 int err;
0ad19a3f 2509
82d5ae15
DL
2510 lxc_list_for_each(iterator, ip) {
2511
2512 inet6dev = iterator->elem;
2513
b3df193c 2514 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
2515 &inet6dev->mcast, &inet6dev->acast,
2516 inet6dev->prefix);
3cfc0f3a
MN
2517 if (err) {
2518 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2519 ifindex, strerror(-err));
82d5ae15 2520 return -1;
3cfc0f3a 2521 }
82d5ae15
DL
2522 }
2523
2524 return 0;
0ad19a3f 2525}
2526
82d5ae15 2527static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 2528{
0ad19a3f 2529 char ifname[IFNAMSIZ];
0ad19a3f 2530 char *current_ifname = ifname;
3cfc0f3a 2531 int err;
0ad19a3f 2532
82d5ae15
DL
2533 /* empty network namespace */
2534 if (!netdev->ifindex) {
b0efbac4 2535 if (netdev->flags & IFF_UP) {
d472214b 2536 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2537 if (err) {
2538 ERROR("failed to set the loopback up : %s",
2539 strerror(-err));
82d5ae15
DL
2540 return -1;
2541 }
82d5ae15 2542 }
40790553
SH
2543 if (netdev->type != LXC_NET_VETH)
2544 return 0;
2545 netdev->ifindex = if_nametoindex(netdev->name);
0ad19a3f 2546 }
13954cce 2547
b466dc33 2548 /* get the new ifindex in case of physical netdev */
40790553 2549 if (netdev->type == LXC_NET_PHYS) {
b466dc33
BP
2550 if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
2551 ERROR("failed to get ifindex for %s",
2552 netdev->link);
2553 return -1;
2554 }
40790553 2555 }
b466dc33 2556
82d5ae15
DL
2557 /* retrieve the name of the interface */
2558 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 2559 ERROR("no interface corresponding to index '%d'",
82d5ae15 2560 netdev->ifindex);
0ad19a3f 2561 return -1;
2562 }
13954cce 2563
018ef520 2564 /* default: let the system to choose one interface name */
9d083402 2565 if (!netdev->name)
fb6d9b2f
DL
2566 netdev->name = netdev->type == LXC_NET_PHYS ?
2567 netdev->link : "eth%d";
018ef520 2568
82d5ae15 2569 /* rename the interface name */
40790553
SH
2570 if (strcmp(ifname, netdev->name) != 0) {
2571 err = lxc_netdev_rename_by_name(ifname, netdev->name);
2572 if (err) {
2573 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2574 strerror(-err));
2575 return -1;
2576 }
018ef520
DL
2577 }
2578
2579 /* Re-read the name of the interface because its name has changed
2580 * and would be automatically allocated by the system
2581 */
82d5ae15 2582 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 2583 ERROR("no interface corresponding to index '%d'",
82d5ae15 2584 netdev->ifindex);
018ef520 2585 return -1;
0ad19a3f 2586 }
2587
82d5ae15
DL
2588 /* set a mac address */
2589 if (netdev->hwaddr) {
2590 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 2591 ERROR("failed to setup hw address for '%s'",
82d5ae15 2592 current_ifname);
0ad19a3f 2593 return -1;
2594 }
2595 }
2596
82d5ae15
DL
2597 /* setup ipv4 addresses on the interface */
2598 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 2599 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 2600 ifname);
2601 return -1;
2602 }
2603
82d5ae15
DL
2604 /* setup ipv6 addresses on the interface */
2605 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 2606 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 2607 ifname);
2608 return -1;
2609 }
2610
82d5ae15 2611 /* set the network device up */
b0efbac4 2612 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
2613 int err;
2614
d472214b 2615 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
2616 if (err) {
2617 ERROR("failed to set '%s' up : %s", current_ifname,
2618 strerror(-err));
0ad19a3f 2619 return -1;
2620 }
2621
2622 /* the network is up, make the loopback up too */
d472214b 2623 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2624 if (err) {
2625 ERROR("failed to set the loopback up : %s",
2626 strerror(-err));
0ad19a3f 2627 return -1;
2628 }
2629 }
2630
f8fee0e2
MK
2631 /* We can only set up the default routes after bringing
2632 * up the interface, sine bringing up the interface adds
2633 * the link-local routes and we can't add a default
2634 * route if the gateway is not reachable. */
2635
2636 /* setup ipv4 gateway on the interface */
2637 if (netdev->ipv4_gateway) {
2638 if (!(netdev->flags & IFF_UP)) {
2639 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2640 return -1;
2641 }
2642
2643 if (lxc_list_empty(&netdev->ipv4)) {
2644 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2645 return -1;
2646 }
2647
2648 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2649 if (err) {
fc739df5
SG
2650 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2651 if (err) {
2652 ERROR("failed to add ipv4 dest for '%s': %s",
2653 ifname, strerror(-err));
2654 }
2655
2656 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2657 if (err) {
2658 ERROR("failed to setup ipv4 gateway for '%s': %s",
2659 ifname, strerror(-err));
2660 if (netdev->ipv4_gateway_auto) {
2661 char buf[INET_ADDRSTRLEN];
2662 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2663 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2664 }
2665 return -1;
19a26f82 2666 }
f8fee0e2
MK
2667 }
2668 }
2669
2670 /* setup ipv6 gateway on the interface */
2671 if (netdev->ipv6_gateway) {
2672 if (!(netdev->flags & IFF_UP)) {
2673 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2674 return -1;
2675 }
2676
2677 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2678 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2679 return -1;
2680 }
2681
2682 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2683 if (err) {
fc739df5
SG
2684 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2685 if (err) {
2686 ERROR("failed to add ipv6 dest for '%s': %s",
f8fee0e2 2687 ifname, strerror(-err));
19a26f82 2688 }
fc739df5
SG
2689
2690 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2691 if (err) {
2692 ERROR("failed to setup ipv6 gateway for '%s': %s",
2693 ifname, strerror(-err));
2694 if (netdev->ipv6_gateway_auto) {
2695 char buf[INET6_ADDRSTRLEN];
2696 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2697 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2698 }
2699 return -1;
2700 }
f8fee0e2
MK
2701 }
2702 }
2703
cd54d859
DL
2704 DEBUG("'%s' has been setup", current_ifname);
2705
0ad19a3f 2706 return 0;
2707}
2708
5f4535a3 2709static int setup_network(struct lxc_list *network)
0ad19a3f 2710{
82d5ae15 2711 struct lxc_list *iterator;
82d5ae15 2712 struct lxc_netdev *netdev;
0ad19a3f 2713
5f4535a3 2714 lxc_list_for_each(iterator, network) {
cd54d859 2715
5f4535a3 2716 netdev = iterator->elem;
82d5ae15
DL
2717
2718 if (setup_netdev(netdev)) {
2719 ERROR("failed to setup netdev");
2720 return -1;
2721 }
2722 }
cd54d859 2723
5f4535a3
DL
2724 if (!lxc_list_empty(network))
2725 INFO("network has been setup");
cd54d859
DL
2726
2727 return 0;
0ad19a3f 2728}
2729
2af6bd1b
SH
2730/* try to move physical nics to the init netns */
2731void restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf)
2732{
2733 int i, ret, oldfd;
2734 char path[MAXPATHLEN];
2735
2736 if (netnsfd < 0)
2737 return;
2738
2739 ret = snprintf(path, MAXPATHLEN, "/proc/self/ns/net");
2740 if (ret < 0 || ret >= MAXPATHLEN) {
2741 WARN("Failed to open monitor netns fd");
2742 return;
2743 }
2744 if ((oldfd = open(path, O_RDONLY)) < 0) {
2745 SYSERROR("Failed to open monitor netns fd");
2746 return;
2747 }
2748 if (setns(netnsfd, 0) != 0) {
2749 SYSERROR("Failed to enter container netns to reset nics");
2750 close(oldfd);
2751 return;
2752 }
2753 for (i=0; i<conf->num_savednics; i++) {
2754 struct saved_nic *s = &conf->saved_nics[i];
2755 if (lxc_netdev_move_by_index(s->ifindex, 1))
2756 WARN("Error moving nic index:%d back to host netns",
2757 s->ifindex);
2758 }
2759 if (setns(oldfd, 0) != 0)
2760 SYSERROR("Failed to re-enter monitor's netns");
2761 close(oldfd);
2762}
2763
2764void lxc_rename_phys_nics_on_shutdown(int netnsfd, struct lxc_conf *conf)
7b35f3d6
SH
2765{
2766 int i;
2767
2af6bd1b
SH
2768 if (conf->num_savednics == 0)
2769 return;
2770
7b35f3d6 2771 INFO("running to reset %d nic names", conf->num_savednics);
2af6bd1b 2772 restore_phys_nics_to_netns(netnsfd, conf);
7b35f3d6
SH
2773 for (i=0; i<conf->num_savednics; i++) {
2774 struct saved_nic *s = &conf->saved_nics[i];
959aee9c 2775 INFO("resetting nic %d to %s", s->ifindex, s->orig_name);
7b35f3d6
SH
2776 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
2777 free(s->orig_name);
2778 }
2779 conf->num_savednics = 0;
7b35f3d6
SH
2780}
2781
ae9242c8
SH
2782static char *default_rootfs_mount = LXCROOTFSMOUNT;
2783
7b379ab3 2784struct lxc_conf *lxc_conf_init(void)
089cd8b8 2785{
7b379ab3 2786 struct lxc_conf *new;
26ddeedd 2787 int i;
7b379ab3
MN
2788
2789 new = malloc(sizeof(*new));
2790 if (!new) {
2791 ERROR("lxc_conf_init : %m");
2792 return NULL;
2793 }
2794 memset(new, 0, sizeof(*new));
2795
b40a606e 2796 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
cccc74b5 2797 new->personality = -1;
bc6928ff 2798 new->autodev = -1;
596a818d
DE
2799 new->console.log_path = NULL;
2800 new->console.log_fd = -1;
28a4b0e5 2801 new->console.path = NULL;
63376d7d 2802 new->console.peer = -1;
b5159817
DE
2803 new->console.peerpty.busy = -1;
2804 new->console.peerpty.master = -1;
2805 new->console.peerpty.slave = -1;
63376d7d
DL
2806 new->console.master = -1;
2807 new->console.slave = -1;
2808 new->console.name[0] = '\0';
d2e30e99 2809 new->maincmd_fd = -1;
76a26f55 2810 new->nbd_idx = -1;
54c30e29 2811 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048
SH
2812 if (!new->rootfs.mount) {
2813 ERROR("lxc_conf_init : %m");
2814 free(new);
2815 return NULL;
2816 }
2f3f41d0 2817 new->kmsg = 1;
7b379ab3
MN
2818 lxc_list_init(&new->cgroup);
2819 lxc_list_init(&new->network);
2820 lxc_list_init(&new->mount_list);
81810dd1 2821 lxc_list_init(&new->caps);
1fb86a7c 2822 lxc_list_init(&new->keepcaps);
f6d3e3e4 2823 lxc_list_init(&new->id_map);
f979ac15 2824 lxc_list_init(&new->includes);
4184c3e1 2825 lxc_list_init(&new->aliens);
7c661726 2826 lxc_list_init(&new->environment);
26ddeedd
SH
2827 for (i=0; i<NUM_LXC_HOOKS; i++)
2828 lxc_list_init(&new->hooks[i]);
ee1e7aa0 2829 lxc_list_init(&new->groups);
fe4de9a6
DE
2830 new->lsm_aa_profile = NULL;
2831 new->lsm_se_context = NULL;
5112cd70 2832 new->tmp_umount_proc = 0;
7b379ab3 2833
9f30a190
MM
2834 for (i = 0; i < LXC_NS_MAX; i++)
2835 new->inherit_ns_fd[i] = -1;
2836
7b379ab3 2837 return new;
089cd8b8
DL
2838}
2839
e3b4c4c4 2840static int instanciate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2841{
8634bc19 2842 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2843 char veth2buf[IFNAMSIZ], *veth2;
3cfc0f3a 2844 int err;
13954cce 2845
e892973e
DL
2846 if (netdev->priv.veth_attr.pair)
2847 veth1 = netdev->priv.veth_attr.pair;
8634bc19 2848 else {
9ba8130c
SH
2849 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2850 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2851 ERROR("veth1 name too long");
2852 return -1;
2853 }
a0265685 2854 veth1 = lxc_mkifname(veth1buf);
ad40563e
ÇO
2855 if (!veth1) {
2856 ERROR("failed to allocate a temporary name");
2857 return -1;
2858 }
74a2b586
JK
2859 /* store away for deconf */
2860 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2861 }
82d5ae15 2862
0e391e57 2863 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
a0265685 2864 veth2 = lxc_mkifname(veth2buf);
ad40563e 2865 if (!veth2) {
82d5ae15 2866 ERROR("failed to allocate a temporary name");
ad40563e 2867 goto out_delete;
0ad19a3f 2868 }
2869
3cfc0f3a
MN
2870 err = lxc_veth_create(veth1, veth2);
2871 if (err) {
2872 ERROR("failed to create %s-%s : %s", veth1, veth2,
2873 strerror(-err));
ad40563e 2874 goto out_delete;
0ad19a3f 2875 }
13954cce 2876
49684c0b
CS
2877 /* changing the high byte of the mac address to 0xfe, the bridge interface
2878 * will always keep the host's mac address and not take the mac address
2879 * of a container */
2880 err = setup_private_host_hw_addr(veth1);
2881 if (err) {
2882 ERROR("failed to change mac address of host interface '%s' : %s",
2883 veth1, strerror(-err));
2884 goto out_delete;
2885 }
2886
82d5ae15 2887 if (netdev->mtu) {
d472214b 2888 err = lxc_netdev_set_mtu(veth1, atoi(netdev->mtu));
3cfc0f3a 2889 if (!err)
d472214b 2890 err = lxc_netdev_set_mtu(veth2, atoi(netdev->mtu));
3cfc0f3a
MN
2891 if (err) {
2892 ERROR("failed to set mtu '%s' for %s-%s : %s",
2893 netdev->mtu, veth1, veth2, strerror(-err));
eb14c10a 2894 goto out_delete;
75d09f83
DL
2895 }
2896 }
2897
3cfc0f3a
MN
2898 if (netdev->link) {
2899 err = lxc_bridge_attach(netdev->link, veth1);
2900 if (err) {
2901 ERROR("failed to attach '%s' to the bridge '%s' : %s",
2902 veth1, netdev->link, strerror(-err));
2903 goto out_delete;
2904 }
eb14c10a
DL
2905 }
2906
82d5ae15
DL
2907 netdev->ifindex = if_nametoindex(veth2);
2908 if (!netdev->ifindex) {
36eb9bde 2909 ERROR("failed to retrieve the index for %s", veth2);
eb14c10a
DL
2910 goto out_delete;
2911 }
2912
d472214b 2913 err = lxc_netdev_up(veth1);
6e35af2e
DL
2914 if (err) {
2915 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2916 goto out_delete;
0ad19a3f 2917 }
2918
e3b4c4c4 2919 if (netdev->upscript) {
751d9dcd
DL
2920 err = run_script(handler->name, "net", netdev->upscript, "up",
2921 "veth", veth1, (char*) NULL);
2922 if (err)
e3b4c4c4 2923 goto out_delete;
e3b4c4c4
ST
2924 }
2925
82d5ae15
DL
2926 DEBUG("instanciated veth '%s/%s', index is '%d'",
2927 veth1, veth2, netdev->ifindex);
2928
6ab9ab6d 2929 return 0;
eb14c10a
DL
2930
2931out_delete:
b84f58b9 2932 lxc_netdev_delete_by_name(veth1);
ad40563e
ÇO
2933 if (!netdev->priv.veth_attr.pair && veth1)
2934 free(veth1);
2935 if(veth2)
2936 free(veth2);
6ab9ab6d 2937 return -1;
13954cce 2938}
d957ae2d 2939
74a2b586
JK
2940static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2941{
2942 char *veth1;
2943 int err;
2944
2945 if (netdev->priv.veth_attr.pair)
2946 veth1 = netdev->priv.veth_attr.pair;
2947 else
2948 veth1 = netdev->priv.veth_attr.veth1;
2949
2950 if (netdev->downscript) {
2951 err = run_script(handler->name, "net", netdev->downscript,
2952 "down", "veth", veth1, (char*) NULL);
2953 if (err)
2954 return -1;
2955 }
2956 return 0;
2957}
2958
e3b4c4c4 2959static int instanciate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2960{
0e391e57 2961 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2962 int err;
d957ae2d
MT
2963
2964 if (!netdev->link) {
2965 ERROR("no link specified for macvlan netdev");
2966 return -1;
2967 }
13954cce 2968
9ba8130c
SH
2969 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2970 if (err >= sizeof(peerbuf))
2971 return -1;
82d5ae15 2972
a0265685 2973 peer = lxc_mkifname(peerbuf);
ad40563e 2974 if (!peer) {
82d5ae15
DL
2975 ERROR("failed to make a temporary name");
2976 return -1;
0ad19a3f 2977 }
2978
3cfc0f3a
MN
2979 err = lxc_macvlan_create(netdev->link, peer,
2980 netdev->priv.macvlan_attr.mode);
2981 if (err) {
2982 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2983 peer, netdev->link, strerror(-err));
ad40563e 2984 goto out;
0ad19a3f 2985 }
2986
82d5ae15
DL
2987 netdev->ifindex = if_nametoindex(peer);
2988 if (!netdev->ifindex) {
36eb9bde 2989 ERROR("failed to retrieve the index for %s", peer);
ad40563e 2990 goto out;
22ebac19 2991 }
2992
e3b4c4c4 2993 if (netdev->upscript) {
751d9dcd
DL
2994 err = run_script(handler->name, "net", netdev->upscript, "up",
2995 "macvlan", netdev->link, (char*) NULL);
2996 if (err)
ad40563e 2997 goto out;
e3b4c4c4
ST
2998 }
2999
e892973e
DL
3000 DEBUG("instanciated macvlan '%s', index is '%d' and mode '%d'",
3001 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 3002
d957ae2d 3003 return 0;
ad40563e
ÇO
3004out:
3005 lxc_netdev_delete_by_name(peer);
3006 free(peer);
3007 return -1;
0ad19a3f 3008}
3009
74a2b586
JK
3010static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
3011{
3012 int err;
3013
3014 if (netdev->downscript) {
3015 err = run_script(handler->name, "net", netdev->downscript,
3016 "down", "macvlan", netdev->link,
3017 (char*) NULL);
3018 if (err)
3019 return -1;
3020 }
3021 return 0;
3022}
3023
26c39028 3024/* XXX: merge with instanciate_macvlan */
e3b4c4c4 3025static int instanciate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
3026{
3027 char peer[IFNAMSIZ];
3cfc0f3a 3028 int err;
26c39028
JHS
3029
3030 if (!netdev->link) {
3031 ERROR("no link specified for vlan netdev");
3032 return -1;
3033 }
3034
9ba8130c
SH
3035 err = snprintf(peer, sizeof(peer), "vlan%d", netdev->priv.vlan_attr.vid);
3036 if (err >= sizeof(peer)) {
3037 ERROR("peer name too long");
3038 return -1;
3039 }
26c39028 3040
3cfc0f3a
MN
3041 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
3042 if (err) {
3043 ERROR("failed to create vlan interface '%s' on '%s' : %s",
3044 peer, netdev->link, strerror(-err));
26c39028
JHS
3045 return -1;
3046 }
3047
3048 netdev->ifindex = if_nametoindex(peer);
3049 if (!netdev->ifindex) {
3050 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 3051 lxc_netdev_delete_by_name(peer);
26c39028
JHS
3052 return -1;
3053 }
3054
e892973e
DL
3055 DEBUG("instanciated vlan '%s', ifindex is '%d'", " vlan1000",
3056 netdev->ifindex);
3057
26c39028
JHS
3058 return 0;
3059}
3060
74a2b586
JK
3061static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
3062{
3063 return 0;
3064}
3065
e3b4c4c4 3066static int instanciate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 3067{
6168e99f
DL
3068 if (!netdev->link) {
3069 ERROR("no link specified for the physical interface");
3070 return -1;
3071 }
3072
9d083402 3073 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 3074 if (!netdev->ifindex) {
9d083402 3075 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 3076 return -1;
3077 }
3078
e3b4c4c4
ST
3079 if (netdev->upscript) {
3080 int err;
751d9dcd
DL
3081 err = run_script(handler->name, "net", netdev->upscript,
3082 "up", "phys", netdev->link, (char*) NULL);
3083 if (err)
e3b4c4c4 3084 return -1;
e3b4c4c4
ST
3085 }
3086
82d5ae15 3087 return 0;
0ad19a3f 3088}
3089
74a2b586
JK
3090static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
3091{
3092 int err;
3093
3094 if (netdev->downscript) {
3095 err = run_script(handler->name, "net", netdev->downscript,
3096 "down", "phys", netdev->link, (char*) NULL);
3097 if (err)
3098 return -1;
3099 }
3100 return 0;
3101}
3102
26b797f3
SH
3103static int instanciate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
3104{
3105 netdev->ifindex = 0;
3106 return 0;
3107}
3108
e3b4c4c4 3109static int instanciate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 3110{
82d5ae15 3111 netdev->ifindex = 0;
e3b4c4c4
ST
3112 if (netdev->upscript) {
3113 int err;
751d9dcd
DL
3114 err = run_script(handler->name, "net", netdev->upscript,
3115 "up", "empty", (char*) NULL);
3116 if (err)
e3b4c4c4 3117 return -1;
e3b4c4c4 3118 }
82d5ae15 3119 return 0;
0ad19a3f 3120}
3121
74a2b586
JK
3122static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
3123{
3124 int err;
3125
3126 if (netdev->downscript) {
3127 err = run_script(handler->name, "net", netdev->downscript,
3128 "down", "empty", (char*) NULL);
3129 if (err)
3130 return -1;
3131 }
3132 return 0;
3133}
3134
26b797f3
SH
3135static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
3136{
3137 return 0;
3138}
3139
3140int lxc_requests_empty_network(struct lxc_handler *handler)
3141{
3142 struct lxc_list *network = &handler->conf->network;
3143 struct lxc_list *iterator;
3144 struct lxc_netdev *netdev;
3145 bool found_none = false, found_nic = false;
3146
3147 if (lxc_list_empty(network))
3148 return 0;
3149
3150 lxc_list_for_each(iterator, network) {
3151
3152 netdev = iterator->elem;
3153
3154 if (netdev->type == LXC_NET_NONE)
3155 found_none = true;
3156 else
3157 found_nic = true;
3158 }
3159 if (found_none && !found_nic)
3160 return 1;
3161 return 0;
3162}
3163
e3b4c4c4 3164int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 3165{
e3b4c4c4 3166 struct lxc_list *network = &handler->conf->network;
82d5ae15 3167 struct lxc_list *iterator;
82d5ae15 3168 struct lxc_netdev *netdev;
cbef6c52
SH
3169 int am_root = (getuid() == 0);
3170
3171 if (!am_root)
3172 return 0;
0ad19a3f 3173
5f4535a3 3174 lxc_list_for_each(iterator, network) {
0ad19a3f 3175
5f4535a3 3176 netdev = iterator->elem;
13954cce 3177
24654103 3178 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 3179 ERROR("invalid network configuration type '%d'",
5f4535a3 3180 netdev->type);
82d5ae15
DL
3181 return -1;
3182 }
0ad19a3f 3183
e3b4c4c4 3184 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
3185 ERROR("failed to create netdev");
3186 return -1;
3187 }
e3b4c4c4 3188
0ad19a3f 3189 }
3190
3191 return 0;
3192}
3193
74a2b586 3194void lxc_delete_network(struct lxc_handler *handler)
7fef7a06 3195{
74a2b586 3196 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
3197 struct lxc_list *iterator;
3198 struct lxc_netdev *netdev;
3199
3200 lxc_list_for_each(iterator, network) {
3201 netdev = iterator->elem;
d472214b 3202
74a2b586 3203 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352
DL
3204 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
3205 WARN("failed to rename to the initial name the " \
3206 "netdev '%s'", netdev->link);
d472214b 3207 continue;
d8f8e352 3208 }
d472214b 3209
74a2b586
JK
3210 if (netdev_deconf[netdev->type](handler, netdev)) {
3211 WARN("failed to destroy netdev");
3212 }
3213
d8f8e352
DL
3214 /* Recent kernel remove the virtual interfaces when the network
3215 * namespace is destroyed but in case we did not moved the
3216 * interface to the network namespace, we have to destroy it
3217 */
74a2b586
JK
3218 if (netdev->ifindex != 0 &&
3219 lxc_netdev_delete_by_index(netdev->ifindex))
d8f8e352 3220 WARN("failed to remove interface '%s'", netdev->name);
7fef7a06
DL
3221 }
3222}
3223
45e854dc
SG
3224#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
3225
fe1f672f
ÇO
3226/* lxc-user-nic returns "interface_name:interface_name\n" */
3227#define MAX_BUFFER_SIZE IFNAMSIZ*2 + 2
74a3920a 3228static int unpriv_assign_nic(struct lxc_netdev *netdev, pid_t pid)
cbef6c52
SH
3229{
3230 pid_t child;
a7242d9a
ÇO
3231 int bytes, pipefd[2];
3232 char *token, *saveptr = NULL;
fe1f672f 3233 char buffer[MAX_BUFFER_SIZE];
cbef6c52
SH
3234
3235 if (netdev->type != LXC_NET_VETH) {
3236 ERROR("nic type %d not support for unprivileged use",
3237 netdev->type);
3238 return -1;
3239 }
3240
a7242d9a
ÇO
3241 if(pipe(pipefd) < 0) {
3242 SYSERROR("pipe failed");
3243 return -1;
3244 }
3245
cbef6c52
SH
3246 if ((child = fork()) < 0) {
3247 SYSERROR("fork");
a7242d9a
ÇO
3248 close(pipefd[0]);
3249 close(pipefd[1]);
3250 return -1;
3251 }
3252
3253 if (child == 0) { // child
3254 /* close the read-end of the pipe */
3255 close(pipefd[0]);
3256 /* redirect the stdout to write-end of the pipe */
3257 dup2(pipefd[1], STDOUT_FILENO);
3258 /* close the write-end of the pipe */
fe1f672f 3259 close(pipefd[1]);
a7242d9a
ÇO
3260
3261 // Call lxc-user-nic pid type bridge
3262 char pidstr[20];
3263 char *args[] = {LXC_USERNIC_PATH, pidstr, "veth", netdev->link, netdev->name, NULL };
3264 snprintf(pidstr, 19, "%lu", (unsigned long) pid);
3265 pidstr[19] = '\0';
3266 execvp(args[0], args);
3267 SYSERROR("execvp lxc-user-nic");
3268 exit(1);
3269 }
3270
3271 /* close the write-end of the pipe */
3272 close(pipefd[1]);
3273
fe1f672f 3274 bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE);
a7242d9a
ÇO
3275 if (bytes < 0) {
3276 SYSERROR("read failed");
3277 }
3278 buffer[bytes - 1] = '\0';
3279
3280 if (wait_for_pid(child) != 0) {
3281 close(pipefd[0]);
cbef6c52
SH
3282 return -1;
3283 }
3284
a7242d9a
ÇO
3285 /* close the read-end of the pipe */
3286 close(pipefd[0]);
cbef6c52 3287
a7242d9a
ÇO
3288 /* fill netdev->name field */
3289 token = strtok_r(buffer, ":", &saveptr);
3290 if (!token)
3291 return -1;
658979c5
SH
3292 netdev->name = malloc(IFNAMSIZ+1);
3293 if (!netdev->name) {
3294 ERROR("Out of memory");
3295 return -1;
3296 }
3297 memset(netdev->name, 0, IFNAMSIZ+1);
3298 strncpy(netdev->name, token, IFNAMSIZ);
a7242d9a
ÇO
3299
3300 /* fill netdev->veth_attr.pair field */
3301 token = strtok_r(NULL, ":", &saveptr);
3302 if (!token)
3303 return -1;
3304 netdev->priv.veth_attr.pair = strdup(token);
658979c5
SH
3305 if (!netdev->priv.veth_attr.pair) {
3306 ERROR("Out of memory");
3307 return -1;
3308 }
45e854dc 3309
a7242d9a 3310 return 0;
cbef6c52
SH
3311}
3312
5f4535a3 3313int lxc_assign_network(struct lxc_list *network, pid_t pid)
0ad19a3f 3314{
82d5ae15 3315 struct lxc_list *iterator;
82d5ae15 3316 struct lxc_netdev *netdev;
cbef6c52 3317 int am_root = (getuid() == 0);
3cfc0f3a 3318 int err;
0ad19a3f 3319
5f4535a3 3320 lxc_list_for_each(iterator, network) {
82d5ae15 3321
5f4535a3 3322 netdev = iterator->elem;
82d5ae15 3323
fbb16259 3324 if (netdev->type == LXC_NET_VETH && !am_root) {
cbef6c52
SH
3325 if (unpriv_assign_nic(netdev, pid))
3326 return -1;
658979c5
SH
3327 // lxc-user-nic has moved the nic to the new ns.
3328 // unpriv_assign_nic() fills in netdev->name.
3329 // netdev->ifindex will be filed in at setup_netdev.
cbef6c52
SH
3330 continue;
3331 }
236087a6 3332
fbb16259
SH
3333 /* empty network namespace, nothing to move */
3334 if (!netdev->ifindex)
3335 continue;
3336
d472214b 3337 err = lxc_netdev_move_by_index(netdev->ifindex, pid);
3cfc0f3a
MN
3338 if (err) {
3339 ERROR("failed to move '%s' to the container : %s",
3340 netdev->link, strerror(-err));
82d5ae15
DL
3341 return -1;
3342 }
3343
c1c75c04 3344 DEBUG("move '%s' to '%d'", netdev->name, pid);
0ad19a3f 3345 }
3346
3347 return 0;
3348}
3349
251d0d2a
DE
3350static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
3351 size_t buf_size)
f6d3e3e4
SH
3352{
3353 char path[PATH_MAX];
e4ccd113 3354 int ret, closeret;
f6d3e3e4
SH
3355 FILE *f;
3356
3357 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
3358 if (ret < 0 || ret >= PATH_MAX) {
03fadd16 3359 fprintf(stderr, "%s: path name too long\n", __func__);
f6d3e3e4
SH
3360 return -E2BIG;
3361 }
3362 f = fopen(path, "w");
3363 if (!f) {
3364 perror("open");
3365 return -EINVAL;
3366 }
251d0d2a 3367 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 3368 if (ret < 0)
e4ccd113
SH
3369 SYSERROR("writing id mapping");
3370 closeret = fclose(f);
3371 if (closeret)
3372 SYSERROR("writing id mapping");
3373 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
3374}
3375
3376int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
3377{
3378 struct lxc_list *iterator;
3379 struct id_map *map;
8afb3e61 3380 int ret = 0, use_shadow = 0;
251d0d2a 3381 enum idtype type;
8afb3e61
SG
3382 char *buf = NULL, *pos, *cmdpath = NULL;
3383
9d9c111c 3384 cmdpath = on_path("newuidmap", NULL);
8afb3e61
SG
3385 if (cmdpath) {
3386 use_shadow = 1;
3387 free(cmdpath);
3388 }
3389
0e6e3a41
SG
3390 if (!use_shadow && geteuid()) {
3391 ERROR("Missing newuidmap/newgidmap");
3392 return -1;
3393 }
251d0d2a
DE
3394
3395 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4 3396 int left, fill;
cf3ef16d
SH
3397 int had_entry = 0;
3398 if (!buf) {
3399 buf = pos = malloc(4096);
4f7521b4
SH
3400 if (!buf)
3401 return -ENOMEM;
cf3ef16d
SH
3402 }
3403 pos = buf;
0e6e3a41 3404 if (use_shadow)
d1838f34 3405 pos += sprintf(buf, "new%cidmap %d",
cf3ef16d
SH
3406 type == ID_TYPE_UID ? 'u' : 'g',
3407 pid);
4f7521b4 3408
cf3ef16d
SH
3409 lxc_list_for_each(iterator, idmap) {
3410 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
251d0d2a 3411 map = iterator->elem;
cf3ef16d
SH
3412 if (map->idtype != type)
3413 continue;
3414
3415 had_entry = 1;
3416 left = 4096 - (pos - buf);
d1838f34 3417 fill = snprintf(pos, left, "%s%lu %lu %lu%s",
0e6e3a41 3418 use_shadow ? " " : "",
d1838f34 3419 map->nsid, map->hostid, map->range,
0e6e3a41 3420 use_shadow ? "" : "\n");
cf3ef16d
SH
3421 if (fill <= 0 || fill >= left)
3422 SYSERROR("snprintf failed, too many mappings");
3423 pos += fill;
251d0d2a 3424 }
cf3ef16d 3425 if (!had_entry)
4f7521b4 3426 continue;
cf3ef16d 3427
0e6e3a41 3428 if (!use_shadow) {
cf3ef16d 3429 ret = write_id_mapping(type, pid, buf, pos-buf);
d1838f34
MS
3430 } else {
3431 left = 4096 - (pos - buf);
3432 fill = snprintf(pos, left, "\n");
3433 if (fill <= 0 || fill >= left)
3434 SYSERROR("snprintf failed, too many mappings");
3435 pos += fill;
cf3ef16d 3436 ret = system(buf);
d1838f34 3437 }
cf3ef16d 3438
f6d3e3e4
SH
3439 if (ret)
3440 break;
3441 }
251d0d2a 3442
4f7521b4
SH
3443 if (buf)
3444 free(buf);
f6d3e3e4
SH
3445 return ret;
3446}
3447
cf3ef16d 3448/*
7b50c609
TS
3449 * return the host uid/gid to which the container root is mapped in
3450 * *val.
0b3a6504 3451 * Return true if id was found, false otherwise.
cf3ef16d 3452 */
2a9a80cb 3453bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
3ec1648d 3454 unsigned long *val)
cf3ef16d
SH
3455{
3456 struct lxc_list *it;
3457 struct id_map *map;
3458
3459 lxc_list_for_each(it, &conf->id_map) {
3460 map = it->elem;
7b50c609 3461 if (map->idtype != idtype)
cf3ef16d
SH
3462 continue;
3463 if (map->nsid != 0)
3464 continue;
2a9a80cb
SH
3465 *val = map->hostid;
3466 return true;
cf3ef16d 3467 }
2a9a80cb 3468 return false;
cf3ef16d
SH
3469}
3470
2133f58c 3471int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3472{
3473 struct lxc_list *it;
3474 struct id_map *map;
3475 lxc_list_for_each(it, &conf->id_map) {
3476 map = it->elem;
2133f58c 3477 if (map->idtype != idtype)
cf3ef16d
SH
3478 continue;
3479 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 3480 return (id - map->hostid) + map->nsid;
cf3ef16d 3481 }
57d116ab 3482 return -1;
cf3ef16d
SH
3483}
3484
2133f58c 3485int find_unmapped_nsuid(struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3486{
3487 struct lxc_list *it;
3488 struct id_map *map;
2133f58c 3489 unsigned int freeid = 0;
cf3ef16d
SH
3490again:
3491 lxc_list_for_each(it, &conf->id_map) {
3492 map = it->elem;
2133f58c 3493 if (map->idtype != idtype)
cf3ef16d
SH
3494 continue;
3495 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3496 freeid = map->nsid + map->range;
3497 goto again;
3498 }
3499 }
3500 return freeid;
3501}
3502
19a26f82
MK
3503int lxc_find_gateway_addresses(struct lxc_handler *handler)
3504{
3505 struct lxc_list *network = &handler->conf->network;
3506 struct lxc_list *iterator;
3507 struct lxc_netdev *netdev;
3508 int link_index;
3509
3510 lxc_list_for_each(iterator, network) {
3511 netdev = iterator->elem;
3512
3513 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
3514 continue;
3515
3516 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
3517 ERROR("gateway = auto only supported for "
3518 "veth and macvlan");
3519 return -1;
3520 }
3521
3522 if (!netdev->link) {
3523 ERROR("gateway = auto needs a link interface");
3524 return -1;
3525 }
3526
3527 link_index = if_nametoindex(netdev->link);
3528 if (!link_index)
3529 return -EINVAL;
3530
3531 if (netdev->ipv4_gateway_auto) {
3532 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
3533 ERROR("failed to automatically find ipv4 gateway "
3534 "address from link interface '%s'", netdev->link);
3535 return -1;
3536 }
3537 }
3538
3539 if (netdev->ipv6_gateway_auto) {
3540 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
3541 ERROR("failed to automatically find ipv6 gateway "
3542 "address from link interface '%s'", netdev->link);
3543 return -1;
3544 }
3545 }
3546 }
3547
3548 return 0;
3549}
3550
5e4a62bf 3551int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 3552{
5e4a62bf 3553 struct lxc_tty_info *tty_info = &conf->tty_info;
025ed0f3 3554 int i, ret;
b0a33c1e 3555
5e4a62bf
DL
3556 /* no tty in the configuration */
3557 if (!conf->tty)
b0a33c1e 3558 return 0;
3559
13954cce 3560 tty_info->pty_info =
e4e7d59d 3561 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 3562 if (!tty_info->pty_info) {
36eb9bde 3563 SYSERROR("failed to allocate pty_info");
985d15b1 3564 return -1;
b0a33c1e 3565 }
3566
985d15b1 3567 for (i = 0; i < conf->tty; i++) {
13954cce 3568
b0a33c1e 3569 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3570
025ed0f3
SH
3571 process_lock();
3572 ret = openpty(&pty_info->master, &pty_info->slave,
3573 pty_info->name, NULL, NULL);
3574 process_unlock();
3575 if (ret) {
36eb9bde 3576 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
3577 tty_info->nbtty = i;
3578 lxc_delete_tty(tty_info);
3579 return -1;
b0a33c1e 3580 }
3581
5332bb84
DL
3582 DEBUG("allocated pty '%s' (%d/%d)",
3583 pty_info->name, pty_info->master, pty_info->slave);
3584
3ec1648d 3585 /* Prevent leaking the file descriptors to the container */
b035ad62
MS
3586 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3587 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3588
b0a33c1e 3589 pty_info->busy = 0;
3590 }
3591
985d15b1 3592 tty_info->nbtty = conf->tty;
1ac470c0
DL
3593
3594 INFO("tty's configured");
3595
985d15b1 3596 return 0;
b0a33c1e 3597}
3598
3599void lxc_delete_tty(struct lxc_tty_info *tty_info)
3600{
3601 int i;
3602
3603 for (i = 0; i < tty_info->nbtty; i++) {
3604 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3605
3606 close(pty_info->master);
3607 close(pty_info->slave);
3608 }
3609
3610 free(tty_info->pty_info);
3611 tty_info->nbtty = 0;
3612}
3613
f6d3e3e4 3614/*
7b50c609
TS
3615 * chown_mapped_root: for an unprivileged user with uid/gid X to
3616 * chown a dir to subuid/subgid Y, he needs to run chown as root
3617 * in a userns where nsid 0 is mapped to hostuid/hostgid Y, and
3618 * nsid Y is mapped to hostuid/hostgid X. That way, the container
3619 * root is privileged with respect to hostuid/hostgid X, allowing
3620 * him to do the chown.
f6d3e3e4 3621 */
c4d10a05 3622int chown_mapped_root(char *path, struct lxc_conf *conf)
f6d3e3e4 3623{
7b50c609
TS
3624 uid_t rootuid;
3625 gid_t rootgid;
c4d10a05 3626 pid_t pid;
2a9a80cb 3627 unsigned long val;
a7ef8753 3628 char *chownpath = path;
f6d3e3e4 3629
2a9a80cb 3630 if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
c4d10a05
SH
3631 ERROR("No mapping for container root");
3632 return -1;
f6d3e3e4 3633 }
7b50c609
TS
3634 rootuid = (uid_t) val;
3635 if (!get_mapped_rootid(conf, ID_TYPE_GID, &val)) {
3636 ERROR("No mapping for container root");
3637 return -1;
3638 }
3639 rootgid = (gid_t) val;
2a9a80cb 3640
a7ef8753
SH
3641 /*
3642 * In case of overlay, we want only the writeable layer
3643 * to be chowned
3644 */
1f92162d 3645 if (strncmp(path, "overlayfs:", 10) == 0 || strncmp(path, "aufs:", 5) == 0) {
a7ef8753
SH
3646 chownpath = strchr(path, ':');
3647 if (!chownpath) {
3648 ERROR("Bad overlay path: %s", path);
3649 return -1;
3650 }
3651 chownpath = strchr(chownpath+1, ':');
3652 if (!chownpath) {
3653 ERROR("Bad overlay path: %s", path);
3654 return -1;
3655 }
3656 chownpath++;
3657 }
3658 path = chownpath;
c4d10a05 3659 if (geteuid() == 0) {
7b50c609 3660 if (chown(path, rootuid, rootgid) < 0) {
c4d10a05
SH
3661 ERROR("Error chowning %s", path);
3662 return -1;
3663 }
3664 return 0;
3665 }
f3d7e4ca 3666
7b50c609 3667 if (rootuid == geteuid()) {
f3d7e4ca
SH
3668 // nothing to do
3669 INFO("%s: container root is our uid; no need to chown" ,__func__);
3670 return 0;
3671 }
3672
c4d10a05
SH
3673 pid = fork();
3674 if (pid < 0) {
3675 SYSERROR("Failed forking");
f6d3e3e4
SH
3676 return -1;
3677 }
c4d10a05 3678 if (!pid) {
7b50c609
TS
3679 int hostuid = geteuid(), hostgid = getegid(), ret;
3680 struct stat sb;
3681 char map1[100], map2[100], map3[100], map4[100], map5[100];
3682 char ugid[100];
3683 char *args1[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3684 "-m", map3, "-m", map5,
3685 "--", "chown", ugid, path, NULL };
3686 char *args2[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3687 "-m", map3, "-m", map4, "-m", map5,
3688 "--", "chown", ugid, path, NULL };
3689
3690 // save the current gid of "path"
3691 if (stat(path, &sb) < 0) {
3692 ERROR("Error stat %s", path);
3693 return -1;
3694 }
f6d3e3e4 3695
9a7c2aba
SH
3696 /*
3697 * A file has to be group-owned by a gid mapped into the
3698 * container, or the container won't be privileged over it.
3699 */
3700 if (sb.st_uid == geteuid() &&
3701 mapped_hostid(sb.st_gid, conf, ID_TYPE_GID) < 0 &&
3702 chown(path, -1, hostgid) < 0) {
3703 ERROR("Failed chgrping %s", path);
7b50c609
TS
3704 return -1;
3705 }
3706
3707 // "u:0:rootuid:1"
3708 ret = snprintf(map1, 100, "u:0:%d:1", rootuid);
c4d10a05
SH
3709 if (ret < 0 || ret >= 100) {
3710 ERROR("Error uid printing map string");
f6d3e3e4
SH
3711 return -1;
3712 }
c4d10a05 3713
98e5ba51
SH
3714 // "u:hostuid:hostuid:1"
3715 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3716 if (ret < 0 || ret >= 100) {
3717 ERROR("Error uid printing map string");
3718 return -1;
3719 }
3720
7b50c609
TS
3721 // "g:0:rootgid:1"
3722 ret = snprintf(map3, 100, "g:0:%d:1", rootgid);
c4d10a05 3723 if (ret < 0 || ret >= 100) {
7b50c609 3724 ERROR("Error gid printing map string");
c4d10a05
SH
3725 return -1;
3726 }
3727
7b50c609 3728 // "g:pathgid:rootgid+pathgid:1"
b4c1e35d
SG
3729 ret = snprintf(map4, 100, "g:%d:%d:1", (gid_t)sb.st_gid,
3730 rootgid + (gid_t)sb.st_gid);
7b50c609
TS
3731 if (ret < 0 || ret >= 100) {
3732 ERROR("Error gid printing map string");
3733 return -1;
3734 }
3735
3736 // "g:hostgid:hostgid:1"
3737 ret = snprintf(map5, 100, "g:%d:%d:1", hostgid, hostgid);
3738 if (ret < 0 || ret >= 100) {
3739 ERROR("Error gid printing map string");
3740 return -1;
3741 }
3742
3743 // "0:pathgid" (chown)
b4c1e35d 3744 ret = snprintf(ugid, 100, "0:%d", (gid_t)sb.st_gid);
7b50c609
TS
3745 if (ret < 0 || ret >= 100) {
3746 ERROR("Error owner printing format string for chown");
3747 return -1;
3748 }
3749
3750 if (hostgid == sb.st_gid)
3751 ret = execvp("lxc-usernsexec", args1);
3752 else
3753 ret = execvp("lxc-usernsexec", args2);
c4d10a05
SH
3754 SYSERROR("Failed executing usernsexec");
3755 exit(1);
f6d3e3e4 3756 }
c4d10a05 3757 return wait_for_pid(pid);
f6d3e3e4
SH
3758}
3759
c4d10a05 3760int ttys_shift_ids(struct lxc_conf *c)
f6d3e3e4 3761{
c4d10a05 3762 int i;
f6d3e3e4 3763
c4d10a05 3764 if (lxc_list_empty(&c->id_map))
f6d3e3e4 3765 return 0;
c4d10a05
SH
3766
3767 for (i = 0; i < c->tty_info.nbtty; i++) {
3768 struct lxc_pty_info *pty_info = &c->tty_info.pty_info[i];
3769
3770 if (chown_mapped_root(pty_info->name, c) < 0) {
3771 ERROR("Failed to chown %s", pty_info->name);
f6d3e3e4
SH
3772 return -1;
3773 }
3774 }
3775
29b10e4f 3776 if (strcmp(c->console.name, "") !=0 && chown_mapped_root(c->console.name, c) < 0) {
c4d10a05
SH
3777 ERROR("Failed to chown %s", c->console.name);
3778 return -1;
3779 }
3780
f6d3e3e4
SH
3781 return 0;
3782}
3783
bc6928ff
MW
3784/*
3785 * This routine is called when the configuration does not already specify a value
3786 * for autodev (mounting a file system on /dev and populating it in a container).
3787 * If a hard override value has not be specified, then we try to apply some
3788 * heuristics to determine if we should switch to autodev mode.
3789 *
3790 * For instance, if the container has an /etc/systemd/system directory then it
3791 * is probably running systemd as the init process and it needs the autodev
3792 * mount to prevent it from mounting devtmpfs on /dev on it's own causing conflicts
3793 * in the host.
3794 *
3795 * We may also want to enable autodev if the host has devtmpfs mounted on its
3796 * /dev as this then enable us to use subdirectories under /dev for the container
3797 * /dev directories and we can fake udev devices.
3798 */
3799struct start_args {
3800 char *const *argv;
3801};
3802
3803#define MAX_SYMLINK_DEPTH 32
3804
74a3920a 3805static int check_autodev( const char *rootfs, void *data )
bc6928ff
MW
3806{
3807 struct start_args *arg = data;
3808 int ret;
3809 int loop_count = 0;
3810 struct stat s;
3811 char absrootfs[MAXPATHLEN];
3812 char path[MAXPATHLEN];
3813 char abs_path[MAXPATHLEN];
3814 char *command = "/sbin/init";
3815
3816 if (rootfs == NULL || strlen(rootfs) == 0)
3817 return -2;
3818
3819 if (!realpath(rootfs, absrootfs))
3820 return -2;
3821
3822 if( arg && arg->argv[0] ) {
3823 command = arg->argv[0];
959aee9c 3824 DEBUG("Set exec command to %s", command );
bc6928ff
MW
3825 }
3826
3827 strncpy( path, command, MAXPATHLEN-1 );
3828
3829 if ( 0 != access(path, F_OK) || 0 != stat(path, &s) )
3830 return -2;
3831
3832 /* Dereference down the symlink merry path testing as we go. */
3833 /* If anything references systemd in the path - set autodev! */
3834 /* Renormalize to the rootfs before each dereference */
3835 /* Relative symlinks should fall out in the wash even with .. */
3836 while( 1 ) {
3837 if ( strstr( path, "systemd" ) ) {
3838 INFO("Container with systemd init detected - enabling autodev!");
3839 return 1;
3840 }
3841
3842 ret = snprintf(abs_path, MAXPATHLEN-1, "%s/%s", absrootfs, path);
3843 if (ret < 0 || ret > MAXPATHLEN)
3844 return -2;
3845
3846 ret = readlink( abs_path, path, MAXPATHLEN-1 );
3847
3848 if ( ( ret <= 0 ) || ( ++loop_count > MAX_SYMLINK_DEPTH ) ) {
3849 break; /* Break out for other tests */
3850 }
3851 path[ret] = '\0';
3852 }
3853
3854 /*
3855 * Add future checks here.
3856 * Return positive if we should go autodev
3857 * Return 0 if we should NOT go autodev
3858 * Return negative if we encounter an error or can not determine...
3859 */
3860
3861 /* All else fails, we don't need autodev */
3862 INFO("Autodev not required.");
3863 return 0;
3864}
3865
5112cd70
SH
3866/*
3867 * _do_tmp_proc_mount: Mount /proc inside container if not already
3868 * mounted
3869 *
3870 * @rootfs : the rootfs where proc should be mounted
3871 *
3872 * Returns < 0 on failure, 0 if the correct proc was already mounted
3873 * and 1 if a new proc was mounted.
3874 */
3875static int do_tmp_proc_mount(const char *rootfs)
3876{
3877 char path[MAXPATHLEN];
3878 char link[20];
3879 int linklen, ret;
3880
3881 ret = snprintf(path, MAXPATHLEN, "%s/proc/self", rootfs);
3882 if (ret < 0 || ret >= MAXPATHLEN) {
3883 SYSERROR("proc path name too long");
3884 return -1;
3885 }
3886 memset(link, 0, 20);
3887 linklen = readlink(path, link, 20);
3888 INFO("I am %d, /proc/self points to '%s'", getpid(), link);
3889 ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs);
3890 if (linklen < 0) /* /proc not mounted */
3891 goto domount;
3892 /* can't be longer than rootfs/proc/1 */
3893 if (strncmp(link, "1", linklen) != 0) {
3894 /* wrong /procs mounted */
3895 umount2(path, MNT_DETACH); /* ignore failure */
3896 goto domount;
3897 }
3898 /* the right proc is already mounted */
3899 return 0;
3900
3901domount:
3902 if (mount("proc", path, "proc", 0, NULL))
3903 return -1;
3904 INFO("Mounted /proc in container for security transition");
3905 return 1;
3906}
3907
3908int tmp_proc_mount(struct lxc_conf *lxc_conf)
3909{
3910 int mounted;
3911
3912 if (lxc_conf->rootfs.path == NULL || strlen(lxc_conf->rootfs.path) == 0) {
3913 if (mount("proc", "/proc", "proc", 0, NULL)) {
3914 SYSERROR("Failed mounting /proc, proceeding");
3915 mounted = 0;
3916 } else
3917 mounted = 1;
3918 } else
3919 mounted = do_tmp_proc_mount(lxc_conf->rootfs.mount);
3920 if (mounted == -1) {
3921 SYSERROR("failed to mount /proc in the container.");
3922 return -1;
3923 } else if (mounted == 1) {
3924 lxc_conf->tmp_umount_proc = 1;
3925 }
3926 return 0;
3927}
3928
3929void tmp_proc_unmount(struct lxc_conf *lxc_conf)
3930{
3931 if (lxc_conf->tmp_umount_proc == 1) {
3932 umount("/proc");
3933 lxc_conf->tmp_umount_proc = 0;
3934 }
3935}
3936
e995d7a2
SH
3937static void remount_all_slave(void)
3938{
3939 /* walk /proc/mounts and change any shared entries to slave */
3940 FILE *f = fopen("/proc/self/mountinfo", "r");
3941 char *line = NULL;
3942 size_t len = 0;
3943
3944 if (!f) {
3945 SYSERROR("Failed to open /proc/self/mountinfo to mark all shared");
3946 ERROR("Continuing container startup...");
3947 return;
3948 }
3949
3950 while (getline(&line, &len, f) != -1) {
3951 char *target, *opts;
3952 target = get_field(line, 4);
3953 if (!target)
3954 continue;
3955 opts = get_field(target, 2);
3956 if (!opts)
3957 continue;
3958 null_endofword(opts);
3959 if (!strstr(opts, "shared"))
3960 continue;
3961 null_endofword(target);
3962 if (mount(NULL, target, NULL, MS_SLAVE, NULL)) {
3963 SYSERROR("Failed to make %s rslave", target);
3964 ERROR("Continuing...");
3965 }
3966 }
3967 fclose(f);
3968 if (line)
3969 free(line);
3970}
3971
2322903b
SH
3972void lxc_execute_bind_init(struct lxc_conf *conf)
3973{
3974 int ret;
9d9c111c
SH
3975 char path[PATH_MAX], destpath[PATH_MAX], *p;
3976
3977 /* If init exists in the container, don't bind mount a static one */
3978 p = choose_init(conf->rootfs.mount);
3979 if (p) {
3980 free(p);
3981 return;
3982 }
2322903b
SH
3983
3984 ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
3985 if (ret < 0 || ret >= PATH_MAX) {
3986 WARN("Path name too long searching for lxc.init.static");
3987 return;
3988 }
3989
3990 if (!file_exists(path)) {
3991 INFO("%s does not exist on host", path);
3992 return;
3993 }
3994
3995 ret = snprintf(destpath, PATH_MAX, "%s%s", conf->rootfs.mount, "/init.lxc.static");
3996 if (ret < 0 || ret >= PATH_MAX) {
3997 WARN("Path name too long for container's lxc.init.static");
3998 return;
3999 }
4000
4001 if (!file_exists(destpath)) {
4002 FILE * pathfile = fopen(destpath, "wb");
4003 if (!pathfile) {
4004 SYSERROR("Failed to create mount target '%s'", destpath);
4005 return;
4006 }
4007 fclose(pathfile);
4008 }
4009
4010 ret = mount(path, destpath, "none", MS_BIND, NULL);
4011 if (ret < 0)
4012 SYSERROR("Failed to bind lxc.init.static into container");
4013 INFO("lxc.init.static bound into container at %s", path);
4014}
4015
35120d9c
SH
4016/*
4017 * This does the work of remounting / if it is shared, calling the
4018 * container pre-mount hooks, and mounting the rootfs.
4019 */
4020int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath)
0ad19a3f 4021{
35120d9c
SH
4022 if (conf->rootfs_setup) {
4023 /*
4024 * rootfs was set up in another namespace. bind-mount it
4025 * to give us a mount in our own ns so we can pivot_root to it
4026 */
4027 const char *path = conf->rootfs.mount;
4028 if (mount(path, path, "rootfs", MS_BIND, NULL) < 0) {
4029 ERROR("Failed to bind-mount container / onto itself");
4030 return false;
4031 }
4032 }
d4ef7c50 4033
cd698bdd 4034 if (detect_ramfs_rootfs()) {
35120d9c 4035 if (chroot_into_slave(conf)) {
cd698bdd
FK
4036 ERROR("Failed to chroot into slave /");
4037 return -1;
4038 }
4039 }
4040
e995d7a2
SH
4041 remount_all_slave();
4042
35120d9c
SH
4043 if (run_lxc_hooks(name, "pre-mount", conf, lxcpath, NULL)) {
4044 ERROR("failed to run pre-mount hooks for container '%s'.", name);
4045 return -1;
4046 }
4047
4048 if (setup_rootfs(conf)) {
4049 ERROR("failed to setup rootfs for '%s'", name);
4050 return -1;
4051 }
4052
4053 conf->rootfs_setup = true;
4054 return 0;
4055}
4056
1c1c7051
SH
4057static bool verify_start_hooks(struct lxc_conf *conf)
4058{
4059 struct lxc_list *it;
4060 char path[MAXPATHLEN];
4061 lxc_list_for_each(it, &conf->hooks[LXCHOOK_START]) {
4062 char *hookname = it->elem;
4063 struct stat st;
4064 int ret;
4065
4066 ret = snprintf(path, MAXPATHLEN, "%s%s",
4067 conf->rootfs.mount, hookname);
4068 if (ret < 0 || ret >= MAXPATHLEN)
4069 return false;
4070 ret = stat(path, &st);
4071 if (ret) {
4072 SYSERROR("Start hook %s not found in container rootfs",
4073 hookname);
4074 return false;
4075 }
4076 }
4077
4078 return true;
4079}
4080
35120d9c
SH
4081int lxc_setup(struct lxc_handler *handler)
4082{
4083 const char *name = handler->name;
4084 struct lxc_conf *lxc_conf = handler->conf;
4085 const char *lxcpath = handler->lxcpath;
4086 void *data = handler->data;
4087
4088 if (do_rootfs_setup(lxc_conf, name, lxcpath) < 0) {
4089 ERROR("Error setting up rootfs mount after spawn");
4090 return -1;
4091 }
4092
6c544cb3
MM
4093 if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
4094 if (setup_utsname(lxc_conf->utsname)) {
4095 ERROR("failed to setup the utsname for '%s'", name);
4096 return -1;
4097 }
0ad19a3f 4098 }
4099
5f4535a3 4100 if (setup_network(&lxc_conf->network)) {
36eb9bde 4101 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 4102 return -1;
0ad19a3f 4103 }
4104
bc6928ff
MW
4105 if (lxc_conf->autodev < 0) {
4106 lxc_conf->autodev = check_autodev(lxc_conf->rootfs.mount, data);
4107 }
4108
4109 if (lxc_conf->autodev > 0) {
4110 if (mount_autodev(name, lxc_conf->rootfs.mount, lxcpath)) {
91c3830e 4111 ERROR("failed to mount /dev in the container");
c6883f38
SH
4112 return -1;
4113 }
4114 }
4115
368bbc02
CS
4116 /* do automatic mounts (mainly /proc and /sys), but exclude
4117 * those that need to wait until other stuff has finished
4118 */
4fb3cba5 4119 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
4120 ERROR("failed to setup the automatic mounts for '%s'", name);
4121 return -1;
4122 }
4123
80a881b2 4124 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
36eb9bde 4125 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 4126 return -1;
576f946d 4127 }
4128
c1dc38c2 4129 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
e7938e9e
MN
4130 ERROR("failed to setup the mount entries for '%s'", name);
4131 return -1;
4132 }
4133
1c1c7051
SH
4134 /* Make sure any start hooks are in the rootfs */
4135 if (!verify_start_hooks(lxc_conf))
4136 return -1;
4137
2322903b
SH
4138 if (lxc_conf->is_execute)
4139 lxc_execute_bind_init(lxc_conf);
4140
368bbc02
CS
4141 /* now mount only cgroup, if wanted;
4142 * before, /sys could not have been mounted
4143 * (is either mounted automatically or via fstab entries)
4144 */
4fb3cba5 4145 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
4146 ERROR("failed to setup the automatic mounts for '%s'", name);
4147 return -1;
4148 }
4149
283678ed 4150 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
773fb9ca
SH
4151 ERROR("failed to run mount hooks for container '%s'.", name);
4152 return -1;
4153 }
4154
bc6928ff 4155 if (lxc_conf->autodev > 0) {
283678ed 4156 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
f7bee6c6
MW
4157 ERROR("failed to run autodev hooks for container '%s'.", name);
4158 return -1;
4159 }
91c3830e
SH
4160 if (setup_autodev(lxc_conf->rootfs.mount)) {
4161 ERROR("failed to populate /dev in the container");
4162 return -1;
4163 }
4164 }
368bbc02 4165
37903589 4166 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 4167 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 4168 return -1;
6e590161 4169 }
4170
7e0e1d94
AV
4171 if (lxc_conf->kmsg) {
4172 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
4173 ERROR("failed to setup kmsg for '%s'", name);
4174 }
1bd051a6 4175
37903589 4176 if (!lxc_conf->is_execute && setup_tty(&lxc_conf->rootfs, &lxc_conf->tty_info, lxc_conf->ttydir)) {
36eb9bde 4177 ERROR("failed to setup the ttys for '%s'", name);
95b5ffaf 4178 return -1;
b0a33c1e 4179 }
4180
69aa6655
DE
4181 if (!lxc_conf->is_execute && setup_dev_symlinks(&lxc_conf->rootfs)) {
4182 ERROR("failed to setup /dev symlinks for '%s'", name);
4183 return -1;
4184 }
4185
5112cd70
SH
4186 /* mount /proc if it's not already there */
4187 if (tmp_proc_mount(lxc_conf) < 0) {
fe4de9a6 4188 ERROR("failed to LSM mount proc for '%s'", name);
e075f5d9 4189 return -1;
e075f5d9 4190 }
e075f5d9 4191
ac778708 4192 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 4193 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 4194 return -1;
ed502555 4195 }
4196
571e6ec8 4197 if (setup_pts(lxc_conf->pts)) {
36eb9bde 4198 ERROR("failed to setup the new pts instance");
95b5ffaf 4199 return -1;
3c26f34e 4200 }
4201
cccc74b5
DL
4202 if (setup_personality(lxc_conf->personality)) {
4203 ERROR("failed to setup personality");
4204 return -1;
4205 }
4206
f6d3e3e4 4207 if (lxc_list_empty(&lxc_conf->id_map)) {
1fb86a7c
SH
4208 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
4209 if (!lxc_list_empty(&lxc_conf->caps)) {
4210 ERROR("Simultaneously requested dropping and keeping caps");
4211 return -1;
4212 }
4213 if (dropcaps_except(&lxc_conf->keepcaps)) {
959aee9c 4214 ERROR("failed to keep requested caps");
1fb86a7c
SH
4215 return -1;
4216 }
4217 } else if (setup_caps(&lxc_conf->caps)) {
f6d3e3e4
SH
4218 ERROR("failed to drop capabilities");
4219 return -1;
4220 }
81810dd1
DL
4221 }
4222
cd54d859
DL
4223 NOTICE("'%s' is setup.", name);
4224
0ad19a3f 4225 return 0;
4226}
26ddeedd 4227
283678ed
SH
4228int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
4229 const char *lxcpath, char *argv[])
26ddeedd
SH
4230{
4231 int which = -1;
4232 struct lxc_list *it;
4233
4234 if (strcmp(hook, "pre-start") == 0)
4235 which = LXCHOOK_PRESTART;
5ea6163a
SH
4236 else if (strcmp(hook, "pre-mount") == 0)
4237 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
4238 else if (strcmp(hook, "mount") == 0)
4239 which = LXCHOOK_MOUNT;
f7bee6c6
MW
4240 else if (strcmp(hook, "autodev") == 0)
4241 which = LXCHOOK_AUTODEV;
26ddeedd
SH
4242 else if (strcmp(hook, "start") == 0)
4243 which = LXCHOOK_START;
4244 else if (strcmp(hook, "post-stop") == 0)
4245 which = LXCHOOK_POSTSTOP;
148e91f5
SH
4246 else if (strcmp(hook, "clone") == 0)
4247 which = LXCHOOK_CLONE;
26ddeedd
SH
4248 else
4249 return -1;
4250 lxc_list_for_each(it, &conf->hooks[which]) {
4251 int ret;
4252 char *hookname = it->elem;
283678ed 4253 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
26ddeedd
SH
4254 if (ret)
4255 return ret;
4256 }
4257 return 0;
4258}
72d0e1cb 4259
427b3a21 4260static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
4261{
4262 struct lxc_netdev *netdev = it->elem;
9ebb03ad 4263 struct lxc_list *it2,*next;
72d0e1cb
SG
4264
4265 lxc_list_del(it);
4266
4267 if (netdev->link)
4268 free(netdev->link);
4269 if (netdev->name)
4270 free(netdev->name);
c9bb9a85
DE
4271 if (netdev->type == LXC_NET_VETH && netdev->priv.veth_attr.pair)
4272 free(netdev->priv.veth_attr.pair);
72d0e1cb
SG
4273 if (netdev->upscript)
4274 free(netdev->upscript);
4275 if (netdev->hwaddr)
4276 free(netdev->hwaddr);
4277 if (netdev->mtu)
4278 free(netdev->mtu);
4279 if (netdev->ipv4_gateway)
4280 free(netdev->ipv4_gateway);
4281 if (netdev->ipv6_gateway)
4282 free(netdev->ipv6_gateway);
9ebb03ad 4283 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
4284 lxc_list_del(it2);
4285 free(it2->elem);
4286 free(it2);
4287 }
9ebb03ad 4288 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
4289 lxc_list_del(it2);
4290 free(it2->elem);
4291 free(it2);
4292 }
d95db067 4293 free(netdev);
72d0e1cb
SG
4294 free(it);
4295}
4296
4297/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 4298int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
4299{
4300 char *p1;
4301 int ret, idx, i;
4302 struct lxc_list *it;
4303 struct lxc_netdev *netdev;
4304
4305 p1 = index(key, '.');
4306 if (!p1 || *(p1+1) == '\0')
4307 p1 = NULL;
4308
4309 ret = sscanf(key, "%d", &idx);
4310 if (ret != 1) return -1;
4311 if (idx < 0)
4312 return -1;
4313
4314 i = 0;
4315 lxc_list_for_each(it, &c->network) {
4316 if (i == idx)
4317 break;
4318 i++;
4319 }
4320 if (i < idx) // we don't have that many nics defined
4321 return -1;
4322
4323 if (!it || !it->elem)
4324 return -1;
4325
4326 netdev = it->elem;
4327
4328 if (!p1) {
4329 lxc_remove_nic(it);
52d21d40 4330 } else if (strcmp(p1, ".ipv4") == 0) {
9ebb03ad
DE
4331 struct lxc_list *it2,*next;
4332 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
4333 lxc_list_del(it2);
4334 free(it2->elem);
4335 free(it2);
4336 }
52d21d40 4337 } else if (strcmp(p1, ".ipv6") == 0) {
9ebb03ad
DE
4338 struct lxc_list *it2,*next;
4339 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
4340 lxc_list_del(it2);
4341 free(it2->elem);
4342 free(it2);
4343 }
52d21d40 4344 } else if (strcmp(p1, ".link") == 0) {
72d0e1cb
SG
4345 if (netdev->link) {
4346 free(netdev->link);
4347 netdev->link = NULL;
4348 }
52d21d40 4349 } else if (strcmp(p1, ".name") == 0) {
72d0e1cb
SG
4350 if (netdev->name) {
4351 free(netdev->name);
4352 netdev->name = NULL;
4353 }
52d21d40 4354 } else if (strcmp(p1, ".script.up") == 0) {
72d0e1cb
SG
4355 if (netdev->upscript) {
4356 free(netdev->upscript);
4357 netdev->upscript = NULL;
4358 }
52d21d40 4359 } else if (strcmp(p1, ".hwaddr") == 0) {
72d0e1cb
SG
4360 if (netdev->hwaddr) {
4361 free(netdev->hwaddr);
4362 netdev->hwaddr = NULL;
4363 }
52d21d40 4364 } else if (strcmp(p1, ".mtu") == 0) {
72d0e1cb
SG
4365 if (netdev->mtu) {
4366 free(netdev->mtu);
4367 netdev->mtu = NULL;
4368 }
52d21d40 4369 } else if (strcmp(p1, ".ipv4_gateway") == 0) {
72d0e1cb
SG
4370 if (netdev->ipv4_gateway) {
4371 free(netdev->ipv4_gateway);
4372 netdev->ipv4_gateway = NULL;
4373 }
52d21d40 4374 } else if (strcmp(p1, ".ipv6_gateway") == 0) {
72d0e1cb
SG
4375 if (netdev->ipv6_gateway) {
4376 free(netdev->ipv6_gateway);
4377 netdev->ipv6_gateway = NULL;
4378 }
4379 }
4380 else return -1;
4381
4382 return 0;
4383}
4384
4385int lxc_clear_config_network(struct lxc_conf *c)
4386{
9ebb03ad
DE
4387 struct lxc_list *it,*next;
4388 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
4389 lxc_remove_nic(it);
4390 }
4391 return 0;
4392}
4393
4394int lxc_clear_config_caps(struct lxc_conf *c)
4395{
9ebb03ad 4396 struct lxc_list *it,*next;
72d0e1cb 4397
9ebb03ad 4398 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
4399 lxc_list_del(it);
4400 free(it->elem);
4401 free(it);
4402 }
4403 return 0;
4404}
4405
74a3920a 4406static int lxc_free_idmap(struct lxc_list *id_map) {
27c27d73
SH
4407 struct lxc_list *it, *next;
4408
4355ab5f 4409 lxc_list_for_each_safe(it, id_map, next) {
27c27d73
SH
4410 lxc_list_del(it);
4411 free(it->elem);
4412 free(it);
4413 }
4414 return 0;
4415}
4416
4355ab5f
SH
4417int lxc_clear_idmaps(struct lxc_conf *c)
4418{
4419 return lxc_free_idmap(&c->id_map);
4420}
4421
1fb86a7c
SH
4422int lxc_clear_config_keepcaps(struct lxc_conf *c)
4423{
4424 struct lxc_list *it,*next;
4425
4426 lxc_list_for_each_safe(it, &c->keepcaps, next) {
4427 lxc_list_del(it);
4428 free(it->elem);
4429 free(it);
4430 }
4431 return 0;
4432}
4433
12a50cc6 4434int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 4435{
9ebb03ad 4436 struct lxc_list *it,*next;
72d0e1cb 4437 bool all = false;
12a50cc6 4438 const char *k = key + 11;
72d0e1cb
SG
4439
4440 if (strcmp(key, "lxc.cgroup") == 0)
4441 all = true;
4442
9ebb03ad 4443 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
4444 struct lxc_cgroup *cg = it->elem;
4445 if (!all && strcmp(cg->subsystem, k) != 0)
4446 continue;
4447 lxc_list_del(it);
4448 free(cg->subsystem);
4449 free(cg->value);
4450 free(cg);
4451 free(it);
4452 }
4453 return 0;
4454}
4455
ee1e7aa0
SG
4456int lxc_clear_groups(struct lxc_conf *c)
4457{
4458 struct lxc_list *it,*next;
4459
4460 lxc_list_for_each_safe(it, &c->groups, next) {
4461 lxc_list_del(it);
4462 free(it->elem);
4463 free(it);
4464 }
4465 return 0;
4466}
4467
ab799c0b
SG
4468int lxc_clear_environment(struct lxc_conf *c)
4469{
4470 struct lxc_list *it,*next;
4471
4472 lxc_list_for_each_safe(it, &c->environment, next) {
4473 lxc_list_del(it);
4474 free(it->elem);
4475 free(it);
4476 }
4477 return 0;
4478}
4479
4480
72d0e1cb
SG
4481int lxc_clear_mount_entries(struct lxc_conf *c)
4482{
9ebb03ad 4483 struct lxc_list *it,*next;
72d0e1cb 4484
9ebb03ad 4485 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
4486 lxc_list_del(it);
4487 free(it->elem);
4488 free(it);
4489 }
4490 return 0;
4491}
4492
b099e9e9
SH
4493int lxc_clear_automounts(struct lxc_conf *c)
4494{
4495 c->auto_mounts = 0;
4496 return 0;
4497}
4498
12a50cc6 4499int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 4500{
9ebb03ad 4501 struct lxc_list *it,*next;
17ed13a3 4502 bool all = false, done = false;
12a50cc6 4503 const char *k = key + 9;
72d0e1cb
SG
4504 int i;
4505
17ed13a3
SH
4506 if (strcmp(key, "lxc.hook") == 0)
4507 all = true;
4508
72d0e1cb 4509 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 4510 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 4511 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
4512 lxc_list_del(it);
4513 free(it->elem);
4514 free(it);
4515 }
4516 done = true;
72d0e1cb
SG
4517 }
4518 }
17ed13a3
SH
4519
4520 if (!done) {
4521 ERROR("Invalid hook key: %s", key);
4522 return -1;
4523 }
72d0e1cb
SG
4524 return 0;
4525}
8eb5694b 4526
74a3920a 4527static void lxc_clear_saved_nics(struct lxc_conf *conf)
7b35f3d6
SH
4528{
4529 int i;
4530
0cf45501 4531 if (!conf->saved_nics)
7b35f3d6
SH
4532 return;
4533 for (i=0; i < conf->num_savednics; i++)
4534 free(conf->saved_nics[i].orig_name);
7b35f3d6
SH
4535 free(conf->saved_nics);
4536}
4537
4184c3e1
SH
4538static inline void lxc_clear_aliens(struct lxc_conf *conf)
4539{
4540 struct lxc_list *it,*next;
4541
4542 lxc_list_for_each_safe(it, &conf->aliens, next) {
4543 lxc_list_del(it);
4544 free(it->elem);
4545 free(it);
4546 }
4547}
4548
f979ac15
SH
4549static inline void lxc_clear_includes(struct lxc_conf *conf)
4550{
4551 struct lxc_list *it,*next;
4552
4553 lxc_list_for_each_safe(it, &conf->includes, next) {
4554 lxc_list_del(it);
4555 free(it->elem);
4556 free(it);
4557 }
4558}
4559
8eb5694b
SH
4560void lxc_conf_free(struct lxc_conf *conf)
4561{
4562 if (!conf)
4563 return;
b91f00d3
SH
4564 if (conf->console.log_path)
4565 free(conf->console.log_path);
8eb5694b
SH
4566 if (conf->console.path)
4567 free(conf->console.path);
54c30e29 4568 if (conf->rootfs.mount)
8eb5694b 4569 free(conf->rootfs.mount);
a17b1e65
SG
4570 if (conf->rootfs.options)
4571 free(conf->rootfs.options);
d95db067
DE
4572 if (conf->rootfs.path)
4573 free(conf->rootfs.path);
a58878d6
SH
4574 if (conf->rootfs.pivot)
4575 free(conf->rootfs.pivot);
4576 if (conf->logfile)
4577 free(conf->logfile);
d95db067
DE
4578 if (conf->utsname)
4579 free(conf->utsname);
4580 if (conf->ttydir)
4581 free(conf->ttydir);
4582 if (conf->fstab)
4583 free(conf->fstab);
fc7e8864
WM
4584 if (conf->rcfile)
4585 free(conf->rcfile);
6b0d5538 4586 free(conf->unexpanded_config);
8eb5694b 4587 lxc_clear_config_network(conf);
fe4de9a6
DE
4588 if (conf->lsm_aa_profile)
4589 free(conf->lsm_aa_profile);
4590 if (conf->lsm_se_context)
4591 free(conf->lsm_se_context);
769872f9 4592 lxc_seccomp_free(conf);
8eb5694b 4593 lxc_clear_config_caps(conf);
1fb86a7c 4594 lxc_clear_config_keepcaps(conf);
8eb5694b 4595 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 4596 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 4597 lxc_clear_mount_entries(conf);
7b35f3d6 4598 lxc_clear_saved_nics(conf);
27c27d73 4599 lxc_clear_idmaps(conf);
ee1e7aa0 4600 lxc_clear_groups(conf);
f979ac15 4601 lxc_clear_includes(conf);
761d81ca 4602 lxc_clear_aliens(conf);
ab799c0b 4603 lxc_clear_environment(conf);
8eb5694b
SH
4604 free(conf);
4605}
4355ab5f
SH
4606
4607struct userns_fn_data {
4608 int (*fn)(void *);
4609 void *arg;
4610 int p[2];
4611};
4612
4613static int run_userns_fn(void *data)
4614{
4615 struct userns_fn_data *d = data;
4616 char c;
4617 // we're not sharing with the parent any more, if it was a thread
4618
4619 close(d->p[1]);
4620 if (read(d->p[0], &c, 1) != 1)
4621 return -1;
4622 close(d->p[0]);
4623 return d->fn(d->arg);
4624}
4625
4626/*
8b227008
TS
4627 * Add ID_TYPE_UID/ID_TYPE_GID entries to an existing lxc_conf,
4628 * if they are not already there.
4355ab5f 4629 */
8b227008
TS
4630static struct lxc_list *idmap_add_id(struct lxc_conf *conf,
4631 uid_t uid, gid_t gid)
4355ab5f 4632{
8b227008
TS
4633 int hostuid_mapped = mapped_hostid(uid, conf, ID_TYPE_UID);
4634 int hostgid_mapped = mapped_hostid(gid, conf, ID_TYPE_GID);
4355ab5f
SH
4635 struct lxc_list *new = NULL, *tmp, *it, *next;
4636 struct id_map *entry;
4637
3ec1648d
SH
4638 new = malloc(sizeof(*new));
4639 if (!new) {
4640 ERROR("Out of memory building id map");
4641 return NULL;
4642 }
4643 lxc_list_init(new);
4644
8b227008
TS
4645 if (hostuid_mapped < 0) {
4646 hostuid_mapped = find_unmapped_nsuid(conf, ID_TYPE_UID);
4647 if (hostuid_mapped < 0)
3ec1648d
SH
4648 goto err;
4649 tmp = malloc(sizeof(*tmp));
4650 if (!tmp)
4651 goto err;
4355ab5f
SH
4652 entry = malloc(sizeof(*entry));
4653 if (!entry) {
3ec1648d
SH
4654 free(tmp);
4655 goto err;
4355ab5f 4656 }
3ec1648d 4657 tmp->elem = entry;
4355ab5f 4658 entry->idtype = ID_TYPE_UID;
8b227008
TS
4659 entry->nsid = hostuid_mapped;
4660 entry->hostid = (unsigned long) uid;
4661 entry->range = 1;
4662 lxc_list_add_tail(new, tmp);
4663 }
4664 if (hostgid_mapped < 0) {
4665 hostgid_mapped = find_unmapped_nsuid(conf, ID_TYPE_GID);
4666 if (hostgid_mapped < 0)
4667 goto err;
4668 tmp = malloc(sizeof(*tmp));
4669 if (!tmp)
4670 goto err;
4671 entry = malloc(sizeof(*entry));
4672 if (!entry) {
4673 free(tmp);
4674 goto err;
4675 }
4676 tmp->elem = entry;
4677 entry->idtype = ID_TYPE_GID;
4678 entry->nsid = hostgid_mapped;
4679 entry->hostid = (unsigned long) gid;
4355ab5f 4680 entry->range = 1;
3ec1648d 4681 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4682 }
4683 lxc_list_for_each_safe(it, &conf->id_map, next) {
4684 tmp = malloc(sizeof(*tmp));
4685 if (!tmp)
4686 goto err;
4687 entry = malloc(sizeof(*entry));
4688 if (!entry) {
4689 free(tmp);
4690 goto err;
4691 }
4692 memset(entry, 0, sizeof(*entry));
4693 memcpy(entry, it->elem, sizeof(*entry));
4694 tmp->elem = entry;
3ec1648d 4695 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4696 }
4697
4698 return new;
4699
4700err:
8b227008 4701 ERROR("Out of memory building a new uid/gid map");
908fde6a
SH
4702 if (new)
4703 lxc_free_idmap(new);
c30ac545 4704 free(new);
4355ab5f
SH
4705 return NULL;
4706}
4707
4708/*
4709 * Run a function in a new user namespace.
8b227008 4710 * The caller's euid/egid will be mapped in if it is not already.
4355ab5f
SH
4711 */
4712int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
4713{
4714 int ret, pid;
4715 struct userns_fn_data d;
4716 char c = '1';
4717 int p[2];
4718 struct lxc_list *idmap;
4719
4355ab5f 4720 ret = pipe(p);
4355ab5f
SH
4721 if (ret < 0) {
4722 SYSERROR("opening pipe");
4723 return -1;
4724 }
4725 d.fn = fn;
4726 d.arg = data;
4727 d.p[0] = p[0];
4728 d.p[1] = p[1];
4729 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4730 if (pid < 0)
4731 goto err;
4355ab5f 4732 close(p[0]);
4355ab5f
SH
4733 p[0] = -1;
4734
8b227008
TS
4735 if ((idmap = idmap_add_id(conf, geteuid(), getegid())) == NULL) {
4736 ERROR("Error adding self to container uid/gid map");
4355ab5f
SH
4737 goto err;
4738 }
4739
4740 ret = lxc_map_ids(idmap, pid);
4741 lxc_free_idmap(idmap);
88dd66fc 4742 free(idmap);
565e571c 4743 if (ret) {
4355ab5f
SH
4744 ERROR("Error setting up child mappings");
4745 goto err;
4746 }
4747
4748 // kick the child
4749 if (write(p[1], &c, 1) != 1) {
4750 SYSERROR("writing to pipe to child");
4751 goto err;
4752 }
4753
3139aead
SG
4754 ret = wait_for_pid(pid);
4755
4756 close(p[1]);
4757 return ret;
4758
4355ab5f 4759err:
4355ab5f
SH
4760 if (p[0] != -1)
4761 close(p[0]);
4762 close(p[1]);
4355ab5f
SH
4763 return -1;
4764}
97e9cfa0 4765
a96a8e8c 4766/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4767static char* getuname(void)
4768{
a96a8e8c 4769 struct passwd *result;
97e9cfa0 4770
a96a8e8c
SH
4771 result = getpwuid(geteuid());
4772 if (!result)
97e9cfa0
SH
4773 return NULL;
4774
a96a8e8c 4775 return strdup(result->pw_name);
97e9cfa0
SH
4776}
4777
a96a8e8c 4778/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4779static char *getgname(void)
4780{
a96a8e8c 4781 struct group *result;
97e9cfa0 4782
a96a8e8c
SH
4783 result = getgrgid(getegid());
4784 if (!result)
97e9cfa0
SH
4785 return NULL;
4786
a96a8e8c 4787 return strdup(result->gr_name);
97e9cfa0
SH
4788}
4789
a96a8e8c 4790/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4791void suggest_default_idmap(void)
4792{
4793 FILE *f;
4794 unsigned int uid = 0, urange = 0, gid = 0, grange = 0;
4795 char *line = NULL;
4796 char *uname, *gname;
4797 size_t len = 0;
4798
4799 if (!(uname = getuname()))
4800 return;
4801
4802 if (!(gname = getgname())) {
4803 free(uname);
4804 return;
4805 }
4806
4807 f = fopen(subuidfile, "r");
4808 if (!f) {
4809 ERROR("Your system is not configured with subuids");
4810 free(gname);
4811 free(uname);
4812 return;
4813 }
4814 while (getline(&line, &len, f) != -1) {
4815 char *p = strchr(line, ':'), *p2;
4816 if (*line == '#')
4817 continue;
4818 if (!p)
4819 continue;
4820 *p = '\0';
4821 p++;
4822 if (strcmp(line, uname))
4823 continue;
4824 p2 = strchr(p, ':');
4825 if (!p2)
4826 continue;
4827 *p2 = '\0';
4828 p2++;
4829 if (!*p2)
4830 continue;
4831 uid = atoi(p);
4832 urange = atoi(p2);
4833 }
4834 fclose(f);
4835
4836 f = fopen(subuidfile, "r");
4837 if (!f) {
4838 ERROR("Your system is not configured with subgids");
4839 free(gname);
4840 free(uname);
4841 return;
4842 }
4843 while (getline(&line, &len, f) != -1) {
4844 char *p = strchr(line, ':'), *p2;
4845 if (*line == '#')
4846 continue;
4847 if (!p)
4848 continue;
4849 *p = '\0';
4850 p++;
4851 if (strcmp(line, uname))
4852 continue;
4853 p2 = strchr(p, ':');
4854 if (!p2)
4855 continue;
4856 *p2 = '\0';
4857 p2++;
4858 if (!*p2)
4859 continue;
4860 gid = atoi(p);
4861 grange = atoi(p2);
4862 }
4863 fclose(f);
4864
4865 if (line)
4866 free(line);
4867
4868 if (!urange || !grange) {
4869 ERROR("You do not have subuids or subgids allocated");
4870 ERROR("Unprivileged containers require subuids and subgids");
4871 return;
4872 }
4873
4874 ERROR("You must either run as root, or define uid mappings");
4875 ERROR("To pass uid mappings to lxc-create, you could create");
4876 ERROR("~/.config/lxc/default.conf:");
4877 ERROR("lxc.include = %s", LXC_DEFAULT_CONFIG);
4878 ERROR("lxc.id_map = u 0 %u %u", uid, urange);
4879 ERROR("lxc.id_map = g 0 %u %u", gid, grange);
4880
4881 free(gname);
4882 free(uname);
4883}