]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
Reduce public API (V2)
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
23#define _GNU_SOURCE
24#include <stdio.h>
25#undef _GNU_SOURCE
26#include <stdlib.h>
e3b4c4c4 27#include <stdarg.h>
0ad19a3f 28#include <errno.h>
29#include <string.h>
30#include <dirent.h>
0ad19a3f 31#include <unistd.h>
bc6928ff 32#include <inttypes.h>
e3b4c4c4 33#include <sys/wait.h>
2d76d1d7 34#include <sys/syscall.h>
4a0ba80d 35#include <time.h>
e827ff7e 36
4ba0d9af
SG
37#if HAVE_IFADDRS_H
38#include <ifaddrs.h>
39#else
40#include <../include/ifaddrs.h>
41#endif
42
e827ff7e 43#if HAVE_PTY_H
b0a33c1e 44#include <pty.h>
e827ff7e
SG
45#else
46#include <../include/openpty.h>
47#endif
0ad19a3f 48
b3ecde1e
DL
49#include <linux/loop.h>
50
0ad19a3f 51#include <sys/types.h>
52#include <sys/utsname.h>
53#include <sys/param.h>
54#include <sys/stat.h>
55#include <sys/socket.h>
56#include <sys/mount.h>
57#include <sys/mman.h>
81810dd1 58#include <sys/prctl.h>
0ad19a3f 59
60#include <arpa/inet.h>
61#include <fcntl.h>
62#include <netinet/in.h>
63#include <net/if.h>
6f4a3756 64#include <libgen.h>
0ad19a3f 65
e5bda9ee 66#include "network.h"
67#include "error.h"
b2718c72 68#include "parse.h"
881450bb 69#include "config.h"
1b09f2c0
DL
70#include "utils.h"
71#include "conf.h"
72#include "log.h"
73#include "lxc.h" /* for lxc_cgroup_set() */
d55bc1ad 74#include "caps.h" /* for lxc_caps_last_cap() */
9be53773 75#include "bdev.h"
368bbc02 76#include "cgroup.h"
025ed0f3 77#include "lxclock.h"
4355ab5f 78#include "namespace.h"
fe4de9a6 79#include "lsm/lsm.h"
d0a36f2c 80
495d2046
SG
81#if HAVE_SYS_CAPABILITY_H
82#include <sys/capability.h>
83#endif
84
6ff05e18
SG
85#if HAVE_SYS_PERSONALITY_H
86#include <sys/personality.h>
87#endif
88
edaf8b1b
SG
89#if IS_BIONIC
90#include <../include/lxcmntent.h>
91#else
92#include <mntent.h>
93#endif
94
769872f9
SH
95#include "lxcseccomp.h"
96
36eb9bde 97lxc_log_define(lxc_conf, lxc);
e5bda9ee 98
0ad19a3f 99#define MAXHWLEN 18
100#define MAXINDEXLEN 20
442cbbe6 101#define MAXMTULEN 16
0ad19a3f 102#define MAXLINELEN 128
103
495d2046 104#if HAVE_SYS_CAPABILITY_H
b09094da
MN
105#ifndef CAP_SETFCAP
106#define CAP_SETFCAP 31
107#endif
108
109#ifndef CAP_MAC_OVERRIDE
110#define CAP_MAC_OVERRIDE 32
111#endif
112
113#ifndef CAP_MAC_ADMIN
114#define CAP_MAC_ADMIN 33
115#endif
495d2046 116#endif
b09094da
MN
117
118#ifndef PR_CAPBSET_DROP
119#define PR_CAPBSET_DROP 24
120#endif
121
9818cae4
SG
122#ifndef LO_FLAGS_AUTOCLEAR
123#define LO_FLAGS_AUTOCLEAR 4
124#endif
125
2d76d1d7
SG
126/* Define pivot_root() if missing from the C library */
127#ifndef HAVE_PIVOT_ROOT
128static int pivot_root(const char * new_root, const char * put_old)
129{
130#ifdef __NR_pivot_root
131return syscall(__NR_pivot_root, new_root, put_old);
132#else
133errno = ENOSYS;
134return -1;
135#endif
136}
137#else
138extern int pivot_root(const char * new_root, const char * put_old);
139#endif
140
141/* Define sethostname() if missing from the C library */
142#ifndef HAVE_SETHOSTNAME
143static int sethostname(const char * name, size_t len)
144{
145#ifdef __NR_sethostname
146return syscall(__NR_sethostname, name, len);
147#else
148errno = ENOSYS;
149return -1;
150#endif
151}
152#endif
153
72f919c4
SG
154/* Define __S_ISTYPE if missing from the C library */
155#ifndef __S_ISTYPE
156#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
157#endif
158
72d0e1cb 159char *lxchook_names[NUM_LXC_HOOKS] = {
148e91f5 160 "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop", "clone" };
72d0e1cb 161
e3b4c4c4 162typedef int (*instanciate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 163
998ac676
RT
164struct mount_opt {
165 char *name;
166 int clear;
167 int flag;
168};
169
81810dd1
DL
170struct caps_opt {
171 char *name;
172 int value;
173};
174
e3b4c4c4
ST
175static int instanciate_veth(struct lxc_handler *, struct lxc_netdev *);
176static int instanciate_macvlan(struct lxc_handler *, struct lxc_netdev *);
177static int instanciate_vlan(struct lxc_handler *, struct lxc_netdev *);
178static int instanciate_phys(struct lxc_handler *, struct lxc_netdev *);
179static int instanciate_empty(struct lxc_handler *, struct lxc_netdev *);
82d5ae15 180
24654103
DL
181static instanciate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
182 [LXC_NET_VETH] = instanciate_veth,
183 [LXC_NET_MACVLAN] = instanciate_macvlan,
184 [LXC_NET_VLAN] = instanciate_vlan,
185 [LXC_NET_PHYS] = instanciate_phys,
186 [LXC_NET_EMPTY] = instanciate_empty,
0ad19a3f 187};
188
74a2b586
JK
189static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
190static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
191static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
192static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
193static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
194
195static instanciate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
196 [LXC_NET_VETH] = shutdown_veth,
197 [LXC_NET_MACVLAN] = shutdown_macvlan,
198 [LXC_NET_VLAN] = shutdown_vlan,
199 [LXC_NET_PHYS] = shutdown_phys,
200 [LXC_NET_EMPTY] = shutdown_empty,
201};
202
998ac676 203static struct mount_opt mount_opt[] = {
88d413d5
SW
204 { "defaults", 0, 0 },
205 { "ro", 0, MS_RDONLY },
206 { "rw", 1, MS_RDONLY },
207 { "suid", 1, MS_NOSUID },
208 { "nosuid", 0, MS_NOSUID },
209 { "dev", 1, MS_NODEV },
210 { "nodev", 0, MS_NODEV },
211 { "exec", 1, MS_NOEXEC },
212 { "noexec", 0, MS_NOEXEC },
213 { "sync", 0, MS_SYNCHRONOUS },
214 { "async", 1, MS_SYNCHRONOUS },
215 { "dirsync", 0, MS_DIRSYNC },
216 { "remount", 0, MS_REMOUNT },
217 { "mand", 0, MS_MANDLOCK },
218 { "nomand", 1, MS_MANDLOCK },
219 { "atime", 1, MS_NOATIME },
220 { "noatime", 0, MS_NOATIME },
221 { "diratime", 1, MS_NODIRATIME },
222 { "nodiratime", 0, MS_NODIRATIME },
223 { "bind", 0, MS_BIND },
224 { "rbind", 0, MS_BIND|MS_REC },
225 { "relatime", 0, MS_RELATIME },
226 { "norelatime", 1, MS_RELATIME },
227 { "strictatime", 0, MS_STRICTATIME },
228 { "nostrictatime", 1, MS_STRICTATIME },
229 { NULL, 0, 0 },
998ac676
RT
230};
231
495d2046 232#if HAVE_SYS_CAPABILITY_H
81810dd1 233static struct caps_opt caps_opt[] = {
a6afdde9 234 { "chown", CAP_CHOWN },
1e11be34
DL
235 { "dac_override", CAP_DAC_OVERRIDE },
236 { "dac_read_search", CAP_DAC_READ_SEARCH },
237 { "fowner", CAP_FOWNER },
238 { "fsetid", CAP_FSETID },
81810dd1
DL
239 { "kill", CAP_KILL },
240 { "setgid", CAP_SETGID },
241 { "setuid", CAP_SETUID },
242 { "setpcap", CAP_SETPCAP },
243 { "linux_immutable", CAP_LINUX_IMMUTABLE },
244 { "net_bind_service", CAP_NET_BIND_SERVICE },
245 { "net_broadcast", CAP_NET_BROADCAST },
246 { "net_admin", CAP_NET_ADMIN },
247 { "net_raw", CAP_NET_RAW },
248 { "ipc_lock", CAP_IPC_LOCK },
249 { "ipc_owner", CAP_IPC_OWNER },
250 { "sys_module", CAP_SYS_MODULE },
251 { "sys_rawio", CAP_SYS_RAWIO },
252 { "sys_chroot", CAP_SYS_CHROOT },
253 { "sys_ptrace", CAP_SYS_PTRACE },
254 { "sys_pacct", CAP_SYS_PACCT },
255 { "sys_admin", CAP_SYS_ADMIN },
256 { "sys_boot", CAP_SYS_BOOT },
257 { "sys_nice", CAP_SYS_NICE },
258 { "sys_resource", CAP_SYS_RESOURCE },
259 { "sys_time", CAP_SYS_TIME },
260 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
261 { "mknod", CAP_MKNOD },
262 { "lease", CAP_LEASE },
9527e566 263#ifdef CAP_AUDIT_WRITE
81810dd1 264 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
265#endif
266#ifdef CAP_AUDIT_CONTROL
81810dd1 267 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 268#endif
81810dd1
DL
269 { "setfcap", CAP_SETFCAP },
270 { "mac_override", CAP_MAC_OVERRIDE },
271 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
272#ifdef CAP_SYSLOG
273 { "syslog", CAP_SYSLOG },
274#endif
275#ifdef CAP_WAKE_ALARM
276 { "wake_alarm", CAP_WAKE_ALARM },
277#endif
81810dd1 278};
495d2046
SG
279#else
280static struct caps_opt caps_opt[] = {};
281#endif
81810dd1 282
4a0ba80d
SG
283static char padchar[] =
284"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
285
286static char *mkifname(char *template)
287{
288 char *name = NULL;
289 int i = 0;
290 FILE *urandom;
291 unsigned int seed;
4a0ba80d
SG
292 struct ifaddrs *ifaddr, *ifa;
293 int ifexists = 0;
294
295 /* Get all the network interfaces */
296 getifaddrs(&ifaddr);
297
298 /* Initialize the random number generator */
025ed0f3 299 process_lock();
4a0ba80d 300 urandom = fopen ("/dev/urandom", "r");
025ed0f3 301 process_unlock();
4a0ba80d
SG
302 if (urandom != NULL) {
303 if (fread (&seed, sizeof(seed), 1, urandom) <= 0)
304 seed = time(0);
025ed0f3 305 process_lock();
4a0ba80d 306 fclose(urandom);
025ed0f3 307 process_unlock();
4a0ba80d
SG
308 }
309 else
310 seed = time(0);
7f3e12f3
SG
311
312#ifndef HAVE_RAND_R
313 srand(seed);
314#endif
4a0ba80d
SG
315
316 /* Generate random names until we find one that doesn't exist */
317 while(1) {
318 ifexists = 0;
319 name = strdup(template);
320
321 if (name == NULL)
322 return NULL;
323
324 for (i = 0; i < strlen(name); i++) {
325 if (name[i] == 'X') {
7f3e12f3
SG
326#ifdef HAVE_RAND_R
327 name[i] = padchar[rand_r(&seed) % (strlen(padchar) - 1)];
328#else
329 name[i] = padchar[rand() % (strlen(padchar) - 1)];
330#endif
4a0ba80d
SG
331 }
332 }
333
334 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
335 if (strcmp(ifa->ifa_name, name) == 0) {
336 ifexists = 1;
337 break;
338 }
339 }
340
341 if (ifexists == 0)
342 break;
343
344 free(name);
345 }
346
347 freeifaddrs(ifaddr);
348 return name;
349}
350
91c3830e
SH
351static int run_buffer(char *buffer)
352{
353 FILE *f;
354 char *output;
8e7da691 355 int ret;
91c3830e 356
025ed0f3 357 process_lock();
91c3830e 358 f = popen(buffer, "r");
025ed0f3 359 process_unlock();
91c3830e
SH
360 if (!f) {
361 SYSERROR("popen failed");
362 return -1;
363 }
364
365 output = malloc(LXC_LOG_BUFFER_SIZE);
366 if (!output) {
367 ERROR("failed to allocate memory for script output");
025ed0f3 368 process_lock();
00b6be44 369 pclose(f);
025ed0f3 370 process_unlock();
91c3830e
SH
371 return -1;
372 }
373
374 while(fgets(output, LXC_LOG_BUFFER_SIZE, f))
375 DEBUG("script output: %s", output);
376
377 free(output);
378
025ed0f3 379 process_lock();
8e7da691 380 ret = pclose(f);
025ed0f3 381 process_unlock();
8e7da691 382 if (ret == -1) {
91c3830e
SH
383 SYSERROR("Script exited on error");
384 return -1;
8e7da691
DE
385 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
386 ERROR("Script exited with status %d", WEXITSTATUS(ret));
387 return -1;
388 } else if (WIFSIGNALED(ret)) {
389 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
390 strsignal(WTERMSIG(ret)));
391 return -1;
91c3830e
SH
392 }
393
394 return 0;
395}
396
148e91f5 397static int run_script_argv(const char *name, const char *section,
283678ed
SH
398 const char *script, const char *hook, const char *lxcpath,
399 char **argsin)
148e91f5
SH
400{
401 int ret, i;
402 char *buffer;
403 size_t size = 0;
404
405 INFO("Executing script '%s' for container '%s', config section '%s'",
406 script, name, section);
407
408 for (i=0; argsin && argsin[i]; i++)
409 size += strlen(argsin[i]) + 1;
410
411 size += strlen(hook) + 1;
412
413 size += strlen(script);
414 size += strlen(name);
415 size += strlen(section);
416 size += 3;
417
418 if (size > INT_MAX)
419 return -1;
420
421 buffer = alloca(size);
422 if (!buffer) {
423 ERROR("failed to allocate memory");
424 return -1;
425 }
426
427 ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
428 if (ret < 0 || ret >= size) {
429 ERROR("Script name too long");
430 return -1;
431 }
432
433 for (i=0; argsin && argsin[i]; i++) {
434 int len = size-ret;
435 int rc;
436 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
437 if (rc < 0 || rc >= len) {
438 ERROR("Script args too long");
439 return -1;
440 }
441 ret += rc;
442 }
443
444 return run_buffer(buffer);
445}
446
751d9dcd
DL
447static int run_script(const char *name, const char *section,
448 const char *script, ...)
e3b4c4c4 449{
abbfd20b 450 int ret;
91c3830e 451 char *buffer, *p;
abbfd20b
DL
452 size_t size = 0;
453 va_list ap;
751d9dcd
DL
454
455 INFO("Executing script '%s' for container '%s', config section '%s'",
456 script, name, section);
e3b4c4c4 457
abbfd20b
DL
458 va_start(ap, script);
459 while ((p = va_arg(ap, char *)))
95642a10 460 size += strlen(p) + 1;
abbfd20b
DL
461 va_end(ap);
462
463 size += strlen(script);
464 size += strlen(name);
465 size += strlen(section);
95642a10 466 size += 3;
abbfd20b 467
95642a10
MS
468 if (size > INT_MAX)
469 return -1;
470
471 buffer = alloca(size);
abbfd20b
DL
472 if (!buffer) {
473 ERROR("failed to allocate memory");
751d9dcd
DL
474 return -1;
475 }
476
9ba8130c
SH
477 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
478 if (ret < 0 || ret >= size) {
479 ERROR("Script name too long");
9ba8130c
SH
480 return -1;
481 }
751d9dcd 482
abbfd20b 483 va_start(ap, script);
9ba8130c
SH
484 while ((p = va_arg(ap, char *))) {
485 int len = size-ret;
486 int rc;
487 rc = snprintf(buffer + ret, len, " %s", p);
488 if (rc < 0 || rc >= len) {
9ba8130c
SH
489 ERROR("Script args too long");
490 return -1;
491 }
492 ret += rc;
493 }
abbfd20b 494 va_end(ap);
751d9dcd 495
91c3830e 496 return run_buffer(buffer);
e3b4c4c4
ST
497}
498
a6afdde9 499static int find_fstype_cb(char* buffer, void *data)
78ae2fcc 500{
501 struct cbarg {
502 const char *rootfs;
a6afdde9 503 const char *target;
78ae2fcc 504 int mntopt;
505 } *cbarg = data;
506
507 char *fstype;
508
509 /* we don't try 'nodev' entries */
510 if (strstr(buffer, "nodev"))
511 return 0;
512
513 fstype = buffer;
b2718c72 514 fstype += lxc_char_left_gc(fstype, strlen(fstype));
515 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
78ae2fcc 516
a6afdde9
DL
517 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
518 cbarg->rootfs, cbarg->target, fstype);
519
520 if (mount(cbarg->rootfs, cbarg->target, fstype, cbarg->mntopt, NULL)) {
521 DEBUG("mount failed with error: %s", strerror(errno));
78ae2fcc 522 return 0;
a6afdde9 523 }
78ae2fcc 524
a6afdde9
DL
525 INFO("mounted '%s' on '%s', with fstype '%s'",
526 cbarg->rootfs, cbarg->target, fstype);
78ae2fcc 527
528 return 1;
529}
530
2656d231 531static int mount_unknow_fs(const char *rootfs, const char *target, int mntopt)
78ae2fcc 532{
a6afdde9 533 int i;
78ae2fcc 534
535 struct cbarg {
536 const char *rootfs;
a6afdde9 537 const char *target;
78ae2fcc 538 int mntopt;
539 } cbarg = {
540 .rootfs = rootfs,
a6afdde9 541 .target = target,
78ae2fcc 542 .mntopt = mntopt,
543 };
544
a6afdde9
DL
545 /*
546 * find the filesystem type with brute force:
547 * first we check with /etc/filesystems, in case the modules
78ae2fcc 548 * are auto-loaded and fall back to the supported kernel fs
549 */
550 char *fsfile[] = {
551 "/etc/filesystems",
552 "/proc/filesystems",
553 };
554
a6afdde9
DL
555 for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
556
557 int ret;
558
559 if (access(fsfile[i], F_OK))
560 continue;
561
562 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
563 if (ret < 0) {
564 ERROR("failed to parse '%s'", fsfile[i]);
565 return -1;
566 }
567
568 if (ret)
569 return 0;
78ae2fcc 570 }
571
a6afdde9
DL
572 ERROR("failed to determine fs type for '%s'", rootfs);
573 return -1;
574}
575
2656d231 576static int mount_rootfs_dir(const char *rootfs, const char *target)
a6afdde9
DL
577{
578 return mount(rootfs, target, "none", MS_BIND | MS_REC, NULL);
579}
580
581static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
582{
583 int rfd;
584 int ret = -1;
585
025ed0f3 586 process_lock();
a6afdde9 587 rfd = open(rootfs, O_RDWR);
025ed0f3 588 process_unlock();
a6afdde9
DL
589 if (rfd < 0) {
590 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 591 return -1;
592 }
593
a6afdde9 594 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 595
a6afdde9 596 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 597
a6afdde9
DL
598 if (ioctl(fd, LOOP_SET_FD, rfd)) {
599 SYSERROR("failed to LOOP_SET_FD");
600 goto out;
78ae2fcc 601 }
602
a6afdde9
DL
603 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
604 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 605 goto out;
606 }
607
a6afdde9 608 ret = 0;
78ae2fcc 609out:
025ed0f3 610 process_lock();
a6afdde9 611 close(rfd);
025ed0f3 612 process_unlock();
78ae2fcc 613
a6afdde9 614 return ret;
78ae2fcc 615}
616
2656d231 617static int mount_rootfs_file(const char *rootfs, const char *target)
78ae2fcc 618{
a6afdde9
DL
619 struct dirent dirent, *direntp;
620 struct loop_info64 loinfo;
9ba8130c 621 int ret = -1, fd = -1, rc;
a6afdde9
DL
622 DIR *dir;
623 char path[MAXPATHLEN];
78ae2fcc 624
025ed0f3 625 process_lock();
a6afdde9 626 dir = opendir("/dev");
025ed0f3 627 process_unlock();
a6afdde9
DL
628 if (!dir) {
629 SYSERROR("failed to open '/dev'");
78ae2fcc 630 return -1;
631 }
632
a6afdde9
DL
633 while (!readdir_r(dir, &dirent, &direntp)) {
634
635 if (!direntp)
636 break;
637
638 if (!strcmp(direntp->d_name, "."))
639 continue;
640
641 if (!strcmp(direntp->d_name, ".."))
642 continue;
643
644 if (strncmp(direntp->d_name, "loop", 4))
645 continue;
646
9ba8130c
SH
647 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
648 if (rc < 0 || rc >= MAXPATHLEN)
649 continue;
650
025ed0f3 651 process_lock();
a6afdde9 652 fd = open(path, O_RDWR);
025ed0f3 653 process_unlock();
a6afdde9
DL
654 if (fd < 0)
655 continue;
656
657 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
025ed0f3 658 process_lock();
a6afdde9 659 close(fd);
025ed0f3 660 process_unlock();
a6afdde9
DL
661 continue;
662 }
663
664 if (errno != ENXIO) {
665 WARN("unexpected error for ioctl on '%s': %m",
666 direntp->d_name);
025ed0f3 667 process_lock();
00b6be44 668 close(fd);
025ed0f3 669 process_unlock();
a6afdde9
DL
670 continue;
671 }
672
673 DEBUG("found '%s' free lodev", path);
674
675 ret = setup_lodev(rootfs, fd, &loinfo);
676 if (!ret)
2656d231 677 ret = mount_unknow_fs(path, target, 0);
025ed0f3 678 process_lock();
a6afdde9 679 close(fd);
025ed0f3 680 process_unlock();
a6afdde9
DL
681
682 break;
683 }
684
025ed0f3 685 process_lock();
a6afdde9
DL
686 if (closedir(dir))
687 WARN("failed to close directory");
025ed0f3 688 process_unlock();
a6afdde9
DL
689
690 return ret;
78ae2fcc 691}
692
2656d231 693static int mount_rootfs_block(const char *rootfs, const char *target)
a6afdde9 694{
2656d231 695 return mount_unknow_fs(rootfs, target, 0);
a6afdde9
DL
696}
697
0c547523
SH
698/*
699 * pin_rootfs
b7ed4bf0
CS
700 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
701 * the duration of the container run, to prevent the container from marking
702 * the underlying fs readonly on shutdown. unlink the file immediately so
703 * no name pollution is happens
0c547523
SH
704 * return -1 on error.
705 * return -2 if nothing needed to be pinned.
706 * return an open fd (>=0) if we pinned it.
707 */
708int pin_rootfs(const char *rootfs)
709{
710 char absrootfs[MAXPATHLEN];
711 char absrootfspin[MAXPATHLEN];
712 struct stat s;
713 int ret, fd;
714
e99ee0de 715 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 716 return -2;
e99ee0de 717
00ec333b 718 if (!realpath(rootfs, absrootfs))
9be53773 719 return -2;
0c547523 720
00ec333b 721 if (access(absrootfs, F_OK))
0c547523 722 return -1;
0c547523 723
00ec333b 724 if (stat(absrootfs, &s))
0c547523 725 return -1;
0c547523 726
72f919c4 727 if (!S_ISDIR(s.st_mode))
0c547523
SH
728 return -2;
729
b7ed4bf0 730 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
00ec333b 731 if (ret >= MAXPATHLEN)
0c547523 732 return -1;
0c547523 733
025ed0f3 734 process_lock();
0c547523 735 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
025ed0f3 736 process_unlock();
b7ed4bf0
CS
737 if (fd < 0)
738 return fd;
739 (void)unlink(absrootfspin);
0c547523
SH
740 return fd;
741}
742
368bbc02
CS
743static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct cgroup_process_info *cgroup_info)
744{
368bbc02 745 int r;
b06b8511
CS
746 size_t i;
747 static struct {
748 int match_mask;
749 int match_flag;
750 const char *source;
751 const char *destination;
752 const char *fstype;
753 unsigned long flags;
754 const char *options;
755 } default_mounts[] = {
756 /* Read-only bind-mounting... In older kernels, doing that required
757 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
758 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
759 * kernel 2.6.26 onwards. However, this apparently does not work on
760 * kernel 3.8. Unfortunately, on that very same kernel, doing the
761 * same trick as above doesn't seem to work either, there one needs
762 * to ALSO specify MS_BIND for the remount, otherwise the entire
763 * fs is remounted read-only or the mount fails because it's busy...
764 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
765 * 2.6.32...
368bbc02 766 */
b06b8511
CS
767 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
768 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
769 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
770 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
771 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
772 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
773 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
774 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
775 { 0, 0, NULL, NULL, NULL, 0, NULL }
776 };
368bbc02 777
b06b8511
CS
778 for (i = 0; default_mounts[i].match_mask; i++) {
779 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
780 char *source = NULL;
781 char *destination = NULL;
782 int saved_errno;
783
784 if (default_mounts[i].source) {
785 /* will act like strdup if %r is not present */
786 source = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].source);
787 if (!source) {
788 SYSERROR("memory allocation error");
789 return -1;
790 }
791 }
792 if (default_mounts[i].destination) {
793 /* will act like strdup if %r is not present */
794 destination = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].destination);
795 if (!destination) {
796 saved_errno = errno;
797 SYSERROR("memory allocation error");
798 free(source);
799 errno = saved_errno;
800 return -1;
801 }
802 }
803 r = mount(source, destination, default_mounts[i].fstype, default_mounts[i].flags, default_mounts[i].options);
804 saved_errno = errno;
c414be25
DE
805 if (r < 0)
806 SYSERROR("error mounting %s on %s", source, destination);
b06b8511
CS
807 free(source);
808 free(destination);
809 if (r < 0) {
b06b8511
CS
810 errno = saved_errno;
811 return -1;
812 }
368bbc02 813 }
368bbc02
CS
814 }
815
b06b8511 816 if (flags & LXC_AUTO_CGROUP_MASK) {
7997d7da 817 r = lxc_setup_mount_cgroup(conf->rootfs.mount, cgroup_info, flags & LXC_AUTO_CGROUP_MASK);
368bbc02
CS
818 if (r < 0) {
819 SYSERROR("error mounting /sys/fs/cgroup");
b06b8511 820 return -1;
368bbc02
CS
821 }
822 }
823
368bbc02 824 return 0;
368bbc02
CS
825}
826
2656d231 827static int mount_rootfs(const char *rootfs, const char *target)
0ad19a3f 828{
b09ef133 829 char absrootfs[MAXPATHLEN];
78ae2fcc 830 struct stat s;
a6afdde9 831 int i;
78ae2fcc 832
a6afdde9 833 typedef int (*rootfs_cb)(const char *, const char *);
78ae2fcc 834
835 struct rootfs_type {
836 int type;
837 rootfs_cb cb;
838 } rtfs_type[] = {
2656d231
DL
839 { S_IFDIR, mount_rootfs_dir },
840 { S_IFBLK, mount_rootfs_block },
841 { S_IFREG, mount_rootfs_file },
78ae2fcc 842 };
0ad19a3f 843
4c8ab83b 844 if (!realpath(rootfs, absrootfs)) {
36eb9bde 845 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 846 return -1;
847 }
b09ef133 848
b09ef133 849 if (access(absrootfs, F_OK)) {
36eb9bde 850 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 851 return -1;
852 }
853
78ae2fcc 854 if (stat(absrootfs, &s)) {
36eb9bde 855 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 856 return -1;
857 }
858
78ae2fcc 859 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 860
78ae2fcc 861 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
862 continue;
9b0f0477 863
a6afdde9 864 return rtfs_type[i].cb(absrootfs, target);
78ae2fcc 865 }
9b0f0477 866
36eb9bde 867 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 868 return -1;
0ad19a3f 869}
870
4e5440c6 871static int setup_utsname(struct utsname *utsname)
0ad19a3f 872{
4e5440c6
DL
873 if (!utsname)
874 return 0;
0ad19a3f 875
4e5440c6
DL
876 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
877 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 878 return -1;
879 }
880
4e5440c6 881 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 882
0ad19a3f 883 return 0;
884}
885
33fcb7a0 886static int setup_tty(const struct lxc_rootfs *rootfs,
7c6ef2a2 887 const struct lxc_tty_info *tty_info, char *ttydir)
b0a33c1e 888{
7c6ef2a2
SH
889 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
890 int i, ret;
b0a33c1e 891
bc9bd0e3
DL
892 if (!rootfs->path)
893 return 0;
894
b0a33c1e 895 for (i = 0; i < tty_info->nbtty; i++) {
896
897 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
898
7c6ef2a2 899 ret = snprintf(path, sizeof(path), "%s/dev/tty%d",
12297168 900 rootfs->mount, i + 1);
7c6ef2a2
SH
901 if (ret >= sizeof(path)) {
902 ERROR("pathname too long for ttys");
903 return -1;
904 }
905 if (ttydir) {
906 /* create dev/lxc/tty%d" */
9ba8130c 907 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/tty%d",
7c6ef2a2
SH
908 rootfs->mount, ttydir, i + 1);
909 if (ret >= sizeof(lxcpath)) {
910 ERROR("pathname too long for ttys");
911 return -1;
912 }
025ed0f3 913 process_lock();
7c6ef2a2 914 ret = creat(lxcpath, 0660);
025ed0f3 915 process_unlock();
7c6ef2a2
SH
916 if (ret==-1 && errno != EEXIST) {
917 SYSERROR("error creating %s\n", lxcpath);
918 return -1;
919 }
025ed0f3 920 process_lock();
4d44e274
SH
921 if (ret >= 0)
922 close(ret);
025ed0f3 923 process_unlock();
7c6ef2a2
SH
924 ret = unlink(path);
925 if (ret && errno != ENOENT) {
926 SYSERROR("error unlinking %s\n", path);
927 return -1;
928 }
b0a33c1e 929
7c6ef2a2
SH
930 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
931 WARN("failed to mount '%s'->'%s'",
932 pty_info->name, path);
933 continue;
934 }
13954cce 935
9ba8130c
SH
936 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
937 if (ret >= sizeof(lxcpath)) {
938 ERROR("tty pathname too long");
939 return -1;
940 }
7c6ef2a2
SH
941 ret = symlink(lxcpath, path);
942 if (ret) {
943 SYSERROR("failed to create symlink for tty %d\n", i+1);
944 return -1;
945 }
946 } else {
c6883f38
SH
947 /* If we populated /dev, then we need to create /dev/ttyN */
948 if (access(path, F_OK)) {
025ed0f3 949 process_lock();
c6883f38 950 ret = creat(path, 0660);
025ed0f3 951 process_unlock();
c6883f38
SH
952 if (ret==-1) {
953 SYSERROR("error creating %s\n", path);
954 /* this isn't fatal, continue */
025ed0f3
SH
955 } else {
956 process_lock();
c6883f38 957 close(ret);
025ed0f3
SH
958 process_unlock();
959 }
c6883f38 960 }
7c6ef2a2
SH
961 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
962 WARN("failed to mount '%s'->'%s'",
963 pty_info->name, path);
964 continue;
965 }
b0a33c1e 966 }
967 }
968
cd54d859
DL
969 INFO("%d tty(s) has been setup", tty_info->nbtty);
970
b0a33c1e 971 return 0;
972}
973
7a7ff0c6 974static int setup_rootfs_pivot_root_cb(char *buffer, void *data)
bf601689
MH
975{
976 struct lxc_list *mountlist, *listentry, *iterator;
2c7d90ac 977 char *pivotdir, *mountpoint, *mountentry, *saveptr = NULL;
bf601689
MH
978 int found;
979 void **cbparm;
980
981 mountentry = buffer;
982 cbparm = (void **)data;
983
984 mountlist = cbparm[0];
985 pivotdir = cbparm[1];
986
987 /* parse entry, first field is mountname, ignore */
2796cf79 988 mountpoint = strtok_r(mountentry, " ", &saveptr);
bf601689
MH
989 if (!mountpoint)
990 return -1;
991
992 /* second field is mountpoint */
2796cf79 993 mountpoint = strtok_r(NULL, " ", &saveptr);
bf601689
MH
994 if (!mountpoint)
995 return -1;
996
997 /* only consider mountpoints below old root fs */
998 if (strncmp(mountpoint, pivotdir, strlen(pivotdir)))
999 return 0;
1000
1001 /* filter duplicate mountpoints */
1002 found = 0;
1003 lxc_list_for_each(iterator, mountlist) {
1004 if (!strcmp(iterator->elem, mountpoint)) {
1005 found = 1;
1006 break;
1007 }
1008 }
1009 if (found)
1010 return 0;
1011
1012 /* add entry to list */
1013 listentry = malloc(sizeof(*listentry));
1014 if (!listentry) {
1015 SYSERROR("malloc for mountpoint listentry failed");
1016 return -1;
1017 }
1018
1019 listentry->elem = strdup(mountpoint);
1020 if (!listentry->elem) {
1021 SYSERROR("strdup failed");
00b6be44 1022 free(listentry);
bf601689
MH
1023 return -1;
1024 }
1025 lxc_list_add_tail(mountlist, listentry);
1026
1027 return 0;
1028}
1029
cc6f6dd7 1030static int umount_oldrootfs(const char *oldrootfs)
bf601689 1031{
2382ecff 1032 char path[MAXPATHLEN];
bf601689 1033 void *cbparm[2];
9ebb03ad 1034 struct lxc_list mountlist, *iterator, *next;
bf601689 1035 int ok, still_mounted, last_still_mounted;
9ba8130c 1036 int rc;
bf601689
MH
1037
1038 /* read and parse /proc/mounts in old root fs */
1039 lxc_list_init(&mountlist);
1040
cc6f6dd7 1041 /* oldrootfs is on the top tree directory now */
9ba8130c
SH
1042 rc = snprintf(path, sizeof(path), "/%s", oldrootfs);
1043 if (rc >= sizeof(path)) {
1044 ERROR("rootfs name too long");
1045 return -1;
1046 }
bf601689 1047 cbparm[0] = &mountlist;
bf601689 1048
cc6f6dd7 1049 cbparm[1] = strdup(path);
bf601689
MH
1050 if (!cbparm[1]) {
1051 SYSERROR("strdup failed");
1052 return -1;
1053 }
1054
9ba8130c
SH
1055 rc = snprintf(path, sizeof(path), "%s/proc/mounts", oldrootfs);
1056 if (rc >= sizeof(path)) {
1057 ERROR("container proc/mounts name too long");
1058 return -1;
1059 }
cc6f6dd7
DL
1060
1061 ok = lxc_file_for_each_line(path,
1062 setup_rootfs_pivot_root_cb, &cbparm);
bf601689
MH
1063 if (ok < 0) {
1064 SYSERROR("failed to read or parse mount list '%s'", path);
1065 return -1;
1066 }
1067
1068 /* umount filesystems until none left or list no longer shrinks */
1069 still_mounted = 0;
1070 do {
1071 last_still_mounted = still_mounted;
1072 still_mounted = 0;
1073
9ebb03ad 1074 lxc_list_for_each_safe(iterator, &mountlist, next) {
bf601689 1075
c08556c6 1076 /* umount normally */
bf601689
MH
1077 if (!umount(iterator->elem)) {
1078 DEBUG("umounted '%s'", (char *)iterator->elem);
1079 lxc_list_del(iterator);
1080 continue;
1081 }
1082
bf601689
MH
1083 still_mounted++;
1084 }
7df119ee 1085
bf601689
MH
1086 } while (still_mounted > 0 && still_mounted != last_still_mounted);
1087
7df119ee 1088
c08556c6
DL
1089 lxc_list_for_each(iterator, &mountlist) {
1090
1091 /* let's try a lazy umount */
1092 if (!umount2(iterator->elem, MNT_DETACH)) {
1093 INFO("lazy unmount of '%s'", (char *)iterator->elem);
1094 continue;
1095 }
1096
1097 /* be more brutal (nfs) */
1098 if (!umount2(iterator->elem, MNT_FORCE)) {
1099 INFO("forced unmount of '%s'", (char *)iterator->elem);
1100 continue;
1101 }
1102
7df119ee 1103 WARN("failed to unmount '%s'", (char *)iterator->elem);
c08556c6 1104 }
bf601689 1105
cc6f6dd7
DL
1106 return 0;
1107}
1108
1109static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
1110{
1111 char path[MAXPATHLEN];
1112 int remove_pivotdir = 0;
9ba8130c 1113 int rc;
cc6f6dd7
DL
1114
1115 /* change into new root fs */
1116 if (chdir(rootfs)) {
1117 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
1118 return -1;
1119 }
1120
1121 if (!pivotdir)
30c5d292 1122 pivotdir = "lxc_putold";
cc6f6dd7 1123
4f9293b1 1124 /* compute the full path to pivotdir under rootfs */
9ba8130c
SH
1125 rc = snprintf(path, sizeof(path), "%s/%s", rootfs, pivotdir);
1126 if (rc >= sizeof(path)) {
1127 ERROR("pivot dir name too long");
1128 return -1;
1129 }
cc6f6dd7
DL
1130
1131 if (access(path, F_OK)) {
1132
1133 if (mkdir_p(path, 0755)) {
1134 SYSERROR("failed to create pivotdir '%s'", path);
1135 return -1;
1136 }
1137
1138 remove_pivotdir = 1;
1139 DEBUG("created '%s' directory", path);
1140 }
1141
1142 DEBUG("mountpoint for old rootfs is '%s'", path);
1143
1144 /* pivot_root into our new root fs */
1145 if (pivot_root(".", path)) {
1146 SYSERROR("pivot_root syscall failed");
bf601689
MH
1147 return -1;
1148 }
cc6f6dd7
DL
1149
1150 if (chdir("/")) {
1151 SYSERROR("can't chdir to / after pivot_root");
1152 return -1;
1153 }
1154
1155 DEBUG("pivot_root syscall to '%s' successful", rootfs);
1156
1157 /* we switch from absolute path to relative path */
1158 if (umount_oldrootfs(pivotdir))
1159 return -1;
bf601689 1160
c08556c6
DL
1161 /* remove temporary mount point, we don't consider the removing
1162 * as fatal */
a91d897a
FW
1163 if (remove_pivotdir && rmdir(pivotdir))
1164 WARN("can't remove mountpoint '%s': %m", pivotdir);
bf601689 1165
bf601689
MH
1166 return 0;
1167}
1168
bc6928ff
MW
1169
1170/*
1171 * Note: This is a verbatum copy of what is in monitor.c. We're just
1172 * usint it here to generate a safe subdirectory in /dev/ for the
1173 * containers /dev/
1174 */
1175
1176/* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS.
1177 * FNV has good anti collision properties and we're not worried
1178 * about pre-image resistance or one-way-ness, we're just trying to make
1179 * the name unique in the 108 bytes of space we have.
1180 */
1181#define FNV1A_64_INIT ((uint64_t)0xcbf29ce484222325ULL)
1182static uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
1183{
1184 unsigned char *bp;
1185
1186 for(bp = buf; bp < (unsigned char *)buf + len; bp++)
1187 {
1188 /* xor the bottom with the current octet */
1189 hval ^= (uint64_t)*bp;
1190
1191 /* gcc optimised:
1192 * multiply by the 64 bit FNV magic prime mod 2^64
1193 */
1194 hval += (hval << 1) + (hval << 4) + (hval << 5) +
1195 (hval << 7) + (hval << 8) + (hval << 40);
1196 }
1197
1198 return hval;
1199}
1200
1201/*
1202 * Check to see if a directory has something mounted on it and,
1203 * if it does, return the fstype.
1204 *
1205 * Code largely based on detect_shared_rootfs below
1206 *
1207 * Returns: # of matching entries in /proc/self/mounts
1208 * if != 0 fstype is filled with the last filesystem value.
1209 * if == 0 no matches found, fstype unchanged.
1210 *
1211 * ToDo: Maybe return the mount options in another parameter...
1212 */
1213
1214#define LINELEN 4096
1215#define MAX_FSTYPE_LEN 128
1216int mount_check_fs( const char *dir, char *fstype )
1217{
1218 char buf[LINELEN], *p;
1219 struct stat s;
1220 FILE *f;
1221 int found_fs = 0;
1222 char *p2;
1223
1224 DEBUG("entering mount_check_fs for %s\n", dir);
1225
1226 if ( 0 != access(dir, F_OK) || 0 != stat(dir, &s) || 0 == S_ISDIR(s.st_mode) ) {
1227 return 0;
1228 }
1229
1230 process_lock();
1231 f = fopen("/proc/self/mounts", "r");
1232 process_unlock();
1233 if (!f)
1234 return 0;
1235 while ((p = fgets(buf, LINELEN, f))) {
1236 p = index(buf, ' ');
1237 if( !p )
1238 continue;
1239 *p = '\0';
1240 p2 = p + 1;
1241
1242 p = index(p2, ' ');
1243 if( !p )
1244 continue;
1245 *p = '\0';
1246
1247 /* Compare the directory in the entry to desired */
1248 if( strcmp( p2, dir ) ) {
1249 continue;
1250 }
1251
1252 p2 = p + 1;
1253 p = index( p2, ' ');
1254 if( !p )
1255 continue;
1256 *p = '\0';
1257
1258 ++found_fs;
1259
1260 if( fstype ) {
1261 strncpy( fstype, p2, MAX_FSTYPE_LEN - 1 );
1262 fstype [ MAX_FSTYPE_LEN - 1 ] = '\0';
1263 }
1264 }
1265
1266 process_lock();
1267 fclose(f);
1268 process_unlock();
1269
1270 DEBUG("mount_check_fs returning %d last %s\n", found_fs, fstype);
1271
1272 return found_fs;
1273}
1274
1275/*
1276 * Locate a devtmpfs mount (should be on /dev) and create a container
1277 * subdirectory on it which we can then bind mount to the container
1278 * /dev instead of mounting a tmpfs there.
1279 * If we fail, return NULL.
1280 * Else return the pointer to the name buffer with the string to
1281 * the devtmpfs subdirectory.
1282 */
1283
1284char *mk_devtmpfs(const char *name, char *path, const char *lxcpath)
1285{
1286 int ret;
1287 struct stat s;
1288 char tmp_path[MAXPATHLEN];
1289 char fstype[MAX_FSTYPE_LEN];
1290 char *base_path = "/dev/.lxc";
1291 char *user_path = "/dev/.lxc/user";
1292 uint64_t hash;
1293
1294 if ( 0 != access(base_path, F_OK) || 0 != stat(base_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1295 /* This is just making /dev/.lxc it better work or we're done */
1296 ret = mkdir(base_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1297 if ( ret ) {
1298 SYSERROR( "Unable to create /dev/.lxc for autodev" );
1299 return NULL;
1300 }
1301 }
1302
1303 /*
1304 * Programmers notes:
1305 * We can not do mounts in this area of code that we want
1306 * to be visible in the host. Consequently, /dev/.lxc must
1307 * be set up earlier if we need a tmpfs mounted there.
1308 * That only affects the rare cases where autodev is enabled
1309 * for a container and devtmpfs is not mounted on /dev in the
1310 * host. In that case, we'll fall back to the old method
1311 * of mounting a tmpfs in the container and have no visibility
1312 * into the container /dev.
1313 */
1314 if( ! mount_check_fs( "/dev", fstype )
1315 || strcmp( "devtmpfs", fstype ) ) {
1316 /* Either /dev was not mounted or was not devtmpfs */
1317
1318 if ( ! mount_check_fs( "/dev/.lxc", NULL ) ) {
1319 /*
1320 * /dev/.lxc is not already mounted
1321 * Doing a mount here does no good, since
1322 * it's not visible in the host.
1323 */
1324
1325 ERROR("/dev/.lxc is not setup - taking fallback" );
1326 return NULL;
1327 }
1328 }
1329
1330 if ( 0 != access(user_path, F_OK) || 0 != stat(user_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1331 /*
1332 * This is making /dev/.lxc/user path for non-priv users.
1333 * If this doesn't work, we'll have to fall back in the
1334 * case of non-priv users. It's mode 1777 like /tmp.
1335 */
1336 ret = mkdir(user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
1337 if ( ret ) {
1338 /* Issue an error but don't fail yet! */
1339 ERROR("Unable to create /dev/.lxc/user");
1340 }
1341 /* Umask tends to screw us up here */
1342 chmod(user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
1343 }
1344
1345 /*
1346 * Since the container name must be unique within a given
1347 * lxcpath, we're going to use a hash of the path
1348 * /lxcpath/name as our hash name in /dev/.lxc/
1349 */
1350
1351 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s", lxcpath, name);
1352 if (ret < 0 || ret >= MAXPATHLEN)
1353 return NULL;
1354
1355 hash = fnv_64a_buf(tmp_path, ret, FNV1A_64_INIT);
1356
1357 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, base_path, name, hash);
1358 if (ret < 0 || ret >= MAXPATHLEN)
1359 return NULL;
1360
1361 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1362 ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1363 if ( ret ) {
1364 /* Something must have failed with the base_path...
1365 * Maybe unpriv user. Try user_path now... */
1366 INFO("Setup in /dev/.lxc failed. Trying /dev/.lxc/user." );
1367
1368 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, user_path, name, hash);
1369 if (ret < 0 || ret >= MAXPATHLEN)
1370 return NULL;
1371
1372 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1373 ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1374 if ( ret ) {
1375 ERROR("Container /dev setup in host /dev failed - taking fallback" );
1376 return NULL;
1377 }
1378 }
1379 }
1380 }
1381
1382 strcpy( path, tmp_path );
1383 return path;
1384}
1385
1386
91c3830e
SH
1387/*
1388 * Do we want to add options for max size of /dev and a file to
1389 * specify which devices to create?
1390 */
bc6928ff 1391static int mount_autodev(const char *name, char *root, const char *lxcpath)
91c3830e
SH
1392{
1393 int ret;
bc6928ff 1394 struct stat s;
91c3830e 1395 char path[MAXPATHLEN];
bc6928ff
MW
1396 char host_path[MAXPATHLEN];
1397 char devtmpfs_path[MAXPATHLEN];
91c3830e
SH
1398
1399 INFO("Mounting /dev under %s\n", root);
bc6928ff
MW
1400
1401 ret = snprintf(host_path, MAXPATHLEN, "%s/%s/rootfs.dev", lxcpath, name);
1402 if (ret < 0 || ret > MAXPATHLEN)
1403 return -1;
1404
91c3830e
SH
1405 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
1406 if (ret < 0 || ret > MAXPATHLEN)
1407 return -1;
bc6928ff
MW
1408
1409 if (mk_devtmpfs( name, devtmpfs_path, lxcpath ) ) {
1410 /*
1411 * Get rid of old links and directoriess
1412 * This could be either a symlink and we remove it,
1413 * or an empty directory and we remove it,
1414 * or non-existant and we don't care,
1415 * or a non-empty directory, and we will then emit an error
1416 * but we will not fail out the process.
1417 */
1418 unlink( host_path );
1419 rmdir( host_path );
1420 ret = symlink(devtmpfs_path, host_path);
1421
1422 if ( ret < 0 ) {
1423 SYSERROR("WARNING: Failed to create symlink '%s'->'%s'\n", host_path, devtmpfs_path);
1424 }
1425 DEBUG("Bind mounting %s to %s", devtmpfs_path , path );
1426 ret = mount(devtmpfs_path, path, NULL, MS_BIND, 0 );
1427 } else {
1428 /* Only mount a tmpfs on here if we don't already a mount */
1429 if ( ! mount_check_fs( host_path, NULL ) ) {
1430 DEBUG("Mounting tmpfs to %s", host_path );
1431 ret = mount("none", path, "tmpfs", 0, "size=100000");
1432 } else {
1433 /* This allows someone to manually set up a mount */
1434 DEBUG("Bind mounting %s to %s", host_path, path );
1435 ret = mount(host_path , path, NULL, MS_BIND, 0 );
1436 }
1437 }
91c3830e
SH
1438 if (ret) {
1439 SYSERROR("Failed to mount /dev at %s\n", root);
1440 return -1;
1441 }
1442 ret = snprintf(path, MAXPATHLEN, "%s/dev/pts", root);
1443 if (ret < 0 || ret >= MAXPATHLEN)
1444 return -1;
bc6928ff
MW
1445 /*
1446 * If we are running on a devtmpfs mapping, dev/pts may already exist.
1447 * If not, then create it and exit if that fails...
1448 */
1449 if ( 0 != access(path, F_OK) || 0 != stat(path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1450 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1451 if (ret) {
1452 SYSERROR("Failed to create /dev/pts in container");
1453 return -1;
1454 }
91c3830e
SH
1455 }
1456
1457 INFO("Mounted /dev under %s\n", root);
1458 return 0;
1459}
1460
c6883f38
SH
1461struct lxc_devs {
1462 char *name;
1463 mode_t mode;
1464 int maj;
1465 int min;
1466};
1467
1468struct lxc_devs lxc_devs[] = {
1469 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1470 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1471 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1472 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1473 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1474 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1475 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1476};
1477
c6883f38
SH
1478static int setup_autodev(char *root)
1479{
1480 int ret;
1481 struct lxc_devs *d;
1482 char path[MAXPATHLEN];
1483 int i;
3a32201c 1484 mode_t cmask;
c6883f38 1485
91c3830e
SH
1486 INFO("Creating initial consoles under %s/dev\n", root);
1487
c6883f38 1488 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
91c3830e
SH
1489 if (ret < 0 || ret >= MAXPATHLEN) {
1490 ERROR("Error calculating container /dev location");
c6883f38 1491 return -1;
f7bee6c6 1492 }
91c3830e
SH
1493
1494 INFO("Populating /dev under %s\n", root);
3a32201c 1495 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38
SH
1496 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
1497 d = &lxc_devs[i];
1498 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", root, d->name);
1499 if (ret < 0 || ret >= MAXPATHLEN)
1500 return -1;
1501 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1502 if (ret && errno != EEXIST) {
c6883f38
SH
1503 SYSERROR("Error creating %s\n", d->name);
1504 return -1;
1505 }
1506 }
3a32201c 1507 umask(cmask);
c6883f38
SH
1508
1509 INFO("Populated /dev under %s\n", root);
1510 return 0;
1511}
1512
cc28d0b0
SH
1513/*
1514 * Detect whether / is mounted MS_SHARED. The only way I know of to
1515 * check that is through /proc/self/mountinfo.
1516 * I'm only checking for /. If the container rootfs or mount location
1517 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1518 * out would be too much work to be worth it.
1519 */
1520#define LINELEN 4096
1521int detect_shared_rootfs(void)
1522{
1523 char buf[LINELEN], *p;
1524 FILE *f;
1525 int i;
1526 char *p2;
1527
025ed0f3 1528 process_lock();
cc28d0b0 1529 f = fopen("/proc/self/mountinfo", "r");
025ed0f3 1530 process_unlock();
cc28d0b0
SH
1531 if (!f)
1532 return 0;
1533 while ((p = fgets(buf, LINELEN, f))) {
cc28d0b0
SH
1534 for (p = buf, i=0; p && i < 4; i++)
1535 p = index(p+1, ' ');
1536 if (!p)
1537 continue;
1538 p2 = index(p+1, ' ');
1539 if (!p2)
1540 continue;
1541 *p2 = '\0';
cc28d0b0
SH
1542 if (strcmp(p+1, "/") == 0) {
1543 // this is '/'. is it shared?
1544 p = index(p2+1, ' ');
ab81cef0 1545 if (p && strstr(p, "shared:")) {
025ed0f3 1546 process_lock();
00b6be44 1547 fclose(f);
025ed0f3 1548 process_unlock();
cc28d0b0 1549 return 1;
00b6be44 1550 }
cc28d0b0
SH
1551 }
1552 }
025ed0f3 1553 process_lock();
cc28d0b0 1554 fclose(f);
025ed0f3 1555 process_unlock();
cc28d0b0
SH
1556 return 0;
1557}
1558
1559/*
1560 * I'll forgive you for asking whether all of this is needed :) The
1561 * answer is yes.
1562 * pivot_root will fail if the new root, the put_old dir, or the parent
1563 * of current->fs->root are MS_SHARED. (parent of current->fs_root may
1564 * or may not be current->fs_root - if we assumed it always was, we could
1565 * just mount --make-rslave /). So,
1566 * 1. mount a tiny tmpfs to be parent of current->fs->root.
1567 * 2. make that MS_SLAVE
1568 * 3. make a 'root' directory under that
1569 * 4. mount --rbind / under the $tinyroot/root.
1570 * 5. make that rslave
1571 * 6. chdir and chroot into $tinyroot/root
1572 * 7. $tinyroot will be unmounted by our parent in start.c
1573 */
1574static int chroot_into_slave(struct lxc_conf *conf)
1575{
1576 char path[MAXPATHLEN];
1577 const char *destpath = conf->rootfs.mount;
1578 int ret;
1579
1580 if (mount(destpath, destpath, NULL, MS_BIND, 0)) {
1581 SYSERROR("failed to mount %s bind", destpath);
1582 return -1;
1583 }
1584 if (mount("", destpath, NULL, MS_SLAVE, 0)) {
1585 SYSERROR("failed to make %s slave", destpath);
1586 return -1;
1587 }
1588 if (mount("none", destpath, "tmpfs", 0, "size=10000")) {
1589 SYSERROR("Failed to mount tmpfs / at %s", destpath);
1590 return -1;
1591 }
1592 ret = snprintf(path, MAXPATHLEN, "%s/root", destpath);
1593 if (ret < 0 || ret >= MAXPATHLEN) {
1594 ERROR("out of memory making root path");
1595 return -1;
1596 }
1597 if (mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) {
1598 SYSERROR("Failed to create /dev/pts in container");
1599 return -1;
1600 }
1601 if (mount("/", path, NULL, MS_BIND|MS_REC, 0)) {
1602 SYSERROR("Failed to rbind mount / to %s", path);
1603 return -1;
1604 }
1605 if (mount("", destpath, NULL, MS_SLAVE|MS_REC, 0)) {
1606 SYSERROR("Failed to make tmp-/ at %s rslave", path);
1607 return -1;
1608 }
1609 if (chdir(path)) {
1610 SYSERROR("Failed to chdir into tmp-/");
1611 return -1;
1612 }
1613 if (chroot(path)) {
1614 SYSERROR("Failed to chroot into tmp-/");
1615 return -1;
1616 }
1617 INFO("Chrooted into tmp-/ at %s\n", path);
1618 return 0;
1619}
1620
1621static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1622{
cc28d0b0
SH
1623 const struct lxc_rootfs *rootfs = &conf->rootfs;
1624
a0f379bf
DW
1625 if (!rootfs->path) {
1626 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1627 SYSERROR("Failed to make / rslave");
1628 return -1;
1629 }
c69bd12f 1630 return 0;
a0f379bf 1631 }
0ad19a3f 1632
12297168 1633 if (access(rootfs->mount, F_OK)) {
b1789442 1634 SYSERROR("failed to access to '%s', check it is present",
12297168 1635 rootfs->mount);
b1789442
DL
1636 return -1;
1637 }
1638
cc28d0b0
SH
1639 if (detect_shared_rootfs()) {
1640 if (chroot_into_slave(conf)) {
1641 ERROR("Failed to chroot into slave /");
1642 return -1;
1643 }
1644 }
1645
9be53773
SH
1646 // First try mounting rootfs using a bdev
1647 struct bdev *bdev = bdev_init(rootfs->path, rootfs->mount, NULL);
1648 if (bdev && bdev->ops->mount(bdev) == 0) {
59d66af2 1649 bdev_put(bdev);
9be53773
SH
1650 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1651 return 0;
1652 }
59d66af2
SH
1653 if (bdev)
1654 bdev_put(bdev);
2656d231 1655 if (mount_rootfs(rootfs->path, rootfs->mount)) {
a6afdde9 1656 ERROR("failed to mount rootfs");
c3f0a28c 1657 return -1;
1658 }
0ad19a3f 1659
12297168 1660 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1661
ac778708
DL
1662 return 0;
1663}
1664
1665int setup_pivot_root(const struct lxc_rootfs *rootfs)
1666{
ac778708
DL
1667 if (!rootfs->path)
1668 return 0;
1669
12297168 1670 if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
cc6f6dd7 1671 ERROR("failed to setup pivot root");
25368b52 1672 return -1;
c69bd12f
DL
1673 }
1674
25368b52 1675 return 0;
0ad19a3f 1676}
1677
d852c78c 1678static int setup_pts(int pts)
3c26f34e 1679{
77890c6d
SW
1680 char target[PATH_MAX];
1681
d852c78c
DL
1682 if (!pts)
1683 return 0;
3c26f34e 1684
1685 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1686 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1687 return -1;
1688 }
1689
a6afdde9 1690 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
67e5a20a 1691 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
36eb9bde 1692 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1693 return -1;
1694 }
1695
3c26f34e 1696 if (access("/dev/ptmx", F_OK)) {
1697 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1698 goto out;
36eb9bde 1699 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1700 return -1;
1701 }
1702
77890c6d
SW
1703 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1704 goto out;
1705
3c26f34e 1706 /* fallback here, /dev/pts/ptmx exists just mount bind */
1707 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1708 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1709 return -1;
1710 }
cd54d859
DL
1711
1712 INFO("created new pts instance");
d852c78c 1713
3c26f34e 1714out:
1715 return 0;
1716}
1717
cccc74b5
DL
1718static int setup_personality(int persona)
1719{
6ff05e18 1720 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1721 if (persona == -1)
1722 return 0;
1723
1724 if (personality(persona) < 0) {
1725 SYSERROR("failed to set personality to '0x%x'", persona);
1726 return -1;
1727 }
1728
1729 INFO("set personality to '0x%x'", persona);
6ff05e18 1730 #endif
cccc74b5
DL
1731
1732 return 0;
1733}
1734
7c6ef2a2 1735static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1736 const struct lxc_console *console)
6e590161 1737{
63376d7d
DL
1738 char path[MAXPATHLEN];
1739 struct stat s;
7c6ef2a2 1740 int ret;
52e35957 1741
7c6ef2a2
SH
1742 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1743 if (ret >= sizeof(path)) {
1744 ERROR("console path too long\n");
1745 return -1;
1746 }
52e35957 1747
63376d7d 1748 if (access(path, F_OK)) {
466978b0 1749 WARN("rootfs specified but no console found at '%s'", path);
63376d7d 1750 return 0;
52e35957
DL
1751 }
1752
b5159817
DE
1753 if (console->master < 0) {
1754 INFO("no console");
f78a1f32
DL
1755 return 0;
1756 }
ed502555 1757
63376d7d
DL
1758 if (stat(path, &s)) {
1759 SYSERROR("failed to stat '%s'", path);
1760 return -1;
1761 }
1762
1763 if (chmod(console->name, s.st_mode)) {
1764 SYSERROR("failed to set mode '0%o' to '%s'",
1765 s.st_mode, console->name);
1766 return -1;
1767 }
13954cce 1768
63376d7d
DL
1769 if (mount(console->name, path, "none", MS_BIND, 0)) {
1770 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1771 return -1;
1772 }
1773
63376d7d 1774 INFO("console has been setup");
7c6ef2a2
SH
1775 return 0;
1776}
1777
1778static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1779 const struct lxc_console *console,
1780 char *ttydir)
1781{
1782 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1783 int ret;
1784
1785 /* create rootfs/dev/<ttydir> directory */
1786 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1787 ttydir);
1788 if (ret >= sizeof(path))
1789 return -1;
1790 ret = mkdir(path, 0755);
1791 if (ret && errno != EEXIST) {
1792 SYSERROR("failed with errno %d to create %s\n", errno, path);
1793 return -1;
1794 }
1795 INFO("created %s\n", path);
1796
1797 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1798 rootfs->mount, ttydir);
1799 if (ret >= sizeof(lxcpath)) {
1800 ERROR("console path too long\n");
1801 return -1;
1802 }
1803
1804 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1805 ret = unlink(path);
1806 if (ret && errno != ENOENT) {
1807 SYSERROR("error unlinking %s\n", path);
1808 return -1;
1809 }
1810
025ed0f3 1811 process_lock();
7c6ef2a2 1812 ret = creat(lxcpath, 0660);
025ed0f3 1813 process_unlock();
7c6ef2a2
SH
1814 if (ret==-1 && errno != EEXIST) {
1815 SYSERROR("error %d creating %s\n", errno, lxcpath);
1816 return -1;
1817 }
025ed0f3 1818 process_lock();
4d44e274
SH
1819 if (ret >= 0)
1820 close(ret);
025ed0f3 1821 process_unlock();
7c6ef2a2 1822
b5159817
DE
1823 if (console->master < 0) {
1824 INFO("no console");
7c6ef2a2
SH
1825 return 0;
1826 }
1827
1828 if (mount(console->name, lxcpath, "none", MS_BIND, 0)) {
1829 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1830 return -1;
1831 }
1832
1833 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1834 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1835 if (ret >= sizeof(lxcpath)) {
1836 ERROR("lxc/console path too long");
1837 return -1;
1838 }
7c6ef2a2
SH
1839 ret = symlink(lxcpath, path);
1840 if (ret) {
1841 SYSERROR("failed to create symlink for console");
1842 return -1;
1843 }
1844
1845 INFO("console has been setup on %s", lxcpath);
cd54d859 1846
6e590161 1847 return 0;
1848}
1849
7c6ef2a2
SH
1850static int setup_console(const struct lxc_rootfs *rootfs,
1851 const struct lxc_console *console,
1852 char *ttydir)
1853{
1854 /* We don't have a rootfs, /dev/console will be shared */
1855 if (!rootfs->path)
1856 return 0;
1857 if (!ttydir)
1858 return setup_dev_console(rootfs, console);
1859
1860 return setup_ttydir_console(rootfs, console, ttydir);
1861}
1862
1bd051a6
SH
1863static int setup_kmsg(const struct lxc_rootfs *rootfs,
1864 const struct lxc_console *console)
1865{
1866 char kpath[MAXPATHLEN];
1867 int ret;
1868
222fea5a
DE
1869 if (!rootfs->path)
1870 return 0;
1bd051a6
SH
1871 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1872 if (ret < 0 || ret >= sizeof(kpath))
1873 return -1;
1874
1875 ret = unlink(kpath);
1876 if (ret && errno != ENOENT) {
1877 SYSERROR("error unlinking %s\n", kpath);
1878 return -1;
1879 }
1880
1881 ret = symlink("console", kpath);
1882 if (ret) {
1883 SYSERROR("failed to create symlink for kmsg");
1884 return -1;
1885 }
1886
1887 return 0;
1888}
1889
998ac676
RT
1890static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1891{
1892 struct mount_opt *mo;
1893
1894 /* If opt is found in mount_opt, set or clear flags.
1895 * Otherwise append it to data. */
1896
1897 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1898 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1899 if (mo->clear)
1900 *flags &= ~mo->flag;
1901 else
1902 *flags |= mo->flag;
1903 return;
1904 }
1905 }
1906
1907 if (strlen(*data))
1908 strcat(*data, ",");
1909 strcat(*data, opt);
1910}
1911
911324ef 1912static int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1913 char **mntdata)
1914{
1915 char *s, *data;
1916 char *p, *saveptr = NULL;
1917
911324ef 1918 *mntdata = NULL;
91656ce5 1919 *mntflags = 0L;
911324ef
DL
1920
1921 if (!mntopts)
998ac676
RT
1922 return 0;
1923
911324ef 1924 s = strdup(mntopts);
998ac676 1925 if (!s) {
36eb9bde 1926 SYSERROR("failed to allocate memory");
998ac676
RT
1927 return -1;
1928 }
1929
1930 data = malloc(strlen(s) + 1);
1931 if (!data) {
36eb9bde 1932 SYSERROR("failed to allocate memory");
998ac676
RT
1933 free(s);
1934 return -1;
1935 }
1936 *data = 0;
1937
1938 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1939 p = strtok_r(NULL, ",", &saveptr))
1940 parse_mntopt(p, mntflags, &data);
1941
1942 if (*data)
1943 *mntdata = data;
1944 else
1945 free(data);
1946 free(s);
1947
1948 return 0;
1949}
1950
911324ef
DL
1951static int mount_entry(const char *fsname, const char *target,
1952 const char *fstype, unsigned long mountflags,
1953 const char *data)
1954{
1955 if (mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data)) {
1956 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1957 return -1;
1958 }
1959
1960 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
1961
1962 DEBUG("remounting %s on %s to respect bind or remount options",
1963 fsname, target);
1964
1965 if (mount(fsname, target, fstype,
1966 mountflags | MS_REMOUNT, data)) {
1967 SYSERROR("failed to mount '%s' on '%s'",
1968 fsname, target);
1969 return -1;
1970 }
1971 }
1972
1973 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1974
1975 return 0;
1976}
1977
1978static inline int mount_entry_on_systemfs(struct mntent *mntent)
0ad19a3f 1979{
998ac676
RT
1980 unsigned long mntflags;
1981 char *mntdata;
911324ef
DL
1982 int ret;
1983
1984 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1985 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1986 return -1;
1987 }
1988
1989 ret = mount_entry(mntent->mnt_fsname, mntent->mnt_dir,
1990 mntent->mnt_type, mntflags, mntdata);
1991
68c152ef
SH
1992 if (hasmntopt(mntent, "optional") != NULL)
1993 ret = 0;
1994
911324ef
DL
1995 free(mntdata);
1996
1997 return ret;
1998}
1999
2000static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2
SH
2001 const struct lxc_rootfs *rootfs,
2002 const char *lxc_name)
911324ef 2003{
013bd428 2004 char *aux;
59760f5d 2005 char path[MAXPATHLEN];
911324ef
DL
2006 unsigned long mntflags;
2007 char *mntdata;
80a881b2 2008 int r, ret = 0, offset;
67e571de 2009 const char *lxcpath;
0ad19a3f 2010
911324ef
DL
2011 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
2012 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
2013 return -1;
2014 }
1bc60a65 2015
2a59a681
SH
2016 lxcpath = default_lxc_path();
2017 if (!lxcpath) {
2018 ERROR("Out of memory");
2019 return -1;
2020 }
2021
80a881b2 2022 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
2023 * use $lxcpath/CN/rootfs as the target prefix */
2024 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
2025 if (r < 0 || r >= MAXPATHLEN)
2026 goto skipvarlib;
2027
2028 aux = strstr(mntent->mnt_dir, path);
2029 if (aux) {
2030 offset = strlen(path);
2031 goto skipabs;
2032 }
2033
2034skipvarlib:
013bd428
DL
2035 aux = strstr(mntent->mnt_dir, rootfs->path);
2036 if (!aux) {
2037 WARN("ignoring mount point '%s'", mntent->mnt_dir);
2038 goto out;
2039 }
80a881b2
SH
2040 offset = strlen(rootfs->path);
2041
2042skipabs:
013bd428 2043
9ba8130c 2044 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
2045 aux + offset);
2046 if (r < 0 || r >= MAXPATHLEN) {
2047 WARN("pathnme too long for '%s'", mntent->mnt_dir);
2048 ret = -1;
2049 goto out;
2050 }
2051
d330fe7b 2052
013bd428 2053 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
911324ef 2054 mntflags, mntdata);
0ad19a3f 2055
68c152ef
SH
2056 if (hasmntopt(mntent, "optional") != NULL)
2057 ret = 0;
2058
013bd428 2059out:
911324ef
DL
2060 free(mntdata);
2061 return ret;
2062}
d330fe7b 2063
911324ef
DL
2064static int mount_entry_on_relative_rootfs(struct mntent *mntent,
2065 const char *rootfs)
2066{
2067 char path[MAXPATHLEN];
2068 unsigned long mntflags;
2069 char *mntdata;
2070 int ret;
d330fe7b 2071
911324ef
DL
2072 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
2073 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
2074 return -1;
2075 }
d330fe7b 2076
911324ef 2077 /* relative to root mount point */
9ba8130c
SH
2078 ret = snprintf(path, sizeof(path), "%s/%s", rootfs, mntent->mnt_dir);
2079 if (ret >= sizeof(path)) {
2080 ERROR("path name too long");
2081 return -1;
2082 }
911324ef
DL
2083
2084 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
2085 mntflags, mntdata);
2086
68c152ef
SH
2087 if (hasmntopt(mntent, "optional") != NULL)
2088 ret = 0;
2089
911324ef 2090 free(mntdata);
998ac676 2091
911324ef
DL
2092 return ret;
2093}
2094
80a881b2
SH
2095static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
2096 const char *lxc_name)
911324ef
DL
2097{
2098 struct mntent *mntent;
2099 int ret = -1;
e76b8764 2100
911324ef 2101 while ((mntent = getmntent(file))) {
e76b8764 2102
911324ef
DL
2103 if (!rootfs->path) {
2104 if (mount_entry_on_systemfs(mntent))
e76b8764 2105 goto out;
911324ef 2106 continue;
e76b8764
CDC
2107 }
2108
911324ef
DL
2109 /* We have a separate root, mounts are relative to it */
2110 if (mntent->mnt_dir[0] != '/') {
2111 if (mount_entry_on_relative_rootfs(mntent,
2112 rootfs->mount))
2113 goto out;
2114 continue;
2115 }
cd54d859 2116
80a881b2 2117 if (mount_entry_on_absolute_rootfs(mntent, rootfs, lxc_name))
911324ef 2118 goto out;
0ad19a3f 2119 }
cd54d859 2120
0ad19a3f 2121 ret = 0;
cd54d859
DL
2122
2123 INFO("mount points have been setup");
0ad19a3f 2124out:
e7938e9e
MN
2125 return ret;
2126}
2127
80a881b2
SH
2128static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
2129 const char *lxc_name)
e7938e9e
MN
2130{
2131 FILE *file;
2132 int ret;
2133
2134 if (!fstab)
2135 return 0;
2136
025ed0f3 2137 process_lock();
e7938e9e 2138 file = setmntent(fstab, "r");
025ed0f3 2139 process_unlock();
e7938e9e
MN
2140 if (!file) {
2141 SYSERROR("failed to use '%s'", fstab);
2142 return -1;
2143 }
2144
80a881b2 2145 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e 2146
025ed0f3 2147 process_lock();
0ad19a3f 2148 endmntent(file);
025ed0f3 2149 process_unlock();
0ad19a3f 2150 return ret;
2151}
2152
80a881b2
SH
2153static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
2154 const char *lxc_name)
e7938e9e
MN
2155{
2156 FILE *file;
2157 struct lxc_list *iterator;
2158 char *mount_entry;
2159 int ret;
2160
025ed0f3 2161 process_lock();
e7938e9e 2162 file = tmpfile();
025ed0f3 2163 process_unlock();
e7938e9e
MN
2164 if (!file) {
2165 ERROR("tmpfile error: %m");
2166 return -1;
2167 }
2168
2169 lxc_list_for_each(iterator, mount) {
2170 mount_entry = iterator->elem;
1d6b1976 2171 fprintf(file, "%s\n", mount_entry);
e7938e9e
MN
2172 }
2173
2174 rewind(file);
2175
80a881b2 2176 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e 2177
025ed0f3 2178 process_lock();
e7938e9e 2179 fclose(file);
025ed0f3 2180 process_unlock();
e7938e9e
MN
2181 return ret;
2182}
2183
81810dd1
DL
2184static int setup_caps(struct lxc_list *caps)
2185{
2186 struct lxc_list *iterator;
2187 char *drop_entry;
d55bc1ad 2188 char *ptr;
81810dd1
DL
2189 int i, capid;
2190
2191 lxc_list_for_each(iterator, caps) {
2192
2193 drop_entry = iterator->elem;
2194
2195 capid = -1;
2196
2197 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2198
2199 if (strcmp(drop_entry, caps_opt[i].name))
2200 continue;
2201
2202 capid = caps_opt[i].value;
2203 break;
2204 }
2205
d55bc1ad
CS
2206 if (capid < 0) {
2207 /* try to see if it's numeric, so the user may specify
2208 * capabilities that the running kernel knows about but
2209 * we don't */
09bbd745 2210 errno = 0;
d55bc1ad 2211 capid = strtol(drop_entry, &ptr, 10);
09bbd745 2212 if (!ptr || *ptr != '\0' || errno != 0)
d55bc1ad
CS
2213 /* not a valid number */
2214 capid = -1;
2215 else if (capid > lxc_caps_last_cap())
2216 /* we have a number but it's not a valid
2217 * capability */
2218 capid = -1;
2219 }
2220
81810dd1 2221 if (capid < 0) {
1e11be34
DL
2222 ERROR("unknown capability %s", drop_entry);
2223 return -1;
81810dd1
DL
2224 }
2225
2226 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
2227
2228 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
2229 SYSERROR("failed to remove %s capability", drop_entry);
2230 return -1;
2231 }
2232
2233 }
2234
1fb86a7c
SH
2235 DEBUG("capabilities have been setup");
2236
2237 return 0;
2238}
2239
2240static int dropcaps_except(struct lxc_list *caps)
2241{
2242 struct lxc_list *iterator;
2243 char *keep_entry;
2244 char *ptr;
2245 int i, capid;
2246 int numcaps = lxc_caps_last_cap() + 1;
2247 INFO("found %d capabilities\n", numcaps);
2248
2caf9a97
SH
2249 if (numcaps <= 0 || numcaps > 200)
2250 return -1;
2251
1fb86a7c
SH
2252 // caplist[i] is 1 if we keep capability i
2253 int *caplist = alloca(numcaps * sizeof(int));
2254 memset(caplist, 0, numcaps * sizeof(int));
2255
2256 lxc_list_for_each(iterator, caps) {
2257
2258 keep_entry = iterator->elem;
2259
2260 capid = -1;
2261
2262 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2263
2264 if (strcmp(keep_entry, caps_opt[i].name))
2265 continue;
2266
2267 capid = caps_opt[i].value;
2268 break;
2269 }
2270
2271 if (capid < 0) {
2272 /* try to see if it's numeric, so the user may specify
2273 * capabilities that the running kernel knows about but
2274 * we don't */
2275 capid = strtol(keep_entry, &ptr, 10);
2276 if (!ptr || *ptr != '\0' ||
f371aca9 2277 capid == INT_MIN || capid == INT_MAX)
1fb86a7c
SH
2278 /* not a valid number */
2279 capid = -1;
2280 else if (capid > lxc_caps_last_cap())
2281 /* we have a number but it's not a valid
2282 * capability */
2283 capid = -1;
2284 }
2285
2286 if (capid < 0) {
2287 ERROR("unknown capability %s", keep_entry);
2288 return -1;
2289 }
2290
2291 DEBUG("drop capability '%s' (%d)", keep_entry, capid);
2292
2293 caplist[capid] = 1;
2294 }
2295 for (i=0; i<numcaps; i++) {
2296 if (caplist[i])
2297 continue;
2298 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
2299 SYSERROR("failed to remove capability %d", i);
2300 return -1;
2301 }
2302 }
2303
2304 DEBUG("capabilities have been setup");
81810dd1
DL
2305
2306 return 0;
2307}
2308
0ad19a3f 2309static int setup_hw_addr(char *hwaddr, const char *ifname)
2310{
2311 struct sockaddr sockaddr;
2312 struct ifreq ifr;
2313 int ret, fd;
2314
3cfc0f3a
MN
2315 ret = lxc_convert_mac(hwaddr, &sockaddr);
2316 if (ret) {
2317 ERROR("mac address '%s' conversion failed : %s",
2318 hwaddr, strerror(-ret));
0ad19a3f 2319 return -1;
2320 }
2321
2322 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
5da6aa8c 2323 ifr.ifr_name[IFNAMSIZ-1] = '\0';
0ad19a3f 2324 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2325
025ed0f3 2326 process_lock();
0ad19a3f 2327 fd = socket(AF_INET, SOCK_DGRAM, 0);
025ed0f3 2328 process_unlock();
0ad19a3f 2329 if (fd < 0) {
3ab87b66 2330 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 2331 return -1;
2332 }
2333
2334 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
025ed0f3 2335 process_lock();
0ad19a3f 2336 close(fd);
025ed0f3 2337 process_unlock();
0ad19a3f 2338 if (ret)
3ab87b66 2339 ERROR("ioctl failure : %s", strerror(errno));
0ad19a3f 2340
5da6aa8c 2341 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
cd54d859 2342
0ad19a3f 2343 return ret;
2344}
2345
82d5ae15 2346static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2347{
82d5ae15
DL
2348 struct lxc_list *iterator;
2349 struct lxc_inetdev *inetdev;
3cfc0f3a 2350 int err;
0ad19a3f 2351
82d5ae15
DL
2352 lxc_list_for_each(iterator, ip) {
2353
2354 inetdev = iterator->elem;
2355
0093bb8c
DL
2356 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2357 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
2358 if (err) {
2359 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2360 ifindex, strerror(-err));
82d5ae15
DL
2361 return -1;
2362 }
2363 }
2364
2365 return 0;
0ad19a3f 2366}
2367
82d5ae15 2368static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2369{
82d5ae15 2370 struct lxc_list *iterator;
7fa9074f 2371 struct lxc_inet6dev *inet6dev;
3cfc0f3a 2372 int err;
0ad19a3f 2373
82d5ae15
DL
2374 lxc_list_for_each(iterator, ip) {
2375
2376 inet6dev = iterator->elem;
2377
b3df193c 2378 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
2379 &inet6dev->mcast, &inet6dev->acast,
2380 inet6dev->prefix);
3cfc0f3a
MN
2381 if (err) {
2382 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2383 ifindex, strerror(-err));
82d5ae15 2384 return -1;
3cfc0f3a 2385 }
82d5ae15
DL
2386 }
2387
2388 return 0;
0ad19a3f 2389}
2390
82d5ae15 2391static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 2392{
0ad19a3f 2393 char ifname[IFNAMSIZ];
0ad19a3f 2394 char *current_ifname = ifname;
3cfc0f3a 2395 int err;
0ad19a3f 2396
82d5ae15
DL
2397 /* empty network namespace */
2398 if (!netdev->ifindex) {
b0efbac4 2399 if (netdev->flags & IFF_UP) {
d472214b 2400 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2401 if (err) {
2402 ERROR("failed to set the loopback up : %s",
2403 strerror(-err));
82d5ae15
DL
2404 return -1;
2405 }
82d5ae15 2406 }
7b57e8b6 2407 return 0;
0ad19a3f 2408 }
13954cce 2409
b466dc33
BP
2410 /* get the new ifindex in case of physical netdev */
2411 if (netdev->type == LXC_NET_PHYS)
2412 if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
2413 ERROR("failed to get ifindex for %s",
2414 netdev->link);
2415 return -1;
2416 }
2417
82d5ae15
DL
2418 /* retrieve the name of the interface */
2419 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 2420 ERROR("no interface corresponding to index '%d'",
82d5ae15 2421 netdev->ifindex);
0ad19a3f 2422 return -1;
2423 }
13954cce 2424
018ef520 2425 /* default: let the system to choose one interface name */
9d083402 2426 if (!netdev->name)
fb6d9b2f
DL
2427 netdev->name = netdev->type == LXC_NET_PHYS ?
2428 netdev->link : "eth%d";
018ef520 2429
82d5ae15 2430 /* rename the interface name */
b84f58b9 2431 err = lxc_netdev_rename_by_name(ifname, netdev->name);
3cfc0f3a
MN
2432 if (err) {
2433 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2434 strerror(-err));
018ef520
DL
2435 return -1;
2436 }
2437
2438 /* Re-read the name of the interface because its name has changed
2439 * and would be automatically allocated by the system
2440 */
82d5ae15 2441 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 2442 ERROR("no interface corresponding to index '%d'",
82d5ae15 2443 netdev->ifindex);
018ef520 2444 return -1;
0ad19a3f 2445 }
2446
82d5ae15
DL
2447 /* set a mac address */
2448 if (netdev->hwaddr) {
2449 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 2450 ERROR("failed to setup hw address for '%s'",
82d5ae15 2451 current_ifname);
0ad19a3f 2452 return -1;
2453 }
2454 }
2455
82d5ae15
DL
2456 /* setup ipv4 addresses on the interface */
2457 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 2458 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 2459 ifname);
2460 return -1;
2461 }
2462
82d5ae15
DL
2463 /* setup ipv6 addresses on the interface */
2464 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 2465 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 2466 ifname);
2467 return -1;
2468 }
2469
82d5ae15 2470 /* set the network device up */
b0efbac4 2471 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
2472 int err;
2473
d472214b 2474 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
2475 if (err) {
2476 ERROR("failed to set '%s' up : %s", current_ifname,
2477 strerror(-err));
0ad19a3f 2478 return -1;
2479 }
2480
2481 /* the network is up, make the loopback up too */
d472214b 2482 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2483 if (err) {
2484 ERROR("failed to set the loopback up : %s",
2485 strerror(-err));
0ad19a3f 2486 return -1;
2487 }
2488 }
2489
f8fee0e2
MK
2490 /* We can only set up the default routes after bringing
2491 * up the interface, sine bringing up the interface adds
2492 * the link-local routes and we can't add a default
2493 * route if the gateway is not reachable. */
2494
2495 /* setup ipv4 gateway on the interface */
2496 if (netdev->ipv4_gateway) {
2497 if (!(netdev->flags & IFF_UP)) {
2498 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2499 return -1;
2500 }
2501
2502 if (lxc_list_empty(&netdev->ipv4)) {
2503 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2504 return -1;
2505 }
2506
2507 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2508 if (err) {
fc739df5
SG
2509 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2510 if (err) {
2511 ERROR("failed to add ipv4 dest for '%s': %s",
2512 ifname, strerror(-err));
2513 }
2514
2515 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2516 if (err) {
2517 ERROR("failed to setup ipv4 gateway for '%s': %s",
2518 ifname, strerror(-err));
2519 if (netdev->ipv4_gateway_auto) {
2520 char buf[INET_ADDRSTRLEN];
2521 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2522 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2523 }
2524 return -1;
19a26f82 2525 }
f8fee0e2
MK
2526 }
2527 }
2528
2529 /* setup ipv6 gateway on the interface */
2530 if (netdev->ipv6_gateway) {
2531 if (!(netdev->flags & IFF_UP)) {
2532 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2533 return -1;
2534 }
2535
2536 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2537 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2538 return -1;
2539 }
2540
2541 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2542 if (err) {
fc739df5
SG
2543 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2544 if (err) {
2545 ERROR("failed to add ipv6 dest for '%s': %s",
f8fee0e2 2546 ifname, strerror(-err));
19a26f82 2547 }
fc739df5
SG
2548
2549 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2550 if (err) {
2551 ERROR("failed to setup ipv6 gateway for '%s': %s",
2552 ifname, strerror(-err));
2553 if (netdev->ipv6_gateway_auto) {
2554 char buf[INET6_ADDRSTRLEN];
2555 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2556 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2557 }
2558 return -1;
2559 }
f8fee0e2
MK
2560 }
2561 }
2562
cd54d859
DL
2563 DEBUG("'%s' has been setup", current_ifname);
2564
0ad19a3f 2565 return 0;
2566}
2567
5f4535a3 2568static int setup_network(struct lxc_list *network)
0ad19a3f 2569{
82d5ae15 2570 struct lxc_list *iterator;
82d5ae15 2571 struct lxc_netdev *netdev;
0ad19a3f 2572
5f4535a3 2573 lxc_list_for_each(iterator, network) {
cd54d859 2574
5f4535a3 2575 netdev = iterator->elem;
82d5ae15
DL
2576
2577 if (setup_netdev(netdev)) {
2578 ERROR("failed to setup netdev");
2579 return -1;
2580 }
2581 }
cd54d859 2582
5f4535a3
DL
2583 if (!lxc_list_empty(network))
2584 INFO("network has been setup");
cd54d859
DL
2585
2586 return 0;
0ad19a3f 2587}
2588
7b35f3d6
SH
2589void lxc_rename_phys_nics_on_shutdown(struct lxc_conf *conf)
2590{
2591 int i;
2592
2593 INFO("running to reset %d nic names", conf->num_savednics);
2594 for (i=0; i<conf->num_savednics; i++) {
2595 struct saved_nic *s = &conf->saved_nics[i];
2596 INFO("resetting nic %d to %s\n", s->ifindex, s->orig_name);
2597 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
2598 free(s->orig_name);
2599 }
2600 conf->num_savednics = 0;
2601 free(conf->saved_nics);
2602}
2603
ae9242c8
SH
2604static char *default_rootfs_mount = LXCROOTFSMOUNT;
2605
7b379ab3 2606struct lxc_conf *lxc_conf_init(void)
089cd8b8 2607{
7b379ab3 2608 struct lxc_conf *new;
26ddeedd 2609 int i;
7b379ab3
MN
2610
2611 new = malloc(sizeof(*new));
2612 if (!new) {
2613 ERROR("lxc_conf_init : %m");
2614 return NULL;
2615 }
2616 memset(new, 0, sizeof(*new));
2617
b40a606e 2618 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
cccc74b5 2619 new->personality = -1;
bc6928ff 2620 new->autodev = -1;
596a818d
DE
2621 new->console.log_path = NULL;
2622 new->console.log_fd = -1;
28a4b0e5 2623 new->console.path = NULL;
63376d7d 2624 new->console.peer = -1;
b5159817
DE
2625 new->console.peerpty.busy = -1;
2626 new->console.peerpty.master = -1;
2627 new->console.peerpty.slave = -1;
63376d7d
DL
2628 new->console.master = -1;
2629 new->console.slave = -1;
2630 new->console.name[0] = '\0';
d2e30e99 2631 new->maincmd_fd = -1;
54c30e29 2632 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048
SH
2633 if (!new->rootfs.mount) {
2634 ERROR("lxc_conf_init : %m");
2635 free(new);
2636 return NULL;
2637 }
2f3f41d0 2638 new->kmsg = 1;
7b379ab3
MN
2639 lxc_list_init(&new->cgroup);
2640 lxc_list_init(&new->network);
2641 lxc_list_init(&new->mount_list);
81810dd1 2642 lxc_list_init(&new->caps);
1fb86a7c 2643 lxc_list_init(&new->keepcaps);
f6d3e3e4 2644 lxc_list_init(&new->id_map);
26ddeedd
SH
2645 for (i=0; i<NUM_LXC_HOOKS; i++)
2646 lxc_list_init(&new->hooks[i]);
fe4de9a6
DE
2647 new->lsm_aa_profile = NULL;
2648 new->lsm_se_context = NULL;
e075f5d9 2649 new->lsm_umount_proc = 0;
7b379ab3 2650
9f30a190
MM
2651 for (i = 0; i < LXC_NS_MAX; i++)
2652 new->inherit_ns_fd[i] = -1;
2653
7b379ab3 2654 return new;
089cd8b8
DL
2655}
2656
e3b4c4c4 2657static int instanciate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2658{
8634bc19 2659 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2660 char veth2buf[IFNAMSIZ], *veth2;
3cfc0f3a 2661 int err;
13954cce 2662
e892973e
DL
2663 if (netdev->priv.veth_attr.pair)
2664 veth1 = netdev->priv.veth_attr.pair;
8634bc19 2665 else {
9ba8130c
SH
2666 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2667 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2668 ERROR("veth1 name too long");
2669 return -1;
2670 }
4a0ba80d 2671 veth1 = mkifname(veth1buf);
ad40563e
ÇO
2672 if (!veth1) {
2673 ERROR("failed to allocate a temporary name");
2674 return -1;
2675 }
74a2b586
JK
2676 /* store away for deconf */
2677 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2678 }
82d5ae15 2679
0e391e57 2680 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
4a0ba80d 2681 veth2 = mkifname(veth2buf);
ad40563e 2682 if (!veth2) {
82d5ae15 2683 ERROR("failed to allocate a temporary name");
ad40563e 2684 goto out_delete;
0ad19a3f 2685 }
2686
3cfc0f3a
MN
2687 err = lxc_veth_create(veth1, veth2);
2688 if (err) {
2689 ERROR("failed to create %s-%s : %s", veth1, veth2,
2690 strerror(-err));
ad40563e 2691 goto out_delete;
0ad19a3f 2692 }
13954cce 2693
49684c0b
CS
2694 /* changing the high byte of the mac address to 0xfe, the bridge interface
2695 * will always keep the host's mac address and not take the mac address
2696 * of a container */
2697 err = setup_private_host_hw_addr(veth1);
2698 if (err) {
2699 ERROR("failed to change mac address of host interface '%s' : %s",
2700 veth1, strerror(-err));
2701 goto out_delete;
2702 }
2703
82d5ae15 2704 if (netdev->mtu) {
d472214b 2705 err = lxc_netdev_set_mtu(veth1, atoi(netdev->mtu));
3cfc0f3a 2706 if (!err)
d472214b 2707 err = lxc_netdev_set_mtu(veth2, atoi(netdev->mtu));
3cfc0f3a
MN
2708 if (err) {
2709 ERROR("failed to set mtu '%s' for %s-%s : %s",
2710 netdev->mtu, veth1, veth2, strerror(-err));
eb14c10a 2711 goto out_delete;
75d09f83
DL
2712 }
2713 }
2714
3cfc0f3a
MN
2715 if (netdev->link) {
2716 err = lxc_bridge_attach(netdev->link, veth1);
2717 if (err) {
2718 ERROR("failed to attach '%s' to the bridge '%s' : %s",
2719 veth1, netdev->link, strerror(-err));
2720 goto out_delete;
2721 }
eb14c10a
DL
2722 }
2723
82d5ae15
DL
2724 netdev->ifindex = if_nametoindex(veth2);
2725 if (!netdev->ifindex) {
36eb9bde 2726 ERROR("failed to retrieve the index for %s", veth2);
eb14c10a
DL
2727 goto out_delete;
2728 }
2729
d472214b 2730 err = lxc_netdev_up(veth1);
6e35af2e
DL
2731 if (err) {
2732 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2733 goto out_delete;
0ad19a3f 2734 }
2735
e3b4c4c4 2736 if (netdev->upscript) {
751d9dcd
DL
2737 err = run_script(handler->name, "net", netdev->upscript, "up",
2738 "veth", veth1, (char*) NULL);
2739 if (err)
e3b4c4c4 2740 goto out_delete;
e3b4c4c4
ST
2741 }
2742
82d5ae15
DL
2743 DEBUG("instanciated veth '%s/%s', index is '%d'",
2744 veth1, veth2, netdev->ifindex);
2745
6ab9ab6d 2746 return 0;
eb14c10a
DL
2747
2748out_delete:
b84f58b9 2749 lxc_netdev_delete_by_name(veth1);
ad40563e
ÇO
2750 if (!netdev->priv.veth_attr.pair && veth1)
2751 free(veth1);
2752 if(veth2)
2753 free(veth2);
6ab9ab6d 2754 return -1;
13954cce 2755}
d957ae2d 2756
74a2b586
JK
2757static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2758{
2759 char *veth1;
2760 int err;
2761
2762 if (netdev->priv.veth_attr.pair)
2763 veth1 = netdev->priv.veth_attr.pair;
2764 else
2765 veth1 = netdev->priv.veth_attr.veth1;
2766
2767 if (netdev->downscript) {
2768 err = run_script(handler->name, "net", netdev->downscript,
2769 "down", "veth", veth1, (char*) NULL);
2770 if (err)
2771 return -1;
2772 }
2773 return 0;
2774}
2775
e3b4c4c4 2776static int instanciate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2777{
0e391e57 2778 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2779 int err;
d957ae2d
MT
2780
2781 if (!netdev->link) {
2782 ERROR("no link specified for macvlan netdev");
2783 return -1;
2784 }
13954cce 2785
9ba8130c
SH
2786 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2787 if (err >= sizeof(peerbuf))
2788 return -1;
82d5ae15 2789
4a0ba80d 2790 peer = mkifname(peerbuf);
ad40563e 2791 if (!peer) {
82d5ae15
DL
2792 ERROR("failed to make a temporary name");
2793 return -1;
0ad19a3f 2794 }
2795
3cfc0f3a
MN
2796 err = lxc_macvlan_create(netdev->link, peer,
2797 netdev->priv.macvlan_attr.mode);
2798 if (err) {
2799 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2800 peer, netdev->link, strerror(-err));
ad40563e 2801 goto out;
0ad19a3f 2802 }
2803
82d5ae15
DL
2804 netdev->ifindex = if_nametoindex(peer);
2805 if (!netdev->ifindex) {
36eb9bde 2806 ERROR("failed to retrieve the index for %s", peer);
ad40563e 2807 goto out;
22ebac19 2808 }
2809
e3b4c4c4 2810 if (netdev->upscript) {
751d9dcd
DL
2811 err = run_script(handler->name, "net", netdev->upscript, "up",
2812 "macvlan", netdev->link, (char*) NULL);
2813 if (err)
ad40563e 2814 goto out;
e3b4c4c4
ST
2815 }
2816
e892973e
DL
2817 DEBUG("instanciated macvlan '%s', index is '%d' and mode '%d'",
2818 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 2819
d957ae2d 2820 return 0;
ad40563e
ÇO
2821out:
2822 lxc_netdev_delete_by_name(peer);
2823 free(peer);
2824 return -1;
0ad19a3f 2825}
2826
74a2b586
JK
2827static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2828{
2829 int err;
2830
2831 if (netdev->downscript) {
2832 err = run_script(handler->name, "net", netdev->downscript,
2833 "down", "macvlan", netdev->link,
2834 (char*) NULL);
2835 if (err)
2836 return -1;
2837 }
2838 return 0;
2839}
2840
26c39028 2841/* XXX: merge with instanciate_macvlan */
e3b4c4c4 2842static int instanciate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
2843{
2844 char peer[IFNAMSIZ];
3cfc0f3a 2845 int err;
26c39028
JHS
2846
2847 if (!netdev->link) {
2848 ERROR("no link specified for vlan netdev");
2849 return -1;
2850 }
2851
9ba8130c
SH
2852 err = snprintf(peer, sizeof(peer), "vlan%d", netdev->priv.vlan_attr.vid);
2853 if (err >= sizeof(peer)) {
2854 ERROR("peer name too long");
2855 return -1;
2856 }
26c39028 2857
3cfc0f3a
MN
2858 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2859 if (err) {
2860 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2861 peer, netdev->link, strerror(-err));
26c39028
JHS
2862 return -1;
2863 }
2864
2865 netdev->ifindex = if_nametoindex(peer);
2866 if (!netdev->ifindex) {
2867 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 2868 lxc_netdev_delete_by_name(peer);
26c39028
JHS
2869 return -1;
2870 }
2871
e892973e
DL
2872 DEBUG("instanciated vlan '%s', ifindex is '%d'", " vlan1000",
2873 netdev->ifindex);
2874
26c39028
JHS
2875 return 0;
2876}
2877
74a2b586
JK
2878static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2879{
2880 return 0;
2881}
2882
e3b4c4c4 2883static int instanciate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2884{
6168e99f
DL
2885 if (!netdev->link) {
2886 ERROR("no link specified for the physical interface");
2887 return -1;
2888 }
2889
9d083402 2890 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 2891 if (!netdev->ifindex) {
9d083402 2892 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 2893 return -1;
2894 }
2895
e3b4c4c4
ST
2896 if (netdev->upscript) {
2897 int err;
751d9dcd
DL
2898 err = run_script(handler->name, "net", netdev->upscript,
2899 "up", "phys", netdev->link, (char*) NULL);
2900 if (err)
e3b4c4c4 2901 return -1;
e3b4c4c4
ST
2902 }
2903
82d5ae15 2904 return 0;
0ad19a3f 2905}
2906
74a2b586
JK
2907static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2908{
2909 int err;
2910
2911 if (netdev->downscript) {
2912 err = run_script(handler->name, "net", netdev->downscript,
2913 "down", "phys", netdev->link, (char*) NULL);
2914 if (err)
2915 return -1;
2916 }
2917 return 0;
2918}
2919
e3b4c4c4 2920static int instanciate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2921{
82d5ae15 2922 netdev->ifindex = 0;
e3b4c4c4
ST
2923 if (netdev->upscript) {
2924 int err;
751d9dcd
DL
2925 err = run_script(handler->name, "net", netdev->upscript,
2926 "up", "empty", (char*) NULL);
2927 if (err)
e3b4c4c4 2928 return -1;
e3b4c4c4 2929 }
82d5ae15 2930 return 0;
0ad19a3f 2931}
2932
74a2b586
JK
2933static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2934{
2935 int err;
2936
2937 if (netdev->downscript) {
2938 err = run_script(handler->name, "net", netdev->downscript,
2939 "down", "empty", (char*) NULL);
2940 if (err)
2941 return -1;
2942 }
2943 return 0;
2944}
2945
e3b4c4c4 2946int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 2947{
e3b4c4c4 2948 struct lxc_list *network = &handler->conf->network;
82d5ae15 2949 struct lxc_list *iterator;
82d5ae15 2950 struct lxc_netdev *netdev;
cbef6c52
SH
2951 int am_root = (getuid() == 0);
2952
2953 if (!am_root)
2954 return 0;
0ad19a3f 2955
5f4535a3 2956 lxc_list_for_each(iterator, network) {
0ad19a3f 2957
5f4535a3 2958 netdev = iterator->elem;
13954cce 2959
24654103 2960 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 2961 ERROR("invalid network configuration type '%d'",
5f4535a3 2962 netdev->type);
82d5ae15
DL
2963 return -1;
2964 }
0ad19a3f 2965
e3b4c4c4 2966 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
2967 ERROR("failed to create netdev");
2968 return -1;
2969 }
e3b4c4c4 2970
0ad19a3f 2971 }
2972
2973 return 0;
2974}
2975
74a2b586 2976void lxc_delete_network(struct lxc_handler *handler)
7fef7a06 2977{
74a2b586 2978 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
2979 struct lxc_list *iterator;
2980 struct lxc_netdev *netdev;
2981
2982 lxc_list_for_each(iterator, network) {
2983 netdev = iterator->elem;
d472214b 2984
74a2b586 2985 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352
DL
2986 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
2987 WARN("failed to rename to the initial name the " \
2988 "netdev '%s'", netdev->link);
d472214b 2989 continue;
d8f8e352 2990 }
d472214b 2991
74a2b586
JK
2992 if (netdev_deconf[netdev->type](handler, netdev)) {
2993 WARN("failed to destroy netdev");
2994 }
2995
d8f8e352
DL
2996 /* Recent kernel remove the virtual interfaces when the network
2997 * namespace is destroyed but in case we did not moved the
2998 * interface to the network namespace, we have to destroy it
2999 */
74a2b586
JK
3000 if (netdev->ifindex != 0 &&
3001 lxc_netdev_delete_by_index(netdev->ifindex))
d8f8e352 3002 WARN("failed to remove interface '%s'", netdev->name);
7fef7a06
DL
3003 }
3004}
3005
cbef6c52
SH
3006int unpriv_assign_nic(struct lxc_netdev *netdev, pid_t pid)
3007{
3008 pid_t child;
3009
3010 if (netdev->type != LXC_NET_VETH) {
3011 ERROR("nic type %d not support for unprivileged use",
3012 netdev->type);
3013 return -1;
3014 }
3015
3016 if ((child = fork()) < 0) {
3017 SYSERROR("fork");
3018 return -1;
3019 }
3020
3021 if (child > 0)
3022 return wait_for_pid(child);
3023
3024 // Call lxc-user-nic pid type bridge
3025 char pidstr[20];
4119204e 3026 char *args[] = { "lxc-user-nic", pidstr, "veth", netdev->link, netdev->name, NULL };
cbef6c52
SH
3027 snprintf(pidstr, 19, "%lu", (unsigned long) pid);
3028 pidstr[19] = '\0';
3029 execvp("lxc-user-nic", args);
3030 SYSERROR("execvp lxc-user-nic");
3031 exit(1);
3032}
3033
5f4535a3 3034int lxc_assign_network(struct lxc_list *network, pid_t pid)
0ad19a3f 3035{
82d5ae15 3036 struct lxc_list *iterator;
82d5ae15 3037 struct lxc_netdev *netdev;
cbef6c52 3038 int am_root = (getuid() == 0);
3cfc0f3a 3039 int err;
0ad19a3f 3040
5f4535a3 3041 lxc_list_for_each(iterator, network) {
82d5ae15 3042
5f4535a3 3043 netdev = iterator->elem;
82d5ae15 3044
cbef6c52
SH
3045 if (!am_root) {
3046 if (unpriv_assign_nic(netdev, pid))
3047 return -1;
3048 // TODO fill in netdev->ifindex and name
3049 continue;
3050 }
236087a6
DL
3051 /* empty network namespace, nothing to move */
3052 if (!netdev->ifindex)
3053 continue;
3054
d472214b 3055 err = lxc_netdev_move_by_index(netdev->ifindex, pid);
3cfc0f3a
MN
3056 if (err) {
3057 ERROR("failed to move '%s' to the container : %s",
3058 netdev->link, strerror(-err));
82d5ae15
DL
3059 return -1;
3060 }
3061
c1c75c04 3062 DEBUG("move '%s' to '%d'", netdev->name, pid);
0ad19a3f 3063 }
3064
3065 return 0;
3066}
3067
251d0d2a
DE
3068static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
3069 size_t buf_size)
f6d3e3e4
SH
3070{
3071 char path[PATH_MAX];
e4ccd113 3072 int ret, closeret;
f6d3e3e4
SH
3073 FILE *f;
3074
3075 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
3076 if (ret < 0 || ret >= PATH_MAX) {
3077 fprintf(stderr, "%s: path name too long", __func__);
3078 return -E2BIG;
3079 }
025ed0f3 3080 process_lock();
f6d3e3e4 3081 f = fopen(path, "w");
025ed0f3 3082 process_unlock();
f6d3e3e4
SH
3083 if (!f) {
3084 perror("open");
3085 return -EINVAL;
3086 }
251d0d2a 3087 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 3088 if (ret < 0)
e4ccd113 3089 SYSERROR("writing id mapping");
025ed0f3 3090 process_lock();
e4ccd113 3091 closeret = fclose(f);
025ed0f3 3092 process_unlock();
e4ccd113
SH
3093 if (closeret)
3094 SYSERROR("writing id mapping");
3095 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
3096}
3097
3098int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
3099{
3100 struct lxc_list *iterator;
3101 struct id_map *map;
3102 int ret = 0;
251d0d2a 3103 enum idtype type;
4f7521b4 3104 char *buf = NULL, *pos;
cf3ef16d 3105 int am_root = (getuid() == 0);
251d0d2a
DE
3106
3107 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4 3108 int left, fill;
cf3ef16d
SH
3109 int had_entry = 0;
3110 if (!buf) {
3111 buf = pos = malloc(4096);
4f7521b4
SH
3112 if (!buf)
3113 return -ENOMEM;
cf3ef16d
SH
3114 }
3115 pos = buf;
3116 if (!am_root)
3117 pos += sprintf(buf, "new%cidmap %d ",
3118 type == ID_TYPE_UID ? 'u' : 'g',
3119 pid);
4f7521b4 3120
cf3ef16d
SH
3121 lxc_list_for_each(iterator, idmap) {
3122 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
251d0d2a 3123 map = iterator->elem;
cf3ef16d
SH
3124 if (map->idtype != type)
3125 continue;
3126
3127 had_entry = 1;
3128 left = 4096 - (pos - buf);
3129 fill = snprintf(pos, left, " %lu %lu %lu", map->nsid,
3130 map->hostid, map->range);
3131 if (fill <= 0 || fill >= left)
3132 SYSERROR("snprintf failed, too many mappings");
3133 pos += fill;
251d0d2a 3134 }
cf3ef16d 3135 if (!had_entry)
4f7521b4 3136 continue;
cf3ef16d
SH
3137 left = 4096 - (pos - buf);
3138 fill = snprintf(pos, left, "\n");
3139 if (fill <= 0 || fill >= left)
3140 SYSERROR("snprintf failed, too many mappings");
3141 pos += fill;
3142
3143 if (am_root)
3144 ret = write_id_mapping(type, pid, buf, pos-buf);
3145 else
3146 ret = system(buf);
3147
f6d3e3e4
SH
3148 if (ret)
3149 break;
3150 }
251d0d2a 3151
4f7521b4
SH
3152 if (buf)
3153 free(buf);
f6d3e3e4
SH
3154 return ret;
3155}
3156
cf3ef16d
SH
3157/*
3158 * return the host uid to which the container root is mapped, or -1 on
3159 * error
3160 */
c4d10a05 3161uid_t get_mapped_rootid(struct lxc_conf *conf)
cf3ef16d
SH
3162{
3163 struct lxc_list *it;
3164 struct id_map *map;
3165
3166 lxc_list_for_each(it, &conf->id_map) {
3167 map = it->elem;
3168 if (map->idtype != ID_TYPE_UID)
3169 continue;
3170 if (map->nsid != 0)
3171 continue;
c4d10a05 3172 return (uid_t) map->hostid;
cf3ef16d 3173 }
c4d10a05 3174 return (uid_t)-1;
cf3ef16d
SH
3175}
3176
57d116ab 3177int mapped_hostid(int id, struct lxc_conf *conf)
cf3ef16d
SH
3178{
3179 struct lxc_list *it;
3180 struct id_map *map;
3181 lxc_list_for_each(it, &conf->id_map) {
3182 map = it->elem;
3183 if (map->idtype != ID_TYPE_UID)
3184 continue;
3185 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 3186 return (id - map->hostid) + map->nsid;
cf3ef16d 3187 }
57d116ab 3188 return -1;
cf3ef16d
SH
3189}
3190
3191int find_unmapped_nsuid(struct lxc_conf *conf)
3192{
3193 struct lxc_list *it;
3194 struct id_map *map;
3195 uid_t freeid = 0;
3196again:
3197 lxc_list_for_each(it, &conf->id_map) {
3198 map = it->elem;
3199 if (map->idtype != ID_TYPE_UID)
3200 continue;
3201 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3202 freeid = map->nsid + map->range;
3203 goto again;
3204 }
3205 }
3206 return freeid;
3207}
3208
19a26f82
MK
3209int lxc_find_gateway_addresses(struct lxc_handler *handler)
3210{
3211 struct lxc_list *network = &handler->conf->network;
3212 struct lxc_list *iterator;
3213 struct lxc_netdev *netdev;
3214 int link_index;
3215
3216 lxc_list_for_each(iterator, network) {
3217 netdev = iterator->elem;
3218
3219 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
3220 continue;
3221
3222 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
3223 ERROR("gateway = auto only supported for "
3224 "veth and macvlan");
3225 return -1;
3226 }
3227
3228 if (!netdev->link) {
3229 ERROR("gateway = auto needs a link interface");
3230 return -1;
3231 }
3232
3233 link_index = if_nametoindex(netdev->link);
3234 if (!link_index)
3235 return -EINVAL;
3236
3237 if (netdev->ipv4_gateway_auto) {
3238 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
3239 ERROR("failed to automatically find ipv4 gateway "
3240 "address from link interface '%s'", netdev->link);
3241 return -1;
3242 }
3243 }
3244
3245 if (netdev->ipv6_gateway_auto) {
3246 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
3247 ERROR("failed to automatically find ipv6 gateway "
3248 "address from link interface '%s'", netdev->link);
3249 return -1;
3250 }
3251 }
3252 }
3253
3254 return 0;
3255}
3256
5e4a62bf 3257int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 3258{
5e4a62bf 3259 struct lxc_tty_info *tty_info = &conf->tty_info;
025ed0f3 3260 int i, ret;
b0a33c1e 3261
5e4a62bf
DL
3262 /* no tty in the configuration */
3263 if (!conf->tty)
b0a33c1e 3264 return 0;
3265
13954cce 3266 tty_info->pty_info =
e4e7d59d 3267 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 3268 if (!tty_info->pty_info) {
36eb9bde 3269 SYSERROR("failed to allocate pty_info");
985d15b1 3270 return -1;
b0a33c1e 3271 }
3272
985d15b1 3273 for (i = 0; i < conf->tty; i++) {
13954cce 3274
b0a33c1e 3275 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3276
025ed0f3
SH
3277 process_lock();
3278 ret = openpty(&pty_info->master, &pty_info->slave,
3279 pty_info->name, NULL, NULL);
3280 process_unlock();
3281 if (ret) {
36eb9bde 3282 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
3283 tty_info->nbtty = i;
3284 lxc_delete_tty(tty_info);
3285 return -1;
b0a33c1e 3286 }
3287
5332bb84
DL
3288 DEBUG("allocated pty '%s' (%d/%d)",
3289 pty_info->name, pty_info->master, pty_info->slave);
3290
b035ad62
MS
3291 /* Prevent leaking the file descriptors to the container */
3292 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3293 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3294
b0a33c1e 3295 pty_info->busy = 0;
3296 }
3297
985d15b1 3298 tty_info->nbtty = conf->tty;
1ac470c0
DL
3299
3300 INFO("tty's configured");
3301
985d15b1 3302 return 0;
b0a33c1e 3303}
3304
3305void lxc_delete_tty(struct lxc_tty_info *tty_info)
3306{
3307 int i;
3308
3309 for (i = 0; i < tty_info->nbtty; i++) {
3310 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3311
025ed0f3 3312 process_lock();
b0a33c1e 3313 close(pty_info->master);
3314 close(pty_info->slave);
025ed0f3 3315 process_unlock();
b0a33c1e 3316 }
3317
3318 free(tty_info->pty_info);
3319 tty_info->nbtty = 0;
3320}
3321
f6d3e3e4 3322/*
c4d10a05
SH
3323 * chown_mapped_root: for an unprivileged user with uid X to chown a dir
3324 * to subuid Y, he needs to run chown as root in a userns where
3325 * nsid 0 is mapped to hostuid Y, and nsid Y is mapped to hostuid
3326 * X. That way, the container root is privileged with respect to
3327 * hostuid X, allowing him to do the chown.
f6d3e3e4 3328 */
c4d10a05 3329int chown_mapped_root(char *path, struct lxc_conf *conf)
f6d3e3e4 3330{
c4d10a05
SH
3331 uid_t rootid;
3332 pid_t pid;
f6d3e3e4 3333
c4d10a05
SH
3334 if ((rootid = get_mapped_rootid(conf)) <= 0) {
3335 ERROR("No mapping for container root");
3336 return -1;
f6d3e3e4 3337 }
c4d10a05
SH
3338 if (geteuid() == 0) {
3339 if (chown(path, rootid, -1) < 0) {
3340 ERROR("Error chowning %s", path);
3341 return -1;
3342 }
3343 return 0;
3344 }
3345 pid = fork();
3346 if (pid < 0) {
3347 SYSERROR("Failed forking");
f6d3e3e4
SH
3348 return -1;
3349 }
c4d10a05
SH
3350 if (!pid) {
3351 int hostuid = geteuid(), ret;
98e5ba51
SH
3352 char map1[100], map2[100], map3[100];
3353 char *args[] = {"lxc-usernsexec", "-m", map1, "-m", map2, "-m",
3354 map3, "--", "chown", "0", path, NULL};
f6d3e3e4 3355
98e5ba51
SH
3356 // "u:0:rootid:1"
3357 ret = snprintf(map1, 100, "u:0:%d:1", rootid);
c4d10a05
SH
3358 if (ret < 0 || ret >= 100) {
3359 ERROR("Error uid printing map string");
f6d3e3e4
SH
3360 return -1;
3361 }
c4d10a05 3362
98e5ba51
SH
3363 // "u:hostuid:hostuid:1"
3364 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3365 if (ret < 0 || ret >= 100) {
3366 ERROR("Error uid printing map string");
3367 return -1;
3368 }
3369
3370 // "g:0:hostgid:1"
3371 ret = snprintf(map3, 100, "g:0:%d:1", getgid());
c4d10a05
SH
3372 if (ret < 0 || ret >= 100) {
3373 ERROR("Error uid printing map string");
3374 return -1;
3375 }
3376
3377 ret = execvp("lxc-usernsexec", args);
3378 SYSERROR("Failed executing usernsexec");
3379 exit(1);
f6d3e3e4 3380 }
c4d10a05 3381 return wait_for_pid(pid);
f6d3e3e4
SH
3382}
3383
c4d10a05 3384int ttys_shift_ids(struct lxc_conf *c)
f6d3e3e4 3385{
c4d10a05 3386 int i;
f6d3e3e4 3387
c4d10a05 3388 if (lxc_list_empty(&c->id_map))
f6d3e3e4 3389 return 0;
c4d10a05
SH
3390
3391 for (i = 0; i < c->tty_info.nbtty; i++) {
3392 struct lxc_pty_info *pty_info = &c->tty_info.pty_info[i];
3393
3394 if (chown_mapped_root(pty_info->name, c) < 0) {
3395 ERROR("Failed to chown %s", pty_info->name);
f6d3e3e4
SH
3396 return -1;
3397 }
3398 }
3399
c4d10a05
SH
3400 if (chown_mapped_root(c->console.name, c) < 0) {
3401 ERROR("Failed to chown %s", c->console.name);
3402 return -1;
3403 }
3404
f6d3e3e4
SH
3405 return 0;
3406}
3407
bc6928ff
MW
3408/*
3409 * This routine is called when the configuration does not already specify a value
3410 * for autodev (mounting a file system on /dev and populating it in a container).
3411 * If a hard override value has not be specified, then we try to apply some
3412 * heuristics to determine if we should switch to autodev mode.
3413 *
3414 * For instance, if the container has an /etc/systemd/system directory then it
3415 * is probably running systemd as the init process and it needs the autodev
3416 * mount to prevent it from mounting devtmpfs on /dev on it's own causing conflicts
3417 * in the host.
3418 *
3419 * We may also want to enable autodev if the host has devtmpfs mounted on its
3420 * /dev as this then enable us to use subdirectories under /dev for the container
3421 * /dev directories and we can fake udev devices.
3422 */
3423struct start_args {
3424 char *const *argv;
3425};
3426
3427#define MAX_SYMLINK_DEPTH 32
3428
3429int check_autodev( const char *rootfs, void *data )
3430{
3431 struct start_args *arg = data;
3432 int ret;
3433 int loop_count = 0;
3434 struct stat s;
3435 char absrootfs[MAXPATHLEN];
3436 char path[MAXPATHLEN];
3437 char abs_path[MAXPATHLEN];
3438 char *command = "/sbin/init";
3439
3440 if (rootfs == NULL || strlen(rootfs) == 0)
3441 return -2;
3442
3443 if (!realpath(rootfs, absrootfs))
3444 return -2;
3445
3446 if( arg && arg->argv[0] ) {
3447 command = arg->argv[0];
3448 DEBUG("Set exec command to %s\n", command );
3449 }
3450
3451 strncpy( path, command, MAXPATHLEN-1 );
3452
3453 if ( 0 != access(path, F_OK) || 0 != stat(path, &s) )
3454 return -2;
3455
3456 /* Dereference down the symlink merry path testing as we go. */
3457 /* If anything references systemd in the path - set autodev! */
3458 /* Renormalize to the rootfs before each dereference */
3459 /* Relative symlinks should fall out in the wash even with .. */
3460 while( 1 ) {
3461 if ( strstr( path, "systemd" ) ) {
3462 INFO("Container with systemd init detected - enabling autodev!");
3463 return 1;
3464 }
3465
3466 ret = snprintf(abs_path, MAXPATHLEN-1, "%s/%s", absrootfs, path);
3467 if (ret < 0 || ret > MAXPATHLEN)
3468 return -2;
3469
3470 ret = readlink( abs_path, path, MAXPATHLEN-1 );
3471
3472 if ( ( ret <= 0 ) || ( ++loop_count > MAX_SYMLINK_DEPTH ) ) {
3473 break; /* Break out for other tests */
3474 }
3475 path[ret] = '\0';
3476 }
3477
3478 /*
3479 * Add future checks here.
3480 * Return positive if we should go autodev
3481 * Return 0 if we should NOT go autodev
3482 * Return negative if we encounter an error or can not determine...
3483 */
3484
3485 /* All else fails, we don't need autodev */
3486 INFO("Autodev not required.");
3487 return 0;
3488}
3489
3490int lxc_setup(const char *name, struct lxc_conf *lxc_conf, const char *lxcpath, struct cgroup_process_info *cgroup_info, void *data)
0ad19a3f 3491{
6c544cb3
MM
3492 if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
3493 if (setup_utsname(lxc_conf->utsname)) {
3494 ERROR("failed to setup the utsname for '%s'", name);
3495 return -1;
3496 }
0ad19a3f 3497 }
3498
5f4535a3 3499 if (setup_network(&lxc_conf->network)) {
36eb9bde 3500 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 3501 return -1;
0ad19a3f 3502 }
3503
283678ed 3504 if (run_lxc_hooks(name, "pre-mount", lxc_conf, lxcpath, NULL)) {
89eaa05e
SH
3505 ERROR("failed to run pre-mount hooks for container '%s'.", name);
3506 return -1;
3507 }
5ea6163a 3508
cc28d0b0 3509 if (setup_rootfs(lxc_conf)) {
ac778708 3510 ERROR("failed to setup rootfs for '%s'", name);
95b5ffaf 3511 return -1;
0ad19a3f 3512 }
3513
bc6928ff
MW
3514 if (lxc_conf->autodev < 0) {
3515 lxc_conf->autodev = check_autodev(lxc_conf->rootfs.mount, data);
3516 }
3517
3518 if (lxc_conf->autodev > 0) {
3519 if (mount_autodev(name, lxc_conf->rootfs.mount, lxcpath)) {
91c3830e 3520 ERROR("failed to mount /dev in the container");
c6883f38
SH
3521 return -1;
3522 }
3523 }
3524
368bbc02
CS
3525 /* do automatic mounts (mainly /proc and /sys), but exclude
3526 * those that need to wait until other stuff has finished
3527 */
b06b8511 3528 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, cgroup_info) < 0) {
368bbc02
CS
3529 ERROR("failed to setup the automatic mounts for '%s'", name);
3530 return -1;
3531 }
3532
80a881b2 3533 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
36eb9bde 3534 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 3535 return -1;
576f946d 3536 }
3537
c1dc38c2 3538 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
e7938e9e
MN
3539 ERROR("failed to setup the mount entries for '%s'", name);
3540 return -1;
3541 }
3542
368bbc02
CS
3543 /* now mount only cgroup, if wanted;
3544 * before, /sys could not have been mounted
3545 * (is either mounted automatically or via fstab entries)
3546 */
b06b8511 3547 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, cgroup_info) < 0) {
368bbc02
CS
3548 ERROR("failed to setup the automatic mounts for '%s'", name);
3549 return -1;
3550 }
3551
283678ed 3552 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
773fb9ca
SH
3553 ERROR("failed to run mount hooks for container '%s'.", name);
3554 return -1;
3555 }
3556
bc6928ff 3557 if (lxc_conf->autodev > 0) {
283678ed 3558 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
f7bee6c6
MW
3559 ERROR("failed to run autodev hooks for container '%s'.", name);
3560 return -1;
3561 }
91c3830e
SH
3562 if (setup_autodev(lxc_conf->rootfs.mount)) {
3563 ERROR("failed to populate /dev in the container");
3564 return -1;
3565 }
3566 }
368bbc02 3567
37903589 3568 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 3569 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 3570 return -1;
6e590161 3571 }
3572
7e0e1d94
AV
3573 if (lxc_conf->kmsg) {
3574 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
3575 ERROR("failed to setup kmsg for '%s'", name);
3576 }
1bd051a6 3577
37903589 3578 if (!lxc_conf->is_execute && setup_tty(&lxc_conf->rootfs, &lxc_conf->tty_info, lxc_conf->ttydir)) {
36eb9bde 3579 ERROR("failed to setup the ttys for '%s'", name);
95b5ffaf 3580 return -1;
b0a33c1e 3581 }
3582
fe4de9a6
DE
3583 /* mount /proc if needed for LSM transition */
3584 if (lsm_proc_mount(lxc_conf) < 0) {
3585 ERROR("failed to LSM mount proc for '%s'", name);
e075f5d9 3586 return -1;
e075f5d9 3587 }
e075f5d9 3588
ac778708 3589 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 3590 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 3591 return -1;
ed502555 3592 }
3593
571e6ec8 3594 if (setup_pts(lxc_conf->pts)) {
36eb9bde 3595 ERROR("failed to setup the new pts instance");
95b5ffaf 3596 return -1;
3c26f34e 3597 }
3598
cccc74b5
DL
3599 if (setup_personality(lxc_conf->personality)) {
3600 ERROR("failed to setup personality");
3601 return -1;
3602 }
3603
f6d3e3e4 3604 if (lxc_list_empty(&lxc_conf->id_map)) {
1fb86a7c
SH
3605 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
3606 if (!lxc_list_empty(&lxc_conf->caps)) {
3607 ERROR("Simultaneously requested dropping and keeping caps");
3608 return -1;
3609 }
3610 if (dropcaps_except(&lxc_conf->keepcaps)) {
3611 ERROR("failed to keep requested caps\n");
3612 return -1;
3613 }
3614 } else if (setup_caps(&lxc_conf->caps)) {
f6d3e3e4
SH
3615 ERROR("failed to drop capabilities");
3616 return -1;
3617 }
81810dd1
DL
3618 }
3619
cd54d859
DL
3620 NOTICE("'%s' is setup.", name);
3621
0ad19a3f 3622 return 0;
3623}
26ddeedd 3624
283678ed
SH
3625int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
3626 const char *lxcpath, char *argv[])
26ddeedd
SH
3627{
3628 int which = -1;
3629 struct lxc_list *it;
3630
3631 if (strcmp(hook, "pre-start") == 0)
3632 which = LXCHOOK_PRESTART;
5ea6163a
SH
3633 else if (strcmp(hook, "pre-mount") == 0)
3634 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
3635 else if (strcmp(hook, "mount") == 0)
3636 which = LXCHOOK_MOUNT;
f7bee6c6
MW
3637 else if (strcmp(hook, "autodev") == 0)
3638 which = LXCHOOK_AUTODEV;
26ddeedd
SH
3639 else if (strcmp(hook, "start") == 0)
3640 which = LXCHOOK_START;
3641 else if (strcmp(hook, "post-stop") == 0)
3642 which = LXCHOOK_POSTSTOP;
148e91f5
SH
3643 else if (strcmp(hook, "clone") == 0)
3644 which = LXCHOOK_CLONE;
26ddeedd
SH
3645 else
3646 return -1;
3647 lxc_list_for_each(it, &conf->hooks[which]) {
3648 int ret;
3649 char *hookname = it->elem;
283678ed 3650 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
26ddeedd
SH
3651 if (ret)
3652 return ret;
3653 }
3654 return 0;
3655}
72d0e1cb 3656
427b3a21 3657static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
3658{
3659 struct lxc_netdev *netdev = it->elem;
9ebb03ad 3660 struct lxc_list *it2,*next;
72d0e1cb
SG
3661
3662 lxc_list_del(it);
3663
3664 if (netdev->link)
3665 free(netdev->link);
3666 if (netdev->name)
3667 free(netdev->name);
c9bb9a85
DE
3668 if (netdev->type == LXC_NET_VETH && netdev->priv.veth_attr.pair)
3669 free(netdev->priv.veth_attr.pair);
72d0e1cb
SG
3670 if (netdev->upscript)
3671 free(netdev->upscript);
3672 if (netdev->hwaddr)
3673 free(netdev->hwaddr);
3674 if (netdev->mtu)
3675 free(netdev->mtu);
3676 if (netdev->ipv4_gateway)
3677 free(netdev->ipv4_gateway);
3678 if (netdev->ipv6_gateway)
3679 free(netdev->ipv6_gateway);
9ebb03ad 3680 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3681 lxc_list_del(it2);
3682 free(it2->elem);
3683 free(it2);
3684 }
9ebb03ad 3685 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3686 lxc_list_del(it2);
3687 free(it2->elem);
3688 free(it2);
3689 }
d95db067 3690 free(netdev);
72d0e1cb
SG
3691 free(it);
3692}
3693
3694/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 3695int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
3696{
3697 char *p1;
3698 int ret, idx, i;
3699 struct lxc_list *it;
3700 struct lxc_netdev *netdev;
3701
3702 p1 = index(key, '.');
3703 if (!p1 || *(p1+1) == '\0')
3704 p1 = NULL;
3705
3706 ret = sscanf(key, "%d", &idx);
3707 if (ret != 1) return -1;
3708 if (idx < 0)
3709 return -1;
3710
3711 i = 0;
3712 lxc_list_for_each(it, &c->network) {
3713 if (i == idx)
3714 break;
3715 i++;
3716 }
3717 if (i < idx) // we don't have that many nics defined
3718 return -1;
3719
3720 if (!it || !it->elem)
3721 return -1;
3722
3723 netdev = it->elem;
3724
3725 if (!p1) {
3726 lxc_remove_nic(it);
3727 } else if (strcmp(p1, "ipv4") == 0) {
9ebb03ad
DE
3728 struct lxc_list *it2,*next;
3729 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3730 lxc_list_del(it2);
3731 free(it2->elem);
3732 free(it2);
3733 }
3734 } else if (strcmp(p1, "ipv6") == 0) {
9ebb03ad
DE
3735 struct lxc_list *it2,*next;
3736 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3737 lxc_list_del(it2);
3738 free(it2->elem);
3739 free(it2);
3740 }
3741 } else if (strcmp(p1, "link") == 0) {
3742 if (netdev->link) {
3743 free(netdev->link);
3744 netdev->link = NULL;
3745 }
3746 } else if (strcmp(p1, "name") == 0) {
3747 if (netdev->name) {
3748 free(netdev->name);
3749 netdev->name = NULL;
3750 }
3751 } else if (strcmp(p1, "script.up") == 0) {
3752 if (netdev->upscript) {
3753 free(netdev->upscript);
3754 netdev->upscript = NULL;
3755 }
3756 } else if (strcmp(p1, "hwaddr") == 0) {
3757 if (netdev->hwaddr) {
3758 free(netdev->hwaddr);
3759 netdev->hwaddr = NULL;
3760 }
3761 } else if (strcmp(p1, "mtu") == 0) {
3762 if (netdev->mtu) {
3763 free(netdev->mtu);
3764 netdev->mtu = NULL;
3765 }
3766 } else if (strcmp(p1, "ipv4_gateway") == 0) {
3767 if (netdev->ipv4_gateway) {
3768 free(netdev->ipv4_gateway);
3769 netdev->ipv4_gateway = NULL;
3770 }
3771 } else if (strcmp(p1, "ipv6_gateway") == 0) {
3772 if (netdev->ipv6_gateway) {
3773 free(netdev->ipv6_gateway);
3774 netdev->ipv6_gateway = NULL;
3775 }
3776 }
3777 else return -1;
3778
3779 return 0;
3780}
3781
3782int lxc_clear_config_network(struct lxc_conf *c)
3783{
9ebb03ad
DE
3784 struct lxc_list *it,*next;
3785 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
3786 lxc_remove_nic(it);
3787 }
3788 return 0;
3789}
3790
3791int lxc_clear_config_caps(struct lxc_conf *c)
3792{
9ebb03ad 3793 struct lxc_list *it,*next;
72d0e1cb 3794
9ebb03ad 3795 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
3796 lxc_list_del(it);
3797 free(it->elem);
3798 free(it);
3799 }
3800 return 0;
3801}
3802
4355ab5f 3803int lxc_free_idmap(struct lxc_list *id_map) {
27c27d73
SH
3804 struct lxc_list *it, *next;
3805
4355ab5f 3806 lxc_list_for_each_safe(it, id_map, next) {
27c27d73
SH
3807 lxc_list_del(it);
3808 free(it->elem);
3809 free(it);
3810 }
3811 return 0;
3812}
3813
4355ab5f
SH
3814int lxc_clear_idmaps(struct lxc_conf *c)
3815{
3816 return lxc_free_idmap(&c->id_map);
3817}
3818
1fb86a7c
SH
3819int lxc_clear_config_keepcaps(struct lxc_conf *c)
3820{
3821 struct lxc_list *it,*next;
3822
3823 lxc_list_for_each_safe(it, &c->keepcaps, next) {
3824 lxc_list_del(it);
3825 free(it->elem);
3826 free(it);
3827 }
3828 return 0;
3829}
3830
12a50cc6 3831int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 3832{
9ebb03ad 3833 struct lxc_list *it,*next;
72d0e1cb 3834 bool all = false;
12a50cc6 3835 const char *k = key + 11;
72d0e1cb
SG
3836
3837 if (strcmp(key, "lxc.cgroup") == 0)
3838 all = true;
3839
9ebb03ad 3840 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
3841 struct lxc_cgroup *cg = it->elem;
3842 if (!all && strcmp(cg->subsystem, k) != 0)
3843 continue;
3844 lxc_list_del(it);
3845 free(cg->subsystem);
3846 free(cg->value);
3847 free(cg);
3848 free(it);
3849 }
3850 return 0;
3851}
3852
3853int lxc_clear_mount_entries(struct lxc_conf *c)
3854{
9ebb03ad 3855 struct lxc_list *it,*next;
72d0e1cb 3856
9ebb03ad 3857 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
3858 lxc_list_del(it);
3859 free(it->elem);
3860 free(it);
3861 }
3862 return 0;
3863}
3864
12a50cc6 3865int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 3866{
9ebb03ad 3867 struct lxc_list *it,*next;
17ed13a3 3868 bool all = false, done = false;
12a50cc6 3869 const char *k = key + 9;
72d0e1cb
SG
3870 int i;
3871
17ed13a3
SH
3872 if (strcmp(key, "lxc.hook") == 0)
3873 all = true;
3874
72d0e1cb 3875 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 3876 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 3877 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
3878 lxc_list_del(it);
3879 free(it->elem);
3880 free(it);
3881 }
3882 done = true;
72d0e1cb
SG
3883 }
3884 }
17ed13a3
SH
3885
3886 if (!done) {
3887 ERROR("Invalid hook key: %s", key);
3888 return -1;
3889 }
72d0e1cb
SG
3890 return 0;
3891}
8eb5694b 3892
7b35f3d6
SH
3893void lxc_clear_saved_nics(struct lxc_conf *conf)
3894{
3895 int i;
3896
3897 if (!conf->num_savednics)
3898 return;
3899 for (i=0; i < conf->num_savednics; i++)
3900 free(conf->saved_nics[i].orig_name);
3901 conf->saved_nics = 0;
3902 free(conf->saved_nics);
3903}
3904
8eb5694b
SH
3905void lxc_conf_free(struct lxc_conf *conf)
3906{
3907 if (!conf)
3908 return;
3909 if (conf->console.path)
3910 free(conf->console.path);
54c30e29 3911 if (conf->rootfs.mount)
8eb5694b 3912 free(conf->rootfs.mount);
d95db067
DE
3913 if (conf->rootfs.path)
3914 free(conf->rootfs.path);
3915 if (conf->utsname)
3916 free(conf->utsname);
3917 if (conf->ttydir)
3918 free(conf->ttydir);
3919 if (conf->fstab)
3920 free(conf->fstab);
fc7e8864
WM
3921 if (conf->rcfile)
3922 free(conf->rcfile);
8eb5694b 3923 lxc_clear_config_network(conf);
fe4de9a6
DE
3924 if (conf->lsm_aa_profile)
3925 free(conf->lsm_aa_profile);
3926 if (conf->lsm_se_context)
3927 free(conf->lsm_se_context);
769872f9 3928 lxc_seccomp_free(conf);
8eb5694b 3929 lxc_clear_config_caps(conf);
1fb86a7c 3930 lxc_clear_config_keepcaps(conf);
8eb5694b 3931 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 3932 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 3933 lxc_clear_mount_entries(conf);
7b35f3d6 3934 lxc_clear_saved_nics(conf);
27c27d73 3935 lxc_clear_idmaps(conf);
8eb5694b
SH
3936 free(conf);
3937}
4355ab5f
SH
3938
3939struct userns_fn_data {
3940 int (*fn)(void *);
3941 void *arg;
3942 int p[2];
3943};
3944
3945static int run_userns_fn(void *data)
3946{
3947 struct userns_fn_data *d = data;
3948 char c;
3949 // we're not sharing with the parent any more, if it was a thread
3950
3951 close(d->p[1]);
3952 if (read(d->p[0], &c, 1) != 1)
3953 return -1;
3954 close(d->p[0]);
3955 return d->fn(d->arg);
3956}
3957
3958/*
3959 * Add a ID_TYPE_UID entry to an existing lxc_conf, if it is not
3960 * alread there.
3961 * We may want to generalize this to do gids as well as uids, but right now
3962 * it's not necessary.
3963 */
3964static struct lxc_list *idmap_add_id(struct lxc_conf *conf, uid_t uid)
3965{
3966 int hostid_mapped = mapped_hostid(uid, conf);
3967 struct lxc_list *new = NULL, *tmp, *it, *next;
3968 struct id_map *entry;
3969
3970 if (hostid_mapped < 0) {
3971 hostid_mapped = find_unmapped_nsuid(conf);
3972 if (hostid_mapped < 0) {
3973 ERROR("Could not find free uid to map");
3974 return NULL;
3975 }
3976 new = malloc(sizeof(*new));
3977 if (!new) {
3978 ERROR("Out of memory building id map");
3979 return NULL;
3980 }
3981 entry = malloc(sizeof(*entry));
3982 if (!entry) {
3983 free(new);
3984 ERROR("Out of memory building idmap entry");
3985 return NULL;
3986 }
3987 new->elem = entry;
3988 entry->idtype = ID_TYPE_UID;
3989 entry->nsid = hostid_mapped;
3990 entry->hostid = (unsigned long)uid;
3991 entry->range = 1;
3992 lxc_list_init(new);
3993 }
3994 lxc_list_for_each_safe(it, &conf->id_map, next) {
3995 tmp = malloc(sizeof(*tmp));
3996 if (!tmp)
3997 goto err;
3998 entry = malloc(sizeof(*entry));
3999 if (!entry) {
4000 free(tmp);
4001 goto err;
4002 }
4003 memset(entry, 0, sizeof(*entry));
4004 memcpy(entry, it->elem, sizeof(*entry));
4005 tmp->elem = entry;
4006 if (!new) {
4007 new = tmp;
4008 lxc_list_init(new);
4009 } else
4010 lxc_list_add_tail(new, tmp);
4011 }
4012
4013 return new;
4014
4015err:
4016 ERROR("Out of memory building a new uid map");
4017 lxc_free_idmap(new);
4018 return NULL;
4019}
4020
4021/*
4022 * Run a function in a new user namespace.
4023 * The caller's euid will be mapped in if it is not already.
4024 */
4025int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
4026{
4027 int ret, pid;
4028 struct userns_fn_data d;
4029 char c = '1';
4030 int p[2];
4031 struct lxc_list *idmap;
4032
4033 process_lock();
4034 ret = pipe(p);
4035 process_unlock();
4036 if (ret < 0) {
4037 SYSERROR("opening pipe");
4038 return -1;
4039 }
4040 d.fn = fn;
4041 d.arg = data;
4042 d.p[0] = p[0];
4043 d.p[1] = p[1];
4044 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4045 if (pid < 0)
4046 goto err;
4047 process_lock();
4048 close(p[0]);
4049 process_unlock();
4050 p[0] = -1;
4051
4052 if ((idmap = idmap_add_id(conf, geteuid())) == NULL) {
4053 ERROR("Error adding self to container uid map");
4054 goto err;
4055 }
4056
4057 ret = lxc_map_ids(idmap, pid);
4058 lxc_free_idmap(idmap);
4059 if (ret < 0) {
4060 ERROR("Error setting up child mappings");
4061 goto err;
4062 }
4063
4064 // kick the child
4065 if (write(p[1], &c, 1) != 1) {
4066 SYSERROR("writing to pipe to child");
4067 goto err;
4068 }
4069
4070 if ((ret = wait_for_pid(pid)) < 0) {
4071 ERROR("Child returned an error: %d\n", ret);
4072 goto err;
4073 }
4074err:
4075 process_lock();
4076 if (p[0] != -1)
4077 close(p[0]);
4078 close(p[1]);
4079 process_unlock();
4080 return -1;
4081}