]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
conffile.c: Also clear text entries with no value
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
23#define _GNU_SOURCE
24#include <stdio.h>
25#undef _GNU_SOURCE
26#include <stdlib.h>
e3b4c4c4 27#include <stdarg.h>
0ad19a3f 28#include <errno.h>
29#include <string.h>
30#include <dirent.h>
0ad19a3f 31#include <unistd.h>
bc6928ff 32#include <inttypes.h>
e3b4c4c4 33#include <sys/wait.h>
2d76d1d7 34#include <sys/syscall.h>
4a0ba80d 35#include <time.h>
e827ff7e
SG
36
37#if HAVE_PTY_H
b0a33c1e 38#include <pty.h>
e827ff7e
SG
39#else
40#include <../include/openpty.h>
41#endif
0ad19a3f 42
b3ecde1e
DL
43#include <linux/loop.h>
44
0ad19a3f 45#include <sys/types.h>
46#include <sys/utsname.h>
47#include <sys/param.h>
48#include <sys/stat.h>
49#include <sys/socket.h>
50#include <sys/mount.h>
51#include <sys/mman.h>
81810dd1 52#include <sys/prctl.h>
0ad19a3f 53
54#include <arpa/inet.h>
55#include <fcntl.h>
56#include <netinet/in.h>
57#include <net/if.h>
6f4a3756 58#include <libgen.h>
0ad19a3f 59
e5bda9ee 60#include "network.h"
61#include "error.h"
b2718c72 62#include "parse.h"
881450bb 63#include "config.h"
1b09f2c0
DL
64#include "utils.h"
65#include "conf.h"
66#include "log.h"
67#include "lxc.h" /* for lxc_cgroup_set() */
d55bc1ad 68#include "caps.h" /* for lxc_caps_last_cap() */
9be53773 69#include "bdev.h"
368bbc02 70#include "cgroup.h"
025ed0f3 71#include "lxclock.h"
4355ab5f 72#include "namespace.h"
fe4de9a6 73#include "lsm/lsm.h"
d0a36f2c 74
495d2046
SG
75#if HAVE_SYS_CAPABILITY_H
76#include <sys/capability.h>
77#endif
78
6ff05e18
SG
79#if HAVE_SYS_PERSONALITY_H
80#include <sys/personality.h>
81#endif
82
edaf8b1b
SG
83#if IS_BIONIC
84#include <../include/lxcmntent.h>
85#else
86#include <mntent.h>
87#endif
88
769872f9
SH
89#include "lxcseccomp.h"
90
36eb9bde 91lxc_log_define(lxc_conf, lxc);
e5bda9ee 92
0ad19a3f 93#define MAXHWLEN 18
94#define MAXINDEXLEN 20
442cbbe6 95#define MAXMTULEN 16
0ad19a3f 96#define MAXLINELEN 128
97
495d2046 98#if HAVE_SYS_CAPABILITY_H
b09094da
MN
99#ifndef CAP_SETFCAP
100#define CAP_SETFCAP 31
101#endif
102
103#ifndef CAP_MAC_OVERRIDE
104#define CAP_MAC_OVERRIDE 32
105#endif
106
107#ifndef CAP_MAC_ADMIN
108#define CAP_MAC_ADMIN 33
109#endif
495d2046 110#endif
b09094da
MN
111
112#ifndef PR_CAPBSET_DROP
113#define PR_CAPBSET_DROP 24
114#endif
115
9818cae4
SG
116#ifndef LO_FLAGS_AUTOCLEAR
117#define LO_FLAGS_AUTOCLEAR 4
118#endif
119
2d76d1d7
SG
120/* Define pivot_root() if missing from the C library */
121#ifndef HAVE_PIVOT_ROOT
122static int pivot_root(const char * new_root, const char * put_old)
123{
124#ifdef __NR_pivot_root
125return syscall(__NR_pivot_root, new_root, put_old);
126#else
127errno = ENOSYS;
128return -1;
129#endif
130}
131#else
132extern int pivot_root(const char * new_root, const char * put_old);
133#endif
134
135/* Define sethostname() if missing from the C library */
136#ifndef HAVE_SETHOSTNAME
137static int sethostname(const char * name, size_t len)
138{
139#ifdef __NR_sethostname
140return syscall(__NR_sethostname, name, len);
141#else
142errno = ENOSYS;
143return -1;
144#endif
145}
146#endif
147
72f919c4
SG
148/* Define __S_ISTYPE if missing from the C library */
149#ifndef __S_ISTYPE
150#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
151#endif
152
72d0e1cb 153char *lxchook_names[NUM_LXC_HOOKS] = {
148e91f5 154 "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop", "clone" };
72d0e1cb 155
e3b4c4c4 156typedef int (*instanciate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 157
998ac676
RT
158struct mount_opt {
159 char *name;
160 int clear;
161 int flag;
162};
163
81810dd1
DL
164struct caps_opt {
165 char *name;
166 int value;
167};
168
e3b4c4c4
ST
169static int instanciate_veth(struct lxc_handler *, struct lxc_netdev *);
170static int instanciate_macvlan(struct lxc_handler *, struct lxc_netdev *);
171static int instanciate_vlan(struct lxc_handler *, struct lxc_netdev *);
172static int instanciate_phys(struct lxc_handler *, struct lxc_netdev *);
173static int instanciate_empty(struct lxc_handler *, struct lxc_netdev *);
82d5ae15 174
24654103
DL
175static instanciate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
176 [LXC_NET_VETH] = instanciate_veth,
177 [LXC_NET_MACVLAN] = instanciate_macvlan,
178 [LXC_NET_VLAN] = instanciate_vlan,
179 [LXC_NET_PHYS] = instanciate_phys,
180 [LXC_NET_EMPTY] = instanciate_empty,
0ad19a3f 181};
182
74a2b586
JK
183static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
184static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
185static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
186static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
187static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
188
189static instanciate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
190 [LXC_NET_VETH] = shutdown_veth,
191 [LXC_NET_MACVLAN] = shutdown_macvlan,
192 [LXC_NET_VLAN] = shutdown_vlan,
193 [LXC_NET_PHYS] = shutdown_phys,
194 [LXC_NET_EMPTY] = shutdown_empty,
195};
196
998ac676 197static struct mount_opt mount_opt[] = {
88d413d5
SW
198 { "defaults", 0, 0 },
199 { "ro", 0, MS_RDONLY },
200 { "rw", 1, MS_RDONLY },
201 { "suid", 1, MS_NOSUID },
202 { "nosuid", 0, MS_NOSUID },
203 { "dev", 1, MS_NODEV },
204 { "nodev", 0, MS_NODEV },
205 { "exec", 1, MS_NOEXEC },
206 { "noexec", 0, MS_NOEXEC },
207 { "sync", 0, MS_SYNCHRONOUS },
208 { "async", 1, MS_SYNCHRONOUS },
209 { "dirsync", 0, MS_DIRSYNC },
210 { "remount", 0, MS_REMOUNT },
211 { "mand", 0, MS_MANDLOCK },
212 { "nomand", 1, MS_MANDLOCK },
213 { "atime", 1, MS_NOATIME },
214 { "noatime", 0, MS_NOATIME },
215 { "diratime", 1, MS_NODIRATIME },
216 { "nodiratime", 0, MS_NODIRATIME },
217 { "bind", 0, MS_BIND },
218 { "rbind", 0, MS_BIND|MS_REC },
219 { "relatime", 0, MS_RELATIME },
220 { "norelatime", 1, MS_RELATIME },
221 { "strictatime", 0, MS_STRICTATIME },
222 { "nostrictatime", 1, MS_STRICTATIME },
223 { NULL, 0, 0 },
998ac676
RT
224};
225
495d2046 226#if HAVE_SYS_CAPABILITY_H
81810dd1 227static struct caps_opt caps_opt[] = {
a6afdde9 228 { "chown", CAP_CHOWN },
1e11be34
DL
229 { "dac_override", CAP_DAC_OVERRIDE },
230 { "dac_read_search", CAP_DAC_READ_SEARCH },
231 { "fowner", CAP_FOWNER },
232 { "fsetid", CAP_FSETID },
81810dd1
DL
233 { "kill", CAP_KILL },
234 { "setgid", CAP_SETGID },
235 { "setuid", CAP_SETUID },
236 { "setpcap", CAP_SETPCAP },
237 { "linux_immutable", CAP_LINUX_IMMUTABLE },
238 { "net_bind_service", CAP_NET_BIND_SERVICE },
239 { "net_broadcast", CAP_NET_BROADCAST },
240 { "net_admin", CAP_NET_ADMIN },
241 { "net_raw", CAP_NET_RAW },
242 { "ipc_lock", CAP_IPC_LOCK },
243 { "ipc_owner", CAP_IPC_OWNER },
244 { "sys_module", CAP_SYS_MODULE },
245 { "sys_rawio", CAP_SYS_RAWIO },
246 { "sys_chroot", CAP_SYS_CHROOT },
247 { "sys_ptrace", CAP_SYS_PTRACE },
248 { "sys_pacct", CAP_SYS_PACCT },
249 { "sys_admin", CAP_SYS_ADMIN },
250 { "sys_boot", CAP_SYS_BOOT },
251 { "sys_nice", CAP_SYS_NICE },
252 { "sys_resource", CAP_SYS_RESOURCE },
253 { "sys_time", CAP_SYS_TIME },
254 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
255 { "mknod", CAP_MKNOD },
256 { "lease", CAP_LEASE },
9527e566 257#ifdef CAP_AUDIT_WRITE
81810dd1 258 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
259#endif
260#ifdef CAP_AUDIT_CONTROL
81810dd1 261 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 262#endif
81810dd1
DL
263 { "setfcap", CAP_SETFCAP },
264 { "mac_override", CAP_MAC_OVERRIDE },
265 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
266#ifdef CAP_SYSLOG
267 { "syslog", CAP_SYSLOG },
268#endif
269#ifdef CAP_WAKE_ALARM
270 { "wake_alarm", CAP_WAKE_ALARM },
271#endif
81810dd1 272};
495d2046
SG
273#else
274static struct caps_opt caps_opt[] = {};
275#endif
81810dd1 276
91c3830e
SH
277static int run_buffer(char *buffer)
278{
279 FILE *f;
280 char *output;
8e7da691 281 int ret;
91c3830e 282
025ed0f3 283 process_lock();
91c3830e 284 f = popen(buffer, "r");
025ed0f3 285 process_unlock();
91c3830e
SH
286 if (!f) {
287 SYSERROR("popen failed");
288 return -1;
289 }
290
291 output = malloc(LXC_LOG_BUFFER_SIZE);
292 if (!output) {
293 ERROR("failed to allocate memory for script output");
025ed0f3 294 process_lock();
00b6be44 295 pclose(f);
025ed0f3 296 process_unlock();
91c3830e
SH
297 return -1;
298 }
299
300 while(fgets(output, LXC_LOG_BUFFER_SIZE, f))
301 DEBUG("script output: %s", output);
302
303 free(output);
304
025ed0f3 305 process_lock();
8e7da691 306 ret = pclose(f);
025ed0f3 307 process_unlock();
8e7da691 308 if (ret == -1) {
91c3830e
SH
309 SYSERROR("Script exited on error");
310 return -1;
8e7da691
DE
311 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
312 ERROR("Script exited with status %d", WEXITSTATUS(ret));
313 return -1;
314 } else if (WIFSIGNALED(ret)) {
315 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
316 strsignal(WTERMSIG(ret)));
317 return -1;
91c3830e
SH
318 }
319
320 return 0;
321}
322
148e91f5 323static int run_script_argv(const char *name, const char *section,
283678ed
SH
324 const char *script, const char *hook, const char *lxcpath,
325 char **argsin)
148e91f5
SH
326{
327 int ret, i;
328 char *buffer;
329 size_t size = 0;
330
331 INFO("Executing script '%s' for container '%s', config section '%s'",
332 script, name, section);
333
334 for (i=0; argsin && argsin[i]; i++)
335 size += strlen(argsin[i]) + 1;
336
337 size += strlen(hook) + 1;
338
339 size += strlen(script);
340 size += strlen(name);
341 size += strlen(section);
342 size += 3;
343
344 if (size > INT_MAX)
345 return -1;
346
347 buffer = alloca(size);
348 if (!buffer) {
349 ERROR("failed to allocate memory");
350 return -1;
351 }
352
353 ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
354 if (ret < 0 || ret >= size) {
355 ERROR("Script name too long");
356 return -1;
357 }
358
359 for (i=0; argsin && argsin[i]; i++) {
360 int len = size-ret;
361 int rc;
362 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
363 if (rc < 0 || rc >= len) {
364 ERROR("Script args too long");
365 return -1;
366 }
367 ret += rc;
368 }
369
370 return run_buffer(buffer);
371}
372
751d9dcd
DL
373static int run_script(const char *name, const char *section,
374 const char *script, ...)
e3b4c4c4 375{
abbfd20b 376 int ret;
91c3830e 377 char *buffer, *p;
abbfd20b
DL
378 size_t size = 0;
379 va_list ap;
751d9dcd
DL
380
381 INFO("Executing script '%s' for container '%s', config section '%s'",
382 script, name, section);
e3b4c4c4 383
abbfd20b
DL
384 va_start(ap, script);
385 while ((p = va_arg(ap, char *)))
95642a10 386 size += strlen(p) + 1;
abbfd20b
DL
387 va_end(ap);
388
389 size += strlen(script);
390 size += strlen(name);
391 size += strlen(section);
95642a10 392 size += 3;
abbfd20b 393
95642a10
MS
394 if (size > INT_MAX)
395 return -1;
396
397 buffer = alloca(size);
abbfd20b
DL
398 if (!buffer) {
399 ERROR("failed to allocate memory");
751d9dcd
DL
400 return -1;
401 }
402
9ba8130c
SH
403 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
404 if (ret < 0 || ret >= size) {
405 ERROR("Script name too long");
9ba8130c
SH
406 return -1;
407 }
751d9dcd 408
abbfd20b 409 va_start(ap, script);
9ba8130c
SH
410 while ((p = va_arg(ap, char *))) {
411 int len = size-ret;
412 int rc;
413 rc = snprintf(buffer + ret, len, " %s", p);
414 if (rc < 0 || rc >= len) {
9ba8130c
SH
415 ERROR("Script args too long");
416 return -1;
417 }
418 ret += rc;
419 }
abbfd20b 420 va_end(ap);
751d9dcd 421
91c3830e 422 return run_buffer(buffer);
e3b4c4c4
ST
423}
424
a6afdde9 425static int find_fstype_cb(char* buffer, void *data)
78ae2fcc 426{
427 struct cbarg {
428 const char *rootfs;
a6afdde9 429 const char *target;
78ae2fcc 430 int mntopt;
431 } *cbarg = data;
432
433 char *fstype;
434
435 /* we don't try 'nodev' entries */
436 if (strstr(buffer, "nodev"))
437 return 0;
438
439 fstype = buffer;
b2718c72 440 fstype += lxc_char_left_gc(fstype, strlen(fstype));
441 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
78ae2fcc 442
a6afdde9
DL
443 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
444 cbarg->rootfs, cbarg->target, fstype);
445
446 if (mount(cbarg->rootfs, cbarg->target, fstype, cbarg->mntopt, NULL)) {
447 DEBUG("mount failed with error: %s", strerror(errno));
78ae2fcc 448 return 0;
a6afdde9 449 }
78ae2fcc 450
a6afdde9
DL
451 INFO("mounted '%s' on '%s', with fstype '%s'",
452 cbarg->rootfs, cbarg->target, fstype);
78ae2fcc 453
454 return 1;
455}
456
2656d231 457static int mount_unknow_fs(const char *rootfs, const char *target, int mntopt)
78ae2fcc 458{
a6afdde9 459 int i;
78ae2fcc 460
461 struct cbarg {
462 const char *rootfs;
a6afdde9 463 const char *target;
78ae2fcc 464 int mntopt;
465 } cbarg = {
466 .rootfs = rootfs,
a6afdde9 467 .target = target,
78ae2fcc 468 .mntopt = mntopt,
469 };
470
a6afdde9
DL
471 /*
472 * find the filesystem type with brute force:
473 * first we check with /etc/filesystems, in case the modules
78ae2fcc 474 * are auto-loaded and fall back to the supported kernel fs
475 */
476 char *fsfile[] = {
477 "/etc/filesystems",
478 "/proc/filesystems",
479 };
480
a6afdde9
DL
481 for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
482
483 int ret;
484
485 if (access(fsfile[i], F_OK))
486 continue;
487
488 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
489 if (ret < 0) {
490 ERROR("failed to parse '%s'", fsfile[i]);
491 return -1;
492 }
493
494 if (ret)
495 return 0;
78ae2fcc 496 }
497
a6afdde9
DL
498 ERROR("failed to determine fs type for '%s'", rootfs);
499 return -1;
500}
501
2656d231 502static int mount_rootfs_dir(const char *rootfs, const char *target)
a6afdde9
DL
503{
504 return mount(rootfs, target, "none", MS_BIND | MS_REC, NULL);
505}
506
507static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
508{
509 int rfd;
510 int ret = -1;
511
025ed0f3 512 process_lock();
a6afdde9 513 rfd = open(rootfs, O_RDWR);
025ed0f3 514 process_unlock();
a6afdde9
DL
515 if (rfd < 0) {
516 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 517 return -1;
518 }
519
a6afdde9 520 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 521
a6afdde9 522 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 523
a6afdde9
DL
524 if (ioctl(fd, LOOP_SET_FD, rfd)) {
525 SYSERROR("failed to LOOP_SET_FD");
526 goto out;
78ae2fcc 527 }
528
a6afdde9
DL
529 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
530 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 531 goto out;
532 }
533
a6afdde9 534 ret = 0;
78ae2fcc 535out:
025ed0f3 536 process_lock();
a6afdde9 537 close(rfd);
025ed0f3 538 process_unlock();
78ae2fcc 539
a6afdde9 540 return ret;
78ae2fcc 541}
542
2656d231 543static int mount_rootfs_file(const char *rootfs, const char *target)
78ae2fcc 544{
a6afdde9
DL
545 struct dirent dirent, *direntp;
546 struct loop_info64 loinfo;
9ba8130c 547 int ret = -1, fd = -1, rc;
a6afdde9
DL
548 DIR *dir;
549 char path[MAXPATHLEN];
78ae2fcc 550
025ed0f3 551 process_lock();
a6afdde9 552 dir = opendir("/dev");
025ed0f3 553 process_unlock();
a6afdde9
DL
554 if (!dir) {
555 SYSERROR("failed to open '/dev'");
78ae2fcc 556 return -1;
557 }
558
a6afdde9
DL
559 while (!readdir_r(dir, &dirent, &direntp)) {
560
561 if (!direntp)
562 break;
563
564 if (!strcmp(direntp->d_name, "."))
565 continue;
566
567 if (!strcmp(direntp->d_name, ".."))
568 continue;
569
570 if (strncmp(direntp->d_name, "loop", 4))
571 continue;
572
9ba8130c
SH
573 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
574 if (rc < 0 || rc >= MAXPATHLEN)
575 continue;
576
025ed0f3 577 process_lock();
a6afdde9 578 fd = open(path, O_RDWR);
025ed0f3 579 process_unlock();
a6afdde9
DL
580 if (fd < 0)
581 continue;
582
583 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
025ed0f3 584 process_lock();
a6afdde9 585 close(fd);
025ed0f3 586 process_unlock();
a6afdde9
DL
587 continue;
588 }
589
590 if (errno != ENXIO) {
591 WARN("unexpected error for ioctl on '%s': %m",
592 direntp->d_name);
025ed0f3 593 process_lock();
00b6be44 594 close(fd);
025ed0f3 595 process_unlock();
a6afdde9
DL
596 continue;
597 }
598
599 DEBUG("found '%s' free lodev", path);
600
601 ret = setup_lodev(rootfs, fd, &loinfo);
602 if (!ret)
2656d231 603 ret = mount_unknow_fs(path, target, 0);
025ed0f3 604 process_lock();
a6afdde9 605 close(fd);
025ed0f3 606 process_unlock();
a6afdde9
DL
607
608 break;
609 }
610
025ed0f3 611 process_lock();
a6afdde9
DL
612 if (closedir(dir))
613 WARN("failed to close directory");
025ed0f3 614 process_unlock();
a6afdde9
DL
615
616 return ret;
78ae2fcc 617}
618
2656d231 619static int mount_rootfs_block(const char *rootfs, const char *target)
a6afdde9 620{
2656d231 621 return mount_unknow_fs(rootfs, target, 0);
a6afdde9
DL
622}
623
0c547523
SH
624/*
625 * pin_rootfs
b7ed4bf0
CS
626 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
627 * the duration of the container run, to prevent the container from marking
628 * the underlying fs readonly on shutdown. unlink the file immediately so
629 * no name pollution is happens
0c547523
SH
630 * return -1 on error.
631 * return -2 if nothing needed to be pinned.
632 * return an open fd (>=0) if we pinned it.
633 */
634int pin_rootfs(const char *rootfs)
635{
636 char absrootfs[MAXPATHLEN];
637 char absrootfspin[MAXPATHLEN];
638 struct stat s;
639 int ret, fd;
640
e99ee0de 641 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 642 return -2;
e99ee0de 643
00ec333b 644 if (!realpath(rootfs, absrootfs))
9be53773 645 return -2;
0c547523 646
00ec333b 647 if (access(absrootfs, F_OK))
0c547523 648 return -1;
0c547523 649
00ec333b 650 if (stat(absrootfs, &s))
0c547523 651 return -1;
0c547523 652
72f919c4 653 if (!S_ISDIR(s.st_mode))
0c547523
SH
654 return -2;
655
b7ed4bf0 656 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
00ec333b 657 if (ret >= MAXPATHLEN)
0c547523 658 return -1;
0c547523 659
025ed0f3 660 process_lock();
0c547523 661 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
025ed0f3 662 process_unlock();
b7ed4bf0
CS
663 if (fd < 0)
664 return fd;
665 (void)unlink(absrootfspin);
0c547523
SH
666 return fd;
667}
668
368bbc02
CS
669static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct cgroup_process_info *cgroup_info)
670{
368bbc02 671 int r;
b06b8511
CS
672 size_t i;
673 static struct {
674 int match_mask;
675 int match_flag;
676 const char *source;
677 const char *destination;
678 const char *fstype;
679 unsigned long flags;
680 const char *options;
681 } default_mounts[] = {
682 /* Read-only bind-mounting... In older kernels, doing that required
683 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
684 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
685 * kernel 2.6.26 onwards. However, this apparently does not work on
686 * kernel 3.8. Unfortunately, on that very same kernel, doing the
687 * same trick as above doesn't seem to work either, there one needs
688 * to ALSO specify MS_BIND for the remount, otherwise the entire
689 * fs is remounted read-only or the mount fails because it's busy...
690 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
691 * 2.6.32...
368bbc02 692 */
b06b8511
CS
693 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
694 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
695 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
696 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
697 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
698 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
699 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
700 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
701 { 0, 0, NULL, NULL, NULL, 0, NULL }
702 };
368bbc02 703
b06b8511
CS
704 for (i = 0; default_mounts[i].match_mask; i++) {
705 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
706 char *source = NULL;
707 char *destination = NULL;
708 int saved_errno;
709
710 if (default_mounts[i].source) {
711 /* will act like strdup if %r is not present */
712 source = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].source);
713 if (!source) {
714 SYSERROR("memory allocation error");
715 return -1;
716 }
717 }
718 if (default_mounts[i].destination) {
719 /* will act like strdup if %r is not present */
720 destination = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].destination);
721 if (!destination) {
722 saved_errno = errno;
723 SYSERROR("memory allocation error");
724 free(source);
725 errno = saved_errno;
726 return -1;
727 }
728 }
729 r = mount(source, destination, default_mounts[i].fstype, default_mounts[i].flags, default_mounts[i].options);
730 saved_errno = errno;
c414be25
DE
731 if (r < 0)
732 SYSERROR("error mounting %s on %s", source, destination);
b06b8511
CS
733 free(source);
734 free(destination);
735 if (r < 0) {
b06b8511
CS
736 errno = saved_errno;
737 return -1;
738 }
368bbc02 739 }
368bbc02
CS
740 }
741
b06b8511 742 if (flags & LXC_AUTO_CGROUP_MASK) {
7997d7da 743 r = lxc_setup_mount_cgroup(conf->rootfs.mount, cgroup_info, flags & LXC_AUTO_CGROUP_MASK);
368bbc02
CS
744 if (r < 0) {
745 SYSERROR("error mounting /sys/fs/cgroup");
b06b8511 746 return -1;
368bbc02
CS
747 }
748 }
749
368bbc02 750 return 0;
368bbc02
CS
751}
752
2656d231 753static int mount_rootfs(const char *rootfs, const char *target)
0ad19a3f 754{
b09ef133 755 char absrootfs[MAXPATHLEN];
78ae2fcc 756 struct stat s;
a6afdde9 757 int i;
78ae2fcc 758
a6afdde9 759 typedef int (*rootfs_cb)(const char *, const char *);
78ae2fcc 760
761 struct rootfs_type {
762 int type;
763 rootfs_cb cb;
764 } rtfs_type[] = {
2656d231
DL
765 { S_IFDIR, mount_rootfs_dir },
766 { S_IFBLK, mount_rootfs_block },
767 { S_IFREG, mount_rootfs_file },
78ae2fcc 768 };
0ad19a3f 769
4c8ab83b 770 if (!realpath(rootfs, absrootfs)) {
36eb9bde 771 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 772 return -1;
773 }
b09ef133 774
b09ef133 775 if (access(absrootfs, F_OK)) {
36eb9bde 776 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 777 return -1;
778 }
779
78ae2fcc 780 if (stat(absrootfs, &s)) {
36eb9bde 781 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 782 return -1;
783 }
784
78ae2fcc 785 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 786
78ae2fcc 787 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
788 continue;
9b0f0477 789
a6afdde9 790 return rtfs_type[i].cb(absrootfs, target);
78ae2fcc 791 }
9b0f0477 792
36eb9bde 793 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 794 return -1;
0ad19a3f 795}
796
4e5440c6 797static int setup_utsname(struct utsname *utsname)
0ad19a3f 798{
4e5440c6
DL
799 if (!utsname)
800 return 0;
0ad19a3f 801
4e5440c6
DL
802 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
803 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 804 return -1;
805 }
806
4e5440c6 807 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 808
0ad19a3f 809 return 0;
810}
811
33fcb7a0 812static int setup_tty(const struct lxc_rootfs *rootfs,
7c6ef2a2 813 const struct lxc_tty_info *tty_info, char *ttydir)
b0a33c1e 814{
7c6ef2a2
SH
815 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
816 int i, ret;
b0a33c1e 817
bc9bd0e3
DL
818 if (!rootfs->path)
819 return 0;
820
b0a33c1e 821 for (i = 0; i < tty_info->nbtty; i++) {
822
823 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
824
7c6ef2a2 825 ret = snprintf(path, sizeof(path), "%s/dev/tty%d",
12297168 826 rootfs->mount, i + 1);
7c6ef2a2
SH
827 if (ret >= sizeof(path)) {
828 ERROR("pathname too long for ttys");
829 return -1;
830 }
831 if (ttydir) {
832 /* create dev/lxc/tty%d" */
9ba8130c 833 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/tty%d",
7c6ef2a2
SH
834 rootfs->mount, ttydir, i + 1);
835 if (ret >= sizeof(lxcpath)) {
836 ERROR("pathname too long for ttys");
837 return -1;
838 }
025ed0f3 839 process_lock();
7c6ef2a2 840 ret = creat(lxcpath, 0660);
025ed0f3 841 process_unlock();
7c6ef2a2
SH
842 if (ret==-1 && errno != EEXIST) {
843 SYSERROR("error creating %s\n", lxcpath);
844 return -1;
845 }
025ed0f3 846 process_lock();
4d44e274
SH
847 if (ret >= 0)
848 close(ret);
025ed0f3 849 process_unlock();
7c6ef2a2
SH
850 ret = unlink(path);
851 if (ret && errno != ENOENT) {
852 SYSERROR("error unlinking %s\n", path);
853 return -1;
854 }
b0a33c1e 855
7c6ef2a2
SH
856 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
857 WARN("failed to mount '%s'->'%s'",
858 pty_info->name, path);
859 continue;
860 }
13954cce 861
9ba8130c
SH
862 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
863 if (ret >= sizeof(lxcpath)) {
864 ERROR("tty pathname too long");
865 return -1;
866 }
7c6ef2a2
SH
867 ret = symlink(lxcpath, path);
868 if (ret) {
869 SYSERROR("failed to create symlink for tty %d\n", i+1);
870 return -1;
871 }
872 } else {
c6883f38
SH
873 /* If we populated /dev, then we need to create /dev/ttyN */
874 if (access(path, F_OK)) {
025ed0f3 875 process_lock();
c6883f38 876 ret = creat(path, 0660);
025ed0f3 877 process_unlock();
c6883f38
SH
878 if (ret==-1) {
879 SYSERROR("error creating %s\n", path);
880 /* this isn't fatal, continue */
025ed0f3
SH
881 } else {
882 process_lock();
c6883f38 883 close(ret);
025ed0f3
SH
884 process_unlock();
885 }
c6883f38 886 }
7c6ef2a2
SH
887 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
888 WARN("failed to mount '%s'->'%s'",
889 pty_info->name, path);
890 continue;
891 }
b0a33c1e 892 }
893 }
894
cd54d859
DL
895 INFO("%d tty(s) has been setup", tty_info->nbtty);
896
b0a33c1e 897 return 0;
898}
899
7a7ff0c6 900static int setup_rootfs_pivot_root_cb(char *buffer, void *data)
bf601689
MH
901{
902 struct lxc_list *mountlist, *listentry, *iterator;
2c7d90ac 903 char *pivotdir, *mountpoint, *mountentry, *saveptr = NULL;
bf601689
MH
904 int found;
905 void **cbparm;
906
907 mountentry = buffer;
908 cbparm = (void **)data;
909
910 mountlist = cbparm[0];
911 pivotdir = cbparm[1];
912
913 /* parse entry, first field is mountname, ignore */
2796cf79 914 mountpoint = strtok_r(mountentry, " ", &saveptr);
bf601689
MH
915 if (!mountpoint)
916 return -1;
917
918 /* second field is mountpoint */
2796cf79 919 mountpoint = strtok_r(NULL, " ", &saveptr);
bf601689
MH
920 if (!mountpoint)
921 return -1;
922
923 /* only consider mountpoints below old root fs */
924 if (strncmp(mountpoint, pivotdir, strlen(pivotdir)))
925 return 0;
926
927 /* filter duplicate mountpoints */
928 found = 0;
929 lxc_list_for_each(iterator, mountlist) {
930 if (!strcmp(iterator->elem, mountpoint)) {
931 found = 1;
932 break;
933 }
934 }
935 if (found)
936 return 0;
937
938 /* add entry to list */
939 listentry = malloc(sizeof(*listentry));
940 if (!listentry) {
941 SYSERROR("malloc for mountpoint listentry failed");
942 return -1;
943 }
944
945 listentry->elem = strdup(mountpoint);
946 if (!listentry->elem) {
947 SYSERROR("strdup failed");
00b6be44 948 free(listentry);
bf601689
MH
949 return -1;
950 }
951 lxc_list_add_tail(mountlist, listentry);
952
953 return 0;
954}
955
cc6f6dd7 956static int umount_oldrootfs(const char *oldrootfs)
bf601689 957{
2382ecff 958 char path[MAXPATHLEN];
bf601689 959 void *cbparm[2];
9ebb03ad 960 struct lxc_list mountlist, *iterator, *next;
bf601689 961 int ok, still_mounted, last_still_mounted;
9ba8130c 962 int rc;
bf601689
MH
963
964 /* read and parse /proc/mounts in old root fs */
965 lxc_list_init(&mountlist);
966
cc6f6dd7 967 /* oldrootfs is on the top tree directory now */
9ba8130c
SH
968 rc = snprintf(path, sizeof(path), "/%s", oldrootfs);
969 if (rc >= sizeof(path)) {
970 ERROR("rootfs name too long");
971 return -1;
972 }
bf601689 973 cbparm[0] = &mountlist;
bf601689 974
cc6f6dd7 975 cbparm[1] = strdup(path);
bf601689
MH
976 if (!cbparm[1]) {
977 SYSERROR("strdup failed");
978 return -1;
979 }
980
9ba8130c
SH
981 rc = snprintf(path, sizeof(path), "%s/proc/mounts", oldrootfs);
982 if (rc >= sizeof(path)) {
983 ERROR("container proc/mounts name too long");
984 return -1;
985 }
cc6f6dd7
DL
986
987 ok = lxc_file_for_each_line(path,
988 setup_rootfs_pivot_root_cb, &cbparm);
bf601689
MH
989 if (ok < 0) {
990 SYSERROR("failed to read or parse mount list '%s'", path);
991 return -1;
992 }
993
994 /* umount filesystems until none left or list no longer shrinks */
995 still_mounted = 0;
996 do {
997 last_still_mounted = still_mounted;
998 still_mounted = 0;
999
9ebb03ad 1000 lxc_list_for_each_safe(iterator, &mountlist, next) {
bf601689 1001
c08556c6 1002 /* umount normally */
bf601689
MH
1003 if (!umount(iterator->elem)) {
1004 DEBUG("umounted '%s'", (char *)iterator->elem);
1005 lxc_list_del(iterator);
1006 continue;
1007 }
1008
bf601689
MH
1009 still_mounted++;
1010 }
7df119ee 1011
bf601689
MH
1012 } while (still_mounted > 0 && still_mounted != last_still_mounted);
1013
7df119ee 1014
c08556c6
DL
1015 lxc_list_for_each(iterator, &mountlist) {
1016
1017 /* let's try a lazy umount */
1018 if (!umount2(iterator->elem, MNT_DETACH)) {
1019 INFO("lazy unmount of '%s'", (char *)iterator->elem);
1020 continue;
1021 }
1022
1023 /* be more brutal (nfs) */
1024 if (!umount2(iterator->elem, MNT_FORCE)) {
1025 INFO("forced unmount of '%s'", (char *)iterator->elem);
1026 continue;
1027 }
1028
7df119ee 1029 WARN("failed to unmount '%s'", (char *)iterator->elem);
c08556c6 1030 }
bf601689 1031
cc6f6dd7
DL
1032 return 0;
1033}
1034
1035static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
1036{
1037 char path[MAXPATHLEN];
1038 int remove_pivotdir = 0;
9ba8130c 1039 int rc;
cc6f6dd7
DL
1040
1041 /* change into new root fs */
1042 if (chdir(rootfs)) {
1043 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
1044 return -1;
1045 }
1046
1047 if (!pivotdir)
30c5d292 1048 pivotdir = "lxc_putold";
cc6f6dd7 1049
4f9293b1 1050 /* compute the full path to pivotdir under rootfs */
9ba8130c
SH
1051 rc = snprintf(path, sizeof(path), "%s/%s", rootfs, pivotdir);
1052 if (rc >= sizeof(path)) {
1053 ERROR("pivot dir name too long");
1054 return -1;
1055 }
cc6f6dd7
DL
1056
1057 if (access(path, F_OK)) {
1058
1059 if (mkdir_p(path, 0755)) {
1060 SYSERROR("failed to create pivotdir '%s'", path);
1061 return -1;
1062 }
1063
1064 remove_pivotdir = 1;
1065 DEBUG("created '%s' directory", path);
1066 }
1067
1068 DEBUG("mountpoint for old rootfs is '%s'", path);
1069
1070 /* pivot_root into our new root fs */
1071 if (pivot_root(".", path)) {
1072 SYSERROR("pivot_root syscall failed");
bf601689
MH
1073 return -1;
1074 }
cc6f6dd7
DL
1075
1076 if (chdir("/")) {
1077 SYSERROR("can't chdir to / after pivot_root");
1078 return -1;
1079 }
1080
1081 DEBUG("pivot_root syscall to '%s' successful", rootfs);
1082
1083 /* we switch from absolute path to relative path */
1084 if (umount_oldrootfs(pivotdir))
1085 return -1;
bf601689 1086
c08556c6
DL
1087 /* remove temporary mount point, we don't consider the removing
1088 * as fatal */
a91d897a
FW
1089 if (remove_pivotdir && rmdir(pivotdir))
1090 WARN("can't remove mountpoint '%s': %m", pivotdir);
bf601689 1091
bf601689
MH
1092 return 0;
1093}
1094
bc6928ff
MW
1095
1096/*
1097 * Note: This is a verbatum copy of what is in monitor.c. We're just
1098 * usint it here to generate a safe subdirectory in /dev/ for the
1099 * containers /dev/
1100 */
1101
1102/* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS.
1103 * FNV has good anti collision properties and we're not worried
1104 * about pre-image resistance or one-way-ness, we're just trying to make
1105 * the name unique in the 108 bytes of space we have.
1106 */
1107#define FNV1A_64_INIT ((uint64_t)0xcbf29ce484222325ULL)
1108static uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
1109{
1110 unsigned char *bp;
1111
1112 for(bp = buf; bp < (unsigned char *)buf + len; bp++)
1113 {
1114 /* xor the bottom with the current octet */
1115 hval ^= (uint64_t)*bp;
1116
1117 /* gcc optimised:
1118 * multiply by the 64 bit FNV magic prime mod 2^64
1119 */
1120 hval += (hval << 1) + (hval << 4) + (hval << 5) +
1121 (hval << 7) + (hval << 8) + (hval << 40);
1122 }
1123
1124 return hval;
1125}
1126
1127/*
1128 * Check to see if a directory has something mounted on it and,
1129 * if it does, return the fstype.
1130 *
1131 * Code largely based on detect_shared_rootfs below
1132 *
1133 * Returns: # of matching entries in /proc/self/mounts
1134 * if != 0 fstype is filled with the last filesystem value.
1135 * if == 0 no matches found, fstype unchanged.
1136 *
1137 * ToDo: Maybe return the mount options in another parameter...
1138 */
1139
1140#define LINELEN 4096
1141#define MAX_FSTYPE_LEN 128
1142int mount_check_fs( const char *dir, char *fstype )
1143{
1144 char buf[LINELEN], *p;
1145 struct stat s;
1146 FILE *f;
1147 int found_fs = 0;
1148 char *p2;
1149
1150 DEBUG("entering mount_check_fs for %s\n", dir);
1151
1152 if ( 0 != access(dir, F_OK) || 0 != stat(dir, &s) || 0 == S_ISDIR(s.st_mode) ) {
1153 return 0;
1154 }
1155
1156 process_lock();
1157 f = fopen("/proc/self/mounts", "r");
1158 process_unlock();
1159 if (!f)
1160 return 0;
1161 while ((p = fgets(buf, LINELEN, f))) {
1162 p = index(buf, ' ');
1163 if( !p )
1164 continue;
1165 *p = '\0';
1166 p2 = p + 1;
1167
1168 p = index(p2, ' ');
1169 if( !p )
1170 continue;
1171 *p = '\0';
1172
1173 /* Compare the directory in the entry to desired */
1174 if( strcmp( p2, dir ) ) {
1175 continue;
1176 }
1177
1178 p2 = p + 1;
1179 p = index( p2, ' ');
1180 if( !p )
1181 continue;
1182 *p = '\0';
1183
1184 ++found_fs;
1185
1186 if( fstype ) {
1187 strncpy( fstype, p2, MAX_FSTYPE_LEN - 1 );
1188 fstype [ MAX_FSTYPE_LEN - 1 ] = '\0';
1189 }
1190 }
1191
1192 process_lock();
1193 fclose(f);
1194 process_unlock();
1195
1196 DEBUG("mount_check_fs returning %d last %s\n", found_fs, fstype);
1197
1198 return found_fs;
1199}
1200
1201/*
1202 * Locate a devtmpfs mount (should be on /dev) and create a container
1203 * subdirectory on it which we can then bind mount to the container
1204 * /dev instead of mounting a tmpfs there.
1205 * If we fail, return NULL.
1206 * Else return the pointer to the name buffer with the string to
1207 * the devtmpfs subdirectory.
1208 */
1209
1210char *mk_devtmpfs(const char *name, char *path, const char *lxcpath)
1211{
1212 int ret;
1213 struct stat s;
1214 char tmp_path[MAXPATHLEN];
1215 char fstype[MAX_FSTYPE_LEN];
1216 char *base_path = "/dev/.lxc";
1217 char *user_path = "/dev/.lxc/user";
1218 uint64_t hash;
1219
1220 if ( 0 != access(base_path, F_OK) || 0 != stat(base_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1221 /* This is just making /dev/.lxc it better work or we're done */
1222 ret = mkdir(base_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1223 if ( ret ) {
1224 SYSERROR( "Unable to create /dev/.lxc for autodev" );
1225 return NULL;
1226 }
1227 }
1228
1229 /*
1230 * Programmers notes:
1231 * We can not do mounts in this area of code that we want
1232 * to be visible in the host. Consequently, /dev/.lxc must
1233 * be set up earlier if we need a tmpfs mounted there.
1234 * That only affects the rare cases where autodev is enabled
1235 * for a container and devtmpfs is not mounted on /dev in the
1236 * host. In that case, we'll fall back to the old method
1237 * of mounting a tmpfs in the container and have no visibility
1238 * into the container /dev.
1239 */
1240 if( ! mount_check_fs( "/dev", fstype )
1241 || strcmp( "devtmpfs", fstype ) ) {
1242 /* Either /dev was not mounted or was not devtmpfs */
1243
1244 if ( ! mount_check_fs( "/dev/.lxc", NULL ) ) {
1245 /*
1246 * /dev/.lxc is not already mounted
1247 * Doing a mount here does no good, since
1248 * it's not visible in the host.
1249 */
1250
1251 ERROR("/dev/.lxc is not setup - taking fallback" );
1252 return NULL;
1253 }
1254 }
1255
1256 if ( 0 != access(user_path, F_OK) || 0 != stat(user_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1257 /*
1258 * This is making /dev/.lxc/user path for non-priv users.
1259 * If this doesn't work, we'll have to fall back in the
1260 * case of non-priv users. It's mode 1777 like /tmp.
1261 */
1262 ret = mkdir(user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
1263 if ( ret ) {
1264 /* Issue an error but don't fail yet! */
1265 ERROR("Unable to create /dev/.lxc/user");
1266 }
1267 /* Umask tends to screw us up here */
1268 chmod(user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
1269 }
1270
1271 /*
1272 * Since the container name must be unique within a given
1273 * lxcpath, we're going to use a hash of the path
1274 * /lxcpath/name as our hash name in /dev/.lxc/
1275 */
1276
1277 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s", lxcpath, name);
1278 if (ret < 0 || ret >= MAXPATHLEN)
1279 return NULL;
1280
1281 hash = fnv_64a_buf(tmp_path, ret, FNV1A_64_INIT);
1282
1283 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, base_path, name, hash);
1284 if (ret < 0 || ret >= MAXPATHLEN)
1285 return NULL;
1286
1287 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1288 ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1289 if ( ret ) {
1290 /* Something must have failed with the base_path...
1291 * Maybe unpriv user. Try user_path now... */
1292 INFO("Setup in /dev/.lxc failed. Trying /dev/.lxc/user." );
1293
1294 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, user_path, name, hash);
1295 if (ret < 0 || ret >= MAXPATHLEN)
1296 return NULL;
1297
1298 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1299 ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1300 if ( ret ) {
1301 ERROR("Container /dev setup in host /dev failed - taking fallback" );
1302 return NULL;
1303 }
1304 }
1305 }
1306 }
1307
1308 strcpy( path, tmp_path );
1309 return path;
1310}
1311
1312
91c3830e
SH
1313/*
1314 * Do we want to add options for max size of /dev and a file to
1315 * specify which devices to create?
1316 */
bc6928ff 1317static int mount_autodev(const char *name, char *root, const char *lxcpath)
91c3830e
SH
1318{
1319 int ret;
bc6928ff 1320 struct stat s;
91c3830e 1321 char path[MAXPATHLEN];
bc6928ff
MW
1322 char host_path[MAXPATHLEN];
1323 char devtmpfs_path[MAXPATHLEN];
91c3830e
SH
1324
1325 INFO("Mounting /dev under %s\n", root);
bc6928ff
MW
1326
1327 ret = snprintf(host_path, MAXPATHLEN, "%s/%s/rootfs.dev", lxcpath, name);
1328 if (ret < 0 || ret > MAXPATHLEN)
1329 return -1;
1330
91c3830e
SH
1331 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
1332 if (ret < 0 || ret > MAXPATHLEN)
1333 return -1;
bc6928ff
MW
1334
1335 if (mk_devtmpfs( name, devtmpfs_path, lxcpath ) ) {
1336 /*
1337 * Get rid of old links and directoriess
1338 * This could be either a symlink and we remove it,
1339 * or an empty directory and we remove it,
1340 * or non-existant and we don't care,
1341 * or a non-empty directory, and we will then emit an error
1342 * but we will not fail out the process.
1343 */
1344 unlink( host_path );
1345 rmdir( host_path );
1346 ret = symlink(devtmpfs_path, host_path);
1347
1348 if ( ret < 0 ) {
1349 SYSERROR("WARNING: Failed to create symlink '%s'->'%s'\n", host_path, devtmpfs_path);
1350 }
1351 DEBUG("Bind mounting %s to %s", devtmpfs_path , path );
1352 ret = mount(devtmpfs_path, path, NULL, MS_BIND, 0 );
1353 } else {
1354 /* Only mount a tmpfs on here if we don't already a mount */
1355 if ( ! mount_check_fs( host_path, NULL ) ) {
1356 DEBUG("Mounting tmpfs to %s", host_path );
1357 ret = mount("none", path, "tmpfs", 0, "size=100000");
1358 } else {
1359 /* This allows someone to manually set up a mount */
1360 DEBUG("Bind mounting %s to %s", host_path, path );
1361 ret = mount(host_path , path, NULL, MS_BIND, 0 );
1362 }
1363 }
91c3830e
SH
1364 if (ret) {
1365 SYSERROR("Failed to mount /dev at %s\n", root);
1366 return -1;
1367 }
1368 ret = snprintf(path, MAXPATHLEN, "%s/dev/pts", root);
1369 if (ret < 0 || ret >= MAXPATHLEN)
1370 return -1;
bc6928ff
MW
1371 /*
1372 * If we are running on a devtmpfs mapping, dev/pts may already exist.
1373 * If not, then create it and exit if that fails...
1374 */
1375 if ( 0 != access(path, F_OK) || 0 != stat(path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1376 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1377 if (ret) {
1378 SYSERROR("Failed to create /dev/pts in container");
1379 return -1;
1380 }
91c3830e
SH
1381 }
1382
1383 INFO("Mounted /dev under %s\n", root);
1384 return 0;
1385}
1386
c6883f38
SH
1387struct lxc_devs {
1388 char *name;
1389 mode_t mode;
1390 int maj;
1391 int min;
1392};
1393
1394struct lxc_devs lxc_devs[] = {
1395 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1396 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1397 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1398 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1399 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1400 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1401 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1402};
1403
c6883f38
SH
1404static int setup_autodev(char *root)
1405{
1406 int ret;
1407 struct lxc_devs *d;
1408 char path[MAXPATHLEN];
1409 int i;
3a32201c 1410 mode_t cmask;
c6883f38 1411
91c3830e
SH
1412 INFO("Creating initial consoles under %s/dev\n", root);
1413
c6883f38 1414 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
91c3830e
SH
1415 if (ret < 0 || ret >= MAXPATHLEN) {
1416 ERROR("Error calculating container /dev location");
c6883f38 1417 return -1;
f7bee6c6 1418 }
91c3830e
SH
1419
1420 INFO("Populating /dev under %s\n", root);
3a32201c 1421 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38
SH
1422 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
1423 d = &lxc_devs[i];
1424 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", root, d->name);
1425 if (ret < 0 || ret >= MAXPATHLEN)
1426 return -1;
1427 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1428 if (ret && errno != EEXIST) {
c6883f38
SH
1429 SYSERROR("Error creating %s\n", d->name);
1430 return -1;
1431 }
1432 }
3a32201c 1433 umask(cmask);
c6883f38
SH
1434
1435 INFO("Populated /dev under %s\n", root);
1436 return 0;
1437}
1438
cc28d0b0
SH
1439/*
1440 * Detect whether / is mounted MS_SHARED. The only way I know of to
1441 * check that is through /proc/self/mountinfo.
1442 * I'm only checking for /. If the container rootfs or mount location
1443 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1444 * out would be too much work to be worth it.
1445 */
1446#define LINELEN 4096
1447int detect_shared_rootfs(void)
1448{
1449 char buf[LINELEN], *p;
1450 FILE *f;
1451 int i;
1452 char *p2;
1453
025ed0f3 1454 process_lock();
cc28d0b0 1455 f = fopen("/proc/self/mountinfo", "r");
025ed0f3 1456 process_unlock();
cc28d0b0
SH
1457 if (!f)
1458 return 0;
1459 while ((p = fgets(buf, LINELEN, f))) {
cc28d0b0
SH
1460 for (p = buf, i=0; p && i < 4; i++)
1461 p = index(p+1, ' ');
1462 if (!p)
1463 continue;
1464 p2 = index(p+1, ' ');
1465 if (!p2)
1466 continue;
1467 *p2 = '\0';
cc28d0b0
SH
1468 if (strcmp(p+1, "/") == 0) {
1469 // this is '/'. is it shared?
1470 p = index(p2+1, ' ');
ab81cef0 1471 if (p && strstr(p, "shared:")) {
025ed0f3 1472 process_lock();
00b6be44 1473 fclose(f);
025ed0f3 1474 process_unlock();
cc28d0b0 1475 return 1;
00b6be44 1476 }
cc28d0b0
SH
1477 }
1478 }
025ed0f3 1479 process_lock();
cc28d0b0 1480 fclose(f);
025ed0f3 1481 process_unlock();
cc28d0b0
SH
1482 return 0;
1483}
1484
1485/*
1486 * I'll forgive you for asking whether all of this is needed :) The
1487 * answer is yes.
1488 * pivot_root will fail if the new root, the put_old dir, or the parent
1489 * of current->fs->root are MS_SHARED. (parent of current->fs_root may
1490 * or may not be current->fs_root - if we assumed it always was, we could
1491 * just mount --make-rslave /). So,
1492 * 1. mount a tiny tmpfs to be parent of current->fs->root.
1493 * 2. make that MS_SLAVE
1494 * 3. make a 'root' directory under that
1495 * 4. mount --rbind / under the $tinyroot/root.
1496 * 5. make that rslave
1497 * 6. chdir and chroot into $tinyroot/root
1498 * 7. $tinyroot will be unmounted by our parent in start.c
1499 */
1500static int chroot_into_slave(struct lxc_conf *conf)
1501{
1502 char path[MAXPATHLEN];
1503 const char *destpath = conf->rootfs.mount;
1504 int ret;
1505
1506 if (mount(destpath, destpath, NULL, MS_BIND, 0)) {
1507 SYSERROR("failed to mount %s bind", destpath);
1508 return -1;
1509 }
1510 if (mount("", destpath, NULL, MS_SLAVE, 0)) {
1511 SYSERROR("failed to make %s slave", destpath);
1512 return -1;
1513 }
1514 if (mount("none", destpath, "tmpfs", 0, "size=10000")) {
1515 SYSERROR("Failed to mount tmpfs / at %s", destpath);
1516 return -1;
1517 }
1518 ret = snprintf(path, MAXPATHLEN, "%s/root", destpath);
1519 if (ret < 0 || ret >= MAXPATHLEN) {
1520 ERROR("out of memory making root path");
1521 return -1;
1522 }
1523 if (mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) {
1524 SYSERROR("Failed to create /dev/pts in container");
1525 return -1;
1526 }
1527 if (mount("/", path, NULL, MS_BIND|MS_REC, 0)) {
1528 SYSERROR("Failed to rbind mount / to %s", path);
1529 return -1;
1530 }
1531 if (mount("", destpath, NULL, MS_SLAVE|MS_REC, 0)) {
1532 SYSERROR("Failed to make tmp-/ at %s rslave", path);
1533 return -1;
1534 }
1535 if (chdir(path)) {
1536 SYSERROR("Failed to chdir into tmp-/");
1537 return -1;
1538 }
1539 if (chroot(path)) {
1540 SYSERROR("Failed to chroot into tmp-/");
1541 return -1;
1542 }
1543 INFO("Chrooted into tmp-/ at %s\n", path);
1544 return 0;
1545}
1546
1547static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1548{
cc28d0b0
SH
1549 const struct lxc_rootfs *rootfs = &conf->rootfs;
1550
a0f379bf
DW
1551 if (!rootfs->path) {
1552 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1553 SYSERROR("Failed to make / rslave");
1554 return -1;
1555 }
c69bd12f 1556 return 0;
a0f379bf 1557 }
0ad19a3f 1558
12297168 1559 if (access(rootfs->mount, F_OK)) {
b1789442 1560 SYSERROR("failed to access to '%s', check it is present",
12297168 1561 rootfs->mount);
b1789442
DL
1562 return -1;
1563 }
1564
cc28d0b0
SH
1565 if (detect_shared_rootfs()) {
1566 if (chroot_into_slave(conf)) {
1567 ERROR("Failed to chroot into slave /");
1568 return -1;
1569 }
1570 }
1571
9be53773
SH
1572 // First try mounting rootfs using a bdev
1573 struct bdev *bdev = bdev_init(rootfs->path, rootfs->mount, NULL);
1574 if (bdev && bdev->ops->mount(bdev) == 0) {
59d66af2 1575 bdev_put(bdev);
9be53773
SH
1576 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1577 return 0;
1578 }
59d66af2
SH
1579 if (bdev)
1580 bdev_put(bdev);
2656d231 1581 if (mount_rootfs(rootfs->path, rootfs->mount)) {
a6afdde9 1582 ERROR("failed to mount rootfs");
c3f0a28c 1583 return -1;
1584 }
0ad19a3f 1585
12297168 1586 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1587
ac778708
DL
1588 return 0;
1589}
1590
1591int setup_pivot_root(const struct lxc_rootfs *rootfs)
1592{
ac778708
DL
1593 if (!rootfs->path)
1594 return 0;
1595
12297168 1596 if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
cc6f6dd7 1597 ERROR("failed to setup pivot root");
25368b52 1598 return -1;
c69bd12f
DL
1599 }
1600
25368b52 1601 return 0;
0ad19a3f 1602}
1603
d852c78c 1604static int setup_pts(int pts)
3c26f34e 1605{
77890c6d
SW
1606 char target[PATH_MAX];
1607
d852c78c
DL
1608 if (!pts)
1609 return 0;
3c26f34e 1610
1611 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1612 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1613 return -1;
1614 }
1615
a6afdde9 1616 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
67e5a20a 1617 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
36eb9bde 1618 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1619 return -1;
1620 }
1621
3c26f34e 1622 if (access("/dev/ptmx", F_OK)) {
1623 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1624 goto out;
36eb9bde 1625 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1626 return -1;
1627 }
1628
77890c6d
SW
1629 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1630 goto out;
1631
3c26f34e 1632 /* fallback here, /dev/pts/ptmx exists just mount bind */
1633 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1634 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1635 return -1;
1636 }
cd54d859
DL
1637
1638 INFO("created new pts instance");
d852c78c 1639
3c26f34e 1640out:
1641 return 0;
1642}
1643
cccc74b5
DL
1644static int setup_personality(int persona)
1645{
6ff05e18 1646 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1647 if (persona == -1)
1648 return 0;
1649
1650 if (personality(persona) < 0) {
1651 SYSERROR("failed to set personality to '0x%x'", persona);
1652 return -1;
1653 }
1654
1655 INFO("set personality to '0x%x'", persona);
6ff05e18 1656 #endif
cccc74b5
DL
1657
1658 return 0;
1659}
1660
7c6ef2a2 1661static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1662 const struct lxc_console *console)
6e590161 1663{
63376d7d
DL
1664 char path[MAXPATHLEN];
1665 struct stat s;
7c6ef2a2 1666 int ret;
52e35957 1667
7c6ef2a2
SH
1668 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1669 if (ret >= sizeof(path)) {
1670 ERROR("console path too long\n");
1671 return -1;
1672 }
52e35957 1673
63376d7d 1674 if (access(path, F_OK)) {
466978b0 1675 WARN("rootfs specified but no console found at '%s'", path);
63376d7d 1676 return 0;
52e35957
DL
1677 }
1678
b5159817
DE
1679 if (console->master < 0) {
1680 INFO("no console");
f78a1f32
DL
1681 return 0;
1682 }
ed502555 1683
63376d7d
DL
1684 if (stat(path, &s)) {
1685 SYSERROR("failed to stat '%s'", path);
1686 return -1;
1687 }
1688
1689 if (chmod(console->name, s.st_mode)) {
1690 SYSERROR("failed to set mode '0%o' to '%s'",
1691 s.st_mode, console->name);
1692 return -1;
1693 }
13954cce 1694
63376d7d
DL
1695 if (mount(console->name, path, "none", MS_BIND, 0)) {
1696 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1697 return -1;
1698 }
1699
63376d7d 1700 INFO("console has been setup");
7c6ef2a2
SH
1701 return 0;
1702}
1703
1704static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1705 const struct lxc_console *console,
1706 char *ttydir)
1707{
1708 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1709 int ret;
1710
1711 /* create rootfs/dev/<ttydir> directory */
1712 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1713 ttydir);
1714 if (ret >= sizeof(path))
1715 return -1;
1716 ret = mkdir(path, 0755);
1717 if (ret && errno != EEXIST) {
1718 SYSERROR("failed with errno %d to create %s\n", errno, path);
1719 return -1;
1720 }
1721 INFO("created %s\n", path);
1722
1723 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1724 rootfs->mount, ttydir);
1725 if (ret >= sizeof(lxcpath)) {
1726 ERROR("console path too long\n");
1727 return -1;
1728 }
1729
1730 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1731 ret = unlink(path);
1732 if (ret && errno != ENOENT) {
1733 SYSERROR("error unlinking %s\n", path);
1734 return -1;
1735 }
1736
025ed0f3 1737 process_lock();
7c6ef2a2 1738 ret = creat(lxcpath, 0660);
025ed0f3 1739 process_unlock();
7c6ef2a2
SH
1740 if (ret==-1 && errno != EEXIST) {
1741 SYSERROR("error %d creating %s\n", errno, lxcpath);
1742 return -1;
1743 }
025ed0f3 1744 process_lock();
4d44e274
SH
1745 if (ret >= 0)
1746 close(ret);
025ed0f3 1747 process_unlock();
7c6ef2a2 1748
b5159817
DE
1749 if (console->master < 0) {
1750 INFO("no console");
7c6ef2a2
SH
1751 return 0;
1752 }
1753
1754 if (mount(console->name, lxcpath, "none", MS_BIND, 0)) {
1755 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1756 return -1;
1757 }
1758
1759 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1760 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1761 if (ret >= sizeof(lxcpath)) {
1762 ERROR("lxc/console path too long");
1763 return -1;
1764 }
7c6ef2a2
SH
1765 ret = symlink(lxcpath, path);
1766 if (ret) {
1767 SYSERROR("failed to create symlink for console");
1768 return -1;
1769 }
1770
1771 INFO("console has been setup on %s", lxcpath);
cd54d859 1772
6e590161 1773 return 0;
1774}
1775
7c6ef2a2
SH
1776static int setup_console(const struct lxc_rootfs *rootfs,
1777 const struct lxc_console *console,
1778 char *ttydir)
1779{
1780 /* We don't have a rootfs, /dev/console will be shared */
1781 if (!rootfs->path)
1782 return 0;
1783 if (!ttydir)
1784 return setup_dev_console(rootfs, console);
1785
1786 return setup_ttydir_console(rootfs, console, ttydir);
1787}
1788
1bd051a6
SH
1789static int setup_kmsg(const struct lxc_rootfs *rootfs,
1790 const struct lxc_console *console)
1791{
1792 char kpath[MAXPATHLEN];
1793 int ret;
1794
222fea5a
DE
1795 if (!rootfs->path)
1796 return 0;
1bd051a6
SH
1797 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1798 if (ret < 0 || ret >= sizeof(kpath))
1799 return -1;
1800
1801 ret = unlink(kpath);
1802 if (ret && errno != ENOENT) {
1803 SYSERROR("error unlinking %s\n", kpath);
1804 return -1;
1805 }
1806
1807 ret = symlink("console", kpath);
1808 if (ret) {
1809 SYSERROR("failed to create symlink for kmsg");
1810 return -1;
1811 }
1812
1813 return 0;
1814}
1815
998ac676
RT
1816static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1817{
1818 struct mount_opt *mo;
1819
1820 /* If opt is found in mount_opt, set or clear flags.
1821 * Otherwise append it to data. */
1822
1823 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1824 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1825 if (mo->clear)
1826 *flags &= ~mo->flag;
1827 else
1828 *flags |= mo->flag;
1829 return;
1830 }
1831 }
1832
1833 if (strlen(*data))
1834 strcat(*data, ",");
1835 strcat(*data, opt);
1836}
1837
911324ef 1838static int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1839 char **mntdata)
1840{
1841 char *s, *data;
1842 char *p, *saveptr = NULL;
1843
911324ef 1844 *mntdata = NULL;
91656ce5 1845 *mntflags = 0L;
911324ef
DL
1846
1847 if (!mntopts)
998ac676
RT
1848 return 0;
1849
911324ef 1850 s = strdup(mntopts);
998ac676 1851 if (!s) {
36eb9bde 1852 SYSERROR("failed to allocate memory");
998ac676
RT
1853 return -1;
1854 }
1855
1856 data = malloc(strlen(s) + 1);
1857 if (!data) {
36eb9bde 1858 SYSERROR("failed to allocate memory");
998ac676
RT
1859 free(s);
1860 return -1;
1861 }
1862 *data = 0;
1863
1864 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1865 p = strtok_r(NULL, ",", &saveptr))
1866 parse_mntopt(p, mntflags, &data);
1867
1868 if (*data)
1869 *mntdata = data;
1870 else
1871 free(data);
1872 free(s);
1873
1874 return 0;
1875}
1876
911324ef
DL
1877static int mount_entry(const char *fsname, const char *target,
1878 const char *fstype, unsigned long mountflags,
1879 const char *data)
1880{
1881 if (mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data)) {
1882 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1883 return -1;
1884 }
1885
1886 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
1887
1888 DEBUG("remounting %s on %s to respect bind or remount options",
1889 fsname, target);
1890
1891 if (mount(fsname, target, fstype,
1892 mountflags | MS_REMOUNT, data)) {
1893 SYSERROR("failed to mount '%s' on '%s'",
1894 fsname, target);
1895 return -1;
1896 }
1897 }
1898
1899 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1900
1901 return 0;
1902}
1903
1904static inline int mount_entry_on_systemfs(struct mntent *mntent)
0ad19a3f 1905{
998ac676
RT
1906 unsigned long mntflags;
1907 char *mntdata;
911324ef
DL
1908 int ret;
1909
1910 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1911 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1912 return -1;
1913 }
1914
1915 ret = mount_entry(mntent->mnt_fsname, mntent->mnt_dir,
1916 mntent->mnt_type, mntflags, mntdata);
1917
68c152ef
SH
1918 if (hasmntopt(mntent, "optional") != NULL)
1919 ret = 0;
1920
911324ef
DL
1921 free(mntdata);
1922
1923 return ret;
1924}
1925
1926static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2
SH
1927 const struct lxc_rootfs *rootfs,
1928 const char *lxc_name)
911324ef 1929{
013bd428 1930 char *aux;
59760f5d 1931 char path[MAXPATHLEN];
911324ef
DL
1932 unsigned long mntflags;
1933 char *mntdata;
80a881b2 1934 int r, ret = 0, offset;
67e571de 1935 const char *lxcpath;
0ad19a3f 1936
911324ef
DL
1937 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1938 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1939 return -1;
1940 }
1bc60a65 1941
2a59a681
SH
1942 lxcpath = default_lxc_path();
1943 if (!lxcpath) {
1944 ERROR("Out of memory");
1945 return -1;
1946 }
1947
80a881b2 1948 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
1949 * use $lxcpath/CN/rootfs as the target prefix */
1950 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
1951 if (r < 0 || r >= MAXPATHLEN)
1952 goto skipvarlib;
1953
1954 aux = strstr(mntent->mnt_dir, path);
1955 if (aux) {
1956 offset = strlen(path);
1957 goto skipabs;
1958 }
1959
1960skipvarlib:
013bd428
DL
1961 aux = strstr(mntent->mnt_dir, rootfs->path);
1962 if (!aux) {
1963 WARN("ignoring mount point '%s'", mntent->mnt_dir);
1964 goto out;
1965 }
80a881b2
SH
1966 offset = strlen(rootfs->path);
1967
1968skipabs:
013bd428 1969
9ba8130c 1970 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
1971 aux + offset);
1972 if (r < 0 || r >= MAXPATHLEN) {
1973 WARN("pathnme too long for '%s'", mntent->mnt_dir);
1974 ret = -1;
1975 goto out;
1976 }
1977
d330fe7b 1978
013bd428 1979 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
911324ef 1980 mntflags, mntdata);
0ad19a3f 1981
68c152ef
SH
1982 if (hasmntopt(mntent, "optional") != NULL)
1983 ret = 0;
1984
013bd428 1985out:
911324ef
DL
1986 free(mntdata);
1987 return ret;
1988}
d330fe7b 1989
911324ef
DL
1990static int mount_entry_on_relative_rootfs(struct mntent *mntent,
1991 const char *rootfs)
1992{
1993 char path[MAXPATHLEN];
1994 unsigned long mntflags;
1995 char *mntdata;
1996 int ret;
d330fe7b 1997
911324ef
DL
1998 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1999 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
2000 return -1;
2001 }
d330fe7b 2002
911324ef 2003 /* relative to root mount point */
9ba8130c
SH
2004 ret = snprintf(path, sizeof(path), "%s/%s", rootfs, mntent->mnt_dir);
2005 if (ret >= sizeof(path)) {
2006 ERROR("path name too long");
2007 return -1;
2008 }
911324ef
DL
2009
2010 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
2011 mntflags, mntdata);
2012
68c152ef
SH
2013 if (hasmntopt(mntent, "optional") != NULL)
2014 ret = 0;
2015
911324ef 2016 free(mntdata);
998ac676 2017
911324ef
DL
2018 return ret;
2019}
2020
80a881b2
SH
2021static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
2022 const char *lxc_name)
911324ef
DL
2023{
2024 struct mntent *mntent;
2025 int ret = -1;
e76b8764 2026
911324ef 2027 while ((mntent = getmntent(file))) {
e76b8764 2028
911324ef
DL
2029 if (!rootfs->path) {
2030 if (mount_entry_on_systemfs(mntent))
e76b8764 2031 goto out;
911324ef 2032 continue;
e76b8764
CDC
2033 }
2034
911324ef
DL
2035 /* We have a separate root, mounts are relative to it */
2036 if (mntent->mnt_dir[0] != '/') {
2037 if (mount_entry_on_relative_rootfs(mntent,
2038 rootfs->mount))
2039 goto out;
2040 continue;
2041 }
cd54d859 2042
80a881b2 2043 if (mount_entry_on_absolute_rootfs(mntent, rootfs, lxc_name))
911324ef 2044 goto out;
0ad19a3f 2045 }
cd54d859 2046
0ad19a3f 2047 ret = 0;
cd54d859
DL
2048
2049 INFO("mount points have been setup");
0ad19a3f 2050out:
e7938e9e
MN
2051 return ret;
2052}
2053
80a881b2
SH
2054static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
2055 const char *lxc_name)
e7938e9e
MN
2056{
2057 FILE *file;
2058 int ret;
2059
2060 if (!fstab)
2061 return 0;
2062
025ed0f3 2063 process_lock();
e7938e9e 2064 file = setmntent(fstab, "r");
025ed0f3 2065 process_unlock();
e7938e9e
MN
2066 if (!file) {
2067 SYSERROR("failed to use '%s'", fstab);
2068 return -1;
2069 }
2070
80a881b2 2071 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e 2072
025ed0f3 2073 process_lock();
0ad19a3f 2074 endmntent(file);
025ed0f3 2075 process_unlock();
0ad19a3f 2076 return ret;
2077}
2078
80a881b2
SH
2079static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
2080 const char *lxc_name)
e7938e9e
MN
2081{
2082 FILE *file;
2083 struct lxc_list *iterator;
2084 char *mount_entry;
2085 int ret;
2086
025ed0f3 2087 process_lock();
e7938e9e 2088 file = tmpfile();
025ed0f3 2089 process_unlock();
e7938e9e
MN
2090 if (!file) {
2091 ERROR("tmpfile error: %m");
2092 return -1;
2093 }
2094
2095 lxc_list_for_each(iterator, mount) {
2096 mount_entry = iterator->elem;
1d6b1976 2097 fprintf(file, "%s\n", mount_entry);
e7938e9e
MN
2098 }
2099
2100 rewind(file);
2101
80a881b2 2102 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e 2103
025ed0f3 2104 process_lock();
e7938e9e 2105 fclose(file);
025ed0f3 2106 process_unlock();
e7938e9e
MN
2107 return ret;
2108}
2109
81810dd1
DL
2110static int setup_caps(struct lxc_list *caps)
2111{
2112 struct lxc_list *iterator;
2113 char *drop_entry;
d55bc1ad 2114 char *ptr;
81810dd1
DL
2115 int i, capid;
2116
2117 lxc_list_for_each(iterator, caps) {
2118
2119 drop_entry = iterator->elem;
2120
2121 capid = -1;
2122
2123 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2124
2125 if (strcmp(drop_entry, caps_opt[i].name))
2126 continue;
2127
2128 capid = caps_opt[i].value;
2129 break;
2130 }
2131
d55bc1ad
CS
2132 if (capid < 0) {
2133 /* try to see if it's numeric, so the user may specify
2134 * capabilities that the running kernel knows about but
2135 * we don't */
09bbd745 2136 errno = 0;
d55bc1ad 2137 capid = strtol(drop_entry, &ptr, 10);
09bbd745 2138 if (!ptr || *ptr != '\0' || errno != 0)
d55bc1ad
CS
2139 /* not a valid number */
2140 capid = -1;
2141 else if (capid > lxc_caps_last_cap())
2142 /* we have a number but it's not a valid
2143 * capability */
2144 capid = -1;
2145 }
2146
81810dd1 2147 if (capid < 0) {
1e11be34
DL
2148 ERROR("unknown capability %s", drop_entry);
2149 return -1;
81810dd1
DL
2150 }
2151
2152 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
2153
2154 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
2155 SYSERROR("failed to remove %s capability", drop_entry);
2156 return -1;
2157 }
2158
2159 }
2160
1fb86a7c
SH
2161 DEBUG("capabilities have been setup");
2162
2163 return 0;
2164}
2165
2166static int dropcaps_except(struct lxc_list *caps)
2167{
2168 struct lxc_list *iterator;
2169 char *keep_entry;
2170 char *ptr;
2171 int i, capid;
2172 int numcaps = lxc_caps_last_cap() + 1;
2173 INFO("found %d capabilities\n", numcaps);
2174
2caf9a97
SH
2175 if (numcaps <= 0 || numcaps > 200)
2176 return -1;
2177
1fb86a7c
SH
2178 // caplist[i] is 1 if we keep capability i
2179 int *caplist = alloca(numcaps * sizeof(int));
2180 memset(caplist, 0, numcaps * sizeof(int));
2181
2182 lxc_list_for_each(iterator, caps) {
2183
2184 keep_entry = iterator->elem;
2185
2186 capid = -1;
2187
2188 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2189
2190 if (strcmp(keep_entry, caps_opt[i].name))
2191 continue;
2192
2193 capid = caps_opt[i].value;
2194 break;
2195 }
2196
2197 if (capid < 0) {
2198 /* try to see if it's numeric, so the user may specify
2199 * capabilities that the running kernel knows about but
2200 * we don't */
2201 capid = strtol(keep_entry, &ptr, 10);
2202 if (!ptr || *ptr != '\0' ||
f371aca9 2203 capid == INT_MIN || capid == INT_MAX)
1fb86a7c
SH
2204 /* not a valid number */
2205 capid = -1;
2206 else if (capid > lxc_caps_last_cap())
2207 /* we have a number but it's not a valid
2208 * capability */
2209 capid = -1;
2210 }
2211
2212 if (capid < 0) {
2213 ERROR("unknown capability %s", keep_entry);
2214 return -1;
2215 }
2216
2217 DEBUG("drop capability '%s' (%d)", keep_entry, capid);
2218
2219 caplist[capid] = 1;
2220 }
2221 for (i=0; i<numcaps; i++) {
2222 if (caplist[i])
2223 continue;
2224 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
2225 SYSERROR("failed to remove capability %d", i);
2226 return -1;
2227 }
2228 }
2229
2230 DEBUG("capabilities have been setup");
81810dd1
DL
2231
2232 return 0;
2233}
2234
0ad19a3f 2235static int setup_hw_addr(char *hwaddr, const char *ifname)
2236{
2237 struct sockaddr sockaddr;
2238 struct ifreq ifr;
2239 int ret, fd;
2240
3cfc0f3a
MN
2241 ret = lxc_convert_mac(hwaddr, &sockaddr);
2242 if (ret) {
2243 ERROR("mac address '%s' conversion failed : %s",
2244 hwaddr, strerror(-ret));
0ad19a3f 2245 return -1;
2246 }
2247
2248 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
5da6aa8c 2249 ifr.ifr_name[IFNAMSIZ-1] = '\0';
0ad19a3f 2250 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2251
025ed0f3 2252 process_lock();
0ad19a3f 2253 fd = socket(AF_INET, SOCK_DGRAM, 0);
025ed0f3 2254 process_unlock();
0ad19a3f 2255 if (fd < 0) {
3ab87b66 2256 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 2257 return -1;
2258 }
2259
2260 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
025ed0f3 2261 process_lock();
0ad19a3f 2262 close(fd);
025ed0f3 2263 process_unlock();
0ad19a3f 2264 if (ret)
3ab87b66 2265 ERROR("ioctl failure : %s", strerror(errno));
0ad19a3f 2266
5da6aa8c 2267 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
cd54d859 2268
0ad19a3f 2269 return ret;
2270}
2271
82d5ae15 2272static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2273{
82d5ae15
DL
2274 struct lxc_list *iterator;
2275 struct lxc_inetdev *inetdev;
3cfc0f3a 2276 int err;
0ad19a3f 2277
82d5ae15
DL
2278 lxc_list_for_each(iterator, ip) {
2279
2280 inetdev = iterator->elem;
2281
0093bb8c
DL
2282 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2283 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
2284 if (err) {
2285 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2286 ifindex, strerror(-err));
82d5ae15
DL
2287 return -1;
2288 }
2289 }
2290
2291 return 0;
0ad19a3f 2292}
2293
82d5ae15 2294static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2295{
82d5ae15 2296 struct lxc_list *iterator;
7fa9074f 2297 struct lxc_inet6dev *inet6dev;
3cfc0f3a 2298 int err;
0ad19a3f 2299
82d5ae15
DL
2300 lxc_list_for_each(iterator, ip) {
2301
2302 inet6dev = iterator->elem;
2303
b3df193c 2304 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
2305 &inet6dev->mcast, &inet6dev->acast,
2306 inet6dev->prefix);
3cfc0f3a
MN
2307 if (err) {
2308 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2309 ifindex, strerror(-err));
82d5ae15 2310 return -1;
3cfc0f3a 2311 }
82d5ae15
DL
2312 }
2313
2314 return 0;
0ad19a3f 2315}
2316
82d5ae15 2317static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 2318{
0ad19a3f 2319 char ifname[IFNAMSIZ];
0ad19a3f 2320 char *current_ifname = ifname;
3cfc0f3a 2321 int err;
0ad19a3f 2322
82d5ae15
DL
2323 /* empty network namespace */
2324 if (!netdev->ifindex) {
b0efbac4 2325 if (netdev->flags & IFF_UP) {
d472214b 2326 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2327 if (err) {
2328 ERROR("failed to set the loopback up : %s",
2329 strerror(-err));
82d5ae15
DL
2330 return -1;
2331 }
82d5ae15 2332 }
7b57e8b6 2333 return 0;
0ad19a3f 2334 }
13954cce 2335
b466dc33
BP
2336 /* get the new ifindex in case of physical netdev */
2337 if (netdev->type == LXC_NET_PHYS)
2338 if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
2339 ERROR("failed to get ifindex for %s",
2340 netdev->link);
2341 return -1;
2342 }
2343
82d5ae15
DL
2344 /* retrieve the name of the interface */
2345 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 2346 ERROR("no interface corresponding to index '%d'",
82d5ae15 2347 netdev->ifindex);
0ad19a3f 2348 return -1;
2349 }
13954cce 2350
018ef520 2351 /* default: let the system to choose one interface name */
9d083402 2352 if (!netdev->name)
fb6d9b2f
DL
2353 netdev->name = netdev->type == LXC_NET_PHYS ?
2354 netdev->link : "eth%d";
018ef520 2355
82d5ae15 2356 /* rename the interface name */
b84f58b9 2357 err = lxc_netdev_rename_by_name(ifname, netdev->name);
3cfc0f3a
MN
2358 if (err) {
2359 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2360 strerror(-err));
018ef520
DL
2361 return -1;
2362 }
2363
2364 /* Re-read the name of the interface because its name has changed
2365 * and would be automatically allocated by the system
2366 */
82d5ae15 2367 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 2368 ERROR("no interface corresponding to index '%d'",
82d5ae15 2369 netdev->ifindex);
018ef520 2370 return -1;
0ad19a3f 2371 }
2372
82d5ae15
DL
2373 /* set a mac address */
2374 if (netdev->hwaddr) {
2375 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 2376 ERROR("failed to setup hw address for '%s'",
82d5ae15 2377 current_ifname);
0ad19a3f 2378 return -1;
2379 }
2380 }
2381
82d5ae15
DL
2382 /* setup ipv4 addresses on the interface */
2383 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 2384 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 2385 ifname);
2386 return -1;
2387 }
2388
82d5ae15
DL
2389 /* setup ipv6 addresses on the interface */
2390 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 2391 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 2392 ifname);
2393 return -1;
2394 }
2395
82d5ae15 2396 /* set the network device up */
b0efbac4 2397 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
2398 int err;
2399
d472214b 2400 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
2401 if (err) {
2402 ERROR("failed to set '%s' up : %s", current_ifname,
2403 strerror(-err));
0ad19a3f 2404 return -1;
2405 }
2406
2407 /* the network is up, make the loopback up too */
d472214b 2408 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2409 if (err) {
2410 ERROR("failed to set the loopback up : %s",
2411 strerror(-err));
0ad19a3f 2412 return -1;
2413 }
2414 }
2415
f8fee0e2
MK
2416 /* We can only set up the default routes after bringing
2417 * up the interface, sine bringing up the interface adds
2418 * the link-local routes and we can't add a default
2419 * route if the gateway is not reachable. */
2420
2421 /* setup ipv4 gateway on the interface */
2422 if (netdev->ipv4_gateway) {
2423 if (!(netdev->flags & IFF_UP)) {
2424 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2425 return -1;
2426 }
2427
2428 if (lxc_list_empty(&netdev->ipv4)) {
2429 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2430 return -1;
2431 }
2432
2433 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2434 if (err) {
fc739df5
SG
2435 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2436 if (err) {
2437 ERROR("failed to add ipv4 dest for '%s': %s",
2438 ifname, strerror(-err));
2439 }
2440
2441 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2442 if (err) {
2443 ERROR("failed to setup ipv4 gateway for '%s': %s",
2444 ifname, strerror(-err));
2445 if (netdev->ipv4_gateway_auto) {
2446 char buf[INET_ADDRSTRLEN];
2447 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2448 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2449 }
2450 return -1;
19a26f82 2451 }
f8fee0e2
MK
2452 }
2453 }
2454
2455 /* setup ipv6 gateway on the interface */
2456 if (netdev->ipv6_gateway) {
2457 if (!(netdev->flags & IFF_UP)) {
2458 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2459 return -1;
2460 }
2461
2462 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2463 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2464 return -1;
2465 }
2466
2467 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2468 if (err) {
fc739df5
SG
2469 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2470 if (err) {
2471 ERROR("failed to add ipv6 dest for '%s': %s",
f8fee0e2 2472 ifname, strerror(-err));
19a26f82 2473 }
fc739df5
SG
2474
2475 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2476 if (err) {
2477 ERROR("failed to setup ipv6 gateway for '%s': %s",
2478 ifname, strerror(-err));
2479 if (netdev->ipv6_gateway_auto) {
2480 char buf[INET6_ADDRSTRLEN];
2481 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2482 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2483 }
2484 return -1;
2485 }
f8fee0e2
MK
2486 }
2487 }
2488
cd54d859
DL
2489 DEBUG("'%s' has been setup", current_ifname);
2490
0ad19a3f 2491 return 0;
2492}
2493
5f4535a3 2494static int setup_network(struct lxc_list *network)
0ad19a3f 2495{
82d5ae15 2496 struct lxc_list *iterator;
82d5ae15 2497 struct lxc_netdev *netdev;
0ad19a3f 2498
5f4535a3 2499 lxc_list_for_each(iterator, network) {
cd54d859 2500
5f4535a3 2501 netdev = iterator->elem;
82d5ae15
DL
2502
2503 if (setup_netdev(netdev)) {
2504 ERROR("failed to setup netdev");
2505 return -1;
2506 }
2507 }
cd54d859 2508
5f4535a3
DL
2509 if (!lxc_list_empty(network))
2510 INFO("network has been setup");
cd54d859
DL
2511
2512 return 0;
0ad19a3f 2513}
2514
7b35f3d6
SH
2515void lxc_rename_phys_nics_on_shutdown(struct lxc_conf *conf)
2516{
2517 int i;
2518
2519 INFO("running to reset %d nic names", conf->num_savednics);
2520 for (i=0; i<conf->num_savednics; i++) {
2521 struct saved_nic *s = &conf->saved_nics[i];
2522 INFO("resetting nic %d to %s\n", s->ifindex, s->orig_name);
2523 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
2524 free(s->orig_name);
2525 }
2526 conf->num_savednics = 0;
2527 free(conf->saved_nics);
2528}
2529
ae9242c8
SH
2530static char *default_rootfs_mount = LXCROOTFSMOUNT;
2531
7b379ab3 2532struct lxc_conf *lxc_conf_init(void)
089cd8b8 2533{
7b379ab3 2534 struct lxc_conf *new;
26ddeedd 2535 int i;
7b379ab3
MN
2536
2537 new = malloc(sizeof(*new));
2538 if (!new) {
2539 ERROR("lxc_conf_init : %m");
2540 return NULL;
2541 }
2542 memset(new, 0, sizeof(*new));
2543
b40a606e 2544 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
cccc74b5 2545 new->personality = -1;
bc6928ff 2546 new->autodev = -1;
596a818d
DE
2547 new->console.log_path = NULL;
2548 new->console.log_fd = -1;
28a4b0e5 2549 new->console.path = NULL;
63376d7d 2550 new->console.peer = -1;
b5159817
DE
2551 new->console.peerpty.busy = -1;
2552 new->console.peerpty.master = -1;
2553 new->console.peerpty.slave = -1;
63376d7d
DL
2554 new->console.master = -1;
2555 new->console.slave = -1;
2556 new->console.name[0] = '\0';
d2e30e99 2557 new->maincmd_fd = -1;
54c30e29 2558 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048
SH
2559 if (!new->rootfs.mount) {
2560 ERROR("lxc_conf_init : %m");
2561 free(new);
2562 return NULL;
2563 }
2f3f41d0 2564 new->kmsg = 1;
7b379ab3
MN
2565 lxc_list_init(&new->cgroup);
2566 lxc_list_init(&new->network);
2567 lxc_list_init(&new->mount_list);
81810dd1 2568 lxc_list_init(&new->caps);
1fb86a7c 2569 lxc_list_init(&new->keepcaps);
f6d3e3e4 2570 lxc_list_init(&new->id_map);
26ddeedd
SH
2571 for (i=0; i<NUM_LXC_HOOKS; i++)
2572 lxc_list_init(&new->hooks[i]);
fe4de9a6
DE
2573 new->lsm_aa_profile = NULL;
2574 new->lsm_se_context = NULL;
e075f5d9 2575 new->lsm_umount_proc = 0;
7b379ab3 2576
9f30a190
MM
2577 for (i = 0; i < LXC_NS_MAX; i++)
2578 new->inherit_ns_fd[i] = -1;
2579
7b379ab3 2580 return new;
089cd8b8
DL
2581}
2582
e3b4c4c4 2583static int instanciate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2584{
8634bc19 2585 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2586 char veth2buf[IFNAMSIZ], *veth2;
3cfc0f3a 2587 int err;
13954cce 2588
e892973e
DL
2589 if (netdev->priv.veth_attr.pair)
2590 veth1 = netdev->priv.veth_attr.pair;
8634bc19 2591 else {
9ba8130c
SH
2592 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2593 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2594 ERROR("veth1 name too long");
2595 return -1;
2596 }
a0265685 2597 veth1 = lxc_mkifname(veth1buf);
ad40563e
ÇO
2598 if (!veth1) {
2599 ERROR("failed to allocate a temporary name");
2600 return -1;
2601 }
74a2b586
JK
2602 /* store away for deconf */
2603 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2604 }
82d5ae15 2605
0e391e57 2606 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
a0265685 2607 veth2 = lxc_mkifname(veth2buf);
ad40563e 2608 if (!veth2) {
82d5ae15 2609 ERROR("failed to allocate a temporary name");
ad40563e 2610 goto out_delete;
0ad19a3f 2611 }
2612
3cfc0f3a
MN
2613 err = lxc_veth_create(veth1, veth2);
2614 if (err) {
2615 ERROR("failed to create %s-%s : %s", veth1, veth2,
2616 strerror(-err));
ad40563e 2617 goto out_delete;
0ad19a3f 2618 }
13954cce 2619
49684c0b
CS
2620 /* changing the high byte of the mac address to 0xfe, the bridge interface
2621 * will always keep the host's mac address and not take the mac address
2622 * of a container */
2623 err = setup_private_host_hw_addr(veth1);
2624 if (err) {
2625 ERROR("failed to change mac address of host interface '%s' : %s",
2626 veth1, strerror(-err));
2627 goto out_delete;
2628 }
2629
82d5ae15 2630 if (netdev->mtu) {
d472214b 2631 err = lxc_netdev_set_mtu(veth1, atoi(netdev->mtu));
3cfc0f3a 2632 if (!err)
d472214b 2633 err = lxc_netdev_set_mtu(veth2, atoi(netdev->mtu));
3cfc0f3a
MN
2634 if (err) {
2635 ERROR("failed to set mtu '%s' for %s-%s : %s",
2636 netdev->mtu, veth1, veth2, strerror(-err));
eb14c10a 2637 goto out_delete;
75d09f83
DL
2638 }
2639 }
2640
3cfc0f3a
MN
2641 if (netdev->link) {
2642 err = lxc_bridge_attach(netdev->link, veth1);
2643 if (err) {
2644 ERROR("failed to attach '%s' to the bridge '%s' : %s",
2645 veth1, netdev->link, strerror(-err));
2646 goto out_delete;
2647 }
eb14c10a
DL
2648 }
2649
82d5ae15
DL
2650 netdev->ifindex = if_nametoindex(veth2);
2651 if (!netdev->ifindex) {
36eb9bde 2652 ERROR("failed to retrieve the index for %s", veth2);
eb14c10a
DL
2653 goto out_delete;
2654 }
2655
d472214b 2656 err = lxc_netdev_up(veth1);
6e35af2e
DL
2657 if (err) {
2658 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2659 goto out_delete;
0ad19a3f 2660 }
2661
e3b4c4c4 2662 if (netdev->upscript) {
751d9dcd
DL
2663 err = run_script(handler->name, "net", netdev->upscript, "up",
2664 "veth", veth1, (char*) NULL);
2665 if (err)
e3b4c4c4 2666 goto out_delete;
e3b4c4c4
ST
2667 }
2668
82d5ae15
DL
2669 DEBUG("instanciated veth '%s/%s', index is '%d'",
2670 veth1, veth2, netdev->ifindex);
2671
6ab9ab6d 2672 return 0;
eb14c10a
DL
2673
2674out_delete:
b84f58b9 2675 lxc_netdev_delete_by_name(veth1);
ad40563e
ÇO
2676 if (!netdev->priv.veth_attr.pair && veth1)
2677 free(veth1);
2678 if(veth2)
2679 free(veth2);
6ab9ab6d 2680 return -1;
13954cce 2681}
d957ae2d 2682
74a2b586
JK
2683static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2684{
2685 char *veth1;
2686 int err;
2687
2688 if (netdev->priv.veth_attr.pair)
2689 veth1 = netdev->priv.veth_attr.pair;
2690 else
2691 veth1 = netdev->priv.veth_attr.veth1;
2692
2693 if (netdev->downscript) {
2694 err = run_script(handler->name, "net", netdev->downscript,
2695 "down", "veth", veth1, (char*) NULL);
2696 if (err)
2697 return -1;
2698 }
2699 return 0;
2700}
2701
e3b4c4c4 2702static int instanciate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2703{
0e391e57 2704 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2705 int err;
d957ae2d
MT
2706
2707 if (!netdev->link) {
2708 ERROR("no link specified for macvlan netdev");
2709 return -1;
2710 }
13954cce 2711
9ba8130c
SH
2712 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2713 if (err >= sizeof(peerbuf))
2714 return -1;
82d5ae15 2715
a0265685 2716 peer = lxc_mkifname(peerbuf);
ad40563e 2717 if (!peer) {
82d5ae15
DL
2718 ERROR("failed to make a temporary name");
2719 return -1;
0ad19a3f 2720 }
2721
3cfc0f3a
MN
2722 err = lxc_macvlan_create(netdev->link, peer,
2723 netdev->priv.macvlan_attr.mode);
2724 if (err) {
2725 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2726 peer, netdev->link, strerror(-err));
ad40563e 2727 goto out;
0ad19a3f 2728 }
2729
82d5ae15
DL
2730 netdev->ifindex = if_nametoindex(peer);
2731 if (!netdev->ifindex) {
36eb9bde 2732 ERROR("failed to retrieve the index for %s", peer);
ad40563e 2733 goto out;
22ebac19 2734 }
2735
e3b4c4c4 2736 if (netdev->upscript) {
751d9dcd
DL
2737 err = run_script(handler->name, "net", netdev->upscript, "up",
2738 "macvlan", netdev->link, (char*) NULL);
2739 if (err)
ad40563e 2740 goto out;
e3b4c4c4
ST
2741 }
2742
e892973e
DL
2743 DEBUG("instanciated macvlan '%s', index is '%d' and mode '%d'",
2744 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 2745
d957ae2d 2746 return 0;
ad40563e
ÇO
2747out:
2748 lxc_netdev_delete_by_name(peer);
2749 free(peer);
2750 return -1;
0ad19a3f 2751}
2752
74a2b586
JK
2753static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2754{
2755 int err;
2756
2757 if (netdev->downscript) {
2758 err = run_script(handler->name, "net", netdev->downscript,
2759 "down", "macvlan", netdev->link,
2760 (char*) NULL);
2761 if (err)
2762 return -1;
2763 }
2764 return 0;
2765}
2766
26c39028 2767/* XXX: merge with instanciate_macvlan */
e3b4c4c4 2768static int instanciate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
2769{
2770 char peer[IFNAMSIZ];
3cfc0f3a 2771 int err;
26c39028
JHS
2772
2773 if (!netdev->link) {
2774 ERROR("no link specified for vlan netdev");
2775 return -1;
2776 }
2777
9ba8130c
SH
2778 err = snprintf(peer, sizeof(peer), "vlan%d", netdev->priv.vlan_attr.vid);
2779 if (err >= sizeof(peer)) {
2780 ERROR("peer name too long");
2781 return -1;
2782 }
26c39028 2783
3cfc0f3a
MN
2784 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2785 if (err) {
2786 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2787 peer, netdev->link, strerror(-err));
26c39028
JHS
2788 return -1;
2789 }
2790
2791 netdev->ifindex = if_nametoindex(peer);
2792 if (!netdev->ifindex) {
2793 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 2794 lxc_netdev_delete_by_name(peer);
26c39028
JHS
2795 return -1;
2796 }
2797
e892973e
DL
2798 DEBUG("instanciated vlan '%s', ifindex is '%d'", " vlan1000",
2799 netdev->ifindex);
2800
26c39028
JHS
2801 return 0;
2802}
2803
74a2b586
JK
2804static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2805{
2806 return 0;
2807}
2808
e3b4c4c4 2809static int instanciate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2810{
6168e99f
DL
2811 if (!netdev->link) {
2812 ERROR("no link specified for the physical interface");
2813 return -1;
2814 }
2815
9d083402 2816 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 2817 if (!netdev->ifindex) {
9d083402 2818 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 2819 return -1;
2820 }
2821
e3b4c4c4
ST
2822 if (netdev->upscript) {
2823 int err;
751d9dcd
DL
2824 err = run_script(handler->name, "net", netdev->upscript,
2825 "up", "phys", netdev->link, (char*) NULL);
2826 if (err)
e3b4c4c4 2827 return -1;
e3b4c4c4
ST
2828 }
2829
82d5ae15 2830 return 0;
0ad19a3f 2831}
2832
74a2b586
JK
2833static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2834{
2835 int err;
2836
2837 if (netdev->downscript) {
2838 err = run_script(handler->name, "net", netdev->downscript,
2839 "down", "phys", netdev->link, (char*) NULL);
2840 if (err)
2841 return -1;
2842 }
2843 return 0;
2844}
2845
e3b4c4c4 2846static int instanciate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2847{
82d5ae15 2848 netdev->ifindex = 0;
e3b4c4c4
ST
2849 if (netdev->upscript) {
2850 int err;
751d9dcd
DL
2851 err = run_script(handler->name, "net", netdev->upscript,
2852 "up", "empty", (char*) NULL);
2853 if (err)
e3b4c4c4 2854 return -1;
e3b4c4c4 2855 }
82d5ae15 2856 return 0;
0ad19a3f 2857}
2858
74a2b586
JK
2859static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2860{
2861 int err;
2862
2863 if (netdev->downscript) {
2864 err = run_script(handler->name, "net", netdev->downscript,
2865 "down", "empty", (char*) NULL);
2866 if (err)
2867 return -1;
2868 }
2869 return 0;
2870}
2871
e3b4c4c4 2872int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 2873{
e3b4c4c4 2874 struct lxc_list *network = &handler->conf->network;
82d5ae15 2875 struct lxc_list *iterator;
82d5ae15 2876 struct lxc_netdev *netdev;
cbef6c52
SH
2877 int am_root = (getuid() == 0);
2878
2879 if (!am_root)
2880 return 0;
0ad19a3f 2881
5f4535a3 2882 lxc_list_for_each(iterator, network) {
0ad19a3f 2883
5f4535a3 2884 netdev = iterator->elem;
13954cce 2885
24654103 2886 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 2887 ERROR("invalid network configuration type '%d'",
5f4535a3 2888 netdev->type);
82d5ae15
DL
2889 return -1;
2890 }
0ad19a3f 2891
e3b4c4c4 2892 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
2893 ERROR("failed to create netdev");
2894 return -1;
2895 }
e3b4c4c4 2896
0ad19a3f 2897 }
2898
2899 return 0;
2900}
2901
74a2b586 2902void lxc_delete_network(struct lxc_handler *handler)
7fef7a06 2903{
74a2b586 2904 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
2905 struct lxc_list *iterator;
2906 struct lxc_netdev *netdev;
2907
2908 lxc_list_for_each(iterator, network) {
2909 netdev = iterator->elem;
d472214b 2910
74a2b586 2911 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352
DL
2912 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
2913 WARN("failed to rename to the initial name the " \
2914 "netdev '%s'", netdev->link);
d472214b 2915 continue;
d8f8e352 2916 }
d472214b 2917
74a2b586
JK
2918 if (netdev_deconf[netdev->type](handler, netdev)) {
2919 WARN("failed to destroy netdev");
2920 }
2921
d8f8e352
DL
2922 /* Recent kernel remove the virtual interfaces when the network
2923 * namespace is destroyed but in case we did not moved the
2924 * interface to the network namespace, we have to destroy it
2925 */
74a2b586
JK
2926 if (netdev->ifindex != 0 &&
2927 lxc_netdev_delete_by_index(netdev->ifindex))
d8f8e352 2928 WARN("failed to remove interface '%s'", netdev->name);
7fef7a06
DL
2929 }
2930}
2931
cbef6c52
SH
2932int unpriv_assign_nic(struct lxc_netdev *netdev, pid_t pid)
2933{
2934 pid_t child;
2935
2936 if (netdev->type != LXC_NET_VETH) {
2937 ERROR("nic type %d not support for unprivileged use",
2938 netdev->type);
2939 return -1;
2940 }
2941
2942 if ((child = fork()) < 0) {
2943 SYSERROR("fork");
2944 return -1;
2945 }
2946
2947 if (child > 0)
2948 return wait_for_pid(child);
2949
2950 // Call lxc-user-nic pid type bridge
2951 char pidstr[20];
4119204e 2952 char *args[] = { "lxc-user-nic", pidstr, "veth", netdev->link, netdev->name, NULL };
cbef6c52
SH
2953 snprintf(pidstr, 19, "%lu", (unsigned long) pid);
2954 pidstr[19] = '\0';
2955 execvp("lxc-user-nic", args);
2956 SYSERROR("execvp lxc-user-nic");
2957 exit(1);
2958}
2959
5f4535a3 2960int lxc_assign_network(struct lxc_list *network, pid_t pid)
0ad19a3f 2961{
82d5ae15 2962 struct lxc_list *iterator;
82d5ae15 2963 struct lxc_netdev *netdev;
cbef6c52 2964 int am_root = (getuid() == 0);
3cfc0f3a 2965 int err;
0ad19a3f 2966
5f4535a3 2967 lxc_list_for_each(iterator, network) {
82d5ae15 2968
5f4535a3 2969 netdev = iterator->elem;
82d5ae15 2970
cbef6c52
SH
2971 if (!am_root) {
2972 if (unpriv_assign_nic(netdev, pid))
2973 return -1;
2974 // TODO fill in netdev->ifindex and name
2975 continue;
2976 }
236087a6
DL
2977 /* empty network namespace, nothing to move */
2978 if (!netdev->ifindex)
2979 continue;
2980
d472214b 2981 err = lxc_netdev_move_by_index(netdev->ifindex, pid);
3cfc0f3a
MN
2982 if (err) {
2983 ERROR("failed to move '%s' to the container : %s",
2984 netdev->link, strerror(-err));
82d5ae15
DL
2985 return -1;
2986 }
2987
c1c75c04 2988 DEBUG("move '%s' to '%d'", netdev->name, pid);
0ad19a3f 2989 }
2990
2991 return 0;
2992}
2993
251d0d2a
DE
2994static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
2995 size_t buf_size)
f6d3e3e4
SH
2996{
2997 char path[PATH_MAX];
e4ccd113 2998 int ret, closeret;
f6d3e3e4
SH
2999 FILE *f;
3000
3001 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
3002 if (ret < 0 || ret >= PATH_MAX) {
3003 fprintf(stderr, "%s: path name too long", __func__);
3004 return -E2BIG;
3005 }
025ed0f3 3006 process_lock();
f6d3e3e4 3007 f = fopen(path, "w");
025ed0f3 3008 process_unlock();
f6d3e3e4
SH
3009 if (!f) {
3010 perror("open");
3011 return -EINVAL;
3012 }
251d0d2a 3013 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 3014 if (ret < 0)
e4ccd113 3015 SYSERROR("writing id mapping");
025ed0f3 3016 process_lock();
e4ccd113 3017 closeret = fclose(f);
025ed0f3 3018 process_unlock();
e4ccd113
SH
3019 if (closeret)
3020 SYSERROR("writing id mapping");
3021 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
3022}
3023
3024int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
3025{
3026 struct lxc_list *iterator;
3027 struct id_map *map;
3028 int ret = 0;
251d0d2a 3029 enum idtype type;
4f7521b4 3030 char *buf = NULL, *pos;
cf3ef16d 3031 int am_root = (getuid() == 0);
251d0d2a
DE
3032
3033 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4 3034 int left, fill;
cf3ef16d
SH
3035 int had_entry = 0;
3036 if (!buf) {
3037 buf = pos = malloc(4096);
4f7521b4
SH
3038 if (!buf)
3039 return -ENOMEM;
cf3ef16d
SH
3040 }
3041 pos = buf;
3042 if (!am_root)
3043 pos += sprintf(buf, "new%cidmap %d ",
3044 type == ID_TYPE_UID ? 'u' : 'g',
3045 pid);
4f7521b4 3046
cf3ef16d
SH
3047 lxc_list_for_each(iterator, idmap) {
3048 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
251d0d2a 3049 map = iterator->elem;
cf3ef16d
SH
3050 if (map->idtype != type)
3051 continue;
3052
3053 had_entry = 1;
3054 left = 4096 - (pos - buf);
3055 fill = snprintf(pos, left, " %lu %lu %lu", map->nsid,
3056 map->hostid, map->range);
3057 if (fill <= 0 || fill >= left)
3058 SYSERROR("snprintf failed, too many mappings");
3059 pos += fill;
251d0d2a 3060 }
cf3ef16d 3061 if (!had_entry)
4f7521b4 3062 continue;
cf3ef16d
SH
3063 left = 4096 - (pos - buf);
3064 fill = snprintf(pos, left, "\n");
3065 if (fill <= 0 || fill >= left)
3066 SYSERROR("snprintf failed, too many mappings");
3067 pos += fill;
3068
3069 if (am_root)
3070 ret = write_id_mapping(type, pid, buf, pos-buf);
3071 else
3072 ret = system(buf);
3073
f6d3e3e4
SH
3074 if (ret)
3075 break;
3076 }
251d0d2a 3077
4f7521b4
SH
3078 if (buf)
3079 free(buf);
f6d3e3e4
SH
3080 return ret;
3081}
3082
cf3ef16d
SH
3083/*
3084 * return the host uid to which the container root is mapped, or -1 on
3085 * error
3086 */
c4d10a05 3087uid_t get_mapped_rootid(struct lxc_conf *conf)
cf3ef16d
SH
3088{
3089 struct lxc_list *it;
3090 struct id_map *map;
3091
3092 lxc_list_for_each(it, &conf->id_map) {
3093 map = it->elem;
3094 if (map->idtype != ID_TYPE_UID)
3095 continue;
3096 if (map->nsid != 0)
3097 continue;
c4d10a05 3098 return (uid_t) map->hostid;
cf3ef16d 3099 }
c4d10a05 3100 return (uid_t)-1;
cf3ef16d
SH
3101}
3102
57d116ab 3103int mapped_hostid(int id, struct lxc_conf *conf)
cf3ef16d
SH
3104{
3105 struct lxc_list *it;
3106 struct id_map *map;
3107 lxc_list_for_each(it, &conf->id_map) {
3108 map = it->elem;
3109 if (map->idtype != ID_TYPE_UID)
3110 continue;
3111 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 3112 return (id - map->hostid) + map->nsid;
cf3ef16d 3113 }
57d116ab 3114 return -1;
cf3ef16d
SH
3115}
3116
3117int find_unmapped_nsuid(struct lxc_conf *conf)
3118{
3119 struct lxc_list *it;
3120 struct id_map *map;
3121 uid_t freeid = 0;
3122again:
3123 lxc_list_for_each(it, &conf->id_map) {
3124 map = it->elem;
3125 if (map->idtype != ID_TYPE_UID)
3126 continue;
3127 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3128 freeid = map->nsid + map->range;
3129 goto again;
3130 }
3131 }
3132 return freeid;
3133}
3134
19a26f82
MK
3135int lxc_find_gateway_addresses(struct lxc_handler *handler)
3136{
3137 struct lxc_list *network = &handler->conf->network;
3138 struct lxc_list *iterator;
3139 struct lxc_netdev *netdev;
3140 int link_index;
3141
3142 lxc_list_for_each(iterator, network) {
3143 netdev = iterator->elem;
3144
3145 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
3146 continue;
3147
3148 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
3149 ERROR("gateway = auto only supported for "
3150 "veth and macvlan");
3151 return -1;
3152 }
3153
3154 if (!netdev->link) {
3155 ERROR("gateway = auto needs a link interface");
3156 return -1;
3157 }
3158
3159 link_index = if_nametoindex(netdev->link);
3160 if (!link_index)
3161 return -EINVAL;
3162
3163 if (netdev->ipv4_gateway_auto) {
3164 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
3165 ERROR("failed to automatically find ipv4 gateway "
3166 "address from link interface '%s'", netdev->link);
3167 return -1;
3168 }
3169 }
3170
3171 if (netdev->ipv6_gateway_auto) {
3172 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
3173 ERROR("failed to automatically find ipv6 gateway "
3174 "address from link interface '%s'", netdev->link);
3175 return -1;
3176 }
3177 }
3178 }
3179
3180 return 0;
3181}
3182
5e4a62bf 3183int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 3184{
5e4a62bf 3185 struct lxc_tty_info *tty_info = &conf->tty_info;
025ed0f3 3186 int i, ret;
b0a33c1e 3187
5e4a62bf
DL
3188 /* no tty in the configuration */
3189 if (!conf->tty)
b0a33c1e 3190 return 0;
3191
13954cce 3192 tty_info->pty_info =
e4e7d59d 3193 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 3194 if (!tty_info->pty_info) {
36eb9bde 3195 SYSERROR("failed to allocate pty_info");
985d15b1 3196 return -1;
b0a33c1e 3197 }
3198
985d15b1 3199 for (i = 0; i < conf->tty; i++) {
13954cce 3200
b0a33c1e 3201 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3202
025ed0f3
SH
3203 process_lock();
3204 ret = openpty(&pty_info->master, &pty_info->slave,
3205 pty_info->name, NULL, NULL);
3206 process_unlock();
3207 if (ret) {
36eb9bde 3208 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
3209 tty_info->nbtty = i;
3210 lxc_delete_tty(tty_info);
3211 return -1;
b0a33c1e 3212 }
3213
5332bb84
DL
3214 DEBUG("allocated pty '%s' (%d/%d)",
3215 pty_info->name, pty_info->master, pty_info->slave);
3216
b035ad62
MS
3217 /* Prevent leaking the file descriptors to the container */
3218 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3219 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3220
b0a33c1e 3221 pty_info->busy = 0;
3222 }
3223
985d15b1 3224 tty_info->nbtty = conf->tty;
1ac470c0
DL
3225
3226 INFO("tty's configured");
3227
985d15b1 3228 return 0;
b0a33c1e 3229}
3230
3231void lxc_delete_tty(struct lxc_tty_info *tty_info)
3232{
3233 int i;
3234
3235 for (i = 0; i < tty_info->nbtty; i++) {
3236 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3237
025ed0f3 3238 process_lock();
b0a33c1e 3239 close(pty_info->master);
3240 close(pty_info->slave);
025ed0f3 3241 process_unlock();
b0a33c1e 3242 }
3243
3244 free(tty_info->pty_info);
3245 tty_info->nbtty = 0;
3246}
3247
f6d3e3e4 3248/*
c4d10a05
SH
3249 * chown_mapped_root: for an unprivileged user with uid X to chown a dir
3250 * to subuid Y, he needs to run chown as root in a userns where
3251 * nsid 0 is mapped to hostuid Y, and nsid Y is mapped to hostuid
3252 * X. That way, the container root is privileged with respect to
3253 * hostuid X, allowing him to do the chown.
f6d3e3e4 3254 */
c4d10a05 3255int chown_mapped_root(char *path, struct lxc_conf *conf)
f6d3e3e4 3256{
c4d10a05
SH
3257 uid_t rootid;
3258 pid_t pid;
f6d3e3e4 3259
c4d10a05
SH
3260 if ((rootid = get_mapped_rootid(conf)) <= 0) {
3261 ERROR("No mapping for container root");
3262 return -1;
f6d3e3e4 3263 }
c4d10a05
SH
3264 if (geteuid() == 0) {
3265 if (chown(path, rootid, -1) < 0) {
3266 ERROR("Error chowning %s", path);
3267 return -1;
3268 }
3269 return 0;
3270 }
3271 pid = fork();
3272 if (pid < 0) {
3273 SYSERROR("Failed forking");
f6d3e3e4
SH
3274 return -1;
3275 }
c4d10a05
SH
3276 if (!pid) {
3277 int hostuid = geteuid(), ret;
98e5ba51
SH
3278 char map1[100], map2[100], map3[100];
3279 char *args[] = {"lxc-usernsexec", "-m", map1, "-m", map2, "-m",
3280 map3, "--", "chown", "0", path, NULL};
f6d3e3e4 3281
98e5ba51
SH
3282 // "u:0:rootid:1"
3283 ret = snprintf(map1, 100, "u:0:%d:1", rootid);
c4d10a05
SH
3284 if (ret < 0 || ret >= 100) {
3285 ERROR("Error uid printing map string");
f6d3e3e4
SH
3286 return -1;
3287 }
c4d10a05 3288
98e5ba51
SH
3289 // "u:hostuid:hostuid:1"
3290 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3291 if (ret < 0 || ret >= 100) {
3292 ERROR("Error uid printing map string");
3293 return -1;
3294 }
3295
3296 // "g:0:hostgid:1"
3297 ret = snprintf(map3, 100, "g:0:%d:1", getgid());
c4d10a05
SH
3298 if (ret < 0 || ret >= 100) {
3299 ERROR("Error uid printing map string");
3300 return -1;
3301 }
3302
3303 ret = execvp("lxc-usernsexec", args);
3304 SYSERROR("Failed executing usernsexec");
3305 exit(1);
f6d3e3e4 3306 }
c4d10a05 3307 return wait_for_pid(pid);
f6d3e3e4
SH
3308}
3309
c4d10a05 3310int ttys_shift_ids(struct lxc_conf *c)
f6d3e3e4 3311{
c4d10a05 3312 int i;
f6d3e3e4 3313
c4d10a05 3314 if (lxc_list_empty(&c->id_map))
f6d3e3e4 3315 return 0;
c4d10a05
SH
3316
3317 for (i = 0; i < c->tty_info.nbtty; i++) {
3318 struct lxc_pty_info *pty_info = &c->tty_info.pty_info[i];
3319
3320 if (chown_mapped_root(pty_info->name, c) < 0) {
3321 ERROR("Failed to chown %s", pty_info->name);
f6d3e3e4
SH
3322 return -1;
3323 }
3324 }
3325
c4d10a05
SH
3326 if (chown_mapped_root(c->console.name, c) < 0) {
3327 ERROR("Failed to chown %s", c->console.name);
3328 return -1;
3329 }
3330
f6d3e3e4
SH
3331 return 0;
3332}
3333
bc6928ff
MW
3334/*
3335 * This routine is called when the configuration does not already specify a value
3336 * for autodev (mounting a file system on /dev and populating it in a container).
3337 * If a hard override value has not be specified, then we try to apply some
3338 * heuristics to determine if we should switch to autodev mode.
3339 *
3340 * For instance, if the container has an /etc/systemd/system directory then it
3341 * is probably running systemd as the init process and it needs the autodev
3342 * mount to prevent it from mounting devtmpfs on /dev on it's own causing conflicts
3343 * in the host.
3344 *
3345 * We may also want to enable autodev if the host has devtmpfs mounted on its
3346 * /dev as this then enable us to use subdirectories under /dev for the container
3347 * /dev directories and we can fake udev devices.
3348 */
3349struct start_args {
3350 char *const *argv;
3351};
3352
3353#define MAX_SYMLINK_DEPTH 32
3354
3355int check_autodev( const char *rootfs, void *data )
3356{
3357 struct start_args *arg = data;
3358 int ret;
3359 int loop_count = 0;
3360 struct stat s;
3361 char absrootfs[MAXPATHLEN];
3362 char path[MAXPATHLEN];
3363 char abs_path[MAXPATHLEN];
3364 char *command = "/sbin/init";
3365
3366 if (rootfs == NULL || strlen(rootfs) == 0)
3367 return -2;
3368
3369 if (!realpath(rootfs, absrootfs))
3370 return -2;
3371
3372 if( arg && arg->argv[0] ) {
3373 command = arg->argv[0];
3374 DEBUG("Set exec command to %s\n", command );
3375 }
3376
3377 strncpy( path, command, MAXPATHLEN-1 );
3378
3379 if ( 0 != access(path, F_OK) || 0 != stat(path, &s) )
3380 return -2;
3381
3382 /* Dereference down the symlink merry path testing as we go. */
3383 /* If anything references systemd in the path - set autodev! */
3384 /* Renormalize to the rootfs before each dereference */
3385 /* Relative symlinks should fall out in the wash even with .. */
3386 while( 1 ) {
3387 if ( strstr( path, "systemd" ) ) {
3388 INFO("Container with systemd init detected - enabling autodev!");
3389 return 1;
3390 }
3391
3392 ret = snprintf(abs_path, MAXPATHLEN-1, "%s/%s", absrootfs, path);
3393 if (ret < 0 || ret > MAXPATHLEN)
3394 return -2;
3395
3396 ret = readlink( abs_path, path, MAXPATHLEN-1 );
3397
3398 if ( ( ret <= 0 ) || ( ++loop_count > MAX_SYMLINK_DEPTH ) ) {
3399 break; /* Break out for other tests */
3400 }
3401 path[ret] = '\0';
3402 }
3403
3404 /*
3405 * Add future checks here.
3406 * Return positive if we should go autodev
3407 * Return 0 if we should NOT go autodev
3408 * Return negative if we encounter an error or can not determine...
3409 */
3410
3411 /* All else fails, we don't need autodev */
3412 INFO("Autodev not required.");
3413 return 0;
3414}
3415
3416int lxc_setup(const char *name, struct lxc_conf *lxc_conf, const char *lxcpath, struct cgroup_process_info *cgroup_info, void *data)
0ad19a3f 3417{
6c544cb3
MM
3418 if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
3419 if (setup_utsname(lxc_conf->utsname)) {
3420 ERROR("failed to setup the utsname for '%s'", name);
3421 return -1;
3422 }
0ad19a3f 3423 }
3424
5f4535a3 3425 if (setup_network(&lxc_conf->network)) {
36eb9bde 3426 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 3427 return -1;
0ad19a3f 3428 }
3429
283678ed 3430 if (run_lxc_hooks(name, "pre-mount", lxc_conf, lxcpath, NULL)) {
89eaa05e
SH
3431 ERROR("failed to run pre-mount hooks for container '%s'.", name);
3432 return -1;
3433 }
5ea6163a 3434
cc28d0b0 3435 if (setup_rootfs(lxc_conf)) {
ac778708 3436 ERROR("failed to setup rootfs for '%s'", name);
95b5ffaf 3437 return -1;
0ad19a3f 3438 }
3439
bc6928ff
MW
3440 if (lxc_conf->autodev < 0) {
3441 lxc_conf->autodev = check_autodev(lxc_conf->rootfs.mount, data);
3442 }
3443
3444 if (lxc_conf->autodev > 0) {
3445 if (mount_autodev(name, lxc_conf->rootfs.mount, lxcpath)) {
91c3830e 3446 ERROR("failed to mount /dev in the container");
c6883f38
SH
3447 return -1;
3448 }
3449 }
3450
368bbc02
CS
3451 /* do automatic mounts (mainly /proc and /sys), but exclude
3452 * those that need to wait until other stuff has finished
3453 */
b06b8511 3454 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, cgroup_info) < 0) {
368bbc02
CS
3455 ERROR("failed to setup the automatic mounts for '%s'", name);
3456 return -1;
3457 }
3458
80a881b2 3459 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
36eb9bde 3460 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 3461 return -1;
576f946d 3462 }
3463
c1dc38c2 3464 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
e7938e9e
MN
3465 ERROR("failed to setup the mount entries for '%s'", name);
3466 return -1;
3467 }
3468
368bbc02
CS
3469 /* now mount only cgroup, if wanted;
3470 * before, /sys could not have been mounted
3471 * (is either mounted automatically or via fstab entries)
3472 */
b06b8511 3473 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, cgroup_info) < 0) {
368bbc02
CS
3474 ERROR("failed to setup the automatic mounts for '%s'", name);
3475 return -1;
3476 }
3477
283678ed 3478 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
773fb9ca
SH
3479 ERROR("failed to run mount hooks for container '%s'.", name);
3480 return -1;
3481 }
3482
bc6928ff 3483 if (lxc_conf->autodev > 0) {
283678ed 3484 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
f7bee6c6
MW
3485 ERROR("failed to run autodev hooks for container '%s'.", name);
3486 return -1;
3487 }
91c3830e
SH
3488 if (setup_autodev(lxc_conf->rootfs.mount)) {
3489 ERROR("failed to populate /dev in the container");
3490 return -1;
3491 }
3492 }
368bbc02 3493
37903589 3494 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 3495 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 3496 return -1;
6e590161 3497 }
3498
7e0e1d94
AV
3499 if (lxc_conf->kmsg) {
3500 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
3501 ERROR("failed to setup kmsg for '%s'", name);
3502 }
1bd051a6 3503
37903589 3504 if (!lxc_conf->is_execute && setup_tty(&lxc_conf->rootfs, &lxc_conf->tty_info, lxc_conf->ttydir)) {
36eb9bde 3505 ERROR("failed to setup the ttys for '%s'", name);
95b5ffaf 3506 return -1;
b0a33c1e 3507 }
3508
fe4de9a6
DE
3509 /* mount /proc if needed for LSM transition */
3510 if (lsm_proc_mount(lxc_conf) < 0) {
3511 ERROR("failed to LSM mount proc for '%s'", name);
e075f5d9 3512 return -1;
e075f5d9 3513 }
e075f5d9 3514
ac778708 3515 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 3516 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 3517 return -1;
ed502555 3518 }
3519
571e6ec8 3520 if (setup_pts(lxc_conf->pts)) {
36eb9bde 3521 ERROR("failed to setup the new pts instance");
95b5ffaf 3522 return -1;
3c26f34e 3523 }
3524
cccc74b5
DL
3525 if (setup_personality(lxc_conf->personality)) {
3526 ERROR("failed to setup personality");
3527 return -1;
3528 }
3529
f6d3e3e4 3530 if (lxc_list_empty(&lxc_conf->id_map)) {
1fb86a7c
SH
3531 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
3532 if (!lxc_list_empty(&lxc_conf->caps)) {
3533 ERROR("Simultaneously requested dropping and keeping caps");
3534 return -1;
3535 }
3536 if (dropcaps_except(&lxc_conf->keepcaps)) {
3537 ERROR("failed to keep requested caps\n");
3538 return -1;
3539 }
3540 } else if (setup_caps(&lxc_conf->caps)) {
f6d3e3e4
SH
3541 ERROR("failed to drop capabilities");
3542 return -1;
3543 }
81810dd1
DL
3544 }
3545
cd54d859
DL
3546 NOTICE("'%s' is setup.", name);
3547
0ad19a3f 3548 return 0;
3549}
26ddeedd 3550
283678ed
SH
3551int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
3552 const char *lxcpath, char *argv[])
26ddeedd
SH
3553{
3554 int which = -1;
3555 struct lxc_list *it;
3556
3557 if (strcmp(hook, "pre-start") == 0)
3558 which = LXCHOOK_PRESTART;
5ea6163a
SH
3559 else if (strcmp(hook, "pre-mount") == 0)
3560 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
3561 else if (strcmp(hook, "mount") == 0)
3562 which = LXCHOOK_MOUNT;
f7bee6c6
MW
3563 else if (strcmp(hook, "autodev") == 0)
3564 which = LXCHOOK_AUTODEV;
26ddeedd
SH
3565 else if (strcmp(hook, "start") == 0)
3566 which = LXCHOOK_START;
3567 else if (strcmp(hook, "post-stop") == 0)
3568 which = LXCHOOK_POSTSTOP;
148e91f5
SH
3569 else if (strcmp(hook, "clone") == 0)
3570 which = LXCHOOK_CLONE;
26ddeedd
SH
3571 else
3572 return -1;
3573 lxc_list_for_each(it, &conf->hooks[which]) {
3574 int ret;
3575 char *hookname = it->elem;
283678ed 3576 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
26ddeedd
SH
3577 if (ret)
3578 return ret;
3579 }
3580 return 0;
3581}
72d0e1cb 3582
427b3a21 3583static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
3584{
3585 struct lxc_netdev *netdev = it->elem;
9ebb03ad 3586 struct lxc_list *it2,*next;
72d0e1cb
SG
3587
3588 lxc_list_del(it);
3589
3590 if (netdev->link)
3591 free(netdev->link);
3592 if (netdev->name)
3593 free(netdev->name);
c9bb9a85
DE
3594 if (netdev->type == LXC_NET_VETH && netdev->priv.veth_attr.pair)
3595 free(netdev->priv.veth_attr.pair);
72d0e1cb
SG
3596 if (netdev->upscript)
3597 free(netdev->upscript);
3598 if (netdev->hwaddr)
3599 free(netdev->hwaddr);
3600 if (netdev->mtu)
3601 free(netdev->mtu);
3602 if (netdev->ipv4_gateway)
3603 free(netdev->ipv4_gateway);
3604 if (netdev->ipv6_gateway)
3605 free(netdev->ipv6_gateway);
9ebb03ad 3606 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3607 lxc_list_del(it2);
3608 free(it2->elem);
3609 free(it2);
3610 }
9ebb03ad 3611 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3612 lxc_list_del(it2);
3613 free(it2->elem);
3614 free(it2);
3615 }
d95db067 3616 free(netdev);
72d0e1cb
SG
3617 free(it);
3618}
3619
3620/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 3621int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
3622{
3623 char *p1;
3624 int ret, idx, i;
3625 struct lxc_list *it;
3626 struct lxc_netdev *netdev;
3627
3628 p1 = index(key, '.');
3629 if (!p1 || *(p1+1) == '\0')
3630 p1 = NULL;
3631
3632 ret = sscanf(key, "%d", &idx);
3633 if (ret != 1) return -1;
3634 if (idx < 0)
3635 return -1;
3636
3637 i = 0;
3638 lxc_list_for_each(it, &c->network) {
3639 if (i == idx)
3640 break;
3641 i++;
3642 }
3643 if (i < idx) // we don't have that many nics defined
3644 return -1;
3645
3646 if (!it || !it->elem)
3647 return -1;
3648
3649 netdev = it->elem;
3650
3651 if (!p1) {
3652 lxc_remove_nic(it);
3653 } else if (strcmp(p1, "ipv4") == 0) {
9ebb03ad
DE
3654 struct lxc_list *it2,*next;
3655 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3656 lxc_list_del(it2);
3657 free(it2->elem);
3658 free(it2);
3659 }
3660 } else if (strcmp(p1, "ipv6") == 0) {
9ebb03ad
DE
3661 struct lxc_list *it2,*next;
3662 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3663 lxc_list_del(it2);
3664 free(it2->elem);
3665 free(it2);
3666 }
3667 } else if (strcmp(p1, "link") == 0) {
3668 if (netdev->link) {
3669 free(netdev->link);
3670 netdev->link = NULL;
3671 }
3672 } else if (strcmp(p1, "name") == 0) {
3673 if (netdev->name) {
3674 free(netdev->name);
3675 netdev->name = NULL;
3676 }
3677 } else if (strcmp(p1, "script.up") == 0) {
3678 if (netdev->upscript) {
3679 free(netdev->upscript);
3680 netdev->upscript = NULL;
3681 }
3682 } else if (strcmp(p1, "hwaddr") == 0) {
3683 if (netdev->hwaddr) {
3684 free(netdev->hwaddr);
3685 netdev->hwaddr = NULL;
3686 }
3687 } else if (strcmp(p1, "mtu") == 0) {
3688 if (netdev->mtu) {
3689 free(netdev->mtu);
3690 netdev->mtu = NULL;
3691 }
3692 } else if (strcmp(p1, "ipv4_gateway") == 0) {
3693 if (netdev->ipv4_gateway) {
3694 free(netdev->ipv4_gateway);
3695 netdev->ipv4_gateway = NULL;
3696 }
3697 } else if (strcmp(p1, "ipv6_gateway") == 0) {
3698 if (netdev->ipv6_gateway) {
3699 free(netdev->ipv6_gateway);
3700 netdev->ipv6_gateway = NULL;
3701 }
3702 }
3703 else return -1;
3704
3705 return 0;
3706}
3707
3708int lxc_clear_config_network(struct lxc_conf *c)
3709{
9ebb03ad
DE
3710 struct lxc_list *it,*next;
3711 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
3712 lxc_remove_nic(it);
3713 }
3714 return 0;
3715}
3716
3717int lxc_clear_config_caps(struct lxc_conf *c)
3718{
9ebb03ad 3719 struct lxc_list *it,*next;
72d0e1cb 3720
9ebb03ad 3721 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
3722 lxc_list_del(it);
3723 free(it->elem);
3724 free(it);
3725 }
3726 return 0;
3727}
3728
4355ab5f 3729int lxc_free_idmap(struct lxc_list *id_map) {
27c27d73
SH
3730 struct lxc_list *it, *next;
3731
4355ab5f 3732 lxc_list_for_each_safe(it, id_map, next) {
27c27d73
SH
3733 lxc_list_del(it);
3734 free(it->elem);
3735 free(it);
3736 }
3737 return 0;
3738}
3739
4355ab5f
SH
3740int lxc_clear_idmaps(struct lxc_conf *c)
3741{
3742 return lxc_free_idmap(&c->id_map);
3743}
3744
1fb86a7c
SH
3745int lxc_clear_config_keepcaps(struct lxc_conf *c)
3746{
3747 struct lxc_list *it,*next;
3748
3749 lxc_list_for_each_safe(it, &c->keepcaps, next) {
3750 lxc_list_del(it);
3751 free(it->elem);
3752 free(it);
3753 }
3754 return 0;
3755}
3756
12a50cc6 3757int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 3758{
9ebb03ad 3759 struct lxc_list *it,*next;
72d0e1cb 3760 bool all = false;
12a50cc6 3761 const char *k = key + 11;
72d0e1cb
SG
3762
3763 if (strcmp(key, "lxc.cgroup") == 0)
3764 all = true;
3765
9ebb03ad 3766 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
3767 struct lxc_cgroup *cg = it->elem;
3768 if (!all && strcmp(cg->subsystem, k) != 0)
3769 continue;
3770 lxc_list_del(it);
3771 free(cg->subsystem);
3772 free(cg->value);
3773 free(cg);
3774 free(it);
3775 }
3776 return 0;
3777}
3778
3779int lxc_clear_mount_entries(struct lxc_conf *c)
3780{
9ebb03ad 3781 struct lxc_list *it,*next;
72d0e1cb 3782
9ebb03ad 3783 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
3784 lxc_list_del(it);
3785 free(it->elem);
3786 free(it);
3787 }
3788 return 0;
3789}
3790
12a50cc6 3791int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 3792{
9ebb03ad 3793 struct lxc_list *it,*next;
17ed13a3 3794 bool all = false, done = false;
12a50cc6 3795 const char *k = key + 9;
72d0e1cb
SG
3796 int i;
3797
17ed13a3
SH
3798 if (strcmp(key, "lxc.hook") == 0)
3799 all = true;
3800
72d0e1cb 3801 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 3802 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 3803 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
3804 lxc_list_del(it);
3805 free(it->elem);
3806 free(it);
3807 }
3808 done = true;
72d0e1cb
SG
3809 }
3810 }
17ed13a3
SH
3811
3812 if (!done) {
3813 ERROR("Invalid hook key: %s", key);
3814 return -1;
3815 }
72d0e1cb
SG
3816 return 0;
3817}
8eb5694b 3818
7b35f3d6
SH
3819void lxc_clear_saved_nics(struct lxc_conf *conf)
3820{
3821 int i;
3822
3823 if (!conf->num_savednics)
3824 return;
3825 for (i=0; i < conf->num_savednics; i++)
3826 free(conf->saved_nics[i].orig_name);
3827 conf->saved_nics = 0;
3828 free(conf->saved_nics);
3829}
3830
8eb5694b
SH
3831void lxc_conf_free(struct lxc_conf *conf)
3832{
3833 if (!conf)
3834 return;
3835 if (conf->console.path)
3836 free(conf->console.path);
54c30e29 3837 if (conf->rootfs.mount)
8eb5694b 3838 free(conf->rootfs.mount);
d95db067
DE
3839 if (conf->rootfs.path)
3840 free(conf->rootfs.path);
3841 if (conf->utsname)
3842 free(conf->utsname);
3843 if (conf->ttydir)
3844 free(conf->ttydir);
3845 if (conf->fstab)
3846 free(conf->fstab);
fc7e8864
WM
3847 if (conf->rcfile)
3848 free(conf->rcfile);
8eb5694b 3849 lxc_clear_config_network(conf);
fe4de9a6
DE
3850 if (conf->lsm_aa_profile)
3851 free(conf->lsm_aa_profile);
3852 if (conf->lsm_se_context)
3853 free(conf->lsm_se_context);
769872f9 3854 lxc_seccomp_free(conf);
8eb5694b 3855 lxc_clear_config_caps(conf);
1fb86a7c 3856 lxc_clear_config_keepcaps(conf);
8eb5694b 3857 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 3858 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 3859 lxc_clear_mount_entries(conf);
7b35f3d6 3860 lxc_clear_saved_nics(conf);
27c27d73 3861 lxc_clear_idmaps(conf);
8eb5694b
SH
3862 free(conf);
3863}
4355ab5f
SH
3864
3865struct userns_fn_data {
3866 int (*fn)(void *);
3867 void *arg;
3868 int p[2];
3869};
3870
3871static int run_userns_fn(void *data)
3872{
3873 struct userns_fn_data *d = data;
3874 char c;
3875 // we're not sharing with the parent any more, if it was a thread
3876
3877 close(d->p[1]);
3878 if (read(d->p[0], &c, 1) != 1)
3879 return -1;
3880 close(d->p[0]);
3881 return d->fn(d->arg);
3882}
3883
3884/*
3885 * Add a ID_TYPE_UID entry to an existing lxc_conf, if it is not
3886 * alread there.
3887 * We may want to generalize this to do gids as well as uids, but right now
3888 * it's not necessary.
3889 */
3890static struct lxc_list *idmap_add_id(struct lxc_conf *conf, uid_t uid)
3891{
3892 int hostid_mapped = mapped_hostid(uid, conf);
3893 struct lxc_list *new = NULL, *tmp, *it, *next;
3894 struct id_map *entry;
3895
3896 if (hostid_mapped < 0) {
3897 hostid_mapped = find_unmapped_nsuid(conf);
3898 if (hostid_mapped < 0) {
3899 ERROR("Could not find free uid to map");
3900 return NULL;
3901 }
3902 new = malloc(sizeof(*new));
3903 if (!new) {
3904 ERROR("Out of memory building id map");
3905 return NULL;
3906 }
3907 entry = malloc(sizeof(*entry));
3908 if (!entry) {
3909 free(new);
3910 ERROR("Out of memory building idmap entry");
3911 return NULL;
3912 }
3913 new->elem = entry;
3914 entry->idtype = ID_TYPE_UID;
3915 entry->nsid = hostid_mapped;
3916 entry->hostid = (unsigned long)uid;
3917 entry->range = 1;
3918 lxc_list_init(new);
3919 }
3920 lxc_list_for_each_safe(it, &conf->id_map, next) {
3921 tmp = malloc(sizeof(*tmp));
3922 if (!tmp)
3923 goto err;
3924 entry = malloc(sizeof(*entry));
3925 if (!entry) {
3926 free(tmp);
3927 goto err;
3928 }
3929 memset(entry, 0, sizeof(*entry));
3930 memcpy(entry, it->elem, sizeof(*entry));
3931 tmp->elem = entry;
3932 if (!new) {
3933 new = tmp;
3934 lxc_list_init(new);
3935 } else
3936 lxc_list_add_tail(new, tmp);
3937 }
3938
3939 return new;
3940
3941err:
3942 ERROR("Out of memory building a new uid map");
3943 lxc_free_idmap(new);
3944 return NULL;
3945}
3946
3947/*
3948 * Run a function in a new user namespace.
3949 * The caller's euid will be mapped in if it is not already.
3950 */
3951int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
3952{
3953 int ret, pid;
3954 struct userns_fn_data d;
3955 char c = '1';
3956 int p[2];
3957 struct lxc_list *idmap;
3958
3959 process_lock();
3960 ret = pipe(p);
3961 process_unlock();
3962 if (ret < 0) {
3963 SYSERROR("opening pipe");
3964 return -1;
3965 }
3966 d.fn = fn;
3967 d.arg = data;
3968 d.p[0] = p[0];
3969 d.p[1] = p[1];
3970 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
3971 if (pid < 0)
3972 goto err;
3973 process_lock();
3974 close(p[0]);
3975 process_unlock();
3976 p[0] = -1;
3977
3978 if ((idmap = idmap_add_id(conf, geteuid())) == NULL) {
3979 ERROR("Error adding self to container uid map");
3980 goto err;
3981 }
3982
3983 ret = lxc_map_ids(idmap, pid);
3984 lxc_free_idmap(idmap);
3985 if (ret < 0) {
3986 ERROR("Error setting up child mappings");
3987 goto err;
3988 }
3989
3990 // kick the child
3991 if (write(p[1], &c, 1) != 1) {
3992 SYSERROR("writing to pipe to child");
3993 goto err;
3994 }
3995
3996 if ((ret = wait_for_pid(pid)) < 0) {
3997 ERROR("Child returned an error: %d\n", ret);
3998 goto err;
3999 }
4000err:
4001 process_lock();
4002 if (p[0] != -1)
4003 close(p[0]);
4004 close(p[1]);
4005 process_unlock();
4006 return -1;
4007}