]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
set close-all-fds by default
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
d06245b8
NC
23#include "config.h"
24
0ad19a3f 25#include <stdio.h>
0ad19a3f 26#include <stdlib.h>
e3b4c4c4 27#include <stdarg.h>
0ad19a3f 28#include <errno.h>
29#include <string.h>
30#include <dirent.h>
0ad19a3f 31#include <unistd.h>
bc6928ff 32#include <inttypes.h>
e3b4c4c4 33#include <sys/wait.h>
2d76d1d7 34#include <sys/syscall.h>
97e9cfa0
SH
35#include <sys/types.h>
36#include <pwd.h>
37#include <grp.h>
4a0ba80d 38#include <time.h>
614305f3 39#ifdef HAVE_STATVFS
2938f7c8 40#include <sys/statvfs.h>
614305f3 41#endif
e827ff7e
SG
42
43#if HAVE_PTY_H
b0a33c1e 44#include <pty.h>
e827ff7e
SG
45#else
46#include <../include/openpty.h>
47#endif
0ad19a3f 48
b3ecde1e
DL
49#include <linux/loop.h>
50
0ad19a3f 51#include <sys/types.h>
52#include <sys/utsname.h>
53#include <sys/param.h>
54#include <sys/stat.h>
55#include <sys/socket.h>
56#include <sys/mount.h>
57#include <sys/mman.h>
81810dd1 58#include <sys/prctl.h>
0ad19a3f 59
60#include <arpa/inet.h>
61#include <fcntl.h>
62#include <netinet/in.h>
63#include <net/if.h>
6f4a3756 64#include <libgen.h>
0ad19a3f 65
e5bda9ee 66#include "network.h"
67#include "error.h"
b2718c72 68#include "parse.h"
1b09f2c0
DL
69#include "utils.h"
70#include "conf.h"
71#include "log.h"
d55bc1ad 72#include "caps.h" /* for lxc_caps_last_cap() */
9be53773 73#include "bdev.h"
368bbc02 74#include "cgroup.h"
025ed0f3 75#include "lxclock.h"
4355ab5f 76#include "namespace.h"
fe4de9a6 77#include "lsm/lsm.h"
d0a36f2c 78
495d2046
SG
79#if HAVE_SYS_CAPABILITY_H
80#include <sys/capability.h>
81#endif
82
6ff05e18
SG
83#if HAVE_SYS_PERSONALITY_H
84#include <sys/personality.h>
85#endif
86
edaf8b1b
SG
87#if IS_BIONIC
88#include <../include/lxcmntent.h>
89#else
90#include <mntent.h>
91#endif
92
769872f9
SH
93#include "lxcseccomp.h"
94
36eb9bde 95lxc_log_define(lxc_conf, lxc);
e5bda9ee 96
0ad19a3f 97#define MAXHWLEN 18
98#define MAXINDEXLEN 20
442cbbe6 99#define MAXMTULEN 16
0ad19a3f 100#define MAXLINELEN 128
101
495d2046 102#if HAVE_SYS_CAPABILITY_H
b09094da
MN
103#ifndef CAP_SETFCAP
104#define CAP_SETFCAP 31
105#endif
106
107#ifndef CAP_MAC_OVERRIDE
108#define CAP_MAC_OVERRIDE 32
109#endif
110
111#ifndef CAP_MAC_ADMIN
112#define CAP_MAC_ADMIN 33
113#endif
495d2046 114#endif
b09094da
MN
115
116#ifndef PR_CAPBSET_DROP
117#define PR_CAPBSET_DROP 24
118#endif
119
9818cae4
SG
120#ifndef LO_FLAGS_AUTOCLEAR
121#define LO_FLAGS_AUTOCLEAR 4
122#endif
123
0769b82a
CS
124/* needed for cgroup automount checks, regardless of whether we
125 * have included linux/capability.h or not */
126#ifndef CAP_SYS_ADMIN
127#define CAP_SYS_ADMIN 21
128#endif
129
2d76d1d7
SG
130/* Define pivot_root() if missing from the C library */
131#ifndef HAVE_PIVOT_ROOT
132static int pivot_root(const char * new_root, const char * put_old)
133{
134#ifdef __NR_pivot_root
135return syscall(__NR_pivot_root, new_root, put_old);
136#else
137errno = ENOSYS;
138return -1;
139#endif
140}
141#else
142extern int pivot_root(const char * new_root, const char * put_old);
143#endif
144
145/* Define sethostname() if missing from the C library */
146#ifndef HAVE_SETHOSTNAME
147static int sethostname(const char * name, size_t len)
148{
149#ifdef __NR_sethostname
150return syscall(__NR_sethostname, name, len);
151#else
152errno = ENOSYS;
153return -1;
154#endif
155}
156#endif
157
72f919c4
SG
158/* Define __S_ISTYPE if missing from the C library */
159#ifndef __S_ISTYPE
160#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
161#endif
162
ecec0126
SG
163#ifndef MS_PRIVATE
164#define MS_PRIVATE (1<<18)
165#endif
166
72d0e1cb 167char *lxchook_names[NUM_LXC_HOOKS] = {
148e91f5 168 "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop", "clone" };
72d0e1cb 169
a589434e 170typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 171
998ac676
RT
172struct mount_opt {
173 char *name;
174 int clear;
175 int flag;
176};
177
81810dd1
DL
178struct caps_opt {
179 char *name;
180 int value;
181};
182
0769b82a
CS
183/* Declare this here, since we don't want to reshuffle the whole file. */
184static int in_caplist(int cap, struct lxc_list *caps);
185
a589434e
JN
186static int instantiate_veth(struct lxc_handler *, struct lxc_netdev *);
187static int instantiate_macvlan(struct lxc_handler *, struct lxc_netdev *);
188static int instantiate_vlan(struct lxc_handler *, struct lxc_netdev *);
189static int instantiate_phys(struct lxc_handler *, struct lxc_netdev *);
190static int instantiate_empty(struct lxc_handler *, struct lxc_netdev *);
191static int instantiate_none(struct lxc_handler *, struct lxc_netdev *);
192
193static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
194 [LXC_NET_VETH] = instantiate_veth,
195 [LXC_NET_MACVLAN] = instantiate_macvlan,
196 [LXC_NET_VLAN] = instantiate_vlan,
197 [LXC_NET_PHYS] = instantiate_phys,
198 [LXC_NET_EMPTY] = instantiate_empty,
199 [LXC_NET_NONE] = instantiate_none,
0ad19a3f 200};
201
74a2b586
JK
202static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
203static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
204static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
205static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
206static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 207static int shutdown_none(struct lxc_handler *, struct lxc_netdev *);
74a2b586 208
a589434e 209static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
74a2b586
JK
210 [LXC_NET_VETH] = shutdown_veth,
211 [LXC_NET_MACVLAN] = shutdown_macvlan,
212 [LXC_NET_VLAN] = shutdown_vlan,
213 [LXC_NET_PHYS] = shutdown_phys,
214 [LXC_NET_EMPTY] = shutdown_empty,
26b797f3 215 [LXC_NET_NONE] = shutdown_none,
74a2b586
JK
216};
217
998ac676 218static struct mount_opt mount_opt[] = {
88d413d5
SW
219 { "defaults", 0, 0 },
220 { "ro", 0, MS_RDONLY },
221 { "rw", 1, MS_RDONLY },
222 { "suid", 1, MS_NOSUID },
223 { "nosuid", 0, MS_NOSUID },
224 { "dev", 1, MS_NODEV },
225 { "nodev", 0, MS_NODEV },
226 { "exec", 1, MS_NOEXEC },
227 { "noexec", 0, MS_NOEXEC },
228 { "sync", 0, MS_SYNCHRONOUS },
229 { "async", 1, MS_SYNCHRONOUS },
230 { "dirsync", 0, MS_DIRSYNC },
231 { "remount", 0, MS_REMOUNT },
232 { "mand", 0, MS_MANDLOCK },
233 { "nomand", 1, MS_MANDLOCK },
234 { "atime", 1, MS_NOATIME },
235 { "noatime", 0, MS_NOATIME },
236 { "diratime", 1, MS_NODIRATIME },
237 { "nodiratime", 0, MS_NODIRATIME },
238 { "bind", 0, MS_BIND },
239 { "rbind", 0, MS_BIND|MS_REC },
240 { "relatime", 0, MS_RELATIME },
241 { "norelatime", 1, MS_RELATIME },
242 { "strictatime", 0, MS_STRICTATIME },
243 { "nostrictatime", 1, MS_STRICTATIME },
244 { NULL, 0, 0 },
998ac676
RT
245};
246
495d2046 247#if HAVE_SYS_CAPABILITY_H
81810dd1 248static struct caps_opt caps_opt[] = {
a6afdde9 249 { "chown", CAP_CHOWN },
1e11be34
DL
250 { "dac_override", CAP_DAC_OVERRIDE },
251 { "dac_read_search", CAP_DAC_READ_SEARCH },
252 { "fowner", CAP_FOWNER },
253 { "fsetid", CAP_FSETID },
81810dd1
DL
254 { "kill", CAP_KILL },
255 { "setgid", CAP_SETGID },
256 { "setuid", CAP_SETUID },
257 { "setpcap", CAP_SETPCAP },
258 { "linux_immutable", CAP_LINUX_IMMUTABLE },
259 { "net_bind_service", CAP_NET_BIND_SERVICE },
260 { "net_broadcast", CAP_NET_BROADCAST },
261 { "net_admin", CAP_NET_ADMIN },
262 { "net_raw", CAP_NET_RAW },
263 { "ipc_lock", CAP_IPC_LOCK },
264 { "ipc_owner", CAP_IPC_OWNER },
265 { "sys_module", CAP_SYS_MODULE },
266 { "sys_rawio", CAP_SYS_RAWIO },
267 { "sys_chroot", CAP_SYS_CHROOT },
268 { "sys_ptrace", CAP_SYS_PTRACE },
269 { "sys_pacct", CAP_SYS_PACCT },
270 { "sys_admin", CAP_SYS_ADMIN },
271 { "sys_boot", CAP_SYS_BOOT },
272 { "sys_nice", CAP_SYS_NICE },
273 { "sys_resource", CAP_SYS_RESOURCE },
274 { "sys_time", CAP_SYS_TIME },
275 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
276 { "mknod", CAP_MKNOD },
277 { "lease", CAP_LEASE },
9527e566 278#ifdef CAP_AUDIT_WRITE
81810dd1 279 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
280#endif
281#ifdef CAP_AUDIT_CONTROL
81810dd1 282 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 283#endif
81810dd1
DL
284 { "setfcap", CAP_SETFCAP },
285 { "mac_override", CAP_MAC_OVERRIDE },
286 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
287#ifdef CAP_SYSLOG
288 { "syslog", CAP_SYSLOG },
289#endif
290#ifdef CAP_WAKE_ALARM
291 { "wake_alarm", CAP_WAKE_ALARM },
292#endif
81810dd1 293};
495d2046
SG
294#else
295static struct caps_opt caps_opt[] = {};
296#endif
81810dd1 297
f0d02950
JTLB
298const char *dev_base_path = "/dev/.lxc";
299const char *dev_user_path = "/dev/.lxc/user";
300
91c3830e
SH
301static int run_buffer(char *buffer)
302{
ebec9176 303 struct lxc_popen_FILE *f;
91c3830e 304 char *output;
8e7da691 305 int ret;
91c3830e 306
ebec9176 307 f = lxc_popen(buffer);
91c3830e
SH
308 if (!f) {
309 SYSERROR("popen failed");
310 return -1;
311 }
312
313 output = malloc(LXC_LOG_BUFFER_SIZE);
314 if (!output) {
315 ERROR("failed to allocate memory for script output");
ebec9176 316 lxc_pclose(f);
91c3830e
SH
317 return -1;
318 }
319
ebec9176 320 while(fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
91c3830e
SH
321 DEBUG("script output: %s", output);
322
323 free(output);
324
ebec9176 325 ret = lxc_pclose(f);
8e7da691 326 if (ret == -1) {
91c3830e
SH
327 SYSERROR("Script exited on error");
328 return -1;
8e7da691
DE
329 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
330 ERROR("Script exited with status %d", WEXITSTATUS(ret));
331 return -1;
332 } else if (WIFSIGNALED(ret)) {
333 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
334 strsignal(WTERMSIG(ret)));
335 return -1;
91c3830e
SH
336 }
337
338 return 0;
339}
340
148e91f5 341static int run_script_argv(const char *name, const char *section,
283678ed
SH
342 const char *script, const char *hook, const char *lxcpath,
343 char **argsin)
148e91f5
SH
344{
345 int ret, i;
346 char *buffer;
347 size_t size = 0;
348
349 INFO("Executing script '%s' for container '%s', config section '%s'",
350 script, name, section);
351
352 for (i=0; argsin && argsin[i]; i++)
353 size += strlen(argsin[i]) + 1;
354
355 size += strlen(hook) + 1;
356
357 size += strlen(script);
358 size += strlen(name);
359 size += strlen(section);
360 size += 3;
361
362 if (size > INT_MAX)
363 return -1;
364
365 buffer = alloca(size);
366 if (!buffer) {
367 ERROR("failed to allocate memory");
368 return -1;
369 }
370
371 ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
372 if (ret < 0 || ret >= size) {
373 ERROR("Script name too long");
374 return -1;
375 }
376
377 for (i=0; argsin && argsin[i]; i++) {
378 int len = size-ret;
379 int rc;
380 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
381 if (rc < 0 || rc >= len) {
382 ERROR("Script args too long");
383 return -1;
384 }
385 ret += rc;
386 }
387
388 return run_buffer(buffer);
389}
390
751d9dcd
DL
391static int run_script(const char *name, const char *section,
392 const char *script, ...)
e3b4c4c4 393{
abbfd20b 394 int ret;
91c3830e 395 char *buffer, *p;
abbfd20b
DL
396 size_t size = 0;
397 va_list ap;
751d9dcd
DL
398
399 INFO("Executing script '%s' for container '%s', config section '%s'",
400 script, name, section);
e3b4c4c4 401
abbfd20b
DL
402 va_start(ap, script);
403 while ((p = va_arg(ap, char *)))
95642a10 404 size += strlen(p) + 1;
abbfd20b
DL
405 va_end(ap);
406
407 size += strlen(script);
408 size += strlen(name);
409 size += strlen(section);
95642a10 410 size += 3;
abbfd20b 411
95642a10
MS
412 if (size > INT_MAX)
413 return -1;
414
415 buffer = alloca(size);
abbfd20b
DL
416 if (!buffer) {
417 ERROR("failed to allocate memory");
751d9dcd
DL
418 return -1;
419 }
420
9ba8130c
SH
421 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
422 if (ret < 0 || ret >= size) {
423 ERROR("Script name too long");
9ba8130c
SH
424 return -1;
425 }
751d9dcd 426
abbfd20b 427 va_start(ap, script);
9ba8130c
SH
428 while ((p = va_arg(ap, char *))) {
429 int len = size-ret;
430 int rc;
431 rc = snprintf(buffer + ret, len, " %s", p);
432 if (rc < 0 || rc >= len) {
9ba8130c
SH
433 ERROR("Script args too long");
434 return -1;
435 }
436 ret += rc;
437 }
abbfd20b 438 va_end(ap);
751d9dcd 439
91c3830e 440 return run_buffer(buffer);
e3b4c4c4
ST
441}
442
a6afdde9 443static int find_fstype_cb(char* buffer, void *data)
78ae2fcc 444{
445 struct cbarg {
446 const char *rootfs;
a6afdde9 447 const char *target;
a17b1e65 448 const char *options;
78ae2fcc 449 } *cbarg = data;
450
a17b1e65
SG
451 unsigned long mntflags;
452 char *mntdata;
78ae2fcc 453 char *fstype;
454
455 /* we don't try 'nodev' entries */
456 if (strstr(buffer, "nodev"))
457 return 0;
458
459 fstype = buffer;
b2718c72 460 fstype += lxc_char_left_gc(fstype, strlen(fstype));
461 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
78ae2fcc 462
9827ecdb
YK
463 /* ignore blank line and comment */
464 if (fstype[0] == '\0' || fstype[0] == '#')
465 return 0;
466
a6afdde9
DL
467 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
468 cbarg->rootfs, cbarg->target, fstype);
469
a17b1e65
SG
470 if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) {
471 free(mntdata);
472 return -1;
473 }
474
475 if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
a6afdde9 476 DEBUG("mount failed with error: %s", strerror(errno));
a17b1e65 477 free(mntdata);
78ae2fcc 478 return 0;
a6afdde9 479 }
a17b1e65 480 free(mntdata);
78ae2fcc 481
a6afdde9
DL
482 INFO("mounted '%s' on '%s', with fstype '%s'",
483 cbarg->rootfs, cbarg->target, fstype);
78ae2fcc 484
485 return 1;
486}
487
a17b1e65
SG
488static int mount_unknown_fs(const char *rootfs, const char *target,
489 const char *options)
78ae2fcc 490{
a6afdde9 491 int i;
78ae2fcc 492
493 struct cbarg {
494 const char *rootfs;
a6afdde9 495 const char *target;
a17b1e65 496 const char *options;
78ae2fcc 497 } cbarg = {
498 .rootfs = rootfs,
a6afdde9 499 .target = target,
a17b1e65 500 .options = options,
78ae2fcc 501 };
502
a6afdde9
DL
503 /*
504 * find the filesystem type with brute force:
505 * first we check with /etc/filesystems, in case the modules
78ae2fcc 506 * are auto-loaded and fall back to the supported kernel fs
507 */
508 char *fsfile[] = {
509 "/etc/filesystems",
510 "/proc/filesystems",
511 };
512
a6afdde9
DL
513 for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
514
515 int ret;
516
517 if (access(fsfile[i], F_OK))
518 continue;
519
520 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
521 if (ret < 0) {
522 ERROR("failed to parse '%s'", fsfile[i]);
523 return -1;
524 }
525
526 if (ret)
527 return 0;
78ae2fcc 528 }
529
a6afdde9
DL
530 ERROR("failed to determine fs type for '%s'", rootfs);
531 return -1;
532}
533
a17b1e65
SG
534static int mount_rootfs_dir(const char *rootfs, const char *target,
535 const char *options)
a6afdde9 536{
a17b1e65
SG
537 unsigned long mntflags;
538 char *mntdata;
539 int ret;
540
541 if (parse_mntopts(options, &mntflags, &mntdata) < 0) {
542 free(mntdata);
543 return -1;
544 }
545
546 ret = mount(rootfs, target, "none", MS_BIND | MS_REC | mntflags, mntdata);
547 free(mntdata);
548
549 return ret;
a6afdde9
DL
550}
551
552static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
553{
554 int rfd;
555 int ret = -1;
556
557 rfd = open(rootfs, O_RDWR);
558 if (rfd < 0) {
559 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 560 return -1;
561 }
562
a6afdde9 563 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 564
a6afdde9 565 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 566
a6afdde9
DL
567 if (ioctl(fd, LOOP_SET_FD, rfd)) {
568 SYSERROR("failed to LOOP_SET_FD");
569 goto out;
78ae2fcc 570 }
571
a6afdde9
DL
572 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
573 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 574 goto out;
575 }
576
a6afdde9 577 ret = 0;
78ae2fcc 578out:
a6afdde9 579 close(rfd);
78ae2fcc 580
a6afdde9 581 return ret;
78ae2fcc 582}
583
a17b1e65
SG
584static int mount_rootfs_file(const char *rootfs, const char *target,
585 const char *options)
78ae2fcc 586{
a6afdde9
DL
587 struct dirent dirent, *direntp;
588 struct loop_info64 loinfo;
9ba8130c 589 int ret = -1, fd = -1, rc;
a6afdde9
DL
590 DIR *dir;
591 char path[MAXPATHLEN];
78ae2fcc 592
a6afdde9
DL
593 dir = opendir("/dev");
594 if (!dir) {
595 SYSERROR("failed to open '/dev'");
78ae2fcc 596 return -1;
597 }
598
a6afdde9
DL
599 while (!readdir_r(dir, &dirent, &direntp)) {
600
601 if (!direntp)
602 break;
603
604 if (!strcmp(direntp->d_name, "."))
605 continue;
606
607 if (!strcmp(direntp->d_name, ".."))
608 continue;
609
610 if (strncmp(direntp->d_name, "loop", 4))
611 continue;
612
9ba8130c
SH
613 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
614 if (rc < 0 || rc >= MAXPATHLEN)
615 continue;
616
a6afdde9
DL
617 fd = open(path, O_RDWR);
618 if (fd < 0)
619 continue;
620
621 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
622 close(fd);
623 continue;
624 }
625
626 if (errno != ENXIO) {
627 WARN("unexpected error for ioctl on '%s': %m",
628 direntp->d_name);
00b6be44 629 close(fd);
a6afdde9
DL
630 continue;
631 }
632
633 DEBUG("found '%s' free lodev", path);
634
635 ret = setup_lodev(rootfs, fd, &loinfo);
636 if (!ret)
a17b1e65 637 ret = mount_unknown_fs(path, target, options);
a6afdde9
DL
638 close(fd);
639
640 break;
641 }
642
643 if (closedir(dir))
644 WARN("failed to close directory");
645
646 return ret;
78ae2fcc 647}
648
a17b1e65
SG
649static int mount_rootfs_block(const char *rootfs, const char *target,
650 const char *options)
a6afdde9 651{
a17b1e65 652 return mount_unknown_fs(rootfs, target, options);
a6afdde9
DL
653}
654
0c547523
SH
655/*
656 * pin_rootfs
b7ed4bf0
CS
657 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
658 * the duration of the container run, to prevent the container from marking
659 * the underlying fs readonly on shutdown. unlink the file immediately so
660 * no name pollution is happens
0c547523
SH
661 * return -1 on error.
662 * return -2 if nothing needed to be pinned.
663 * return an open fd (>=0) if we pinned it.
664 */
665int pin_rootfs(const char *rootfs)
666{
667 char absrootfs[MAXPATHLEN];
668 char absrootfspin[MAXPATHLEN];
669 struct stat s;
670 int ret, fd;
671
e99ee0de 672 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 673 return -2;
e99ee0de 674
00ec333b 675 if (!realpath(rootfs, absrootfs))
9be53773 676 return -2;
0c547523 677
00ec333b 678 if (access(absrootfs, F_OK))
0c547523 679 return -1;
0c547523 680
00ec333b 681 if (stat(absrootfs, &s))
0c547523 682 return -1;
0c547523 683
72f919c4 684 if (!S_ISDIR(s.st_mode))
0c547523
SH
685 return -2;
686
b7ed4bf0 687 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
00ec333b 688 if (ret >= MAXPATHLEN)
0c547523 689 return -1;
0c547523
SH
690
691 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
b7ed4bf0
CS
692 if (fd < 0)
693 return fd;
694 (void)unlink(absrootfspin);
0c547523
SH
695 return fd;
696}
697
e2a7e8dc
SH
698/*
699 * If we are asking to remount something, make sure that any
700 * NOEXEC etc are honored.
701 */
702static unsigned long add_required_remount_flags(const char *s, const char *d,
703 unsigned long flags)
704{
614305f3 705#ifdef HAVE_STATVFS
e2a7e8dc
SH
706 struct statvfs sb;
707 unsigned long required_flags = 0;
708
709 if (!(flags & MS_REMOUNT))
710 return flags;
711
712 if (!s)
713 s = d;
714
715 if (!s)
716 return flags;
717 if (statvfs(s, &sb) < 0)
718 return flags;
719
720 if (sb.f_flag & MS_NOSUID)
721 required_flags |= MS_NOSUID;
722 if (sb.f_flag & MS_NODEV)
723 required_flags |= MS_NODEV;
724 if (sb.f_flag & MS_RDONLY)
725 required_flags |= MS_RDONLY;
726 if (sb.f_flag & MS_NOEXEC)
727 required_flags |= MS_NOEXEC;
728
729 return flags | required_flags;
614305f3
SH
730#else
731 return flags;
732#endif
e2a7e8dc
SH
733}
734
4fb3cba5 735static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
368bbc02 736{
368bbc02 737 int r;
b06b8511
CS
738 size_t i;
739 static struct {
740 int match_mask;
741 int match_flag;
742 const char *source;
743 const char *destination;
744 const char *fstype;
745 unsigned long flags;
746 const char *options;
747 } default_mounts[] = {
748 /* Read-only bind-mounting... In older kernels, doing that required
749 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
750 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
751 * kernel 2.6.26 onwards. However, this apparently does not work on
752 * kernel 3.8. Unfortunately, on that very same kernel, doing the
753 * same trick as above doesn't seem to work either, there one needs
754 * to ALSO specify MS_BIND for the remount, otherwise the entire
755 * fs is remounted read-only or the mount fails because it's busy...
756 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
757 * 2.6.32...
368bbc02 758 */
b06b8511
CS
759 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
760 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
761 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
762 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
763 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
764 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
765 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
766 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
767 { 0, 0, NULL, NULL, NULL, 0, NULL }
768 };
368bbc02 769
b06b8511
CS
770 for (i = 0; default_mounts[i].match_mask; i++) {
771 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
772 char *source = NULL;
773 char *destination = NULL;
774 int saved_errno;
e2a7e8dc 775 unsigned long mflags;
b06b8511
CS
776
777 if (default_mounts[i].source) {
778 /* will act like strdup if %r is not present */
779 source = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].source);
780 if (!source) {
781 SYSERROR("memory allocation error");
782 return -1;
783 }
784 }
785 if (default_mounts[i].destination) {
786 /* will act like strdup if %r is not present */
787 destination = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].destination);
788 if (!destination) {
789 saved_errno = errno;
790 SYSERROR("memory allocation error");
791 free(source);
792 errno = saved_errno;
793 return -1;
794 }
795 }
e2a7e8dc
SH
796 mflags = add_required_remount_flags(source, destination,
797 default_mounts[i].flags);
798 r = mount(source, destination, default_mounts[i].fstype, mflags, default_mounts[i].options);
b06b8511 799 saved_errno = errno;
c414be25 800 if (r < 0)
e2a7e8dc 801 SYSERROR("error mounting %s on %s flags %lu", source, destination, mflags);
b06b8511
CS
802 free(source);
803 free(destination);
804 if (r < 0) {
b06b8511
CS
805 errno = saved_errno;
806 return -1;
807 }
368bbc02 808 }
368bbc02
CS
809 }
810
b06b8511 811 if (flags & LXC_AUTO_CGROUP_MASK) {
0769b82a
CS
812 int cg_flags;
813
814 cg_flags = flags & LXC_AUTO_CGROUP_MASK;
815 /* If the type of cgroup mount was not specified, it depends on the
816 * container's capabilities as to what makes sense: if we have
817 * CAP_SYS_ADMIN, the read-only part can be remounted read-write
818 * anyway, so we may as well default to read-write; then the admin
819 * will not be given a false sense of security. (And if they really
820 * want mixed r/o r/w, then they can explicitly specify :mixed.)
821 * OTOH, if the container lacks CAP_SYS_ADMIN, do only default to
822 * :mixed, because then the container can't remount it read-write. */
823 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
824 int has_sys_admin = 0;
825 if (!lxc_list_empty(&conf->keepcaps)) {
826 has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps);
827 } else {
828 has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps);
829 }
830 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC) {
831 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED;
832 } else {
833 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
834 }
835 }
836
837 if (!cgroup_mount(conf->rootfs.mount, handler, cg_flags)) {
368bbc02 838 SYSERROR("error mounting /sys/fs/cgroup");
b06b8511 839 return -1;
368bbc02
CS
840 }
841 }
842
368bbc02 843 return 0;
368bbc02
CS
844}
845
a17b1e65 846static int mount_rootfs(const char *rootfs, const char *target, const char *options)
0ad19a3f 847{
b09ef133 848 char absrootfs[MAXPATHLEN];
78ae2fcc 849 struct stat s;
a6afdde9 850 int i;
78ae2fcc 851
a17b1e65 852 typedef int (*rootfs_cb)(const char *, const char *, const char *);
78ae2fcc 853
854 struct rootfs_type {
855 int type;
856 rootfs_cb cb;
857 } rtfs_type[] = {
2656d231
DL
858 { S_IFDIR, mount_rootfs_dir },
859 { S_IFBLK, mount_rootfs_block },
860 { S_IFREG, mount_rootfs_file },
78ae2fcc 861 };
0ad19a3f 862
4c8ab83b 863 if (!realpath(rootfs, absrootfs)) {
36eb9bde 864 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 865 return -1;
866 }
b09ef133 867
b09ef133 868 if (access(absrootfs, F_OK)) {
36eb9bde 869 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 870 return -1;
871 }
872
78ae2fcc 873 if (stat(absrootfs, &s)) {
36eb9bde 874 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 875 return -1;
876 }
877
78ae2fcc 878 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 879
78ae2fcc 880 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
881 continue;
9b0f0477 882
a17b1e65 883 return rtfs_type[i].cb(absrootfs, target, options);
78ae2fcc 884 }
9b0f0477 885
36eb9bde 886 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 887 return -1;
0ad19a3f 888}
889
4e5440c6 890static int setup_utsname(struct utsname *utsname)
0ad19a3f 891{
4e5440c6
DL
892 if (!utsname)
893 return 0;
0ad19a3f 894
4e5440c6
DL
895 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
896 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 897 return -1;
898 }
899
4e5440c6 900 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 901
0ad19a3f 902 return 0;
903}
904
69aa6655
DE
905struct dev_symlinks {
906 const char *oldpath;
907 const char *name;
908};
909
910static const struct dev_symlinks dev_symlinks[] = {
911 {"/proc/self/fd", "fd"},
912 {"/proc/self/fd/0", "stdin"},
913 {"/proc/self/fd/1", "stdout"},
914 {"/proc/self/fd/2", "stderr"},
915};
916
917static int setup_dev_symlinks(const struct lxc_rootfs *rootfs)
918{
919 char path[MAXPATHLEN];
920 int ret,i;
09227be2 921 struct stat s;
69aa6655
DE
922
923
924 for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
925 const struct dev_symlinks *d = &dev_symlinks[i];
926 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount, d->name);
927 if (ret < 0 || ret >= MAXPATHLEN)
928 return -1;
09227be2
MW
929
930 /*
931 * Stat the path first. If we don't get an error
932 * accept it as is and don't try to create it
933 */
934 if (!stat(path, &s)) {
935 continue;
936 }
937
69aa6655 938 ret = symlink(d->oldpath, path);
09227be2 939
69aa6655 940 if (ret && errno != EEXIST) {
09227be2
MW
941 if ( errno == EROFS ) {
942 WARN("Warning: Read Only file system while creating %s", path);
943 } else {
944 SYSERROR("Error creating %s", path);
945 return -1;
946 }
69aa6655
DE
947 }
948 }
949 return 0;
950}
951
33fcb7a0 952static int setup_tty(const struct lxc_rootfs *rootfs,
7c6ef2a2 953 const struct lxc_tty_info *tty_info, char *ttydir)
b0a33c1e 954{
7c6ef2a2
SH
955 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
956 int i, ret;
b0a33c1e 957
bc9bd0e3
DL
958 if (!rootfs->path)
959 return 0;
960
b0a33c1e 961 for (i = 0; i < tty_info->nbtty; i++) {
962
963 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
964
7c6ef2a2 965 ret = snprintf(path, sizeof(path), "%s/dev/tty%d",
12297168 966 rootfs->mount, i + 1);
7c6ef2a2
SH
967 if (ret >= sizeof(path)) {
968 ERROR("pathname too long for ttys");
969 return -1;
970 }
971 if (ttydir) {
972 /* create dev/lxc/tty%d" */
9ba8130c 973 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/tty%d",
7c6ef2a2
SH
974 rootfs->mount, ttydir, i + 1);
975 if (ret >= sizeof(lxcpath)) {
976 ERROR("pathname too long for ttys");
977 return -1;
978 }
979 ret = creat(lxcpath, 0660);
980 if (ret==-1 && errno != EEXIST) {
959aee9c 981 SYSERROR("error creating %s", lxcpath);
7c6ef2a2
SH
982 return -1;
983 }
4d44e274
SH
984 if (ret >= 0)
985 close(ret);
7c6ef2a2
SH
986 ret = unlink(path);
987 if (ret && errno != ENOENT) {
959aee9c 988 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
989 return -1;
990 }
b0a33c1e 991
7c6ef2a2
SH
992 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
993 WARN("failed to mount '%s'->'%s'",
994 pty_info->name, path);
995 continue;
996 }
13954cce 997
9ba8130c
SH
998 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
999 if (ret >= sizeof(lxcpath)) {
1000 ERROR("tty pathname too long");
1001 return -1;
1002 }
7c6ef2a2
SH
1003 ret = symlink(lxcpath, path);
1004 if (ret) {
959aee9c 1005 SYSERROR("failed to create symlink for tty %d", i+1);
7c6ef2a2
SH
1006 return -1;
1007 }
1008 } else {
c6883f38
SH
1009 /* If we populated /dev, then we need to create /dev/ttyN */
1010 if (access(path, F_OK)) {
1011 ret = creat(path, 0660);
1012 if (ret==-1) {
959aee9c 1013 SYSERROR("error creating %s", path);
c6883f38 1014 /* this isn't fatal, continue */
025ed0f3 1015 } else {
c6883f38 1016 close(ret);
025ed0f3 1017 }
c6883f38 1018 }
7c6ef2a2
SH
1019 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
1020 WARN("failed to mount '%s'->'%s'",
1021 pty_info->name, path);
1022 continue;
1023 }
b0a33c1e 1024 }
1025 }
1026
cd54d859
DL
1027 INFO("%d tty(s) has been setup", tty_info->nbtty);
1028
b0a33c1e 1029 return 0;
1030}
1031
bf601689 1032
2d489f9e 1033static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
bf601689 1034{
2d489f9e 1035 int oldroot = -1, newroot = -1;
bf601689 1036
2d489f9e
SH
1037 oldroot = open("/", O_DIRECTORY | O_RDONLY);
1038 if (oldroot < 0) {
1039 SYSERROR("Error opening old-/ for fchdir");
9ba8130c
SH
1040 return -1;
1041 }
2d489f9e
SH
1042 newroot = open(rootfs, O_DIRECTORY | O_RDONLY);
1043 if (newroot < 0) {
1044 SYSERROR("Error opening new-/ for fchdir");
1045 goto fail;
c08556c6 1046 }
bf601689 1047
cc6f6dd7 1048 /* change into new root fs */
2d489f9e 1049 if (fchdir(newroot)) {
cc6f6dd7 1050 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
2d489f9e 1051 goto fail;
cc6f6dd7
DL
1052 }
1053
cc6f6dd7 1054 /* pivot_root into our new root fs */
2d489f9e 1055 if (pivot_root(".", ".")) {
cc6f6dd7 1056 SYSERROR("pivot_root syscall failed");
2d489f9e 1057 goto fail;
bf601689 1058 }
cc6f6dd7 1059
2d489f9e
SH
1060 /*
1061 * at this point the old-root is mounted on top of our new-root
1062 * To unmounted it we must not be chdir'd into it, so escape back
1063 * to old-root
1064 */
1065 if (fchdir(oldroot) < 0) {
1066 SYSERROR("Error entering oldroot");
1067 goto fail;
1068 }
7981ea46 1069 if (umount2(".", MNT_DETACH) < 0) {
2d489f9e
SH
1070 SYSERROR("Error detaching old root");
1071 goto fail;
cc6f6dd7
DL
1072 }
1073
2d489f9e
SH
1074 if (fchdir(newroot) < 0) {
1075 SYSERROR("Error re-entering newroot");
1076 goto fail;
1077 }
cc6f6dd7 1078
2d489f9e
SH
1079 close(oldroot);
1080 close(newroot);
bf601689 1081
2d489f9e 1082 DEBUG("pivot_root syscall to '%s' successful", rootfs);
bf601689 1083
bf601689 1084 return 0;
2d489f9e
SH
1085
1086fail:
1087 if (oldroot != -1)
1088 close(oldroot);
1089 if (newroot != -1)
1090 close(newroot);
1091 return -1;
bf601689
MH
1092}
1093
bc6928ff
MW
1094/*
1095 * Check to see if a directory has something mounted on it and,
1096 * if it does, return the fstype.
1097 *
1098 * Code largely based on detect_shared_rootfs below
1099 *
1100 * Returns: # of matching entries in /proc/self/mounts
1101 * if != 0 fstype is filled with the last filesystem value.
1102 * if == 0 no matches found, fstype unchanged.
1103 *
1104 * ToDo: Maybe return the mount options in another parameter...
1105 */
1106
1107#define LINELEN 4096
1108#define MAX_FSTYPE_LEN 128
74a3920a 1109static int mount_check_fs( const char *dir, char *fstype )
bc6928ff
MW
1110{
1111 char buf[LINELEN], *p;
1112 struct stat s;
1113 FILE *f;
1114 int found_fs = 0;
1115 char *p2;
1116
959aee9c 1117 DEBUG("entering mount_check_fs for %s", dir);
bc6928ff
MW
1118
1119 if ( 0 != access(dir, F_OK) || 0 != stat(dir, &s) || 0 == S_ISDIR(s.st_mode) ) {
1120 return 0;
1121 }
1122
bc6928ff 1123 f = fopen("/proc/self/mounts", "r");
bc6928ff
MW
1124 if (!f)
1125 return 0;
4ad9f44b 1126 while (fgets(buf, LINELEN, f)) {
bc6928ff
MW
1127 p = index(buf, ' ');
1128 if( !p )
1129 continue;
1130 *p = '\0';
1131 p2 = p + 1;
1132
1133 p = index(p2, ' ');
1134 if( !p )
1135 continue;
1136 *p = '\0';
1137
1138 /* Compare the directory in the entry to desired */
1139 if( strcmp( p2, dir ) ) {
1140 continue;
1141 }
1142
1143 p2 = p + 1;
1144 p = index( p2, ' ');
1145 if( !p )
1146 continue;
1147 *p = '\0';
1148
1149 ++found_fs;
1150
1151 if( fstype ) {
1152 strncpy( fstype, p2, MAX_FSTYPE_LEN - 1 );
1153 fstype [ MAX_FSTYPE_LEN - 1 ] = '\0';
1154 }
1155 }
1156
bc6928ff 1157 fclose(f);
bc6928ff 1158
959aee9c 1159 DEBUG("mount_check_fs returning %d last %s", found_fs, fstype);
bc6928ff
MW
1160
1161 return found_fs;
1162}
1163
1164/*
1165 * Locate a devtmpfs mount (should be on /dev) and create a container
1166 * subdirectory on it which we can then bind mount to the container
1167 * /dev instead of mounting a tmpfs there.
1168 * If we fail, return NULL.
1169 * Else return the pointer to the name buffer with the string to
1170 * the devtmpfs subdirectory.
1171 */
1172
74a3920a 1173static char *mk_devtmpfs(const char *name, char *path, const char *lxcpath)
bc6928ff
MW
1174{
1175 int ret;
1176 struct stat s;
1177 char tmp_path[MAXPATHLEN];
1178 char fstype[MAX_FSTYPE_LEN];
bc6928ff
MW
1179 uint64_t hash;
1180
f0d02950 1181 if ( 0 != access(dev_base_path, F_OK) || 0 != stat(dev_base_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
bc6928ff 1182 /* This is just making /dev/.lxc it better work or we're done */
f0d02950 1183 ret = mkdir(dev_base_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
bc6928ff
MW
1184 if ( ret ) {
1185 SYSERROR( "Unable to create /dev/.lxc for autodev" );
1186 return NULL;
1187 }
1188 }
1189
1190 /*
1191 * Programmers notes:
1192 * We can not do mounts in this area of code that we want
1193 * to be visible in the host. Consequently, /dev/.lxc must
1194 * be set up earlier if we need a tmpfs mounted there.
1195 * That only affects the rare cases where autodev is enabled
1196 * for a container and devtmpfs is not mounted on /dev in the
1197 * host. In that case, we'll fall back to the old method
1198 * of mounting a tmpfs in the container and have no visibility
1199 * into the container /dev.
1200 */
1201 if( ! mount_check_fs( "/dev", fstype )
1202 || strcmp( "devtmpfs", fstype ) ) {
1203 /* Either /dev was not mounted or was not devtmpfs */
1204
1205 if ( ! mount_check_fs( "/dev/.lxc", NULL ) ) {
1206 /*
1207 * /dev/.lxc is not already mounted
1208 * Doing a mount here does no good, since
1209 * it's not visible in the host.
1210 */
1211
1212 ERROR("/dev/.lxc is not setup - taking fallback" );
1213 return NULL;
1214 }
1215 }
1216
f0d02950 1217 if ( 0 != access(dev_user_path, F_OK) || 0 != stat(dev_user_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
bc6928ff
MW
1218 /*
1219 * This is making /dev/.lxc/user path for non-priv users.
1220 * If this doesn't work, we'll have to fall back in the
1221 * case of non-priv users. It's mode 1777 like /tmp.
1222 */
f0d02950 1223 ret = mkdir(dev_user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
bc6928ff
MW
1224 if ( ret ) {
1225 /* Issue an error but don't fail yet! */
1226 ERROR("Unable to create /dev/.lxc/user");
1227 }
1228 /* Umask tends to screw us up here */
f0d02950 1229 chmod(dev_user_path, S_IRWXU | S_IRWXG | S_IRWXO | S_ISVTX);
bc6928ff
MW
1230 }
1231
1232 /*
1233 * Since the container name must be unique within a given
1234 * lxcpath, we're going to use a hash of the path
1235 * /lxcpath/name as our hash name in /dev/.lxc/
1236 */
1237
1238 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s", lxcpath, name);
1239 if (ret < 0 || ret >= MAXPATHLEN)
1240 return NULL;
1241
1242 hash = fnv_64a_buf(tmp_path, ret, FNV1A_64_INIT);
1243
f0d02950 1244 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, dev_base_path, name, hash);
bc6928ff
MW
1245 if (ret < 0 || ret >= MAXPATHLEN)
1246 return NULL;
1247
1248 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1249 ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1250 if ( ret ) {
f0d02950
JTLB
1251 /* Something must have failed with the dev_base_path...
1252 * Maybe unpriv user. Try dev_user_path now... */
bc6928ff
MW
1253 INFO("Setup in /dev/.lxc failed. Trying /dev/.lxc/user." );
1254
f0d02950 1255 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, dev_user_path, name, hash);
bc6928ff
MW
1256 if (ret < 0 || ret >= MAXPATHLEN)
1257 return NULL;
1258
1259 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1260 ret = mkdir(tmp_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1261 if ( ret ) {
1262 ERROR("Container /dev setup in host /dev failed - taking fallback" );
1263 return NULL;
1264 }
1265 }
1266 }
1267 }
1268
1269 strcpy( path, tmp_path );
1270 return path;
1271}
1272
91c3830e
SH
1273/*
1274 * Do we want to add options for max size of /dev and a file to
1275 * specify which devices to create?
1276 */
bc6928ff 1277static int mount_autodev(const char *name, char *root, const char *lxcpath)
91c3830e
SH
1278{
1279 int ret;
bc6928ff 1280 struct stat s;
91c3830e 1281 char path[MAXPATHLEN];
bc6928ff
MW
1282 char host_path[MAXPATHLEN];
1283 char devtmpfs_path[MAXPATHLEN];
91c3830e 1284
959aee9c 1285 INFO("Mounting /dev under %s", root);
bc6928ff
MW
1286
1287 ret = snprintf(host_path, MAXPATHLEN, "%s/%s/rootfs.dev", lxcpath, name);
1288 if (ret < 0 || ret > MAXPATHLEN)
1289 return -1;
1290
91c3830e
SH
1291 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
1292 if (ret < 0 || ret > MAXPATHLEN)
1293 return -1;
bc6928ff
MW
1294
1295 if (mk_devtmpfs( name, devtmpfs_path, lxcpath ) ) {
1296 /*
1297 * Get rid of old links and directoriess
1298 * This could be either a symlink and we remove it,
1299 * or an empty directory and we remove it,
ec64264d 1300 * or non-existent and we don't care,
bc6928ff
MW
1301 * or a non-empty directory, and we will then emit an error
1302 * but we will not fail out the process.
1303 */
1304 unlink( host_path );
1305 rmdir( host_path );
1306 ret = symlink(devtmpfs_path, host_path);
1307
1308 if ( ret < 0 ) {
959aee9c 1309 SYSERROR("WARNING: Failed to create symlink '%s'->'%s'", host_path, devtmpfs_path);
bc6928ff
MW
1310 }
1311 DEBUG("Bind mounting %s to %s", devtmpfs_path , path );
1312 ret = mount(devtmpfs_path, path, NULL, MS_BIND, 0 );
1313 } else {
1314 /* Only mount a tmpfs on here if we don't already a mount */
1315 if ( ! mount_check_fs( host_path, NULL ) ) {
1316 DEBUG("Mounting tmpfs to %s", host_path );
58ab99ae 1317 ret = mount("none", path, "tmpfs", 0, "size=100000,mode=755");
bc6928ff
MW
1318 } else {
1319 /* This allows someone to manually set up a mount */
1320 DEBUG("Bind mounting %s to %s", host_path, path );
1321 ret = mount(host_path , path, NULL, MS_BIND, 0 );
1322 }
1323 }
91c3830e 1324 if (ret) {
959aee9c 1325 SYSERROR("Failed to mount /dev at %s", root);
91c3830e
SH
1326 return -1;
1327 }
1328 ret = snprintf(path, MAXPATHLEN, "%s/dev/pts", root);
1329 if (ret < 0 || ret >= MAXPATHLEN)
1330 return -1;
bc6928ff
MW
1331 /*
1332 * If we are running on a devtmpfs mapping, dev/pts may already exist.
1333 * If not, then create it and exit if that fails...
1334 */
1335 if ( 0 != access(path, F_OK) || 0 != stat(path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1336 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1337 if (ret) {
1338 SYSERROR("Failed to create /dev/pts in container");
1339 return -1;
1340 }
91c3830e
SH
1341 }
1342
959aee9c 1343 INFO("Mounted /dev under %s", root);
91c3830e
SH
1344 return 0;
1345}
1346
c6883f38 1347struct lxc_devs {
74a3920a 1348 const char *name;
c6883f38
SH
1349 mode_t mode;
1350 int maj;
1351 int min;
1352};
1353
74a3920a 1354static const struct lxc_devs lxc_devs[] = {
c6883f38
SH
1355 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1356 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1357 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1358 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1359 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1360 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1361 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1362};
1363
74a3920a 1364static int setup_autodev(const char *root)
c6883f38
SH
1365{
1366 int ret;
c6883f38
SH
1367 char path[MAXPATHLEN];
1368 int i;
3a32201c 1369 mode_t cmask;
c6883f38 1370
959aee9c 1371 INFO("Creating initial consoles under %s/dev", root);
91c3830e 1372
c6883f38 1373 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
91c3830e
SH
1374 if (ret < 0 || ret >= MAXPATHLEN) {
1375 ERROR("Error calculating container /dev location");
c6883f38 1376 return -1;
f7bee6c6 1377 }
91c3830e 1378
959aee9c 1379 INFO("Populating /dev under %s", root);
3a32201c 1380 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38 1381 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
74a3920a 1382 const struct lxc_devs *d = &lxc_devs[i];
c6883f38
SH
1383 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", root, d->name);
1384 if (ret < 0 || ret >= MAXPATHLEN)
1385 return -1;
1386 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1387 if (ret && errno != EEXIST) {
959aee9c 1388 SYSERROR("Error creating %s", d->name);
c6883f38
SH
1389 return -1;
1390 }
1391 }
3a32201c 1392 umask(cmask);
c6883f38 1393
959aee9c 1394 INFO("Populated /dev under %s", root);
c6883f38
SH
1395 return 0;
1396}
1397
f0d02950
JTLB
1398/*
1399 * Locate allocated devtmpfs mount and purge it.
1400 * path lookup mostly taken from mk_devtmpfs
1401 */
1402int lxc_delete_autodev(struct lxc_handler *handler)
1403{
1404 int ret;
1405 struct stat s;
1406 struct lxc_conf *lxc_conf = handler->conf;
1407 const char *name = handler->name;
1408 const char *lxcpath = handler->lxcpath;
1409 char tmp_path[MAXPATHLEN];
1410 uint64_t hash;
1411
1412 if ( lxc_conf->autodev <= 0 )
1413 return 0;
1414
1c90734d
JTLB
1415 /* don't clean on reboot */
1416 if ( lxc_conf->reboot == 1 )
1417 return 0;
f0d02950
JTLB
1418
1419 /*
1420 * Use the same logic as mk_devtmpfs to compute candidate
1421 * path for cleanup.
1422 */
1423
1424 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s", lxcpath, name);
1425 if (ret < 0 || ret >= MAXPATHLEN)
1426 return -1;
1427
1428 hash = fnv_64a_buf(tmp_path, ret, FNV1A_64_INIT);
1429
1430 /* Probe /dev/.lxc/<container name>.<hash> */
1431 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, dev_base_path, name, hash);
1432 if (ret < 0 || ret >= MAXPATHLEN)
1433 return -1;
1434
1435 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1436 /* Probe /dev/.lxc/user/<container name>.<hash> */
1437 ret = snprintf(tmp_path, MAXPATHLEN, "%s/%s.%016" PRIx64, dev_user_path, name, hash);
1438 if (ret < 0 || ret >= MAXPATHLEN)
1439 return -1;
1440
1441 if ( 0 != access(tmp_path, F_OK) || 0 != stat(tmp_path, &s) || 0 == S_ISDIR(s.st_mode) ) {
1442 WARN("Failed to locate autodev /dev/.lxc and /dev/.lxc/user." );
1443 return -1;
1444 }
1445 }
1446
1447 /* Do the cleanup */
1448 INFO("Cleaning %s", tmp_path );
1449 if ( 0 != lxc_rmdir_onedev(tmp_path, NULL) ) {
1450 ERROR("Failed to cleanup autodev" );
1451 }
1452
1453 return 0;
1454}
1455
cc28d0b0 1456static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1457{
cc28d0b0
SH
1458 const struct lxc_rootfs *rootfs = &conf->rootfs;
1459
a0f379bf
DW
1460 if (!rootfs->path) {
1461 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1462 SYSERROR("Failed to make / rslave");
1463 return -1;
1464 }
c69bd12f 1465 return 0;
a0f379bf 1466 }
0ad19a3f 1467
12297168 1468 if (access(rootfs->mount, F_OK)) {
b1789442 1469 SYSERROR("failed to access to '%s', check it is present",
12297168 1470 rootfs->mount);
b1789442
DL
1471 return -1;
1472 }
1473
9be53773 1474 // First try mounting rootfs using a bdev
76a26f55 1475 struct bdev *bdev = bdev_init(conf, rootfs->path, rootfs->mount, rootfs->options);
9be53773 1476 if (bdev && bdev->ops->mount(bdev) == 0) {
59d66af2 1477 bdev_put(bdev);
9be53773
SH
1478 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1479 return 0;
1480 }
59d66af2
SH
1481 if (bdev)
1482 bdev_put(bdev);
a17b1e65 1483 if (mount_rootfs(rootfs->path, rootfs->mount, rootfs->options)) {
a6afdde9 1484 ERROR("failed to mount rootfs");
c3f0a28c 1485 return -1;
1486 }
0ad19a3f 1487
12297168 1488 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1489
ac778708
DL
1490 return 0;
1491}
1492
91e93c71
AV
1493int prepare_ramfs_root(char *root)
1494{
1495 char buf[LINELEN], *p;
1496 char nroot[PATH_MAX];
1497 FILE *f;
1498 int i;
1499 char *p2;
1500
1501 if (realpath(root, nroot) == NULL)
1502 return -1;
1503
1504 if (chdir("/") == -1)
1505 return -1;
1506
1507 /*
1508 * We could use here MS_MOVE, but in userns this mount is
1509 * locked and can't be moved.
1510 */
1511 if (mount(root, "/", NULL, MS_REC | MS_BIND, NULL)) {
1512 SYSERROR("Failed to move %s into /", root);
1513 return -1;
1514 }
1515
1516 if (mount(".", NULL, NULL, MS_REC | MS_PRIVATE, NULL)) {
1517 SYSERROR("Failed to make . rprivate");
1518 return -1;
1519 }
1520
1521 /*
1522 * The following code cleans up inhereted mounts which are not
1523 * required for CT.
1524 *
1525 * The mountinfo file shows not all mounts, if a few points have been
1526 * unmounted between read operations from the mountinfo. So we need to
1527 * read mountinfo a few times.
1528 *
1529 * This loop can be skipped if a container uses unserns, because all
1530 * inherited mounts are locked and we should live with all this trash.
1531 */
1532 while (1) {
1533 int progress = 0;
1534
1535 f = fopen("./proc/self/mountinfo", "r");
1536 if (!f) {
1537 SYSERROR("Unable to open /proc/self/mountinfo");
1538 return -1;
1539 }
1540 while (fgets(buf, LINELEN, f)) {
1541 for (p = buf, i=0; p && i < 4; i++)
1542 p = strchr(p+1, ' ');
1543 if (!p)
1544 continue;
1545 p2 = strchr(p+1, ' ');
1546 if (!p2)
1547 continue;
1548
1549 *p2 = '\0';
1550 *p = '.';
1551
1552 if (strcmp(p + 1, "/") == 0)
1553 continue;
1554 if (strcmp(p + 1, "/proc") == 0)
1555 continue;
1556
1557 if (umount2(p, MNT_DETACH) == 0)
1558 progress++;
1559 }
1560 fclose(f);
1561 if (!progress)
1562 break;
1563 }
1564
1565 if (umount2("./proc", MNT_DETACH)) {
1566 SYSERROR("Unable to umount /proc");
1567 return -1;
1568 }
1569
1570 /* It is weird, but chdir("..") moves us in a new root */
1571 if (chdir("..") == -1) {
1572 SYSERROR("Unable to change working directory");
1573 return -1;
1574 }
1575
1576 if (chroot(".") == -1) {
1577 SYSERROR("Unable to chroot");
1578 return -1;
1579 }
1580
1581 return 0;
1582}
1583
74a3920a 1584static int setup_pivot_root(const struct lxc_rootfs *rootfs)
ac778708 1585{
ac778708
DL
1586 if (!rootfs->path)
1587 return 0;
1588
91e93c71
AV
1589 if (detect_ramfs_rootfs()) {
1590 if (prepare_ramfs_root(rootfs->mount))
1591 return -1;
1592 } else if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
cc6f6dd7 1593 ERROR("failed to setup pivot root");
25368b52 1594 return -1;
c69bd12f
DL
1595 }
1596
25368b52 1597 return 0;
0ad19a3f 1598}
1599
d852c78c 1600static int setup_pts(int pts)
3c26f34e 1601{
77890c6d
SW
1602 char target[PATH_MAX];
1603
d852c78c
DL
1604 if (!pts)
1605 return 0;
3c26f34e 1606
1607 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1608 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1609 return -1;
1610 }
1611
7e40254a
JTLB
1612 if (mkdir("/dev/pts", 0755)) {
1613 if ( errno != EEXIST ) {
1614 SYSERROR("failed to create '/dev/pts'");
1615 return -1;
1616 }
1617 }
1618
a6afdde9 1619 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
67e5a20a 1620 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
36eb9bde 1621 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1622 return -1;
1623 }
1624
3c26f34e 1625 if (access("/dev/ptmx", F_OK)) {
1626 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1627 goto out;
36eb9bde 1628 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1629 return -1;
1630 }
1631
77890c6d
SW
1632 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1633 goto out;
1634
3c26f34e 1635 /* fallback here, /dev/pts/ptmx exists just mount bind */
1636 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1637 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1638 return -1;
1639 }
cd54d859
DL
1640
1641 INFO("created new pts instance");
d852c78c 1642
3c26f34e 1643out:
1644 return 0;
1645}
1646
cccc74b5
DL
1647static int setup_personality(int persona)
1648{
6ff05e18 1649 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1650 if (persona == -1)
1651 return 0;
1652
1653 if (personality(persona) < 0) {
1654 SYSERROR("failed to set personality to '0x%x'", persona);
1655 return -1;
1656 }
1657
1658 INFO("set personality to '0x%x'", persona);
6ff05e18 1659 #endif
cccc74b5
DL
1660
1661 return 0;
1662}
1663
7c6ef2a2 1664static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1665 const struct lxc_console *console)
6e590161 1666{
63376d7d
DL
1667 char path[MAXPATHLEN];
1668 struct stat s;
7c6ef2a2 1669 int ret;
52e35957 1670
7c6ef2a2
SH
1671 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1672 if (ret >= sizeof(path)) {
959aee9c 1673 ERROR("console path too long");
7c6ef2a2
SH
1674 return -1;
1675 }
52e35957 1676
63376d7d 1677 if (access(path, F_OK)) {
466978b0 1678 WARN("rootfs specified but no console found at '%s'", path);
63376d7d 1679 return 0;
52e35957
DL
1680 }
1681
b5159817
DE
1682 if (console->master < 0) {
1683 INFO("no console");
f78a1f32
DL
1684 return 0;
1685 }
ed502555 1686
63376d7d
DL
1687 if (stat(path, &s)) {
1688 SYSERROR("failed to stat '%s'", path);
1689 return -1;
1690 }
1691
1692 if (chmod(console->name, s.st_mode)) {
1693 SYSERROR("failed to set mode '0%o' to '%s'",
1694 s.st_mode, console->name);
1695 return -1;
1696 }
13954cce 1697
63376d7d
DL
1698 if (mount(console->name, path, "none", MS_BIND, 0)) {
1699 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1700 return -1;
1701 }
1702
63376d7d 1703 INFO("console has been setup");
7c6ef2a2
SH
1704 return 0;
1705}
1706
1707static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1708 const struct lxc_console *console,
1709 char *ttydir)
1710{
1711 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1712 int ret;
1713
1714 /* create rootfs/dev/<ttydir> directory */
1715 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1716 ttydir);
1717 if (ret >= sizeof(path))
1718 return -1;
1719 ret = mkdir(path, 0755);
1720 if (ret && errno != EEXIST) {
959aee9c 1721 SYSERROR("failed with errno %d to create %s", errno, path);
7c6ef2a2
SH
1722 return -1;
1723 }
959aee9c 1724 INFO("created %s", path);
7c6ef2a2
SH
1725
1726 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1727 rootfs->mount, ttydir);
1728 if (ret >= sizeof(lxcpath)) {
959aee9c 1729 ERROR("console path too long");
7c6ef2a2
SH
1730 return -1;
1731 }
1732
1733 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1734 ret = unlink(path);
1735 if (ret && errno != ENOENT) {
959aee9c 1736 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1737 return -1;
1738 }
1739
1740 ret = creat(lxcpath, 0660);
1741 if (ret==-1 && errno != EEXIST) {
959aee9c 1742 SYSERROR("error %d creating %s", errno, lxcpath);
7c6ef2a2
SH
1743 return -1;
1744 }
4d44e274
SH
1745 if (ret >= 0)
1746 close(ret);
7c6ef2a2 1747
b5159817
DE
1748 if (console->master < 0) {
1749 INFO("no console");
7c6ef2a2
SH
1750 return 0;
1751 }
1752
1753 if (mount(console->name, lxcpath, "none", MS_BIND, 0)) {
1754 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1755 return -1;
1756 }
1757
1758 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1759 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1760 if (ret >= sizeof(lxcpath)) {
1761 ERROR("lxc/console path too long");
1762 return -1;
1763 }
7c6ef2a2
SH
1764 ret = symlink(lxcpath, path);
1765 if (ret) {
1766 SYSERROR("failed to create symlink for console");
1767 return -1;
1768 }
1769
1770 INFO("console has been setup on %s", lxcpath);
cd54d859 1771
6e590161 1772 return 0;
1773}
1774
7c6ef2a2
SH
1775static int setup_console(const struct lxc_rootfs *rootfs,
1776 const struct lxc_console *console,
1777 char *ttydir)
1778{
1779 /* We don't have a rootfs, /dev/console will be shared */
1780 if (!rootfs->path)
1781 return 0;
1782 if (!ttydir)
1783 return setup_dev_console(rootfs, console);
1784
1785 return setup_ttydir_console(rootfs, console, ttydir);
1786}
1787
1bd051a6
SH
1788static int setup_kmsg(const struct lxc_rootfs *rootfs,
1789 const struct lxc_console *console)
1790{
1791 char kpath[MAXPATHLEN];
1792 int ret;
1793
222fea5a
DE
1794 if (!rootfs->path)
1795 return 0;
1bd051a6
SH
1796 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1797 if (ret < 0 || ret >= sizeof(kpath))
1798 return -1;
1799
1800 ret = unlink(kpath);
1801 if (ret && errno != ENOENT) {
959aee9c 1802 SYSERROR("error unlinking %s", kpath);
1bd051a6
SH
1803 return -1;
1804 }
1805
1806 ret = symlink("console", kpath);
1807 if (ret) {
1808 SYSERROR("failed to create symlink for kmsg");
1809 return -1;
1810 }
1811
1812 return 0;
1813}
1814
998ac676
RT
1815static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1816{
1817 struct mount_opt *mo;
1818
1819 /* If opt is found in mount_opt, set or clear flags.
1820 * Otherwise append it to data. */
1821
1822 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1823 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1824 if (mo->clear)
1825 *flags &= ~mo->flag;
1826 else
1827 *flags |= mo->flag;
1828 return;
1829 }
1830 }
1831
1832 if (strlen(*data))
1833 strcat(*data, ",");
1834 strcat(*data, opt);
1835}
1836
a17b1e65 1837int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1838 char **mntdata)
1839{
1840 char *s, *data;
1841 char *p, *saveptr = NULL;
1842
911324ef 1843 *mntdata = NULL;
91656ce5 1844 *mntflags = 0L;
911324ef
DL
1845
1846 if (!mntopts)
998ac676
RT
1847 return 0;
1848
911324ef 1849 s = strdup(mntopts);
998ac676 1850 if (!s) {
36eb9bde 1851 SYSERROR("failed to allocate memory");
998ac676
RT
1852 return -1;
1853 }
1854
1855 data = malloc(strlen(s) + 1);
1856 if (!data) {
36eb9bde 1857 SYSERROR("failed to allocate memory");
998ac676
RT
1858 free(s);
1859 return -1;
1860 }
1861 *data = 0;
1862
1863 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1864 p = strtok_r(NULL, ",", &saveptr))
1865 parse_mntopt(p, mntflags, &data);
1866
1867 if (*data)
1868 *mntdata = data;
1869 else
1870 free(data);
1871 free(s);
1872
1873 return 0;
1874}
1875
6fd5e769
SH
1876static void null_endofword(char *word)
1877{
1878 while (*word && *word != ' ' && *word != '\t')
1879 word++;
1880 *word = '\0';
1881}
1882
1883/*
1884 * skip @nfields spaces in @src
1885 */
1886static char *get_field(char *src, int nfields)
1887{
1888 char *p = src;
1889 int i;
1890
1891 for (i = 0; i < nfields; i++) {
1892 while (*p && *p != ' ' && *p != '\t')
1893 p++;
1894 if (!*p)
1895 break;
1896 p++;
1897 }
1898 return p;
1899}
1900
911324ef
DL
1901static int mount_entry(const char *fsname, const char *target,
1902 const char *fstype, unsigned long mountflags,
1fc64d22 1903 const char *data, int optional)
911324ef 1904{
614305f3 1905#ifdef HAVE_STATVFS
2938f7c8 1906 struct statvfs sb;
614305f3 1907#endif
2938f7c8 1908
911324ef 1909 if (mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data)) {
1fc64d22
SG
1910 if (optional) {
1911 INFO("failed to mount '%s' on '%s' (optional): %s", fsname,
1912 target, strerror(errno));
1913 return 0;
1914 }
1915 else {
1916 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1917 return -1;
1918 }
911324ef
DL
1919 }
1920
1921 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
2938f7c8
SH
1922 DEBUG("remounting %s on %s to respect bind or remount options",
1923 fsname ? fsname : "(none)", target ? target : "(none)");
7c5b6e7c
AS
1924 unsigned long rqd_flags = 0;
1925 if (mountflags & MS_RDONLY)
1926 rqd_flags |= MS_RDONLY;
614305f3 1927#ifdef HAVE_STATVFS
2938f7c8 1928 if (statvfs(fsname, &sb) == 0) {
7c5b6e7c 1929 unsigned long required_flags = rqd_flags;
2938f7c8
SH
1930 if (sb.f_flag & MS_NOSUID)
1931 required_flags |= MS_NOSUID;
1932 if (sb.f_flag & MS_NODEV)
1933 required_flags |= MS_NODEV;
1934 if (sb.f_flag & MS_RDONLY)
1935 required_flags |= MS_RDONLY;
1936 if (sb.f_flag & MS_NOEXEC)
1937 required_flags |= MS_NOEXEC;
1938 DEBUG("(at remount) flags for %s was %lu, required extra flags are %lu", fsname, sb.f_flag, required_flags);
1939 /*
1940 * If this was a bind mount request, and required_flags
1941 * does not have any flags which are not already in
1942 * mountflags, then skip the remount
1943 */
1944 if (!(mountflags & MS_REMOUNT)) {
7c5b6e7c 1945 if (!(required_flags & ~mountflags) && rqd_flags == 0) {
2938f7c8
SH
1946 DEBUG("mountflags already was %lu, skipping remount",
1947 mountflags);
1948 goto skipremount;
1949 }
1950 }
1951 mountflags |= required_flags;
6fd5e769 1952 }
614305f3 1953#endif
911324ef
DL
1954
1955 if (mount(fsname, target, fstype,
1956 mountflags | MS_REMOUNT, data)) {
1fc64d22
SG
1957 if (optional) {
1958 INFO("failed to mount '%s' on '%s' (optional): %s",
1959 fsname, target, strerror(errno));
1960 return 0;
1961 }
1962 else {
1963 SYSERROR("failed to mount '%s' on '%s'",
1964 fsname, target);
1965 return -1;
1966 }
911324ef
DL
1967 }
1968 }
1969
614305f3 1970#ifdef HAVE_STATVFS
6fd5e769 1971skipremount:
614305f3 1972#endif
911324ef
DL
1973 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1974
1975 return 0;
1976}
1977
4e4ca161
SH
1978/*
1979 * Remove 'optional', 'create=dir', and 'create=file' from mntopt
1980 */
1981static void cull_mntent_opt(struct mntent *mntent)
1982{
1983 int i;
1984 char *p, *p2;
1985 char *list[] = {"create=dir",
1986 "create=file",
1987 "optional",
1988 NULL };
1989
1990 for (i=0; list[i]; i++) {
1991 if (!(p = strstr(mntent->mnt_opts, list[i])))
1992 continue;
1993 p2 = strchr(p, ',');
1994 if (!p2) {
1995 /* no more mntopts, so just chop it here */
1996 *p = '\0';
1997 continue;
1998 }
1999 memmove(p, p2+1, strlen(p2+1)+1);
2000 }
2001}
2002
2003static inline int mount_entry_on_systemfs(struct mntent *mntent)
0ad19a3f 2004{
998ac676
RT
2005 unsigned long mntflags;
2006 char *mntdata;
911324ef 2007 int ret;
34cfffb3
SG
2008 FILE *pathfile = NULL;
2009 char* pathdirname = NULL;
4f1d50d1 2010 bool optional = hasmntopt(mntent, "optional") != NULL;
911324ef 2011
34cfffb3 2012 if (hasmntopt(mntent, "create=dir")) {
119126b6 2013 if (mkdir_p(mntent->mnt_dir, 0755) < 0) {
34cfffb3
SG
2014 WARN("Failed to create mount target '%s'", mntent->mnt_dir);
2015 ret = -1;
2016 }
2017 }
2018
2019 if (hasmntopt(mntent, "create=file") && access(mntent->mnt_dir, F_OK)) {
2020 pathdirname = strdup(mntent->mnt_dir);
2021 pathdirname = dirname(pathdirname);
119126b6
SG
2022 if (mkdir_p(pathdirname, 0755) < 0) {
2023 WARN("Failed to create target directory");
2024 }
34cfffb3
SG
2025 pathfile = fopen(mntent->mnt_dir, "wb");
2026 if (!pathfile) {
2027 WARN("Failed to create mount target '%s'", mntent->mnt_dir);
2028 ret = -1;
2029 }
2030 else
2031 fclose(pathfile);
2032 }
2033
4e4ca161
SH
2034 cull_mntent_opt(mntent);
2035
a17b1e65
SG
2036 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
2037 free(mntdata);
2038 return -1;
2039 }
2040
911324ef 2041 ret = mount_entry(mntent->mnt_fsname, mntent->mnt_dir,
1fc64d22 2042 mntent->mnt_type, mntflags, mntdata, optional);
68c152ef 2043
34cfffb3 2044 free(pathdirname);
911324ef
DL
2045 free(mntdata);
2046
2047 return ret;
2048}
2049
4e4ca161 2050static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2
SH
2051 const struct lxc_rootfs *rootfs,
2052 const char *lxc_name)
911324ef 2053{
013bd428 2054 char *aux;
59760f5d 2055 char path[MAXPATHLEN];
911324ef
DL
2056 unsigned long mntflags;
2057 char *mntdata;
80a881b2 2058 int r, ret = 0, offset;
67e571de 2059 const char *lxcpath;
34cfffb3
SG
2060 FILE *pathfile = NULL;
2061 char *pathdirname = NULL;
4f1d50d1 2062 bool optional = hasmntopt(mntent, "optional") != NULL;
0ad19a3f 2063
593e8478 2064 lxcpath = lxc_global_config_value("lxc.lxcpath");
2a59a681
SH
2065 if (!lxcpath) {
2066 ERROR("Out of memory");
2067 return -1;
2068 }
2069
80a881b2 2070 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
2071 * use $lxcpath/CN/rootfs as the target prefix */
2072 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
2073 if (r < 0 || r >= MAXPATHLEN)
2074 goto skipvarlib;
2075
2076 aux = strstr(mntent->mnt_dir, path);
2077 if (aux) {
2078 offset = strlen(path);
2079 goto skipabs;
2080 }
2081
2082skipvarlib:
013bd428
DL
2083 aux = strstr(mntent->mnt_dir, rootfs->path);
2084 if (!aux) {
2085 WARN("ignoring mount point '%s'", mntent->mnt_dir);
2086 goto out;
2087 }
80a881b2
SH
2088 offset = strlen(rootfs->path);
2089
2090skipabs:
013bd428 2091
9ba8130c 2092 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
2093 aux + offset);
2094 if (r < 0 || r >= MAXPATHLEN) {
2095 WARN("pathnme too long for '%s'", mntent->mnt_dir);
2096 ret = -1;
2097 goto out;
2098 }
2099
34cfffb3 2100 if (hasmntopt(mntent, "create=dir")) {
119126b6 2101 if (mkdir_p(path, 0755) < 0) {
34cfffb3
SG
2102 WARN("Failed to create mount target '%s'", path);
2103 ret = -1;
2104 }
2105 }
2106
2107 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
2108 pathdirname = strdup(path);
2109 pathdirname = dirname(pathdirname);
119126b6
SG
2110 if (mkdir_p(pathdirname, 0755) < 0) {
2111 WARN("Failed to create target directory");
2112 }
34cfffb3
SG
2113 pathfile = fopen(path, "wb");
2114 if (!pathfile) {
2115 WARN("Failed to create mount target '%s'", path);
2116 ret = -1;
2117 }
2118 else
2119 fclose(pathfile);
2120 }
4e4ca161 2121 cull_mntent_opt(mntent);
d330fe7b 2122
a17b1e65
SG
2123 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
2124 free(mntdata);
2125 return -1;
2126 }
2127
013bd428 2128 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1fc64d22 2129 mntflags, mntdata, optional);
0ad19a3f 2130
a17b1e65
SG
2131 free(mntdata);
2132
013bd428 2133out:
34cfffb3 2134 free(pathdirname);
911324ef
DL
2135 return ret;
2136}
d330fe7b 2137
4e4ca161 2138static int mount_entry_on_relative_rootfs(struct mntent *mntent,
911324ef
DL
2139 const char *rootfs)
2140{
2141 char path[MAXPATHLEN];
2142 unsigned long mntflags;
2143 char *mntdata;
2144 int ret;
34cfffb3
SG
2145 FILE *pathfile = NULL;
2146 char *pathdirname = NULL;
4f1d50d1 2147 bool optional = hasmntopt(mntent, "optional") != NULL;
d330fe7b 2148
34cfffb3 2149 /* relative to root mount point */
9ba8130c
SH
2150 ret = snprintf(path, sizeof(path), "%s/%s", rootfs, mntent->mnt_dir);
2151 if (ret >= sizeof(path)) {
2152 ERROR("path name too long");
2153 return -1;
2154 }
911324ef 2155
34cfffb3 2156 if (hasmntopt(mntent, "create=dir")) {
119126b6 2157 if (mkdir_p(path, 0755) < 0) {
34cfffb3
SG
2158 WARN("Failed to create mount target '%s'", path);
2159 ret = -1;
2160 }
2161 }
2162
2163 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
2164 pathdirname = strdup(path);
2165 pathdirname = dirname(pathdirname);
119126b6
SG
2166 if (mkdir_p(pathdirname, 0755) < 0) {
2167 WARN("Failed to create target directory");
2168 }
34cfffb3
SG
2169 pathfile = fopen(path, "wb");
2170 if (!pathfile) {
2171 WARN("Failed to create mount target '%s'", path);
2172 ret = -1;
2173 }
2174 else
2175 fclose(pathfile);
2176 }
4e4ca161 2177 cull_mntent_opt(mntent);
34cfffb3 2178
a17b1e65
SG
2179 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
2180 free(mntdata);
2181 return -1;
2182 }
2183
911324ef 2184 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1fc64d22 2185 mntflags, mntdata, optional);
68c152ef 2186
34cfffb3 2187 free(pathdirname);
911324ef 2188 free(mntdata);
998ac676 2189
911324ef
DL
2190 return ret;
2191}
2192
80a881b2
SH
2193static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
2194 const char *lxc_name)
911324ef 2195{
aaf901be
AM
2196 struct mntent mntent;
2197 char buf[4096];
911324ef 2198 int ret = -1;
e76b8764 2199
aaf901be 2200 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
e76b8764 2201
911324ef 2202 if (!rootfs->path) {
aaf901be 2203 if (mount_entry_on_systemfs(&mntent))
e76b8764 2204 goto out;
911324ef 2205 continue;
e76b8764
CDC
2206 }
2207
911324ef 2208 /* We have a separate root, mounts are relative to it */
aaf901be
AM
2209 if (mntent.mnt_dir[0] != '/') {
2210 if (mount_entry_on_relative_rootfs(&mntent,
911324ef
DL
2211 rootfs->mount))
2212 goto out;
2213 continue;
2214 }
cd54d859 2215
aaf901be 2216 if (mount_entry_on_absolute_rootfs(&mntent, rootfs, lxc_name))
911324ef 2217 goto out;
0ad19a3f 2218 }
cd54d859 2219
0ad19a3f 2220 ret = 0;
cd54d859
DL
2221
2222 INFO("mount points have been setup");
0ad19a3f 2223out:
e7938e9e
MN
2224 return ret;
2225}
2226
80a881b2
SH
2227static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
2228 const char *lxc_name)
e7938e9e
MN
2229{
2230 FILE *file;
2231 int ret;
2232
2233 if (!fstab)
2234 return 0;
2235
2236 file = setmntent(fstab, "r");
2237 if (!file) {
2238 SYSERROR("failed to use '%s'", fstab);
2239 return -1;
2240 }
2241
80a881b2 2242 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e 2243
0ad19a3f 2244 endmntent(file);
2245 return ret;
2246}
2247
80a881b2
SH
2248static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
2249 const char *lxc_name)
e7938e9e
MN
2250{
2251 FILE *file;
2252 struct lxc_list *iterator;
2253 char *mount_entry;
2254 int ret;
2255
2256 file = tmpfile();
2257 if (!file) {
2258 ERROR("tmpfile error: %m");
2259 return -1;
2260 }
2261
2262 lxc_list_for_each(iterator, mount) {
2263 mount_entry = iterator->elem;
1d6b1976 2264 fprintf(file, "%s\n", mount_entry);
e7938e9e
MN
2265 }
2266
2267 rewind(file);
2268
80a881b2 2269 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e
MN
2270
2271 fclose(file);
2272 return ret;
2273}
2274
bab88e68
CS
2275static int parse_cap(const char *cap)
2276{
2277 char *ptr = NULL;
2278 int i, capid = -1;
2279
7035407c
DE
2280 if (!strcmp(cap, "none"))
2281 return -2;
2282
bab88e68
CS
2283 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2284
2285 if (strcmp(cap, caps_opt[i].name))
2286 continue;
2287
2288 capid = caps_opt[i].value;
2289 break;
2290 }
2291
2292 if (capid < 0) {
2293 /* try to see if it's numeric, so the user may specify
2294 * capabilities that the running kernel knows about but
2295 * we don't */
2296 errno = 0;
2297 capid = strtol(cap, &ptr, 10);
2298 if (!ptr || *ptr != '\0' || errno != 0)
2299 /* not a valid number */
2300 capid = -1;
2301 else if (capid > lxc_caps_last_cap())
2302 /* we have a number but it's not a valid
2303 * capability */
2304 capid = -1;
2305 }
2306
2307 return capid;
2308}
2309
0769b82a
CS
2310int in_caplist(int cap, struct lxc_list *caps)
2311{
2312 struct lxc_list *iterator;
2313 int capid;
2314
2315 lxc_list_for_each(iterator, caps) {
2316 capid = parse_cap(iterator->elem);
2317 if (capid == cap)
2318 return 1;
2319 }
2320
2321 return 0;
2322}
2323
81810dd1
DL
2324static int setup_caps(struct lxc_list *caps)
2325{
2326 struct lxc_list *iterator;
2327 char *drop_entry;
bab88e68 2328 int capid;
81810dd1
DL
2329
2330 lxc_list_for_each(iterator, caps) {
2331
2332 drop_entry = iterator->elem;
2333
bab88e68 2334 capid = parse_cap(drop_entry);
d55bc1ad 2335
81810dd1 2336 if (capid < 0) {
1e11be34
DL
2337 ERROR("unknown capability %s", drop_entry);
2338 return -1;
81810dd1
DL
2339 }
2340
2341 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
2342
2343 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
3ec1648d
SH
2344 SYSERROR("failed to remove %s capability", drop_entry);
2345 return -1;
2346 }
81810dd1
DL
2347
2348 }
2349
1fb86a7c
SH
2350 DEBUG("capabilities have been setup");
2351
2352 return 0;
2353}
2354
2355static int dropcaps_except(struct lxc_list *caps)
2356{
2357 struct lxc_list *iterator;
2358 char *keep_entry;
1fb86a7c
SH
2359 int i, capid;
2360 int numcaps = lxc_caps_last_cap() + 1;
959aee9c 2361 INFO("found %d capabilities", numcaps);
1fb86a7c 2362
2caf9a97
SH
2363 if (numcaps <= 0 || numcaps > 200)
2364 return -1;
2365
1fb86a7c
SH
2366 // caplist[i] is 1 if we keep capability i
2367 int *caplist = alloca(numcaps * sizeof(int));
2368 memset(caplist, 0, numcaps * sizeof(int));
2369
2370 lxc_list_for_each(iterator, caps) {
2371
2372 keep_entry = iterator->elem;
2373
bab88e68 2374 capid = parse_cap(keep_entry);
1fb86a7c 2375
7035407c
DE
2376 if (capid == -2)
2377 continue;
2378
1fb86a7c
SH
2379 if (capid < 0) {
2380 ERROR("unknown capability %s", keep_entry);
2381 return -1;
2382 }
2383
8255688a 2384 DEBUG("keep capability '%s' (%d)", keep_entry, capid);
1fb86a7c
SH
2385
2386 caplist[capid] = 1;
2387 }
2388 for (i=0; i<numcaps; i++) {
2389 if (caplist[i])
2390 continue;
2391 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
3ec1648d
SH
2392 SYSERROR("failed to remove capability %d", i);
2393 return -1;
2394 }
1fb86a7c
SH
2395 }
2396
2397 DEBUG("capabilities have been setup");
81810dd1
DL
2398
2399 return 0;
2400}
2401
0ad19a3f 2402static int setup_hw_addr(char *hwaddr, const char *ifname)
2403{
2404 struct sockaddr sockaddr;
2405 struct ifreq ifr;
2406 int ret, fd;
2407
3cfc0f3a
MN
2408 ret = lxc_convert_mac(hwaddr, &sockaddr);
2409 if (ret) {
2410 ERROR("mac address '%s' conversion failed : %s",
2411 hwaddr, strerror(-ret));
0ad19a3f 2412 return -1;
2413 }
2414
2415 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
5da6aa8c 2416 ifr.ifr_name[IFNAMSIZ-1] = '\0';
0ad19a3f 2417 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2418
2419 fd = socket(AF_INET, SOCK_DGRAM, 0);
2420 if (fd < 0) {
3ab87b66 2421 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 2422 return -1;
2423 }
2424
2425 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
2426 close(fd);
2427 if (ret)
3ab87b66 2428 ERROR("ioctl failure : %s", strerror(errno));
0ad19a3f 2429
5da6aa8c 2430 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
cd54d859 2431
0ad19a3f 2432 return ret;
2433}
2434
82d5ae15 2435static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2436{
82d5ae15
DL
2437 struct lxc_list *iterator;
2438 struct lxc_inetdev *inetdev;
3cfc0f3a 2439 int err;
0ad19a3f 2440
82d5ae15
DL
2441 lxc_list_for_each(iterator, ip) {
2442
2443 inetdev = iterator->elem;
2444
0093bb8c
DL
2445 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2446 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
2447 if (err) {
2448 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2449 ifindex, strerror(-err));
82d5ae15
DL
2450 return -1;
2451 }
2452 }
2453
2454 return 0;
0ad19a3f 2455}
2456
82d5ae15 2457static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2458{
82d5ae15 2459 struct lxc_list *iterator;
7fa9074f 2460 struct lxc_inet6dev *inet6dev;
3cfc0f3a 2461 int err;
0ad19a3f 2462
82d5ae15
DL
2463 lxc_list_for_each(iterator, ip) {
2464
2465 inet6dev = iterator->elem;
2466
b3df193c 2467 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
2468 &inet6dev->mcast, &inet6dev->acast,
2469 inet6dev->prefix);
3cfc0f3a
MN
2470 if (err) {
2471 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2472 ifindex, strerror(-err));
82d5ae15 2473 return -1;
3cfc0f3a 2474 }
82d5ae15
DL
2475 }
2476
2477 return 0;
0ad19a3f 2478}
2479
82d5ae15 2480static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 2481{
0ad19a3f 2482 char ifname[IFNAMSIZ];
0ad19a3f 2483 char *current_ifname = ifname;
3cfc0f3a 2484 int err;
0ad19a3f 2485
82d5ae15
DL
2486 /* empty network namespace */
2487 if (!netdev->ifindex) {
b0efbac4 2488 if (netdev->flags & IFF_UP) {
d472214b 2489 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2490 if (err) {
2491 ERROR("failed to set the loopback up : %s",
2492 strerror(-err));
82d5ae15
DL
2493 return -1;
2494 }
82d5ae15 2495 }
40790553
SH
2496 if (netdev->type != LXC_NET_VETH)
2497 return 0;
2498 netdev->ifindex = if_nametoindex(netdev->name);
0ad19a3f 2499 }
13954cce 2500
b466dc33 2501 /* get the new ifindex in case of physical netdev */
40790553 2502 if (netdev->type == LXC_NET_PHYS) {
b466dc33
BP
2503 if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
2504 ERROR("failed to get ifindex for %s",
2505 netdev->link);
2506 return -1;
2507 }
40790553 2508 }
b466dc33 2509
82d5ae15
DL
2510 /* retrieve the name of the interface */
2511 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 2512 ERROR("no interface corresponding to index '%d'",
82d5ae15 2513 netdev->ifindex);
0ad19a3f 2514 return -1;
2515 }
13954cce 2516
018ef520 2517 /* default: let the system to choose one interface name */
9d083402 2518 if (!netdev->name)
fb6d9b2f
DL
2519 netdev->name = netdev->type == LXC_NET_PHYS ?
2520 netdev->link : "eth%d";
018ef520 2521
82d5ae15 2522 /* rename the interface name */
40790553
SH
2523 if (strcmp(ifname, netdev->name) != 0) {
2524 err = lxc_netdev_rename_by_name(ifname, netdev->name);
2525 if (err) {
2526 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2527 strerror(-err));
2528 return -1;
2529 }
018ef520
DL
2530 }
2531
2532 /* Re-read the name of the interface because its name has changed
2533 * and would be automatically allocated by the system
2534 */
82d5ae15 2535 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 2536 ERROR("no interface corresponding to index '%d'",
82d5ae15 2537 netdev->ifindex);
018ef520 2538 return -1;
0ad19a3f 2539 }
2540
82d5ae15
DL
2541 /* set a mac address */
2542 if (netdev->hwaddr) {
2543 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 2544 ERROR("failed to setup hw address for '%s'",
82d5ae15 2545 current_ifname);
0ad19a3f 2546 return -1;
2547 }
2548 }
2549
82d5ae15
DL
2550 /* setup ipv4 addresses on the interface */
2551 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 2552 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 2553 ifname);
2554 return -1;
2555 }
2556
82d5ae15
DL
2557 /* setup ipv6 addresses on the interface */
2558 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 2559 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 2560 ifname);
2561 return -1;
2562 }
2563
82d5ae15 2564 /* set the network device up */
b0efbac4 2565 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
2566 int err;
2567
d472214b 2568 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
2569 if (err) {
2570 ERROR("failed to set '%s' up : %s", current_ifname,
2571 strerror(-err));
0ad19a3f 2572 return -1;
2573 }
2574
2575 /* the network is up, make the loopback up too */
d472214b 2576 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2577 if (err) {
2578 ERROR("failed to set the loopback up : %s",
2579 strerror(-err));
0ad19a3f 2580 return -1;
2581 }
2582 }
2583
f8fee0e2
MK
2584 /* We can only set up the default routes after bringing
2585 * up the interface, sine bringing up the interface adds
2586 * the link-local routes and we can't add a default
2587 * route if the gateway is not reachable. */
2588
2589 /* setup ipv4 gateway on the interface */
2590 if (netdev->ipv4_gateway) {
2591 if (!(netdev->flags & IFF_UP)) {
2592 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2593 return -1;
2594 }
2595
2596 if (lxc_list_empty(&netdev->ipv4)) {
2597 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2598 return -1;
2599 }
2600
2601 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2602 if (err) {
fc739df5
SG
2603 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2604 if (err) {
2605 ERROR("failed to add ipv4 dest for '%s': %s",
2606 ifname, strerror(-err));
2607 }
2608
2609 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2610 if (err) {
2611 ERROR("failed to setup ipv4 gateway for '%s': %s",
2612 ifname, strerror(-err));
2613 if (netdev->ipv4_gateway_auto) {
2614 char buf[INET_ADDRSTRLEN];
2615 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2616 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2617 }
2618 return -1;
19a26f82 2619 }
f8fee0e2
MK
2620 }
2621 }
2622
2623 /* setup ipv6 gateway on the interface */
2624 if (netdev->ipv6_gateway) {
2625 if (!(netdev->flags & IFF_UP)) {
2626 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2627 return -1;
2628 }
2629
2630 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2631 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2632 return -1;
2633 }
2634
2635 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2636 if (err) {
fc739df5
SG
2637 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2638 if (err) {
2639 ERROR("failed to add ipv6 dest for '%s': %s",
f8fee0e2 2640 ifname, strerror(-err));
19a26f82 2641 }
fc739df5
SG
2642
2643 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2644 if (err) {
2645 ERROR("failed to setup ipv6 gateway for '%s': %s",
2646 ifname, strerror(-err));
2647 if (netdev->ipv6_gateway_auto) {
2648 char buf[INET6_ADDRSTRLEN];
2649 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2650 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2651 }
2652 return -1;
2653 }
f8fee0e2
MK
2654 }
2655 }
2656
cd54d859
DL
2657 DEBUG("'%s' has been setup", current_ifname);
2658
0ad19a3f 2659 return 0;
2660}
2661
5f4535a3 2662static int setup_network(struct lxc_list *network)
0ad19a3f 2663{
82d5ae15 2664 struct lxc_list *iterator;
82d5ae15 2665 struct lxc_netdev *netdev;
0ad19a3f 2666
5f4535a3 2667 lxc_list_for_each(iterator, network) {
cd54d859 2668
5f4535a3 2669 netdev = iterator->elem;
82d5ae15
DL
2670
2671 if (setup_netdev(netdev)) {
2672 ERROR("failed to setup netdev");
2673 return -1;
2674 }
2675 }
cd54d859 2676
5f4535a3
DL
2677 if (!lxc_list_empty(network))
2678 INFO("network has been setup");
cd54d859
DL
2679
2680 return 0;
0ad19a3f 2681}
2682
2af6bd1b
SH
2683/* try to move physical nics to the init netns */
2684void restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf)
2685{
2686 int i, ret, oldfd;
2687 char path[MAXPATHLEN];
2688
2689 if (netnsfd < 0)
2690 return;
2691
2692 ret = snprintf(path, MAXPATHLEN, "/proc/self/ns/net");
2693 if (ret < 0 || ret >= MAXPATHLEN) {
2694 WARN("Failed to open monitor netns fd");
2695 return;
2696 }
2697 if ((oldfd = open(path, O_RDONLY)) < 0) {
2698 SYSERROR("Failed to open monitor netns fd");
2699 return;
2700 }
2701 if (setns(netnsfd, 0) != 0) {
2702 SYSERROR("Failed to enter container netns to reset nics");
2703 close(oldfd);
2704 return;
2705 }
2706 for (i=0; i<conf->num_savednics; i++) {
2707 struct saved_nic *s = &conf->saved_nics[i];
8d357196 2708 if (lxc_netdev_move_by_index(s->ifindex, 1, NULL))
2af6bd1b
SH
2709 WARN("Error moving nic index:%d back to host netns",
2710 s->ifindex);
2711 }
2712 if (setns(oldfd, 0) != 0)
2713 SYSERROR("Failed to re-enter monitor's netns");
2714 close(oldfd);
2715}
2716
2717void lxc_rename_phys_nics_on_shutdown(int netnsfd, struct lxc_conf *conf)
7b35f3d6
SH
2718{
2719 int i;
2720
2af6bd1b
SH
2721 if (conf->num_savednics == 0)
2722 return;
2723
7b35f3d6 2724 INFO("running to reset %d nic names", conf->num_savednics);
2af6bd1b 2725 restore_phys_nics_to_netns(netnsfd, conf);
7b35f3d6
SH
2726 for (i=0; i<conf->num_savednics; i++) {
2727 struct saved_nic *s = &conf->saved_nics[i];
959aee9c 2728 INFO("resetting nic %d to %s", s->ifindex, s->orig_name);
7b35f3d6
SH
2729 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
2730 free(s->orig_name);
2731 }
2732 conf->num_savednics = 0;
7b35f3d6
SH
2733}
2734
ae9242c8
SH
2735static char *default_rootfs_mount = LXCROOTFSMOUNT;
2736
7b379ab3 2737struct lxc_conf *lxc_conf_init(void)
089cd8b8 2738{
7b379ab3 2739 struct lxc_conf *new;
26ddeedd 2740 int i;
7b379ab3
MN
2741
2742 new = malloc(sizeof(*new));
2743 if (!new) {
2744 ERROR("lxc_conf_init : %m");
2745 return NULL;
2746 }
2747 memset(new, 0, sizeof(*new));
2748
b40a606e 2749 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
cccc74b5 2750 new->personality = -1;
bc6928ff 2751 new->autodev = -1;
596a818d
DE
2752 new->console.log_path = NULL;
2753 new->console.log_fd = -1;
28a4b0e5 2754 new->console.path = NULL;
63376d7d 2755 new->console.peer = -1;
b5159817
DE
2756 new->console.peerpty.busy = -1;
2757 new->console.peerpty.master = -1;
2758 new->console.peerpty.slave = -1;
63376d7d
DL
2759 new->console.master = -1;
2760 new->console.slave = -1;
2761 new->console.name[0] = '\0';
d2e30e99 2762 new->maincmd_fd = -1;
76a26f55 2763 new->nbd_idx = -1;
54c30e29 2764 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048
SH
2765 if (!new->rootfs.mount) {
2766 ERROR("lxc_conf_init : %m");
2767 free(new);
2768 return NULL;
2769 }
2f3f41d0 2770 new->kmsg = 1;
7b379ab3
MN
2771 lxc_list_init(&new->cgroup);
2772 lxc_list_init(&new->network);
2773 lxc_list_init(&new->mount_list);
81810dd1 2774 lxc_list_init(&new->caps);
1fb86a7c 2775 lxc_list_init(&new->keepcaps);
f6d3e3e4 2776 lxc_list_init(&new->id_map);
f979ac15 2777 lxc_list_init(&new->includes);
4184c3e1 2778 lxc_list_init(&new->aliens);
7c661726 2779 lxc_list_init(&new->environment);
26ddeedd
SH
2780 for (i=0; i<NUM_LXC_HOOKS; i++)
2781 lxc_list_init(&new->hooks[i]);
ee1e7aa0 2782 lxc_list_init(&new->groups);
fe4de9a6
DE
2783 new->lsm_aa_profile = NULL;
2784 new->lsm_se_context = NULL;
5112cd70 2785 new->tmp_umount_proc = 0;
7b379ab3 2786
9f30a190
MM
2787 for (i = 0; i < LXC_NS_MAX; i++)
2788 new->inherit_ns_fd[i] = -1;
2789
7b379ab3 2790 return new;
089cd8b8
DL
2791}
2792
a589434e 2793static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2794{
8634bc19 2795 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2796 char veth2buf[IFNAMSIZ], *veth2;
3cfc0f3a 2797 int err;
13954cce 2798
e892973e
DL
2799 if (netdev->priv.veth_attr.pair)
2800 veth1 = netdev->priv.veth_attr.pair;
8634bc19 2801 else {
9ba8130c
SH
2802 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2803 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2804 ERROR("veth1 name too long");
2805 return -1;
2806 }
a0265685 2807 veth1 = lxc_mkifname(veth1buf);
ad40563e
ÇO
2808 if (!veth1) {
2809 ERROR("failed to allocate a temporary name");
2810 return -1;
2811 }
74a2b586
JK
2812 /* store away for deconf */
2813 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2814 }
82d5ae15 2815
0e391e57 2816 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
a0265685 2817 veth2 = lxc_mkifname(veth2buf);
ad40563e 2818 if (!veth2) {
82d5ae15 2819 ERROR("failed to allocate a temporary name");
ad40563e 2820 goto out_delete;
0ad19a3f 2821 }
2822
3cfc0f3a
MN
2823 err = lxc_veth_create(veth1, veth2);
2824 if (err) {
2e2d6a7b 2825 ERROR("failed to create veth pair (%s and %s): %s", veth1, veth2,
3cfc0f3a 2826 strerror(-err));
ad40563e 2827 goto out_delete;
0ad19a3f 2828 }
13954cce 2829
49684c0b
CS
2830 /* changing the high byte of the mac address to 0xfe, the bridge interface
2831 * will always keep the host's mac address and not take the mac address
2832 * of a container */
2833 err = setup_private_host_hw_addr(veth1);
2834 if (err) {
2e2d6a7b 2835 ERROR("failed to change mac address of host interface '%s': %s",
49684c0b
CS
2836 veth1, strerror(-err));
2837 goto out_delete;
2838 }
2839
82d5ae15 2840 if (netdev->mtu) {
d472214b 2841 err = lxc_netdev_set_mtu(veth1, atoi(netdev->mtu));
3cfc0f3a 2842 if (!err)
d472214b 2843 err = lxc_netdev_set_mtu(veth2, atoi(netdev->mtu));
3cfc0f3a 2844 if (err) {
2e2d6a7b 2845 ERROR("failed to set mtu '%s' for veth pair (%s and %s): %s",
3cfc0f3a 2846 netdev->mtu, veth1, veth2, strerror(-err));
eb14c10a 2847 goto out_delete;
75d09f83
DL
2848 }
2849 }
2850
3cfc0f3a
MN
2851 if (netdev->link) {
2852 err = lxc_bridge_attach(netdev->link, veth1);
2853 if (err) {
2e2d6a7b 2854 ERROR("failed to attach '%s' to the bridge '%s': %s",
3cfc0f3a
MN
2855 veth1, netdev->link, strerror(-err));
2856 goto out_delete;
2857 }
eb14c10a
DL
2858 }
2859
82d5ae15
DL
2860 netdev->ifindex = if_nametoindex(veth2);
2861 if (!netdev->ifindex) {
36eb9bde 2862 ERROR("failed to retrieve the index for %s", veth2);
eb14c10a
DL
2863 goto out_delete;
2864 }
2865
d472214b 2866 err = lxc_netdev_up(veth1);
6e35af2e
DL
2867 if (err) {
2868 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2869 goto out_delete;
0ad19a3f 2870 }
2871
e3b4c4c4 2872 if (netdev->upscript) {
751d9dcd
DL
2873 err = run_script(handler->name, "net", netdev->upscript, "up",
2874 "veth", veth1, (char*) NULL);
2875 if (err)
e3b4c4c4 2876 goto out_delete;
e3b4c4c4
ST
2877 }
2878
a589434e 2879 DEBUG("instantiated veth '%s/%s', index is '%d'",
82d5ae15
DL
2880 veth1, veth2, netdev->ifindex);
2881
6ab9ab6d 2882 return 0;
eb14c10a
DL
2883
2884out_delete:
b84f58b9 2885 lxc_netdev_delete_by_name(veth1);
ad40563e
ÇO
2886 if (!netdev->priv.veth_attr.pair && veth1)
2887 free(veth1);
2888 if(veth2)
2889 free(veth2);
6ab9ab6d 2890 return -1;
13954cce 2891}
d957ae2d 2892
74a2b586
JK
2893static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2894{
2895 char *veth1;
2896 int err;
2897
2898 if (netdev->priv.veth_attr.pair)
2899 veth1 = netdev->priv.veth_attr.pair;
2900 else
2901 veth1 = netdev->priv.veth_attr.veth1;
2902
2903 if (netdev->downscript) {
2904 err = run_script(handler->name, "net", netdev->downscript,
2905 "down", "veth", veth1, (char*) NULL);
2906 if (err)
2907 return -1;
2908 }
2909 return 0;
2910}
2911
a589434e 2912static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2913{
0e391e57 2914 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2915 int err;
d957ae2d
MT
2916
2917 if (!netdev->link) {
2918 ERROR("no link specified for macvlan netdev");
2919 return -1;
2920 }
13954cce 2921
9ba8130c
SH
2922 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2923 if (err >= sizeof(peerbuf))
2924 return -1;
82d5ae15 2925
a0265685 2926 peer = lxc_mkifname(peerbuf);
ad40563e 2927 if (!peer) {
82d5ae15
DL
2928 ERROR("failed to make a temporary name");
2929 return -1;
0ad19a3f 2930 }
2931
3cfc0f3a
MN
2932 err = lxc_macvlan_create(netdev->link, peer,
2933 netdev->priv.macvlan_attr.mode);
2934 if (err) {
2935 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2936 peer, netdev->link, strerror(-err));
ad40563e 2937 goto out;
0ad19a3f 2938 }
2939
82d5ae15
DL
2940 netdev->ifindex = if_nametoindex(peer);
2941 if (!netdev->ifindex) {
36eb9bde 2942 ERROR("failed to retrieve the index for %s", peer);
ad40563e 2943 goto out;
22ebac19 2944 }
2945
e3b4c4c4 2946 if (netdev->upscript) {
751d9dcd
DL
2947 err = run_script(handler->name, "net", netdev->upscript, "up",
2948 "macvlan", netdev->link, (char*) NULL);
2949 if (err)
ad40563e 2950 goto out;
e3b4c4c4
ST
2951 }
2952
a589434e 2953 DEBUG("instantiated macvlan '%s', index is '%d' and mode '%d'",
e892973e 2954 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 2955
d957ae2d 2956 return 0;
ad40563e
ÇO
2957out:
2958 lxc_netdev_delete_by_name(peer);
2959 free(peer);
2960 return -1;
0ad19a3f 2961}
2962
74a2b586
JK
2963static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2964{
2965 int err;
2966
2967 if (netdev->downscript) {
2968 err = run_script(handler->name, "net", netdev->downscript,
2969 "down", "macvlan", netdev->link,
2970 (char*) NULL);
2971 if (err)
2972 return -1;
2973 }
2974 return 0;
2975}
2976
a589434e
JN
2977/* XXX: merge with instantiate_macvlan */
2978static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
2979{
2980 char peer[IFNAMSIZ];
3cfc0f3a 2981 int err;
26c39028
JHS
2982
2983 if (!netdev->link) {
2984 ERROR("no link specified for vlan netdev");
2985 return -1;
2986 }
2987
9ba8130c
SH
2988 err = snprintf(peer, sizeof(peer), "vlan%d", netdev->priv.vlan_attr.vid);
2989 if (err >= sizeof(peer)) {
2990 ERROR("peer name too long");
2991 return -1;
2992 }
26c39028 2993
3cfc0f3a
MN
2994 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2995 if (err) {
2996 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2997 peer, netdev->link, strerror(-err));
26c39028
JHS
2998 return -1;
2999 }
3000
3001 netdev->ifindex = if_nametoindex(peer);
3002 if (!netdev->ifindex) {
3003 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 3004 lxc_netdev_delete_by_name(peer);
26c39028
JHS
3005 return -1;
3006 }
3007
a589434e 3008 DEBUG("instantiated vlan '%s', ifindex is '%d'", " vlan1000",
e892973e
DL
3009 netdev->ifindex);
3010
26c39028
JHS
3011 return 0;
3012}
3013
74a2b586
JK
3014static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
3015{
3016 return 0;
3017}
3018
a589434e 3019static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 3020{
6168e99f
DL
3021 if (!netdev->link) {
3022 ERROR("no link specified for the physical interface");
3023 return -1;
3024 }
3025
9d083402 3026 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 3027 if (!netdev->ifindex) {
9d083402 3028 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 3029 return -1;
3030 }
3031
e3b4c4c4
ST
3032 if (netdev->upscript) {
3033 int err;
751d9dcd
DL
3034 err = run_script(handler->name, "net", netdev->upscript,
3035 "up", "phys", netdev->link, (char*) NULL);
3036 if (err)
e3b4c4c4 3037 return -1;
e3b4c4c4
ST
3038 }
3039
82d5ae15 3040 return 0;
0ad19a3f 3041}
3042
74a2b586
JK
3043static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
3044{
3045 int err;
3046
3047 if (netdev->downscript) {
3048 err = run_script(handler->name, "net", netdev->downscript,
3049 "down", "phys", netdev->link, (char*) NULL);
3050 if (err)
3051 return -1;
3052 }
3053 return 0;
3054}
3055
a589434e 3056static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
26b797f3
SH
3057{
3058 netdev->ifindex = 0;
3059 return 0;
3060}
3061
a589434e 3062static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 3063{
82d5ae15 3064 netdev->ifindex = 0;
e3b4c4c4
ST
3065 if (netdev->upscript) {
3066 int err;
751d9dcd
DL
3067 err = run_script(handler->name, "net", netdev->upscript,
3068 "up", "empty", (char*) NULL);
3069 if (err)
e3b4c4c4 3070 return -1;
e3b4c4c4 3071 }
82d5ae15 3072 return 0;
0ad19a3f 3073}
3074
74a2b586
JK
3075static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
3076{
3077 int err;
3078
3079 if (netdev->downscript) {
3080 err = run_script(handler->name, "net", netdev->downscript,
3081 "down", "empty", (char*) NULL);
3082 if (err)
3083 return -1;
3084 }
3085 return 0;
3086}
3087
26b797f3
SH
3088static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
3089{
3090 return 0;
3091}
3092
3093int lxc_requests_empty_network(struct lxc_handler *handler)
3094{
3095 struct lxc_list *network = &handler->conf->network;
3096 struct lxc_list *iterator;
3097 struct lxc_netdev *netdev;
3098 bool found_none = false, found_nic = false;
3099
3100 if (lxc_list_empty(network))
3101 return 0;
3102
3103 lxc_list_for_each(iterator, network) {
3104
3105 netdev = iterator->elem;
3106
3107 if (netdev->type == LXC_NET_NONE)
3108 found_none = true;
3109 else
3110 found_nic = true;
3111 }
3112 if (found_none && !found_nic)
3113 return 1;
3114 return 0;
3115}
3116
e3b4c4c4 3117int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 3118{
e3b4c4c4 3119 struct lxc_list *network = &handler->conf->network;
82d5ae15 3120 struct lxc_list *iterator;
82d5ae15 3121 struct lxc_netdev *netdev;
cbef6c52
SH
3122 int am_root = (getuid() == 0);
3123
3124 if (!am_root)
3125 return 0;
0ad19a3f 3126
5f4535a3 3127 lxc_list_for_each(iterator, network) {
0ad19a3f 3128
5f4535a3 3129 netdev = iterator->elem;
13954cce 3130
24654103 3131 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 3132 ERROR("invalid network configuration type '%d'",
5f4535a3 3133 netdev->type);
82d5ae15
DL
3134 return -1;
3135 }
0ad19a3f 3136
e3b4c4c4 3137 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
3138 ERROR("failed to create netdev");
3139 return -1;
3140 }
e3b4c4c4 3141
0ad19a3f 3142 }
3143
3144 return 0;
3145}
3146
74a2b586 3147void lxc_delete_network(struct lxc_handler *handler)
7fef7a06 3148{
74a2b586 3149 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
3150 struct lxc_list *iterator;
3151 struct lxc_netdev *netdev;
3152
3153 lxc_list_for_each(iterator, network) {
3154 netdev = iterator->elem;
d472214b 3155
74a2b586 3156 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352
DL
3157 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
3158 WARN("failed to rename to the initial name the " \
3159 "netdev '%s'", netdev->link);
d472214b 3160 continue;
d8f8e352 3161 }
d472214b 3162
74a2b586
JK
3163 if (netdev_deconf[netdev->type](handler, netdev)) {
3164 WARN("failed to destroy netdev");
3165 }
3166
d8f8e352
DL
3167 /* Recent kernel remove the virtual interfaces when the network
3168 * namespace is destroyed but in case we did not moved the
3169 * interface to the network namespace, we have to destroy it
3170 */
74a2b586
JK
3171 if (netdev->ifindex != 0 &&
3172 lxc_netdev_delete_by_index(netdev->ifindex))
d8f8e352 3173 WARN("failed to remove interface '%s'", netdev->name);
7fef7a06
DL
3174 }
3175}
3176
45e854dc
SG
3177#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
3178
fe1f672f
ÇO
3179/* lxc-user-nic returns "interface_name:interface_name\n" */
3180#define MAX_BUFFER_SIZE IFNAMSIZ*2 + 2
74a3920a 3181static int unpriv_assign_nic(struct lxc_netdev *netdev, pid_t pid)
cbef6c52
SH
3182{
3183 pid_t child;
a7242d9a
ÇO
3184 int bytes, pipefd[2];
3185 char *token, *saveptr = NULL;
fe1f672f 3186 char buffer[MAX_BUFFER_SIZE];
cbef6c52
SH
3187
3188 if (netdev->type != LXC_NET_VETH) {
3189 ERROR("nic type %d not support for unprivileged use",
3190 netdev->type);
3191 return -1;
3192 }
3193
a7242d9a
ÇO
3194 if(pipe(pipefd) < 0) {
3195 SYSERROR("pipe failed");
3196 return -1;
3197 }
3198
cbef6c52
SH
3199 if ((child = fork()) < 0) {
3200 SYSERROR("fork");
a7242d9a
ÇO
3201 close(pipefd[0]);
3202 close(pipefd[1]);
3203 return -1;
3204 }
3205
3206 if (child == 0) { // child
3207 /* close the read-end of the pipe */
3208 close(pipefd[0]);
3209 /* redirect the stdout to write-end of the pipe */
3210 dup2(pipefd[1], STDOUT_FILENO);
3211 /* close the write-end of the pipe */
fe1f672f 3212 close(pipefd[1]);
a7242d9a
ÇO
3213
3214 // Call lxc-user-nic pid type bridge
3215 char pidstr[20];
3216 char *args[] = {LXC_USERNIC_PATH, pidstr, "veth", netdev->link, netdev->name, NULL };
3217 snprintf(pidstr, 19, "%lu", (unsigned long) pid);
3218 pidstr[19] = '\0';
3219 execvp(args[0], args);
3220 SYSERROR("execvp lxc-user-nic");
3221 exit(1);
3222 }
3223
3224 /* close the write-end of the pipe */
3225 close(pipefd[1]);
3226
fe1f672f 3227 bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE);
a7242d9a
ÇO
3228 if (bytes < 0) {
3229 SYSERROR("read failed");
3230 }
3231 buffer[bytes - 1] = '\0';
3232
3233 if (wait_for_pid(child) != 0) {
3234 close(pipefd[0]);
cbef6c52
SH
3235 return -1;
3236 }
3237
a7242d9a
ÇO
3238 /* close the read-end of the pipe */
3239 close(pipefd[0]);
cbef6c52 3240
a7242d9a
ÇO
3241 /* fill netdev->name field */
3242 token = strtok_r(buffer, ":", &saveptr);
3243 if (!token)
3244 return -1;
658979c5
SH
3245 netdev->name = malloc(IFNAMSIZ+1);
3246 if (!netdev->name) {
3247 ERROR("Out of memory");
3248 return -1;
3249 }
3250 memset(netdev->name, 0, IFNAMSIZ+1);
3251 strncpy(netdev->name, token, IFNAMSIZ);
a7242d9a
ÇO
3252
3253 /* fill netdev->veth_attr.pair field */
3254 token = strtok_r(NULL, ":", &saveptr);
3255 if (!token)
3256 return -1;
3257 netdev->priv.veth_attr.pair = strdup(token);
658979c5
SH
3258 if (!netdev->priv.veth_attr.pair) {
3259 ERROR("Out of memory");
3260 return -1;
3261 }
45e854dc 3262
a7242d9a 3263 return 0;
cbef6c52
SH
3264}
3265
5f4535a3 3266int lxc_assign_network(struct lxc_list *network, pid_t pid)
0ad19a3f 3267{
82d5ae15 3268 struct lxc_list *iterator;
82d5ae15 3269 struct lxc_netdev *netdev;
cbef6c52 3270 int am_root = (getuid() == 0);
3cfc0f3a 3271 int err;
0ad19a3f 3272
5f4535a3 3273 lxc_list_for_each(iterator, network) {
82d5ae15 3274
5f4535a3 3275 netdev = iterator->elem;
82d5ae15 3276
fbb16259 3277 if (netdev->type == LXC_NET_VETH && !am_root) {
cbef6c52
SH
3278 if (unpriv_assign_nic(netdev, pid))
3279 return -1;
658979c5
SH
3280 // lxc-user-nic has moved the nic to the new ns.
3281 // unpriv_assign_nic() fills in netdev->name.
3282 // netdev->ifindex will be filed in at setup_netdev.
cbef6c52
SH
3283 continue;
3284 }
236087a6 3285
fbb16259
SH
3286 /* empty network namespace, nothing to move */
3287 if (!netdev->ifindex)
3288 continue;
3289
8d357196 3290 err = lxc_netdev_move_by_index(netdev->ifindex, pid, NULL);
3cfc0f3a
MN
3291 if (err) {
3292 ERROR("failed to move '%s' to the container : %s",
3293 netdev->link, strerror(-err));
82d5ae15
DL
3294 return -1;
3295 }
3296
c1c75c04 3297 DEBUG("move '%s' to '%d'", netdev->name, pid);
0ad19a3f 3298 }
3299
3300 return 0;
3301}
3302
251d0d2a
DE
3303static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
3304 size_t buf_size)
f6d3e3e4
SH
3305{
3306 char path[PATH_MAX];
e4ccd113 3307 int ret, closeret;
f6d3e3e4
SH
3308 FILE *f;
3309
3310 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
3311 if (ret < 0 || ret >= PATH_MAX) {
03fadd16 3312 fprintf(stderr, "%s: path name too long\n", __func__);
f6d3e3e4
SH
3313 return -E2BIG;
3314 }
3315 f = fopen(path, "w");
3316 if (!f) {
3317 perror("open");
3318 return -EINVAL;
3319 }
251d0d2a 3320 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 3321 if (ret < 0)
e4ccd113
SH
3322 SYSERROR("writing id mapping");
3323 closeret = fclose(f);
3324 if (closeret)
3325 SYSERROR("writing id mapping");
3326 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
3327}
3328
3329int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
3330{
3331 struct lxc_list *iterator;
3332 struct id_map *map;
8afb3e61 3333 int ret = 0, use_shadow = 0;
251d0d2a 3334 enum idtype type;
8afb3e61
SG
3335 char *buf = NULL, *pos, *cmdpath = NULL;
3336
22038de5
SH
3337 /*
3338 * If newuidmap exists, that is, if shadow is handing out subuid
3339 * ranges, then insist that root also reserve ranges in subuid. This
3340 * will protected it by preventing another user from being handed the
3341 * range by shadow.
3342 */
9d9c111c 3343 cmdpath = on_path("newuidmap", NULL);
8afb3e61
SG
3344 if (cmdpath) {
3345 use_shadow = 1;
3346 free(cmdpath);
3347 }
3348
0e6e3a41
SG
3349 if (!use_shadow && geteuid()) {
3350 ERROR("Missing newuidmap/newgidmap");
3351 return -1;
3352 }
251d0d2a
DE
3353
3354 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4 3355 int left, fill;
cf3ef16d
SH
3356 int had_entry = 0;
3357 if (!buf) {
3358 buf = pos = malloc(4096);
4f7521b4
SH
3359 if (!buf)
3360 return -ENOMEM;
cf3ef16d
SH
3361 }
3362 pos = buf;
0e6e3a41 3363 if (use_shadow)
d1838f34 3364 pos += sprintf(buf, "new%cidmap %d",
cf3ef16d
SH
3365 type == ID_TYPE_UID ? 'u' : 'g',
3366 pid);
4f7521b4 3367
cf3ef16d
SH
3368 lxc_list_for_each(iterator, idmap) {
3369 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
251d0d2a 3370 map = iterator->elem;
cf3ef16d
SH
3371 if (map->idtype != type)
3372 continue;
3373
3374 had_entry = 1;
3375 left = 4096 - (pos - buf);
d1838f34 3376 fill = snprintf(pos, left, "%s%lu %lu %lu%s",
0e6e3a41 3377 use_shadow ? " " : "",
d1838f34 3378 map->nsid, map->hostid, map->range,
0e6e3a41 3379 use_shadow ? "" : "\n");
cf3ef16d
SH
3380 if (fill <= 0 || fill >= left)
3381 SYSERROR("snprintf failed, too many mappings");
3382 pos += fill;
251d0d2a 3383 }
cf3ef16d 3384 if (!had_entry)
4f7521b4 3385 continue;
cf3ef16d 3386
0e6e3a41 3387 if (!use_shadow) {
cf3ef16d 3388 ret = write_id_mapping(type, pid, buf, pos-buf);
d1838f34
MS
3389 } else {
3390 left = 4096 - (pos - buf);
3391 fill = snprintf(pos, left, "\n");
3392 if (fill <= 0 || fill >= left)
3393 SYSERROR("snprintf failed, too many mappings");
3394 pos += fill;
cf3ef16d 3395 ret = system(buf);
d1838f34 3396 }
cf3ef16d 3397
f6d3e3e4
SH
3398 if (ret)
3399 break;
3400 }
251d0d2a 3401
4f7521b4
SH
3402 if (buf)
3403 free(buf);
f6d3e3e4
SH
3404 return ret;
3405}
3406
cf3ef16d 3407/*
7b50c609
TS
3408 * return the host uid/gid to which the container root is mapped in
3409 * *val.
0b3a6504 3410 * Return true if id was found, false otherwise.
cf3ef16d 3411 */
2a9a80cb 3412bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
3ec1648d 3413 unsigned long *val)
cf3ef16d
SH
3414{
3415 struct lxc_list *it;
3416 struct id_map *map;
3417
3418 lxc_list_for_each(it, &conf->id_map) {
3419 map = it->elem;
7b50c609 3420 if (map->idtype != idtype)
cf3ef16d
SH
3421 continue;
3422 if (map->nsid != 0)
3423 continue;
2a9a80cb
SH
3424 *val = map->hostid;
3425 return true;
cf3ef16d 3426 }
2a9a80cb 3427 return false;
cf3ef16d
SH
3428}
3429
2133f58c 3430int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3431{
3432 struct lxc_list *it;
3433 struct id_map *map;
3434 lxc_list_for_each(it, &conf->id_map) {
3435 map = it->elem;
2133f58c 3436 if (map->idtype != idtype)
cf3ef16d
SH
3437 continue;
3438 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 3439 return (id - map->hostid) + map->nsid;
cf3ef16d 3440 }
57d116ab 3441 return -1;
cf3ef16d
SH
3442}
3443
2133f58c 3444int find_unmapped_nsuid(struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3445{
3446 struct lxc_list *it;
3447 struct id_map *map;
2133f58c 3448 unsigned int freeid = 0;
cf3ef16d
SH
3449again:
3450 lxc_list_for_each(it, &conf->id_map) {
3451 map = it->elem;
2133f58c 3452 if (map->idtype != idtype)
cf3ef16d
SH
3453 continue;
3454 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3455 freeid = map->nsid + map->range;
3456 goto again;
3457 }
3458 }
3459 return freeid;
3460}
3461
19a26f82
MK
3462int lxc_find_gateway_addresses(struct lxc_handler *handler)
3463{
3464 struct lxc_list *network = &handler->conf->network;
3465 struct lxc_list *iterator;
3466 struct lxc_netdev *netdev;
3467 int link_index;
3468
3469 lxc_list_for_each(iterator, network) {
3470 netdev = iterator->elem;
3471
3472 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
3473 continue;
3474
3475 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
3476 ERROR("gateway = auto only supported for "
3477 "veth and macvlan");
3478 return -1;
3479 }
3480
3481 if (!netdev->link) {
3482 ERROR("gateway = auto needs a link interface");
3483 return -1;
3484 }
3485
3486 link_index = if_nametoindex(netdev->link);
3487 if (!link_index)
3488 return -EINVAL;
3489
3490 if (netdev->ipv4_gateway_auto) {
3491 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
3492 ERROR("failed to automatically find ipv4 gateway "
3493 "address from link interface '%s'", netdev->link);
3494 return -1;
3495 }
3496 }
3497
3498 if (netdev->ipv6_gateway_auto) {
3499 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
3500 ERROR("failed to automatically find ipv6 gateway "
3501 "address from link interface '%s'", netdev->link);
3502 return -1;
3503 }
3504 }
3505 }
3506
3507 return 0;
3508}
3509
5e4a62bf 3510int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 3511{
5e4a62bf 3512 struct lxc_tty_info *tty_info = &conf->tty_info;
025ed0f3 3513 int i, ret;
b0a33c1e 3514
5e4a62bf
DL
3515 /* no tty in the configuration */
3516 if (!conf->tty)
b0a33c1e 3517 return 0;
3518
13954cce 3519 tty_info->pty_info =
e4e7d59d 3520 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 3521 if (!tty_info->pty_info) {
36eb9bde 3522 SYSERROR("failed to allocate pty_info");
985d15b1 3523 return -1;
b0a33c1e 3524 }
3525
985d15b1 3526 for (i = 0; i < conf->tty; i++) {
13954cce 3527
b0a33c1e 3528 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3529
025ed0f3
SH
3530 process_lock();
3531 ret = openpty(&pty_info->master, &pty_info->slave,
3532 pty_info->name, NULL, NULL);
3533 process_unlock();
3534 if (ret) {
36eb9bde 3535 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
3536 tty_info->nbtty = i;
3537 lxc_delete_tty(tty_info);
3538 return -1;
b0a33c1e 3539 }
3540
5332bb84
DL
3541 DEBUG("allocated pty '%s' (%d/%d)",
3542 pty_info->name, pty_info->master, pty_info->slave);
3543
3ec1648d 3544 /* Prevent leaking the file descriptors to the container */
b035ad62
MS
3545 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3546 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3547
b0a33c1e 3548 pty_info->busy = 0;
3549 }
3550
985d15b1 3551 tty_info->nbtty = conf->tty;
1ac470c0
DL
3552
3553 INFO("tty's configured");
3554
985d15b1 3555 return 0;
b0a33c1e 3556}
3557
3558void lxc_delete_tty(struct lxc_tty_info *tty_info)
3559{
3560 int i;
3561
3562 for (i = 0; i < tty_info->nbtty; i++) {
3563 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3564
3565 close(pty_info->master);
3566 close(pty_info->slave);
3567 }
3568
3569 free(tty_info->pty_info);
3570 tty_info->nbtty = 0;
3571}
3572
f6d3e3e4 3573/*
7b50c609
TS
3574 * chown_mapped_root: for an unprivileged user with uid/gid X to
3575 * chown a dir to subuid/subgid Y, he needs to run chown as root
3576 * in a userns where nsid 0 is mapped to hostuid/hostgid Y, and
3577 * nsid Y is mapped to hostuid/hostgid X. That way, the container
3578 * root is privileged with respect to hostuid/hostgid X, allowing
3579 * him to do the chown.
f6d3e3e4 3580 */
c4d10a05 3581int chown_mapped_root(char *path, struct lxc_conf *conf)
f6d3e3e4 3582{
7b50c609
TS
3583 uid_t rootuid;
3584 gid_t rootgid;
c4d10a05 3585 pid_t pid;
2a9a80cb 3586 unsigned long val;
a7ef8753 3587 char *chownpath = path;
f6d3e3e4 3588
2a9a80cb 3589 if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
c4d10a05
SH
3590 ERROR("No mapping for container root");
3591 return -1;
f6d3e3e4 3592 }
7b50c609
TS
3593 rootuid = (uid_t) val;
3594 if (!get_mapped_rootid(conf, ID_TYPE_GID, &val)) {
3595 ERROR("No mapping for container root");
3596 return -1;
3597 }
3598 rootgid = (gid_t) val;
2a9a80cb 3599
a7ef8753
SH
3600 /*
3601 * In case of overlay, we want only the writeable layer
3602 * to be chowned
3603 */
1f92162d 3604 if (strncmp(path, "overlayfs:", 10) == 0 || strncmp(path, "aufs:", 5) == 0) {
a7ef8753
SH
3605 chownpath = strchr(path, ':');
3606 if (!chownpath) {
3607 ERROR("Bad overlay path: %s", path);
3608 return -1;
3609 }
3610 chownpath = strchr(chownpath+1, ':');
3611 if (!chownpath) {
3612 ERROR("Bad overlay path: %s", path);
3613 return -1;
3614 }
3615 chownpath++;
3616 }
3617 path = chownpath;
c4d10a05 3618 if (geteuid() == 0) {
7b50c609 3619 if (chown(path, rootuid, rootgid) < 0) {
c4d10a05
SH
3620 ERROR("Error chowning %s", path);
3621 return -1;
3622 }
3623 return 0;
3624 }
f3d7e4ca 3625
7b50c609 3626 if (rootuid == geteuid()) {
f3d7e4ca
SH
3627 // nothing to do
3628 INFO("%s: container root is our uid; no need to chown" ,__func__);
3629 return 0;
3630 }
3631
c4d10a05
SH
3632 pid = fork();
3633 if (pid < 0) {
3634 SYSERROR("Failed forking");
f6d3e3e4
SH
3635 return -1;
3636 }
c4d10a05 3637 if (!pid) {
7b50c609
TS
3638 int hostuid = geteuid(), hostgid = getegid(), ret;
3639 struct stat sb;
3640 char map1[100], map2[100], map3[100], map4[100], map5[100];
3641 char ugid[100];
3642 char *args1[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3643 "-m", map3, "-m", map5,
3644 "--", "chown", ugid, path, NULL };
3645 char *args2[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3646 "-m", map3, "-m", map4, "-m", map5,
3647 "--", "chown", ugid, path, NULL };
3648
3649 // save the current gid of "path"
3650 if (stat(path, &sb) < 0) {
3651 ERROR("Error stat %s", path);
3652 return -1;
3653 }
f6d3e3e4 3654
9a7c2aba
SH
3655 /*
3656 * A file has to be group-owned by a gid mapped into the
3657 * container, or the container won't be privileged over it.
3658 */
3659 if (sb.st_uid == geteuid() &&
3660 mapped_hostid(sb.st_gid, conf, ID_TYPE_GID) < 0 &&
3661 chown(path, -1, hostgid) < 0) {
3662 ERROR("Failed chgrping %s", path);
7b50c609
TS
3663 return -1;
3664 }
3665
3666 // "u:0:rootuid:1"
3667 ret = snprintf(map1, 100, "u:0:%d:1", rootuid);
c4d10a05
SH
3668 if (ret < 0 || ret >= 100) {
3669 ERROR("Error uid printing map string");
f6d3e3e4
SH
3670 return -1;
3671 }
c4d10a05 3672
98e5ba51
SH
3673 // "u:hostuid:hostuid:1"
3674 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3675 if (ret < 0 || ret >= 100) {
3676 ERROR("Error uid printing map string");
3677 return -1;
3678 }
3679
7b50c609
TS
3680 // "g:0:rootgid:1"
3681 ret = snprintf(map3, 100, "g:0:%d:1", rootgid);
c4d10a05 3682 if (ret < 0 || ret >= 100) {
7b50c609 3683 ERROR("Error gid printing map string");
c4d10a05
SH
3684 return -1;
3685 }
3686
7b50c609 3687 // "g:pathgid:rootgid+pathgid:1"
b4c1e35d
SG
3688 ret = snprintf(map4, 100, "g:%d:%d:1", (gid_t)sb.st_gid,
3689 rootgid + (gid_t)sb.st_gid);
7b50c609
TS
3690 if (ret < 0 || ret >= 100) {
3691 ERROR("Error gid printing map string");
3692 return -1;
3693 }
3694
3695 // "g:hostgid:hostgid:1"
3696 ret = snprintf(map5, 100, "g:%d:%d:1", hostgid, hostgid);
3697 if (ret < 0 || ret >= 100) {
3698 ERROR("Error gid printing map string");
3699 return -1;
3700 }
3701
3702 // "0:pathgid" (chown)
b4c1e35d 3703 ret = snprintf(ugid, 100, "0:%d", (gid_t)sb.st_gid);
7b50c609
TS
3704 if (ret < 0 || ret >= 100) {
3705 ERROR("Error owner printing format string for chown");
3706 return -1;
3707 }
3708
3709 if (hostgid == sb.st_gid)
3710 ret = execvp("lxc-usernsexec", args1);
3711 else
3712 ret = execvp("lxc-usernsexec", args2);
c4d10a05
SH
3713 SYSERROR("Failed executing usernsexec");
3714 exit(1);
f6d3e3e4 3715 }
c4d10a05 3716 return wait_for_pid(pid);
f6d3e3e4
SH
3717}
3718
c4d10a05 3719int ttys_shift_ids(struct lxc_conf *c)
f6d3e3e4 3720{
c4d10a05 3721 int i;
f6d3e3e4 3722
c4d10a05 3723 if (lxc_list_empty(&c->id_map))
f6d3e3e4 3724 return 0;
c4d10a05
SH
3725
3726 for (i = 0; i < c->tty_info.nbtty; i++) {
3727 struct lxc_pty_info *pty_info = &c->tty_info.pty_info[i];
3728
3729 if (chown_mapped_root(pty_info->name, c) < 0) {
3730 ERROR("Failed to chown %s", pty_info->name);
f6d3e3e4
SH
3731 return -1;
3732 }
3733 }
3734
29b10e4f 3735 if (strcmp(c->console.name, "") !=0 && chown_mapped_root(c->console.name, c) < 0) {
c4d10a05
SH
3736 ERROR("Failed to chown %s", c->console.name);
3737 return -1;
3738 }
3739
f6d3e3e4
SH
3740 return 0;
3741}
3742
bc6928ff
MW
3743/*
3744 * This routine is called when the configuration does not already specify a value
3745 * for autodev (mounting a file system on /dev and populating it in a container).
3746 * If a hard override value has not be specified, then we try to apply some
3747 * heuristics to determine if we should switch to autodev mode.
3748 *
3749 * For instance, if the container has an /etc/systemd/system directory then it
3750 * is probably running systemd as the init process and it needs the autodev
3751 * mount to prevent it from mounting devtmpfs on /dev on it's own causing conflicts
3752 * in the host.
3753 *
3754 * We may also want to enable autodev if the host has devtmpfs mounted on its
3755 * /dev as this then enable us to use subdirectories under /dev for the container
3756 * /dev directories and we can fake udev devices.
3757 */
3758struct start_args {
3759 char *const *argv;
3760};
3761
3762#define MAX_SYMLINK_DEPTH 32
3763
74a3920a 3764static int check_autodev( const char *rootfs, void *data )
bc6928ff
MW
3765{
3766 struct start_args *arg = data;
3767 int ret;
3768 int loop_count = 0;
3769 struct stat s;
3770 char absrootfs[MAXPATHLEN];
3771 char path[MAXPATHLEN];
3772 char abs_path[MAXPATHLEN];
3773 char *command = "/sbin/init";
3774
3775 if (rootfs == NULL || strlen(rootfs) == 0)
3776 return -2;
3777
3778 if (!realpath(rootfs, absrootfs))
3779 return -2;
3780
3781 if( arg && arg->argv[0] ) {
3782 command = arg->argv[0];
959aee9c 3783 DEBUG("Set exec command to %s", command );
bc6928ff
MW
3784 }
3785
3786 strncpy( path, command, MAXPATHLEN-1 );
3787
3788 if ( 0 != access(path, F_OK) || 0 != stat(path, &s) )
3789 return -2;
3790
3791 /* Dereference down the symlink merry path testing as we go. */
3792 /* If anything references systemd in the path - set autodev! */
3793 /* Renormalize to the rootfs before each dereference */
3794 /* Relative symlinks should fall out in the wash even with .. */
3795 while( 1 ) {
3796 if ( strstr( path, "systemd" ) ) {
3797 INFO("Container with systemd init detected - enabling autodev!");
3798 return 1;
3799 }
3800
3801 ret = snprintf(abs_path, MAXPATHLEN-1, "%s/%s", absrootfs, path);
3802 if (ret < 0 || ret > MAXPATHLEN)
3803 return -2;
3804
3805 ret = readlink( abs_path, path, MAXPATHLEN-1 );
3806
3807 if ( ( ret <= 0 ) || ( ++loop_count > MAX_SYMLINK_DEPTH ) ) {
3808 break; /* Break out for other tests */
3809 }
3810 path[ret] = '\0';
3811 }
3812
3813 /*
3814 * Add future checks here.
3815 * Return positive if we should go autodev
3816 * Return 0 if we should NOT go autodev
3817 * Return negative if we encounter an error or can not determine...
3818 */
3819
3820 /* All else fails, we don't need autodev */
3821 INFO("Autodev not required.");
3822 return 0;
3823}
3824
5112cd70
SH
3825/*
3826 * _do_tmp_proc_mount: Mount /proc inside container if not already
3827 * mounted
3828 *
3829 * @rootfs : the rootfs where proc should be mounted
3830 *
3831 * Returns < 0 on failure, 0 if the correct proc was already mounted
3832 * and 1 if a new proc was mounted.
3833 */
3834static int do_tmp_proc_mount(const char *rootfs)
3835{
3836 char path[MAXPATHLEN];
3837 char link[20];
3838 int linklen, ret;
3839
3840 ret = snprintf(path, MAXPATHLEN, "%s/proc/self", rootfs);
3841 if (ret < 0 || ret >= MAXPATHLEN) {
3842 SYSERROR("proc path name too long");
3843 return -1;
3844 }
3845 memset(link, 0, 20);
3846 linklen = readlink(path, link, 20);
3847 INFO("I am %d, /proc/self points to '%s'", getpid(), link);
3848 ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs);
3849 if (linklen < 0) /* /proc not mounted */
3850 goto domount;
3851 /* can't be longer than rootfs/proc/1 */
3852 if (strncmp(link, "1", linklen) != 0) {
3853 /* wrong /procs mounted */
3854 umount2(path, MNT_DETACH); /* ignore failure */
3855 goto domount;
3856 }
3857 /* the right proc is already mounted */
3858 return 0;
3859
3860domount:
3861 if (mount("proc", path, "proc", 0, NULL))
3862 return -1;
3863 INFO("Mounted /proc in container for security transition");
3864 return 1;
3865}
3866
3867int tmp_proc_mount(struct lxc_conf *lxc_conf)
3868{
3869 int mounted;
3870
3871 if (lxc_conf->rootfs.path == NULL || strlen(lxc_conf->rootfs.path) == 0) {
3872 if (mount("proc", "/proc", "proc", 0, NULL)) {
3873 SYSERROR("Failed mounting /proc, proceeding");
3874 mounted = 0;
3875 } else
3876 mounted = 1;
3877 } else
3878 mounted = do_tmp_proc_mount(lxc_conf->rootfs.mount);
3879 if (mounted == -1) {
3880 SYSERROR("failed to mount /proc in the container.");
3881 return -1;
3882 } else if (mounted == 1) {
3883 lxc_conf->tmp_umount_proc = 1;
3884 }
3885 return 0;
3886}
3887
3888void tmp_proc_unmount(struct lxc_conf *lxc_conf)
3889{
3890 if (lxc_conf->tmp_umount_proc == 1) {
3891 umount("/proc");
3892 lxc_conf->tmp_umount_proc = 0;
3893 }
3894}
3895
6a0c909a 3896void remount_all_slave(void)
e995d7a2
SH
3897{
3898 /* walk /proc/mounts and change any shared entries to slave */
3899 FILE *f = fopen("/proc/self/mountinfo", "r");
3900 char *line = NULL;
3901 size_t len = 0;
3902
3903 if (!f) {
3904 SYSERROR("Failed to open /proc/self/mountinfo to mark all shared");
3905 ERROR("Continuing container startup...");
3906 return;
3907 }
3908
3909 while (getline(&line, &len, f) != -1) {
3910 char *target, *opts;
3911 target = get_field(line, 4);
3912 if (!target)
3913 continue;
3914 opts = get_field(target, 2);
3915 if (!opts)
3916 continue;
3917 null_endofword(opts);
3918 if (!strstr(opts, "shared"))
3919 continue;
3920 null_endofword(target);
3921 if (mount(NULL, target, NULL, MS_SLAVE, NULL)) {
3922 SYSERROR("Failed to make %s rslave", target);
3923 ERROR("Continuing...");
3924 }
3925 }
3926 fclose(f);
3927 if (line)
3928 free(line);
3929}
3930
2322903b
SH
3931void lxc_execute_bind_init(struct lxc_conf *conf)
3932{
3933 int ret;
9d9c111c
SH
3934 char path[PATH_MAX], destpath[PATH_MAX], *p;
3935
3936 /* If init exists in the container, don't bind mount a static one */
3937 p = choose_init(conf->rootfs.mount);
3938 if (p) {
3939 free(p);
3940 return;
3941 }
2322903b
SH
3942
3943 ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
3944 if (ret < 0 || ret >= PATH_MAX) {
3945 WARN("Path name too long searching for lxc.init.static");
3946 return;
3947 }
3948
3949 if (!file_exists(path)) {
3950 INFO("%s does not exist on host", path);
3951 return;
3952 }
3953
3954 ret = snprintf(destpath, PATH_MAX, "%s%s", conf->rootfs.mount, "/init.lxc.static");
3955 if (ret < 0 || ret >= PATH_MAX) {
3956 WARN("Path name too long for container's lxc.init.static");
3957 return;
3958 }
3959
3960 if (!file_exists(destpath)) {
3961 FILE * pathfile = fopen(destpath, "wb");
3962 if (!pathfile) {
3963 SYSERROR("Failed to create mount target '%s'", destpath);
3964 return;
3965 }
3966 fclose(pathfile);
3967 }
3968
3969 ret = mount(path, destpath, "none", MS_BIND, NULL);
3970 if (ret < 0)
3971 SYSERROR("Failed to bind lxc.init.static into container");
3972 INFO("lxc.init.static bound into container at %s", path);
3973}
3974
35120d9c
SH
3975/*
3976 * This does the work of remounting / if it is shared, calling the
3977 * container pre-mount hooks, and mounting the rootfs.
3978 */
3979int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath)
0ad19a3f 3980{
35120d9c
SH
3981 if (conf->rootfs_setup) {
3982 /*
3983 * rootfs was set up in another namespace. bind-mount it
3984 * to give us a mount in our own ns so we can pivot_root to it
3985 */
3986 const char *path = conf->rootfs.mount;
3987 if (mount(path, path, "rootfs", MS_BIND, NULL) < 0) {
3988 ERROR("Failed to bind-mount container / onto itself");
145832ba 3989 return -1;
35120d9c 3990 }
145832ba 3991 return 0;
35120d9c 3992 }
d4ef7c50 3993
e995d7a2
SH
3994 remount_all_slave();
3995
35120d9c
SH
3996 if (run_lxc_hooks(name, "pre-mount", conf, lxcpath, NULL)) {
3997 ERROR("failed to run pre-mount hooks for container '%s'.", name);
3998 return -1;
3999 }
4000
4001 if (setup_rootfs(conf)) {
4002 ERROR("failed to setup rootfs for '%s'", name);
4003 return -1;
4004 }
4005
4006 conf->rootfs_setup = true;
4007 return 0;
4008}
4009
1c1c7051
SH
4010static bool verify_start_hooks(struct lxc_conf *conf)
4011{
4012 struct lxc_list *it;
4013 char path[MAXPATHLEN];
4014 lxc_list_for_each(it, &conf->hooks[LXCHOOK_START]) {
4015 char *hookname = it->elem;
4016 struct stat st;
4017 int ret;
4018
4019 ret = snprintf(path, MAXPATHLEN, "%s%s",
4020 conf->rootfs.mount, hookname);
4021 if (ret < 0 || ret >= MAXPATHLEN)
4022 return false;
4023 ret = stat(path, &st);
4024 if (ret) {
4025 SYSERROR("Start hook %s not found in container rootfs",
4026 hookname);
4027 return false;
4028 }
6a0c909a 4029 return true;
1c1c7051
SH
4030 }
4031
4032 return true;
4033}
4034
35120d9c
SH
4035int lxc_setup(struct lxc_handler *handler)
4036{
4037 const char *name = handler->name;
4038 struct lxc_conf *lxc_conf = handler->conf;
4039 const char *lxcpath = handler->lxcpath;
4040 void *data = handler->data;
4041
4042 if (do_rootfs_setup(lxc_conf, name, lxcpath) < 0) {
4043 ERROR("Error setting up rootfs mount after spawn");
4044 return -1;
4045 }
4046
6c544cb3
MM
4047 if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
4048 if (setup_utsname(lxc_conf->utsname)) {
4049 ERROR("failed to setup the utsname for '%s'", name);
4050 return -1;
4051 }
0ad19a3f 4052 }
4053
5f4535a3 4054 if (setup_network(&lxc_conf->network)) {
36eb9bde 4055 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 4056 return -1;
0ad19a3f 4057 }
4058
bc6928ff
MW
4059 if (lxc_conf->autodev < 0) {
4060 lxc_conf->autodev = check_autodev(lxc_conf->rootfs.mount, data);
4061 }
4062
4063 if (lxc_conf->autodev > 0) {
4064 if (mount_autodev(name, lxc_conf->rootfs.mount, lxcpath)) {
91c3830e 4065 ERROR("failed to mount /dev in the container");
c6883f38
SH
4066 return -1;
4067 }
4068 }
4069
368bbc02
CS
4070 /* do automatic mounts (mainly /proc and /sys), but exclude
4071 * those that need to wait until other stuff has finished
4072 */
4fb3cba5 4073 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
4074 ERROR("failed to setup the automatic mounts for '%s'", name);
4075 return -1;
4076 }
4077
80a881b2 4078 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
36eb9bde 4079 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 4080 return -1;
576f946d 4081 }
4082
c1dc38c2 4083 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
e7938e9e
MN
4084 ERROR("failed to setup the mount entries for '%s'", name);
4085 return -1;
4086 }
4087
1c1c7051
SH
4088 /* Make sure any start hooks are in the rootfs */
4089 if (!verify_start_hooks(lxc_conf))
4090 return -1;
4091
2322903b
SH
4092 if (lxc_conf->is_execute)
4093 lxc_execute_bind_init(lxc_conf);
4094
368bbc02
CS
4095 /* now mount only cgroup, if wanted;
4096 * before, /sys could not have been mounted
4097 * (is either mounted automatically or via fstab entries)
4098 */
4fb3cba5 4099 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
4100 ERROR("failed to setup the automatic mounts for '%s'", name);
4101 return -1;
4102 }
4103
283678ed 4104 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
773fb9ca
SH
4105 ERROR("failed to run mount hooks for container '%s'.", name);
4106 return -1;
4107 }
4108
bc6928ff 4109 if (lxc_conf->autodev > 0) {
283678ed 4110 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
f7bee6c6
MW
4111 ERROR("failed to run autodev hooks for container '%s'.", name);
4112 return -1;
4113 }
91c3830e
SH
4114 if (setup_autodev(lxc_conf->rootfs.mount)) {
4115 ERROR("failed to populate /dev in the container");
4116 return -1;
4117 }
4118 }
368bbc02 4119
37903589 4120 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 4121 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 4122 return -1;
6e590161 4123 }
4124
7e0e1d94
AV
4125 if (lxc_conf->kmsg) {
4126 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
4127 ERROR("failed to setup kmsg for '%s'", name);
4128 }
1bd051a6 4129
37903589 4130 if (!lxc_conf->is_execute && setup_tty(&lxc_conf->rootfs, &lxc_conf->tty_info, lxc_conf->ttydir)) {
36eb9bde 4131 ERROR("failed to setup the ttys for '%s'", name);
95b5ffaf 4132 return -1;
b0a33c1e 4133 }
4134
69aa6655
DE
4135 if (!lxc_conf->is_execute && setup_dev_symlinks(&lxc_conf->rootfs)) {
4136 ERROR("failed to setup /dev symlinks for '%s'", name);
4137 return -1;
4138 }
4139
5112cd70
SH
4140 /* mount /proc if it's not already there */
4141 if (tmp_proc_mount(lxc_conf) < 0) {
fe4de9a6 4142 ERROR("failed to LSM mount proc for '%s'", name);
e075f5d9 4143 return -1;
e075f5d9 4144 }
e075f5d9 4145
ac778708 4146 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 4147 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 4148 return -1;
ed502555 4149 }
4150
571e6ec8 4151 if (setup_pts(lxc_conf->pts)) {
36eb9bde 4152 ERROR("failed to setup the new pts instance");
95b5ffaf 4153 return -1;
3c26f34e 4154 }
4155
cccc74b5
DL
4156 if (setup_personality(lxc_conf->personality)) {
4157 ERROR("failed to setup personality");
4158 return -1;
4159 }
4160
97a8f74f
SG
4161 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
4162 if (!lxc_list_empty(&lxc_conf->caps)) {
4163 ERROR("Simultaneously requested dropping and keeping caps");
f6d3e3e4
SH
4164 return -1;
4165 }
97a8f74f
SG
4166 if (dropcaps_except(&lxc_conf->keepcaps)) {
4167 ERROR("failed to keep requested caps");
4168 return -1;
4169 }
4170 } else if (setup_caps(&lxc_conf->caps)) {
4171 ERROR("failed to drop capabilities");
4172 return -1;
81810dd1
DL
4173 }
4174
cd54d859
DL
4175 NOTICE("'%s' is setup.", name);
4176
0ad19a3f 4177 return 0;
4178}
26ddeedd 4179
283678ed
SH
4180int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
4181 const char *lxcpath, char *argv[])
26ddeedd
SH
4182{
4183 int which = -1;
4184 struct lxc_list *it;
4185
4186 if (strcmp(hook, "pre-start") == 0)
4187 which = LXCHOOK_PRESTART;
5ea6163a
SH
4188 else if (strcmp(hook, "pre-mount") == 0)
4189 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
4190 else if (strcmp(hook, "mount") == 0)
4191 which = LXCHOOK_MOUNT;
f7bee6c6
MW
4192 else if (strcmp(hook, "autodev") == 0)
4193 which = LXCHOOK_AUTODEV;
26ddeedd
SH
4194 else if (strcmp(hook, "start") == 0)
4195 which = LXCHOOK_START;
4196 else if (strcmp(hook, "post-stop") == 0)
4197 which = LXCHOOK_POSTSTOP;
148e91f5
SH
4198 else if (strcmp(hook, "clone") == 0)
4199 which = LXCHOOK_CLONE;
26ddeedd
SH
4200 else
4201 return -1;
4202 lxc_list_for_each(it, &conf->hooks[which]) {
4203 int ret;
4204 char *hookname = it->elem;
283678ed 4205 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
26ddeedd
SH
4206 if (ret)
4207 return ret;
4208 }
4209 return 0;
4210}
72d0e1cb 4211
427b3a21 4212static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
4213{
4214 struct lxc_netdev *netdev = it->elem;
9ebb03ad 4215 struct lxc_list *it2,*next;
72d0e1cb
SG
4216
4217 lxc_list_del(it);
4218
4219 if (netdev->link)
4220 free(netdev->link);
4221 if (netdev->name)
4222 free(netdev->name);
c9bb9a85
DE
4223 if (netdev->type == LXC_NET_VETH && netdev->priv.veth_attr.pair)
4224 free(netdev->priv.veth_attr.pair);
72d0e1cb
SG
4225 if (netdev->upscript)
4226 free(netdev->upscript);
4227 if (netdev->hwaddr)
4228 free(netdev->hwaddr);
4229 if (netdev->mtu)
4230 free(netdev->mtu);
4231 if (netdev->ipv4_gateway)
4232 free(netdev->ipv4_gateway);
4233 if (netdev->ipv6_gateway)
4234 free(netdev->ipv6_gateway);
9ebb03ad 4235 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
4236 lxc_list_del(it2);
4237 free(it2->elem);
4238 free(it2);
4239 }
9ebb03ad 4240 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
4241 lxc_list_del(it2);
4242 free(it2->elem);
4243 free(it2);
4244 }
d95db067 4245 free(netdev);
72d0e1cb
SG
4246 free(it);
4247}
4248
4249/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 4250int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
4251{
4252 char *p1;
4253 int ret, idx, i;
4254 struct lxc_list *it;
4255 struct lxc_netdev *netdev;
4256
4257 p1 = index(key, '.');
4258 if (!p1 || *(p1+1) == '\0')
4259 p1 = NULL;
4260
4261 ret = sscanf(key, "%d", &idx);
4262 if (ret != 1) return -1;
4263 if (idx < 0)
4264 return -1;
4265
4266 i = 0;
4267 lxc_list_for_each(it, &c->network) {
4268 if (i == idx)
4269 break;
4270 i++;
4271 }
4272 if (i < idx) // we don't have that many nics defined
4273 return -1;
4274
4275 if (!it || !it->elem)
4276 return -1;
4277
4278 netdev = it->elem;
4279
4280 if (!p1) {
4281 lxc_remove_nic(it);
52d21d40 4282 } else if (strcmp(p1, ".ipv4") == 0) {
9ebb03ad
DE
4283 struct lxc_list *it2,*next;
4284 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
4285 lxc_list_del(it2);
4286 free(it2->elem);
4287 free(it2);
4288 }
52d21d40 4289 } else if (strcmp(p1, ".ipv6") == 0) {
9ebb03ad
DE
4290 struct lxc_list *it2,*next;
4291 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
4292 lxc_list_del(it2);
4293 free(it2->elem);
4294 free(it2);
4295 }
52d21d40 4296 } else if (strcmp(p1, ".link") == 0) {
72d0e1cb
SG
4297 if (netdev->link) {
4298 free(netdev->link);
4299 netdev->link = NULL;
4300 }
52d21d40 4301 } else if (strcmp(p1, ".name") == 0) {
72d0e1cb
SG
4302 if (netdev->name) {
4303 free(netdev->name);
4304 netdev->name = NULL;
4305 }
52d21d40 4306 } else if (strcmp(p1, ".script.up") == 0) {
72d0e1cb
SG
4307 if (netdev->upscript) {
4308 free(netdev->upscript);
4309 netdev->upscript = NULL;
4310 }
52d21d40 4311 } else if (strcmp(p1, ".hwaddr") == 0) {
72d0e1cb
SG
4312 if (netdev->hwaddr) {
4313 free(netdev->hwaddr);
4314 netdev->hwaddr = NULL;
4315 }
52d21d40 4316 } else if (strcmp(p1, ".mtu") == 0) {
72d0e1cb
SG
4317 if (netdev->mtu) {
4318 free(netdev->mtu);
4319 netdev->mtu = NULL;
4320 }
9eaf8a59 4321 } else if (strcmp(p1, ".ipv4.gateway") == 0) {
72d0e1cb
SG
4322 if (netdev->ipv4_gateway) {
4323 free(netdev->ipv4_gateway);
4324 netdev->ipv4_gateway = NULL;
4325 }
9eaf8a59 4326 } else if (strcmp(p1, ".ipv6.gateway") == 0) {
72d0e1cb
SG
4327 if (netdev->ipv6_gateway) {
4328 free(netdev->ipv6_gateway);
4329 netdev->ipv6_gateway = NULL;
4330 }
4331 }
4332 else return -1;
4333
4334 return 0;
4335}
4336
4337int lxc_clear_config_network(struct lxc_conf *c)
4338{
9ebb03ad
DE
4339 struct lxc_list *it,*next;
4340 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
4341 lxc_remove_nic(it);
4342 }
4343 return 0;
4344}
4345
4346int lxc_clear_config_caps(struct lxc_conf *c)
4347{
9ebb03ad 4348 struct lxc_list *it,*next;
72d0e1cb 4349
9ebb03ad 4350 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
4351 lxc_list_del(it);
4352 free(it->elem);
4353 free(it);
4354 }
4355 return 0;
4356}
4357
74a3920a 4358static int lxc_free_idmap(struct lxc_list *id_map) {
27c27d73
SH
4359 struct lxc_list *it, *next;
4360
4355ab5f 4361 lxc_list_for_each_safe(it, id_map, next) {
27c27d73
SH
4362 lxc_list_del(it);
4363 free(it->elem);
4364 free(it);
4365 }
4366 return 0;
4367}
4368
4355ab5f
SH
4369int lxc_clear_idmaps(struct lxc_conf *c)
4370{
4371 return lxc_free_idmap(&c->id_map);
4372}
4373
1fb86a7c
SH
4374int lxc_clear_config_keepcaps(struct lxc_conf *c)
4375{
4376 struct lxc_list *it,*next;
4377
4378 lxc_list_for_each_safe(it, &c->keepcaps, next) {
4379 lxc_list_del(it);
4380 free(it->elem);
4381 free(it);
4382 }
4383 return 0;
4384}
4385
12a50cc6 4386int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 4387{
9ebb03ad 4388 struct lxc_list *it,*next;
72d0e1cb 4389 bool all = false;
12a50cc6 4390 const char *k = key + 11;
72d0e1cb
SG
4391
4392 if (strcmp(key, "lxc.cgroup") == 0)
4393 all = true;
4394
9ebb03ad 4395 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
4396 struct lxc_cgroup *cg = it->elem;
4397 if (!all && strcmp(cg->subsystem, k) != 0)
4398 continue;
4399 lxc_list_del(it);
4400 free(cg->subsystem);
4401 free(cg->value);
4402 free(cg);
4403 free(it);
4404 }
4405 return 0;
4406}
4407
ee1e7aa0
SG
4408int lxc_clear_groups(struct lxc_conf *c)
4409{
4410 struct lxc_list *it,*next;
4411
4412 lxc_list_for_each_safe(it, &c->groups, next) {
4413 lxc_list_del(it);
4414 free(it->elem);
4415 free(it);
4416 }
4417 return 0;
4418}
4419
ab799c0b
SG
4420int lxc_clear_environment(struct lxc_conf *c)
4421{
4422 struct lxc_list *it,*next;
4423
4424 lxc_list_for_each_safe(it, &c->environment, next) {
4425 lxc_list_del(it);
4426 free(it->elem);
4427 free(it);
4428 }
4429 return 0;
4430}
4431
4432
72d0e1cb
SG
4433int lxc_clear_mount_entries(struct lxc_conf *c)
4434{
9ebb03ad 4435 struct lxc_list *it,*next;
72d0e1cb 4436
9ebb03ad 4437 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
4438 lxc_list_del(it);
4439 free(it->elem);
4440 free(it);
4441 }
4442 return 0;
4443}
4444
b099e9e9
SH
4445int lxc_clear_automounts(struct lxc_conf *c)
4446{
4447 c->auto_mounts = 0;
4448 return 0;
4449}
4450
12a50cc6 4451int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 4452{
9ebb03ad 4453 struct lxc_list *it,*next;
17ed13a3 4454 bool all = false, done = false;
12a50cc6 4455 const char *k = key + 9;
72d0e1cb
SG
4456 int i;
4457
17ed13a3
SH
4458 if (strcmp(key, "lxc.hook") == 0)
4459 all = true;
4460
72d0e1cb 4461 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 4462 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 4463 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
4464 lxc_list_del(it);
4465 free(it->elem);
4466 free(it);
4467 }
4468 done = true;
72d0e1cb
SG
4469 }
4470 }
17ed13a3
SH
4471
4472 if (!done) {
4473 ERROR("Invalid hook key: %s", key);
4474 return -1;
4475 }
72d0e1cb
SG
4476 return 0;
4477}
8eb5694b 4478
74a3920a 4479static void lxc_clear_saved_nics(struct lxc_conf *conf)
7b35f3d6
SH
4480{
4481 int i;
4482
0cf45501 4483 if (!conf->saved_nics)
7b35f3d6
SH
4484 return;
4485 for (i=0; i < conf->num_savednics; i++)
4486 free(conf->saved_nics[i].orig_name);
7b35f3d6
SH
4487 free(conf->saved_nics);
4488}
4489
4184c3e1
SH
4490static inline void lxc_clear_aliens(struct lxc_conf *conf)
4491{
4492 struct lxc_list *it,*next;
4493
4494 lxc_list_for_each_safe(it, &conf->aliens, next) {
4495 lxc_list_del(it);
4496 free(it->elem);
4497 free(it);
4498 }
4499}
4500
f979ac15
SH
4501static inline void lxc_clear_includes(struct lxc_conf *conf)
4502{
4503 struct lxc_list *it,*next;
4504
4505 lxc_list_for_each_safe(it, &conf->includes, next) {
4506 lxc_list_del(it);
4507 free(it->elem);
4508 free(it);
4509 }
4510}
4511
8eb5694b
SH
4512void lxc_conf_free(struct lxc_conf *conf)
4513{
4514 if (!conf)
4515 return;
b91f00d3
SH
4516 if (conf->console.log_path)
4517 free(conf->console.log_path);
8eb5694b
SH
4518 if (conf->console.path)
4519 free(conf->console.path);
54c30e29 4520 if (conf->rootfs.mount)
8eb5694b 4521 free(conf->rootfs.mount);
a17b1e65
SG
4522 if (conf->rootfs.options)
4523 free(conf->rootfs.options);
d95db067
DE
4524 if (conf->rootfs.path)
4525 free(conf->rootfs.path);
a58878d6
SH
4526 if (conf->rootfs.pivot)
4527 free(conf->rootfs.pivot);
4528 if (conf->logfile)
4529 free(conf->logfile);
d95db067
DE
4530 if (conf->utsname)
4531 free(conf->utsname);
4532 if (conf->ttydir)
4533 free(conf->ttydir);
4534 if (conf->fstab)
4535 free(conf->fstab);
fc7e8864
WM
4536 if (conf->rcfile)
4537 free(conf->rcfile);
67c660d0
SG
4538 if (conf->init_cmd)
4539 free(conf->init_cmd);
6b0d5538 4540 free(conf->unexpanded_config);
8eb5694b 4541 lxc_clear_config_network(conf);
fe4de9a6
DE
4542 if (conf->lsm_aa_profile)
4543 free(conf->lsm_aa_profile);
4544 if (conf->lsm_se_context)
4545 free(conf->lsm_se_context);
769872f9 4546 lxc_seccomp_free(conf);
8eb5694b 4547 lxc_clear_config_caps(conf);
1fb86a7c 4548 lxc_clear_config_keepcaps(conf);
8eb5694b 4549 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 4550 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 4551 lxc_clear_mount_entries(conf);
7b35f3d6 4552 lxc_clear_saved_nics(conf);
27c27d73 4553 lxc_clear_idmaps(conf);
ee1e7aa0 4554 lxc_clear_groups(conf);
f979ac15 4555 lxc_clear_includes(conf);
761d81ca 4556 lxc_clear_aliens(conf);
ab799c0b 4557 lxc_clear_environment(conf);
8eb5694b
SH
4558 free(conf);
4559}
4355ab5f
SH
4560
4561struct userns_fn_data {
4562 int (*fn)(void *);
4563 void *arg;
4564 int p[2];
4565};
4566
4567static int run_userns_fn(void *data)
4568{
4569 struct userns_fn_data *d = data;
4570 char c;
4571 // we're not sharing with the parent any more, if it was a thread
4572
4573 close(d->p[1]);
4574 if (read(d->p[0], &c, 1) != 1)
4575 return -1;
4576 close(d->p[0]);
4577 return d->fn(d->arg);
4578}
4579
4580/*
8b227008
TS
4581 * Add ID_TYPE_UID/ID_TYPE_GID entries to an existing lxc_conf,
4582 * if they are not already there.
4355ab5f 4583 */
8b227008
TS
4584static struct lxc_list *idmap_add_id(struct lxc_conf *conf,
4585 uid_t uid, gid_t gid)
4355ab5f 4586{
8b227008
TS
4587 int hostuid_mapped = mapped_hostid(uid, conf, ID_TYPE_UID);
4588 int hostgid_mapped = mapped_hostid(gid, conf, ID_TYPE_GID);
4355ab5f
SH
4589 struct lxc_list *new = NULL, *tmp, *it, *next;
4590 struct id_map *entry;
4591
3ec1648d
SH
4592 new = malloc(sizeof(*new));
4593 if (!new) {
4594 ERROR("Out of memory building id map");
4595 return NULL;
4596 }
4597 lxc_list_init(new);
4598
8b227008
TS
4599 if (hostuid_mapped < 0) {
4600 hostuid_mapped = find_unmapped_nsuid(conf, ID_TYPE_UID);
4601 if (hostuid_mapped < 0)
3ec1648d
SH
4602 goto err;
4603 tmp = malloc(sizeof(*tmp));
4604 if (!tmp)
4605 goto err;
4355ab5f
SH
4606 entry = malloc(sizeof(*entry));
4607 if (!entry) {
3ec1648d
SH
4608 free(tmp);
4609 goto err;
4355ab5f 4610 }
3ec1648d 4611 tmp->elem = entry;
4355ab5f 4612 entry->idtype = ID_TYPE_UID;
8b227008
TS
4613 entry->nsid = hostuid_mapped;
4614 entry->hostid = (unsigned long) uid;
4615 entry->range = 1;
4616 lxc_list_add_tail(new, tmp);
4617 }
4618 if (hostgid_mapped < 0) {
4619 hostgid_mapped = find_unmapped_nsuid(conf, ID_TYPE_GID);
4620 if (hostgid_mapped < 0)
4621 goto err;
4622 tmp = malloc(sizeof(*tmp));
4623 if (!tmp)
4624 goto err;
4625 entry = malloc(sizeof(*entry));
4626 if (!entry) {
4627 free(tmp);
4628 goto err;
4629 }
4630 tmp->elem = entry;
4631 entry->idtype = ID_TYPE_GID;
4632 entry->nsid = hostgid_mapped;
4633 entry->hostid = (unsigned long) gid;
4355ab5f 4634 entry->range = 1;
3ec1648d 4635 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4636 }
4637 lxc_list_for_each_safe(it, &conf->id_map, next) {
4638 tmp = malloc(sizeof(*tmp));
4639 if (!tmp)
4640 goto err;
4641 entry = malloc(sizeof(*entry));
4642 if (!entry) {
4643 free(tmp);
4644 goto err;
4645 }
4646 memset(entry, 0, sizeof(*entry));
4647 memcpy(entry, it->elem, sizeof(*entry));
4648 tmp->elem = entry;
3ec1648d 4649 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4650 }
4651
4652 return new;
4653
4654err:
8b227008 4655 ERROR("Out of memory building a new uid/gid map");
908fde6a
SH
4656 if (new)
4657 lxc_free_idmap(new);
c30ac545 4658 free(new);
4355ab5f
SH
4659 return NULL;
4660}
4661
4662/*
4663 * Run a function in a new user namespace.
8b227008 4664 * The caller's euid/egid will be mapped in if it is not already.
4355ab5f
SH
4665 */
4666int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
4667{
4668 int ret, pid;
4669 struct userns_fn_data d;
4670 char c = '1';
4671 int p[2];
4672 struct lxc_list *idmap;
4673
4355ab5f 4674 ret = pipe(p);
4355ab5f
SH
4675 if (ret < 0) {
4676 SYSERROR("opening pipe");
4677 return -1;
4678 }
4679 d.fn = fn;
4680 d.arg = data;
4681 d.p[0] = p[0];
4682 d.p[1] = p[1];
4683 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4684 if (pid < 0)
4685 goto err;
4355ab5f 4686 close(p[0]);
4355ab5f
SH
4687 p[0] = -1;
4688
8b227008
TS
4689 if ((idmap = idmap_add_id(conf, geteuid(), getegid())) == NULL) {
4690 ERROR("Error adding self to container uid/gid map");
4355ab5f
SH
4691 goto err;
4692 }
4693
4694 ret = lxc_map_ids(idmap, pid);
4695 lxc_free_idmap(idmap);
88dd66fc 4696 free(idmap);
565e571c 4697 if (ret) {
4355ab5f
SH
4698 ERROR("Error setting up child mappings");
4699 goto err;
4700 }
4701
4702 // kick the child
4703 if (write(p[1], &c, 1) != 1) {
4704 SYSERROR("writing to pipe to child");
4705 goto err;
4706 }
4707
3139aead
SG
4708 ret = wait_for_pid(pid);
4709
4710 close(p[1]);
4711 return ret;
4712
4355ab5f 4713err:
4355ab5f
SH
4714 if (p[0] != -1)
4715 close(p[0]);
4716 close(p[1]);
4355ab5f
SH
4717 return -1;
4718}
97e9cfa0 4719
a96a8e8c 4720/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4721static char* getuname(void)
4722{
a96a8e8c 4723 struct passwd *result;
97e9cfa0 4724
a96a8e8c
SH
4725 result = getpwuid(geteuid());
4726 if (!result)
97e9cfa0
SH
4727 return NULL;
4728
a96a8e8c 4729 return strdup(result->pw_name);
97e9cfa0
SH
4730}
4731
a96a8e8c 4732/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4733static char *getgname(void)
4734{
a96a8e8c 4735 struct group *result;
97e9cfa0 4736
a96a8e8c
SH
4737 result = getgrgid(getegid());
4738 if (!result)
97e9cfa0
SH
4739 return NULL;
4740
a96a8e8c 4741 return strdup(result->gr_name);
97e9cfa0
SH
4742}
4743
a96a8e8c 4744/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4745void suggest_default_idmap(void)
4746{
4747 FILE *f;
4748 unsigned int uid = 0, urange = 0, gid = 0, grange = 0;
4749 char *line = NULL;
4750 char *uname, *gname;
4751 size_t len = 0;
4752
4753 if (!(uname = getuname()))
4754 return;
4755
4756 if (!(gname = getgname())) {
4757 free(uname);
4758 return;
4759 }
4760
4761 f = fopen(subuidfile, "r");
4762 if (!f) {
4763 ERROR("Your system is not configured with subuids");
4764 free(gname);
4765 free(uname);
4766 return;
4767 }
4768 while (getline(&line, &len, f) != -1) {
4769 char *p = strchr(line, ':'), *p2;
4770 if (*line == '#')
4771 continue;
4772 if (!p)
4773 continue;
4774 *p = '\0';
4775 p++;
4776 if (strcmp(line, uname))
4777 continue;
4778 p2 = strchr(p, ':');
4779 if (!p2)
4780 continue;
4781 *p2 = '\0';
4782 p2++;
4783 if (!*p2)
4784 continue;
4785 uid = atoi(p);
4786 urange = atoi(p2);
4787 }
4788 fclose(f);
4789
4790 f = fopen(subuidfile, "r");
4791 if (!f) {
4792 ERROR("Your system is not configured with subgids");
4793 free(gname);
4794 free(uname);
4795 return;
4796 }
4797 while (getline(&line, &len, f) != -1) {
4798 char *p = strchr(line, ':'), *p2;
4799 if (*line == '#')
4800 continue;
4801 if (!p)
4802 continue;
4803 *p = '\0';
4804 p++;
4805 if (strcmp(line, uname))
4806 continue;
4807 p2 = strchr(p, ':');
4808 if (!p2)
4809 continue;
4810 *p2 = '\0';
4811 p2++;
4812 if (!*p2)
4813 continue;
4814 gid = atoi(p);
4815 grange = atoi(p2);
4816 }
4817 fclose(f);
4818
4819 if (line)
4820 free(line);
4821
4822 if (!urange || !grange) {
4823 ERROR("You do not have subuids or subgids allocated");
4824 ERROR("Unprivileged containers require subuids and subgids");
4825 return;
4826 }
4827
4828 ERROR("You must either run as root, or define uid mappings");
4829 ERROR("To pass uid mappings to lxc-create, you could create");
4830 ERROR("~/.config/lxc/default.conf:");
4831 ERROR("lxc.include = %s", LXC_DEFAULT_CONFIG);
4832 ERROR("lxc.id_map = u 0 %u %u", uid, urange);
4833 ERROR("lxc.id_map = g 0 %u %u", gid, grange);
4834
4835 free(gname);
4836 free(uname);
4837}