]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
Merge pull request #796 from brauner/2016-02-02/fix_execute_null_ptr_deref
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
d06245b8
NC
23#include "config.h"
24
0ad19a3f 25#include <stdio.h>
0ad19a3f 26#include <stdlib.h>
e3b4c4c4 27#include <stdarg.h>
0ad19a3f 28#include <errno.h>
29#include <string.h>
30#include <dirent.h>
0ad19a3f 31#include <unistd.h>
bc6928ff 32#include <inttypes.h>
e3b4c4c4 33#include <sys/wait.h>
2d76d1d7 34#include <sys/syscall.h>
97e9cfa0
SH
35#include <sys/types.h>
36#include <pwd.h>
37#include <grp.h>
4a0ba80d 38#include <time.h>
614305f3 39#ifdef HAVE_STATVFS
2938f7c8 40#include <sys/statvfs.h>
614305f3 41#endif
e827ff7e
SG
42
43#if HAVE_PTY_H
b0a33c1e 44#include <pty.h>
e827ff7e
SG
45#else
46#include <../include/openpty.h>
47#endif
0ad19a3f 48
b3ecde1e
DL
49#include <linux/loop.h>
50
0ad19a3f 51#include <sys/types.h>
52#include <sys/utsname.h>
53#include <sys/param.h>
54#include <sys/stat.h>
55#include <sys/socket.h>
56#include <sys/mount.h>
57#include <sys/mman.h>
81810dd1 58#include <sys/prctl.h>
0ad19a3f 59
60#include <arpa/inet.h>
61#include <fcntl.h>
62#include <netinet/in.h>
63#include <net/if.h>
6f4a3756 64#include <libgen.h>
0ad19a3f 65
e5bda9ee 66#include "network.h"
67#include "error.h"
e8bd4e43 68#include "af_unix.h"
b2718c72 69#include "parse.h"
1b09f2c0
DL
70#include "utils.h"
71#include "conf.h"
72#include "log.h"
d55bc1ad 73#include "caps.h" /* for lxc_caps_last_cap() */
4ec31c52 74#include "bdev/bdev.h"
3c16d0cb 75#include "bdev/lxcoverlay.h"
368bbc02 76#include "cgroup.h"
025ed0f3 77#include "lxclock.h"
4355ab5f 78#include "namespace.h"
fe4de9a6 79#include "lsm/lsm.h"
d0a36f2c 80
495d2046
SG
81#if HAVE_SYS_CAPABILITY_H
82#include <sys/capability.h>
83#endif
84
6ff05e18
SG
85#if HAVE_SYS_PERSONALITY_H
86#include <sys/personality.h>
87#endif
88
edaf8b1b
SG
89#if IS_BIONIC
90#include <../include/lxcmntent.h>
91#else
92#include <mntent.h>
93#endif
94
769872f9
SH
95#include "lxcseccomp.h"
96
36eb9bde 97lxc_log_define(lxc_conf, lxc);
e5bda9ee 98
87da4ec3 99#define LINELEN 4096
0ad19a3f 100
495d2046 101#if HAVE_SYS_CAPABILITY_H
b09094da
MN
102#ifndef CAP_SETFCAP
103#define CAP_SETFCAP 31
104#endif
105
106#ifndef CAP_MAC_OVERRIDE
107#define CAP_MAC_OVERRIDE 32
108#endif
109
110#ifndef CAP_MAC_ADMIN
111#define CAP_MAC_ADMIN 33
112#endif
495d2046 113#endif
b09094da
MN
114
115#ifndef PR_CAPBSET_DROP
116#define PR_CAPBSET_DROP 24
117#endif
118
9818cae4
SG
119#ifndef LO_FLAGS_AUTOCLEAR
120#define LO_FLAGS_AUTOCLEAR 4
121#endif
122
0769b82a
CS
123/* needed for cgroup automount checks, regardless of whether we
124 * have included linux/capability.h or not */
125#ifndef CAP_SYS_ADMIN
126#define CAP_SYS_ADMIN 21
127#endif
128
2d76d1d7
SG
129/* Define pivot_root() if missing from the C library */
130#ifndef HAVE_PIVOT_ROOT
131static int pivot_root(const char * new_root, const char * put_old)
132{
133#ifdef __NR_pivot_root
134return syscall(__NR_pivot_root, new_root, put_old);
135#else
136errno = ENOSYS;
137return -1;
138#endif
139}
140#else
141extern int pivot_root(const char * new_root, const char * put_old);
142#endif
143
144/* Define sethostname() if missing from the C library */
145#ifndef HAVE_SETHOSTNAME
146static int sethostname(const char * name, size_t len)
147{
148#ifdef __NR_sethostname
149return syscall(__NR_sethostname, name, len);
150#else
151errno = ENOSYS;
152return -1;
153#endif
154}
155#endif
156
72f919c4
SG
157/* Define __S_ISTYPE if missing from the C library */
158#ifndef __S_ISTYPE
159#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
160#endif
161
ecec0126
SG
162#ifndef MS_PRIVATE
163#define MS_PRIVATE (1<<18)
164#endif
165
72d0e1cb 166char *lxchook_names[NUM_LXC_HOOKS] = {
52492063 167 "pre-start", "pre-mount", "mount", "autodev", "start", "stop", "post-stop", "clone", "destroy" };
72d0e1cb 168
a589434e 169typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 170
998ac676
RT
171struct mount_opt {
172 char *name;
173 int clear;
174 int flag;
175};
176
81810dd1
DL
177struct caps_opt {
178 char *name;
179 int value;
180};
181
858377e4
SH
182/*
183 * The lxc_conf of the container currently being worked on in an
184 * API call
185 * This is used in the error calls
186 */
187#ifdef HAVE_TLS
188__thread struct lxc_conf *current_config;
189#else
190struct lxc_conf *current_config;
191#endif
192
0769b82a
CS
193/* Declare this here, since we don't want to reshuffle the whole file. */
194static int in_caplist(int cap, struct lxc_list *caps);
195
a589434e
JN
196static int instantiate_veth(struct lxc_handler *, struct lxc_netdev *);
197static int instantiate_macvlan(struct lxc_handler *, struct lxc_netdev *);
198static int instantiate_vlan(struct lxc_handler *, struct lxc_netdev *);
199static int instantiate_phys(struct lxc_handler *, struct lxc_netdev *);
200static int instantiate_empty(struct lxc_handler *, struct lxc_netdev *);
201static int instantiate_none(struct lxc_handler *, struct lxc_netdev *);
202
203static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
204 [LXC_NET_VETH] = instantiate_veth,
205 [LXC_NET_MACVLAN] = instantiate_macvlan,
206 [LXC_NET_VLAN] = instantiate_vlan,
207 [LXC_NET_PHYS] = instantiate_phys,
208 [LXC_NET_EMPTY] = instantiate_empty,
209 [LXC_NET_NONE] = instantiate_none,
0ad19a3f 210};
211
74a2b586
JK
212static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
213static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
214static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
215static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
216static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
26b797f3 217static int shutdown_none(struct lxc_handler *, struct lxc_netdev *);
74a2b586 218
a589434e 219static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
74a2b586
JK
220 [LXC_NET_VETH] = shutdown_veth,
221 [LXC_NET_MACVLAN] = shutdown_macvlan,
222 [LXC_NET_VLAN] = shutdown_vlan,
223 [LXC_NET_PHYS] = shutdown_phys,
224 [LXC_NET_EMPTY] = shutdown_empty,
26b797f3 225 [LXC_NET_NONE] = shutdown_none,
74a2b586
JK
226};
227
998ac676 228static struct mount_opt mount_opt[] = {
88d413d5
SW
229 { "defaults", 0, 0 },
230 { "ro", 0, MS_RDONLY },
231 { "rw", 1, MS_RDONLY },
232 { "suid", 1, MS_NOSUID },
233 { "nosuid", 0, MS_NOSUID },
234 { "dev", 1, MS_NODEV },
235 { "nodev", 0, MS_NODEV },
236 { "exec", 1, MS_NOEXEC },
237 { "noexec", 0, MS_NOEXEC },
238 { "sync", 0, MS_SYNCHRONOUS },
239 { "async", 1, MS_SYNCHRONOUS },
240 { "dirsync", 0, MS_DIRSYNC },
241 { "remount", 0, MS_REMOUNT },
242 { "mand", 0, MS_MANDLOCK },
243 { "nomand", 1, MS_MANDLOCK },
244 { "atime", 1, MS_NOATIME },
245 { "noatime", 0, MS_NOATIME },
246 { "diratime", 1, MS_NODIRATIME },
247 { "nodiratime", 0, MS_NODIRATIME },
248 { "bind", 0, MS_BIND },
249 { "rbind", 0, MS_BIND|MS_REC },
250 { "relatime", 0, MS_RELATIME },
251 { "norelatime", 1, MS_RELATIME },
252 { "strictatime", 0, MS_STRICTATIME },
253 { "nostrictatime", 1, MS_STRICTATIME },
254 { NULL, 0, 0 },
998ac676
RT
255};
256
495d2046 257#if HAVE_SYS_CAPABILITY_H
81810dd1 258static struct caps_opt caps_opt[] = {
a6afdde9 259 { "chown", CAP_CHOWN },
1e11be34
DL
260 { "dac_override", CAP_DAC_OVERRIDE },
261 { "dac_read_search", CAP_DAC_READ_SEARCH },
262 { "fowner", CAP_FOWNER },
263 { "fsetid", CAP_FSETID },
81810dd1
DL
264 { "kill", CAP_KILL },
265 { "setgid", CAP_SETGID },
266 { "setuid", CAP_SETUID },
267 { "setpcap", CAP_SETPCAP },
268 { "linux_immutable", CAP_LINUX_IMMUTABLE },
269 { "net_bind_service", CAP_NET_BIND_SERVICE },
270 { "net_broadcast", CAP_NET_BROADCAST },
271 { "net_admin", CAP_NET_ADMIN },
272 { "net_raw", CAP_NET_RAW },
273 { "ipc_lock", CAP_IPC_LOCK },
274 { "ipc_owner", CAP_IPC_OWNER },
275 { "sys_module", CAP_SYS_MODULE },
276 { "sys_rawio", CAP_SYS_RAWIO },
277 { "sys_chroot", CAP_SYS_CHROOT },
278 { "sys_ptrace", CAP_SYS_PTRACE },
279 { "sys_pacct", CAP_SYS_PACCT },
280 { "sys_admin", CAP_SYS_ADMIN },
281 { "sys_boot", CAP_SYS_BOOT },
282 { "sys_nice", CAP_SYS_NICE },
283 { "sys_resource", CAP_SYS_RESOURCE },
284 { "sys_time", CAP_SYS_TIME },
285 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
286 { "mknod", CAP_MKNOD },
287 { "lease", CAP_LEASE },
57b837e2
CB
288#ifdef CAP_AUDIT_READ
289 { "audit_read", CAP_AUDIT_READ },
290#endif
9527e566 291#ifdef CAP_AUDIT_WRITE
81810dd1 292 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
293#endif
294#ifdef CAP_AUDIT_CONTROL
81810dd1 295 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 296#endif
81810dd1
DL
297 { "setfcap", CAP_SETFCAP },
298 { "mac_override", CAP_MAC_OVERRIDE },
299 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
300#ifdef CAP_SYSLOG
301 { "syslog", CAP_SYSLOG },
302#endif
303#ifdef CAP_WAKE_ALARM
304 { "wake_alarm", CAP_WAKE_ALARM },
305#endif
2b54359b
CB
306#ifdef CAP_BLOCK_SUSPEND
307 { "block_suspend", CAP_BLOCK_SUSPEND },
308#endif
81810dd1 309};
495d2046
SG
310#else
311static struct caps_opt caps_opt[] = {};
312#endif
81810dd1 313
91c3830e
SH
314static int run_buffer(char *buffer)
315{
ebec9176 316 struct lxc_popen_FILE *f;
91c3830e 317 char *output;
8e7da691 318 int ret;
91c3830e 319
ebec9176 320 f = lxc_popen(buffer);
91c3830e
SH
321 if (!f) {
322 SYSERROR("popen failed");
323 return -1;
324 }
325
326 output = malloc(LXC_LOG_BUFFER_SIZE);
327 if (!output) {
328 ERROR("failed to allocate memory for script output");
ebec9176 329 lxc_pclose(f);
91c3830e
SH
330 return -1;
331 }
332
ebec9176 333 while(fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
91c3830e
SH
334 DEBUG("script output: %s", output);
335
336 free(output);
337
ebec9176 338 ret = lxc_pclose(f);
8e7da691 339 if (ret == -1) {
91c3830e
SH
340 SYSERROR("Script exited on error");
341 return -1;
8e7da691
DE
342 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
343 ERROR("Script exited with status %d", WEXITSTATUS(ret));
344 return -1;
345 } else if (WIFSIGNALED(ret)) {
346 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
347 strsignal(WTERMSIG(ret)));
348 return -1;
91c3830e
SH
349 }
350
351 return 0;
352}
353
148e91f5 354static int run_script_argv(const char *name, const char *section,
283678ed
SH
355 const char *script, const char *hook, const char *lxcpath,
356 char **argsin)
148e91f5
SH
357{
358 int ret, i;
359 char *buffer;
360 size_t size = 0;
361
362 INFO("Executing script '%s' for container '%s', config section '%s'",
363 script, name, section);
364
365 for (i=0; argsin && argsin[i]; i++)
366 size += strlen(argsin[i]) + 1;
367
368 size += strlen(hook) + 1;
369
370 size += strlen(script);
371 size += strlen(name);
372 size += strlen(section);
373 size += 3;
374
375 if (size > INT_MAX)
376 return -1;
377
378 buffer = alloca(size);
379 if (!buffer) {
380 ERROR("failed to allocate memory");
381 return -1;
382 }
383
384 ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
385 if (ret < 0 || ret >= size) {
386 ERROR("Script name too long");
387 return -1;
388 }
389
390 for (i=0; argsin && argsin[i]; i++) {
391 int len = size-ret;
392 int rc;
393 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
394 if (rc < 0 || rc >= len) {
395 ERROR("Script args too long");
396 return -1;
397 }
398 ret += rc;
399 }
400
401 return run_buffer(buffer);
402}
403
751d9dcd
DL
404static int run_script(const char *name, const char *section,
405 const char *script, ...)
e3b4c4c4 406{
abbfd20b 407 int ret;
91c3830e 408 char *buffer, *p;
abbfd20b
DL
409 size_t size = 0;
410 va_list ap;
751d9dcd
DL
411
412 INFO("Executing script '%s' for container '%s', config section '%s'",
413 script, name, section);
e3b4c4c4 414
abbfd20b
DL
415 va_start(ap, script);
416 while ((p = va_arg(ap, char *)))
95642a10 417 size += strlen(p) + 1;
abbfd20b
DL
418 va_end(ap);
419
420 size += strlen(script);
421 size += strlen(name);
422 size += strlen(section);
95642a10 423 size += 3;
abbfd20b 424
95642a10
MS
425 if (size > INT_MAX)
426 return -1;
427
428 buffer = alloca(size);
abbfd20b
DL
429 if (!buffer) {
430 ERROR("failed to allocate memory");
751d9dcd
DL
431 return -1;
432 }
433
9ba8130c
SH
434 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
435 if (ret < 0 || ret >= size) {
436 ERROR("Script name too long");
9ba8130c
SH
437 return -1;
438 }
751d9dcd 439
abbfd20b 440 va_start(ap, script);
9ba8130c
SH
441 while ((p = va_arg(ap, char *))) {
442 int len = size-ret;
443 int rc;
444 rc = snprintf(buffer + ret, len, " %s", p);
445 if (rc < 0 || rc >= len) {
9ba8130c
SH
446 ERROR("Script args too long");
447 return -1;
448 }
449 ret += rc;
450 }
abbfd20b 451 va_end(ap);
751d9dcd 452
91c3830e 453 return run_buffer(buffer);
e3b4c4c4
ST
454}
455
a17b1e65
SG
456static int mount_rootfs_dir(const char *rootfs, const char *target,
457 const char *options)
a6afdde9 458{
a17b1e65
SG
459 unsigned long mntflags;
460 char *mntdata;
461 int ret;
462
463 if (parse_mntopts(options, &mntflags, &mntdata) < 0) {
464 free(mntdata);
465 return -1;
466 }
467
468 ret = mount(rootfs, target, "none", MS_BIND | MS_REC | mntflags, mntdata);
469 free(mntdata);
470
471 return ret;
a6afdde9
DL
472}
473
474static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
475{
476 int rfd;
477 int ret = -1;
478
479 rfd = open(rootfs, O_RDWR);
480 if (rfd < 0) {
481 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 482 return -1;
483 }
484
a6afdde9 485 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 486
a6afdde9 487 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 488
a6afdde9
DL
489 if (ioctl(fd, LOOP_SET_FD, rfd)) {
490 SYSERROR("failed to LOOP_SET_FD");
491 goto out;
78ae2fcc 492 }
493
a6afdde9
DL
494 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
495 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 496 goto out;
497 }
498
a6afdde9 499 ret = 0;
78ae2fcc 500out:
a6afdde9 501 close(rfd);
78ae2fcc 502
a6afdde9 503 return ret;
78ae2fcc 504}
505
a17b1e65
SG
506static int mount_rootfs_file(const char *rootfs, const char *target,
507 const char *options)
78ae2fcc 508{
a6afdde9
DL
509 struct dirent dirent, *direntp;
510 struct loop_info64 loinfo;
9ba8130c 511 int ret = -1, fd = -1, rc;
a6afdde9
DL
512 DIR *dir;
513 char path[MAXPATHLEN];
78ae2fcc 514
a6afdde9
DL
515 dir = opendir("/dev");
516 if (!dir) {
517 SYSERROR("failed to open '/dev'");
78ae2fcc 518 return -1;
519 }
520
a6afdde9
DL
521 while (!readdir_r(dir, &dirent, &direntp)) {
522
523 if (!direntp)
524 break;
525
526 if (!strcmp(direntp->d_name, "."))
527 continue;
528
529 if (!strcmp(direntp->d_name, ".."))
530 continue;
531
532 if (strncmp(direntp->d_name, "loop", 4))
533 continue;
534
9ba8130c
SH
535 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
536 if (rc < 0 || rc >= MAXPATHLEN)
537 continue;
538
a6afdde9
DL
539 fd = open(path, O_RDWR);
540 if (fd < 0)
541 continue;
542
543 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
544 close(fd);
545 continue;
546 }
547
548 if (errno != ENXIO) {
549 WARN("unexpected error for ioctl on '%s': %m",
550 direntp->d_name);
00b6be44 551 close(fd);
a6afdde9
DL
552 continue;
553 }
554
555 DEBUG("found '%s' free lodev", path);
556
557 ret = setup_lodev(rootfs, fd, &loinfo);
558 if (!ret)
a17b1e65 559 ret = mount_unknown_fs(path, target, options);
a6afdde9
DL
560 close(fd);
561
562 break;
563 }
564
565 if (closedir(dir))
566 WARN("failed to close directory");
567
568 return ret;
78ae2fcc 569}
570
a17b1e65
SG
571static int mount_rootfs_block(const char *rootfs, const char *target,
572 const char *options)
a6afdde9 573{
a17b1e65 574 return mount_unknown_fs(rootfs, target, options);
a6afdde9
DL
575}
576
0c547523
SH
577/*
578 * pin_rootfs
b7ed4bf0
CS
579 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
580 * the duration of the container run, to prevent the container from marking
581 * the underlying fs readonly on shutdown. unlink the file immediately so
582 * no name pollution is happens
0c547523
SH
583 * return -1 on error.
584 * return -2 if nothing needed to be pinned.
585 * return an open fd (>=0) if we pinned it.
586 */
587int pin_rootfs(const char *rootfs)
588{
589 char absrootfs[MAXPATHLEN];
590 char absrootfspin[MAXPATHLEN];
591 struct stat s;
592 int ret, fd;
593
e99ee0de 594 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 595 return -2;
e99ee0de 596
00ec333b 597 if (!realpath(rootfs, absrootfs))
9be53773 598 return -2;
0c547523 599
00ec333b 600 if (access(absrootfs, F_OK))
0c547523 601 return -1;
0c547523 602
00ec333b 603 if (stat(absrootfs, &s))
0c547523 604 return -1;
0c547523 605
72f919c4 606 if (!S_ISDIR(s.st_mode))
0c547523
SH
607 return -2;
608
b7ed4bf0 609 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
00ec333b 610 if (ret >= MAXPATHLEN)
0c547523 611 return -1;
0c547523
SH
612
613 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
b7ed4bf0
CS
614 if (fd < 0)
615 return fd;
616 (void)unlink(absrootfspin);
0c547523
SH
617 return fd;
618}
619
e2a7e8dc
SH
620/*
621 * If we are asking to remount something, make sure that any
622 * NOEXEC etc are honored.
623 */
624static unsigned long add_required_remount_flags(const char *s, const char *d,
625 unsigned long flags)
626{
614305f3 627#ifdef HAVE_STATVFS
e2a7e8dc
SH
628 struct statvfs sb;
629 unsigned long required_flags = 0;
630
631 if (!(flags & MS_REMOUNT))
632 return flags;
633
634 if (!s)
635 s = d;
636
637 if (!s)
638 return flags;
639 if (statvfs(s, &sb) < 0)
640 return flags;
641
642 if (sb.f_flag & MS_NOSUID)
643 required_flags |= MS_NOSUID;
644 if (sb.f_flag & MS_NODEV)
645 required_flags |= MS_NODEV;
646 if (sb.f_flag & MS_RDONLY)
647 required_flags |= MS_RDONLY;
648 if (sb.f_flag & MS_NOEXEC)
649 required_flags |= MS_NOEXEC;
650
651 return flags | required_flags;
614305f3
SH
652#else
653 return flags;
654#endif
e2a7e8dc
SH
655}
656
4fb3cba5 657static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
368bbc02 658{
368bbc02 659 int r;
80e80c40 660 int i;
b06b8511
CS
661 static struct {
662 int match_mask;
663 int match_flag;
664 const char *source;
665 const char *destination;
666 const char *fstype;
667 unsigned long flags;
668 const char *options;
669 } default_mounts[] = {
670 /* Read-only bind-mounting... In older kernels, doing that required
671 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
672 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
673 * kernel 2.6.26 onwards. However, this apparently does not work on
674 * kernel 3.8. Unfortunately, on that very same kernel, doing the
675 * same trick as above doesn't seem to work either, there one needs
676 * to ALSO specify MS_BIND for the remount, otherwise the entire
677 * fs is remounted read-only or the mount fails because it's busy...
678 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
679 * 2.6.32...
368bbc02 680 */
f24a52d5 681 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
592fd47a
SH
682 /* proc/tty is used as a temporary placeholder for proc/sys/net which we'll move back in a few steps */
683 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys/net", "%r/proc/tty", NULL, MS_BIND, NULL },
f24a52d5
SG
684 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
685 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
592fd47a 686 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/tty", "%r/proc/sys/net", NULL, MS_MOVE, NULL },
f24a52d5
SG
687 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
688 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
689 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
690 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
691 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
692 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
693 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL },
694 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
695 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys/devices/virtual/net", "sysfs", 0, NULL },
696 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys/devices/virtual/net/devices/virtual/net", "%r/sys/devices/virtual/net", NULL, MS_BIND, NULL },
697 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys/devices/virtual/net", NULL, MS_REMOUNT|MS_BIND|MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL },
698 { 0, 0, NULL, NULL, NULL, 0, NULL }
b06b8511 699 };
368bbc02 700
b06b8511
CS
701 for (i = 0; default_mounts[i].match_mask; i++) {
702 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
703 char *source = NULL;
704 char *destination = NULL;
705 int saved_errno;
e2a7e8dc 706 unsigned long mflags;
b06b8511
CS
707
708 if (default_mounts[i].source) {
709 /* will act like strdup if %r is not present */
8ede5f4c 710 source = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].source);
b06b8511
CS
711 if (!source) {
712 SYSERROR("memory allocation error");
713 return -1;
714 }
715 }
cc4fd506
SH
716 if (!default_mounts[i].destination) {
717 ERROR("BUG: auto mounts destination %d was NULL", i);
718 return -1;
719 }
720 /* will act like strdup if %r is not present */
721 destination = lxc_string_replace("%r", conf->rootfs.path ? conf->rootfs.mount : "", default_mounts[i].destination);
722 if (!destination) {
723 saved_errno = errno;
724 SYSERROR("memory allocation error");
725 free(source);
726 errno = saved_errno;
727 return -1;
b06b8511 728 }
e2a7e8dc
SH
729 mflags = add_required_remount_flags(source, destination,
730 default_mounts[i].flags);
592fd47a 731 r = safe_mount(source, destination, default_mounts[i].fstype, mflags, default_mounts[i].options, conf->rootfs.path ? conf->rootfs.mount : NULL);
b06b8511 732 saved_errno = errno;
b88ff9a0
SG
733 if (r < 0 && errno == ENOENT) {
734 INFO("Mount source or target for %s on %s doesn't exist. Skipping.", source, destination);
735 r = 0;
736 }
737 else if (r < 0)
e2a7e8dc 738 SYSERROR("error mounting %s on %s flags %lu", source, destination, mflags);
f24a52d5 739
b06b8511
CS
740 free(source);
741 free(destination);
742 if (r < 0) {
b06b8511
CS
743 errno = saved_errno;
744 return -1;
745 }
368bbc02 746 }
368bbc02
CS
747 }
748
b06b8511 749 if (flags & LXC_AUTO_CGROUP_MASK) {
0769b82a
CS
750 int cg_flags;
751
752 cg_flags = flags & LXC_AUTO_CGROUP_MASK;
753 /* If the type of cgroup mount was not specified, it depends on the
754 * container's capabilities as to what makes sense: if we have
755 * CAP_SYS_ADMIN, the read-only part can be remounted read-write
756 * anyway, so we may as well default to read-write; then the admin
757 * will not be given a false sense of security. (And if they really
758 * want mixed r/o r/w, then they can explicitly specify :mixed.)
759 * OTOH, if the container lacks CAP_SYS_ADMIN, do only default to
760 * :mixed, because then the container can't remount it read-write. */
761 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
762 int has_sys_admin = 0;
763 if (!lxc_list_empty(&conf->keepcaps)) {
764 has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps);
765 } else {
766 has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps);
767 }
768 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC) {
769 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED;
770 } else {
771 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
772 }
773 }
774
8ede5f4c 775 if (!cgroup_mount(conf->rootfs.path ? conf->rootfs.mount : "", handler, cg_flags)) {
368bbc02 776 SYSERROR("error mounting /sys/fs/cgroup");
b06b8511 777 return -1;
368bbc02
CS
778 }
779 }
780
368bbc02 781 return 0;
368bbc02
CS
782}
783
a17b1e65 784static int mount_rootfs(const char *rootfs, const char *target, const char *options)
0ad19a3f 785{
b09ef133 786 char absrootfs[MAXPATHLEN];
78ae2fcc 787 struct stat s;
a6afdde9 788 int i;
78ae2fcc 789
a17b1e65 790 typedef int (*rootfs_cb)(const char *, const char *, const char *);
78ae2fcc 791
792 struct rootfs_type {
793 int type;
794 rootfs_cb cb;
795 } rtfs_type[] = {
2656d231
DL
796 { S_IFDIR, mount_rootfs_dir },
797 { S_IFBLK, mount_rootfs_block },
798 { S_IFREG, mount_rootfs_file },
78ae2fcc 799 };
0ad19a3f 800
4c8ab83b 801 if (!realpath(rootfs, absrootfs)) {
36eb9bde 802 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 803 return -1;
804 }
b09ef133 805
b09ef133 806 if (access(absrootfs, F_OK)) {
36eb9bde 807 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 808 return -1;
809 }
810
78ae2fcc 811 if (stat(absrootfs, &s)) {
36eb9bde 812 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 813 return -1;
814 }
815
78ae2fcc 816 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 817
78ae2fcc 818 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
819 continue;
9b0f0477 820
a17b1e65 821 return rtfs_type[i].cb(absrootfs, target, options);
78ae2fcc 822 }
9b0f0477 823
36eb9bde 824 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 825 return -1;
0ad19a3f 826}
827
4e5440c6 828static int setup_utsname(struct utsname *utsname)
0ad19a3f 829{
4e5440c6
DL
830 if (!utsname)
831 return 0;
0ad19a3f 832
4e5440c6
DL
833 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
834 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 835 return -1;
836 }
837
4e5440c6 838 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 839
0ad19a3f 840 return 0;
841}
842
69aa6655
DE
843struct dev_symlinks {
844 const char *oldpath;
845 const char *name;
846};
847
848static const struct dev_symlinks dev_symlinks[] = {
849 {"/proc/self/fd", "fd"},
850 {"/proc/self/fd/0", "stdin"},
851 {"/proc/self/fd/1", "stdout"},
852 {"/proc/self/fd/2", "stderr"},
853};
854
855static int setup_dev_symlinks(const struct lxc_rootfs *rootfs)
856{
857 char path[MAXPATHLEN];
858 int ret,i;
09227be2 859 struct stat s;
69aa6655 860
1ec0e8e3
CB
861 /* rootfs struct will be empty when container is created without rootfs. */
862 char *rootfs_path = NULL;
863 if (rootfs && rootfs->path)
864 rootfs_path = rootfs->mount;
865
69aa6655
DE
866
867 for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
868 const struct dev_symlinks *d = &dev_symlinks[i];
1ec0e8e3 869 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs_path ? rootfs_path : "", d->name);
69aa6655
DE
870 if (ret < 0 || ret >= MAXPATHLEN)
871 return -1;
09227be2
MW
872
873 /*
874 * Stat the path first. If we don't get an error
875 * accept it as is and don't try to create it
876 */
877 if (!stat(path, &s)) {
878 continue;
879 }
880
69aa6655 881 ret = symlink(d->oldpath, path);
09227be2 882
69aa6655 883 if (ret && errno != EEXIST) {
09227be2
MW
884 if ( errno == EROFS ) {
885 WARN("Warning: Read Only file system while creating %s", path);
886 } else {
887 SYSERROR("Error creating %s", path);
888 return -1;
889 }
69aa6655
DE
890 }
891 }
892 return 0;
893}
894
393903d1
SH
895/*
896 * Build a space-separate list of ptys to pass to systemd.
897 */
898static bool append_ptyname(char **pp, char *name)
b0a33c1e 899{
393903d1
SH
900 char *p;
901
902 if (!*pp) {
903 *pp = malloc(strlen(name) + strlen("container_ttys=") + 1);
904 if (!*pp)
905 return false;
906 sprintf(*pp, "container_ttys=%s", name);
907 return true;
908 }
909 p = realloc(*pp, strlen(*pp) + strlen(name) + 2);
910 if (!p)
911 return false;
912 *pp = p;
913 strcat(p, " ");
914 strcat(p, name);
915 return true;
916}
917
918static int setup_tty(struct lxc_conf *conf)
919{
393903d1
SH
920 const struct lxc_tty_info *tty_info = &conf->tty_info;
921 char *ttydir = conf->ttydir;
7c6ef2a2
SH
922 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
923 int i, ret;
b0a33c1e 924
e8bd4e43 925 if (!conf->rootfs.path)
bc9bd0e3
DL
926 return 0;
927
b0a33c1e 928 for (i = 0; i < tty_info->nbtty; i++) {
929
930 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
931
e8bd4e43 932 ret = snprintf(path, sizeof(path), "/dev/tty%d", i + 1);
7c6ef2a2
SH
933 if (ret >= sizeof(path)) {
934 ERROR("pathname too long for ttys");
935 return -1;
936 }
937 if (ttydir) {
938 /* create dev/lxc/tty%d" */
e8bd4e43 939 ret = snprintf(lxcpath, sizeof(lxcpath), "/dev/%s/tty%d", ttydir, i + 1);
7c6ef2a2
SH
940 if (ret >= sizeof(lxcpath)) {
941 ERROR("pathname too long for ttys");
942 return -1;
943 }
944 ret = creat(lxcpath, 0660);
945 if (ret==-1 && errno != EEXIST) {
959aee9c 946 SYSERROR("error creating %s", lxcpath);
7c6ef2a2
SH
947 return -1;
948 }
4d44e274
SH
949 if (ret >= 0)
950 close(ret);
7c6ef2a2
SH
951 ret = unlink(path);
952 if (ret && errno != ENOENT) {
959aee9c 953 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
954 return -1;
955 }
b0a33c1e 956
7c6ef2a2
SH
957 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
958 WARN("failed to mount '%s'->'%s'",
959 pty_info->name, path);
960 continue;
961 }
13954cce 962
9ba8130c
SH
963 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
964 if (ret >= sizeof(lxcpath)) {
965 ERROR("tty pathname too long");
966 return -1;
967 }
7c6ef2a2
SH
968 ret = symlink(lxcpath, path);
969 if (ret) {
959aee9c 970 SYSERROR("failed to create symlink for tty %d", i+1);
7c6ef2a2
SH
971 return -1;
972 }
973 } else {
c6883f38
SH
974 /* If we populated /dev, then we need to create /dev/ttyN */
975 if (access(path, F_OK)) {
976 ret = creat(path, 0660);
977 if (ret==-1) {
959aee9c 978 SYSERROR("error creating %s", path);
c6883f38 979 /* this isn't fatal, continue */
025ed0f3 980 } else {
c6883f38 981 close(ret);
025ed0f3 982 }
c6883f38 983 }
7c6ef2a2 984 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
e8bd4e43 985 SYSERROR("failed to mount '%s'->'%s'", pty_info->name, path);
7c6ef2a2
SH
986 continue;
987 }
393903d1 988 }
e8bd4e43 989 if (!append_ptyname(&conf->pty_names, pty_info->name)) {
393903d1
SH
990 ERROR("Error setting up container_ttys string");
991 return -1;
b0a33c1e 992 }
993 }
994
cd54d859
DL
995 INFO("%d tty(s) has been setup", tty_info->nbtty);
996
b0a33c1e 997 return 0;
998}
999
bf601689 1000
2d489f9e 1001static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
bf601689 1002{
2d489f9e 1003 int oldroot = -1, newroot = -1;
bf601689 1004
2d489f9e
SH
1005 oldroot = open("/", O_DIRECTORY | O_RDONLY);
1006 if (oldroot < 0) {
1007 SYSERROR("Error opening old-/ for fchdir");
9ba8130c
SH
1008 return -1;
1009 }
2d489f9e
SH
1010 newroot = open(rootfs, O_DIRECTORY | O_RDONLY);
1011 if (newroot < 0) {
1012 SYSERROR("Error opening new-/ for fchdir");
1013 goto fail;
c08556c6 1014 }
bf601689 1015
cc6f6dd7 1016 /* change into new root fs */
2d489f9e 1017 if (fchdir(newroot)) {
cc6f6dd7 1018 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
2d489f9e 1019 goto fail;
cc6f6dd7
DL
1020 }
1021
cc6f6dd7 1022 /* pivot_root into our new root fs */
2d489f9e 1023 if (pivot_root(".", ".")) {
cc6f6dd7 1024 SYSERROR("pivot_root syscall failed");
2d489f9e 1025 goto fail;
bf601689 1026 }
cc6f6dd7 1027
2d489f9e
SH
1028 /*
1029 * at this point the old-root is mounted on top of our new-root
1030 * To unmounted it we must not be chdir'd into it, so escape back
1031 * to old-root
1032 */
1033 if (fchdir(oldroot) < 0) {
1034 SYSERROR("Error entering oldroot");
1035 goto fail;
1036 }
7981ea46 1037 if (umount2(".", MNT_DETACH) < 0) {
2d489f9e
SH
1038 SYSERROR("Error detaching old root");
1039 goto fail;
cc6f6dd7
DL
1040 }
1041
2d489f9e
SH
1042 if (fchdir(newroot) < 0) {
1043 SYSERROR("Error re-entering newroot");
1044 goto fail;
1045 }
cc6f6dd7 1046
2d489f9e
SH
1047 close(oldroot);
1048 close(newroot);
bf601689 1049
2d489f9e 1050 DEBUG("pivot_root syscall to '%s' successful", rootfs);
bf601689 1051
bf601689 1052 return 0;
2d489f9e
SH
1053
1054fail:
1055 if (oldroot != -1)
1056 close(oldroot);
1057 if (newroot != -1)
1058 close(newroot);
1059 return -1;
bf601689
MH
1060}
1061
bc6928ff 1062/*
87da4ec3
SH
1063 * Just create a path for /dev under $lxcpath/$name and in rootfs
1064 * If we hit an error, log it but don't fail yet.
91c3830e 1065 */
14221cbb 1066static int mount_autodev(const char *name, const struct lxc_rootfs *rootfs, const char *lxcpath)
91c3830e
SH
1067{
1068 int ret;
87da4ec3
SH
1069 size_t clen;
1070 char *path;
91c3830e 1071
1ec0e8e3
CB
1072 /* rootfs struct will be empty when container is created without rootfs. */
1073 char *rootfs_path = NULL;
1074 if (rootfs && rootfs->path)
1075 rootfs_path = rootfs->mount;
1076
14221cbb 1077 INFO("Mounting container /dev");
bc6928ff 1078
14221cbb 1079 /* $(rootfs->mount) + "/dev/pts" + '\0' */
1ec0e8e3 1080 clen = (rootfs_path ? strlen(rootfs_path) : 0) + 9;
87da4ec3 1081 path = alloca(clen);
bc6928ff 1082
1ec0e8e3 1083 ret = snprintf(path, clen, "%s/dev", rootfs_path ? rootfs_path : "");
87da4ec3 1084 if (ret < 0 || ret >= clen)
91c3830e 1085 return -1;
bc6928ff 1086
87da4ec3 1087 if (!dir_exists(path)) {
14221cbb 1088 WARN("No /dev in container.");
87da4ec3
SH
1089 WARN("Proceeding without autodev setup");
1090 return 0;
bc6928ff 1091 }
87da4ec3 1092
1ec0e8e3
CB
1093 ret = safe_mount("none", path, "tmpfs", 0, "size=500000,mode=755",
1094 rootfs_path);
1095 if (ret != 0) {
87da4ec3 1096 SYSERROR("Failed mounting tmpfs onto %s\n", path);
1ec0e8e3 1097 return -1;
91c3830e 1098 }
87da4ec3
SH
1099
1100 INFO("Mounted tmpfs onto %s", path);
1101
1ec0e8e3 1102 ret = snprintf(path, clen, "%s/dev/pts", rootfs_path ? rootfs_path : "");
87da4ec3 1103 if (ret < 0 || ret >= clen)
91c3830e 1104 return -1;
87da4ec3 1105
bc6928ff
MW
1106 /*
1107 * If we are running on a devtmpfs mapping, dev/pts may already exist.
1108 * If not, then create it and exit if that fails...
1109 */
87da4ec3 1110 if (!dir_exists(path)) {
bc6928ff
MW
1111 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1112 if (ret) {
1113 SYSERROR("Failed to create /dev/pts in container");
1114 return -1;
1115 }
91c3830e
SH
1116 }
1117
14221cbb 1118 INFO("Mounted container /dev");
91c3830e
SH
1119 return 0;
1120}
1121
c6883f38 1122struct lxc_devs {
74a3920a 1123 const char *name;
c6883f38
SH
1124 mode_t mode;
1125 int maj;
1126 int min;
1127};
1128
74a3920a 1129static const struct lxc_devs lxc_devs[] = {
c6883f38
SH
1130 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1131 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1132 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1133 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1134 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1135 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1136 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1137};
1138
14221cbb 1139static int fill_autodev(const struct lxc_rootfs *rootfs)
c6883f38
SH
1140{
1141 int ret;
c6883f38
SH
1142 char path[MAXPATHLEN];
1143 int i;
3a32201c 1144 mode_t cmask;
c6883f38 1145
1ec0e8e3
CB
1146 /* rootfs struct will be empty when container is created without rootfs. */
1147 char *rootfs_path = NULL;
1148 if (rootfs && rootfs->path)
1149 rootfs_path = rootfs->mount;
1150
14221cbb 1151 INFO("Creating initial consoles under container /dev");
91c3830e 1152
1ec0e8e3 1153 ret = snprintf(path, MAXPATHLEN, "%s/dev", rootfs_path ? rootfs_path : "");
91c3830e
SH
1154 if (ret < 0 || ret >= MAXPATHLEN) {
1155 ERROR("Error calculating container /dev location");
c6883f38 1156 return -1;
f7bee6c6 1157 }
91c3830e 1158
9cb4d183
SH
1159 if (!dir_exists(path)) // ignore, just don't try to fill in
1160 return 0;
1161
14221cbb 1162 INFO("Populating container /dev");
3a32201c 1163 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38 1164 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
74a3920a 1165 const struct lxc_devs *d = &lxc_devs[i];
1ec0e8e3 1166 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", rootfs_path ? rootfs_path : "", d->name);
c6883f38
SH
1167 if (ret < 0 || ret >= MAXPATHLEN)
1168 return -1;
1169 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1170 if (ret && errno != EEXIST) {
9cb4d183
SH
1171 char hostpath[MAXPATHLEN];
1172 FILE *pathfile;
1173
1174 // Unprivileged containers cannot create devices, so
1175 // bind mount the device from the host
1176 ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", d->name);
1177 if (ret < 0 || ret >= MAXPATHLEN)
1178 return -1;
1179 pathfile = fopen(path, "wb");
1180 if (!pathfile) {
1181 SYSERROR("Failed to create device mount target '%s'", path);
1182 return -1;
1183 }
1184 fclose(pathfile);
592fd47a 1185 if (safe_mount(hostpath, path, 0, MS_BIND, NULL,
1ec0e8e3 1186 rootfs_path ? rootfs_path : NULL) != 0) {
9cb4d183
SH
1187 SYSERROR("Failed bind mounting device %s from host into container",
1188 d->name);
1189 return -1;
1190 }
c6883f38
SH
1191 }
1192 }
3a32201c 1193 umask(cmask);
c6883f38 1194
14221cbb 1195 INFO("Populated container /dev");
c6883f38
SH
1196 return 0;
1197}
1198
cc28d0b0 1199static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1200{
cc28d0b0
SH
1201 const struct lxc_rootfs *rootfs = &conf->rootfs;
1202
a0f379bf
DW
1203 if (!rootfs->path) {
1204 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1205 SYSERROR("Failed to make / rslave");
1206 return -1;
1207 }
c69bd12f 1208 return 0;
a0f379bf 1209 }
0ad19a3f 1210
12297168 1211 if (access(rootfs->mount, F_OK)) {
b1789442 1212 SYSERROR("failed to access to '%s', check it is present",
12297168 1213 rootfs->mount);
b1789442
DL
1214 return -1;
1215 }
1216
9be53773 1217 // First try mounting rootfs using a bdev
76a26f55 1218 struct bdev *bdev = bdev_init(conf, rootfs->path, rootfs->mount, rootfs->options);
9be53773 1219 if (bdev && bdev->ops->mount(bdev) == 0) {
59d66af2 1220 bdev_put(bdev);
9be53773
SH
1221 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1222 return 0;
1223 }
59d66af2
SH
1224 if (bdev)
1225 bdev_put(bdev);
a17b1e65 1226 if (mount_rootfs(rootfs->path, rootfs->mount, rootfs->options)) {
a6afdde9 1227 ERROR("failed to mount rootfs");
c3f0a28c 1228 return -1;
1229 }
0ad19a3f 1230
12297168 1231 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1232
ac778708
DL
1233 return 0;
1234}
1235
91e93c71
AV
1236int prepare_ramfs_root(char *root)
1237{
1238 char buf[LINELEN], *p;
1239 char nroot[PATH_MAX];
1240 FILE *f;
1241 int i;
1242 char *p2;
1243
1244 if (realpath(root, nroot) == NULL)
1245 return -1;
1246
1247 if (chdir("/") == -1)
1248 return -1;
1249
1250 /*
1251 * We could use here MS_MOVE, but in userns this mount is
1252 * locked and can't be moved.
1253 */
1254 if (mount(root, "/", NULL, MS_REC | MS_BIND, NULL)) {
1255 SYSERROR("Failed to move %s into /", root);
1256 return -1;
1257 }
1258
88322f77 1259 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
91e93c71
AV
1260 SYSERROR("Failed to make . rprivate");
1261 return -1;
1262 }
1263
1264 /*
1265 * The following code cleans up inhereted mounts which are not
1266 * required for CT.
1267 *
1268 * The mountinfo file shows not all mounts, if a few points have been
1269 * unmounted between read operations from the mountinfo. So we need to
1270 * read mountinfo a few times.
1271 *
1272 * This loop can be skipped if a container uses unserns, because all
1273 * inherited mounts are locked and we should live with all this trash.
1274 */
1275 while (1) {
1276 int progress = 0;
1277
1278 f = fopen("./proc/self/mountinfo", "r");
1279 if (!f) {
1280 SYSERROR("Unable to open /proc/self/mountinfo");
1281 return -1;
1282 }
1283 while (fgets(buf, LINELEN, f)) {
1284 for (p = buf, i=0; p && i < 4; i++)
1285 p = strchr(p+1, ' ');
1286 if (!p)
1287 continue;
1288 p2 = strchr(p+1, ' ');
1289 if (!p2)
1290 continue;
1291
1292 *p2 = '\0';
1293 *p = '.';
1294
1295 if (strcmp(p + 1, "/") == 0)
1296 continue;
1297 if (strcmp(p + 1, "/proc") == 0)
1298 continue;
1299
1300 if (umount2(p, MNT_DETACH) == 0)
1301 progress++;
1302 }
1303 fclose(f);
1304 if (!progress)
1305 break;
1306 }
1307
8bea9fae
PR
1308 /* This also can be skipped if a container uses unserns */
1309 umount2("./proc", MNT_DETACH);
91e93c71
AV
1310
1311 /* It is weird, but chdir("..") moves us in a new root */
1312 if (chdir("..") == -1) {
1313 SYSERROR("Unable to change working directory");
1314 return -1;
1315 }
1316
1317 if (chroot(".") == -1) {
1318 SYSERROR("Unable to chroot");
1319 return -1;
1320 }
1321
1322 return 0;
1323}
1324
74a3920a 1325static int setup_pivot_root(const struct lxc_rootfs *rootfs)
ac778708 1326{
ac778708
DL
1327 if (!rootfs->path)
1328 return 0;
1329
91e93c71
AV
1330 if (detect_ramfs_rootfs()) {
1331 if (prepare_ramfs_root(rootfs->mount))
1332 return -1;
1333 } else if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
cc6f6dd7 1334 ERROR("failed to setup pivot root");
25368b52 1335 return -1;
c69bd12f
DL
1336 }
1337
25368b52 1338 return 0;
0ad19a3f 1339}
1340
d852c78c 1341static int setup_pts(int pts)
3c26f34e 1342{
77890c6d
SW
1343 char target[PATH_MAX];
1344
d852c78c
DL
1345 if (!pts)
1346 return 0;
3c26f34e 1347
1348 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1349 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1350 return -1;
1351 }
1352
7e40254a
JTLB
1353 if (mkdir("/dev/pts", 0755)) {
1354 if ( errno != EEXIST ) {
1355 SYSERROR("failed to create '/dev/pts'");
1356 return -1;
1357 }
1358 }
1359
a6afdde9 1360 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
67e5a20a 1361 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
36eb9bde 1362 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1363 return -1;
1364 }
1365
3c26f34e 1366 if (access("/dev/ptmx", F_OK)) {
1367 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1368 goto out;
36eb9bde 1369 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1370 return -1;
1371 }
1372
77890c6d
SW
1373 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1374 goto out;
1375
3c26f34e 1376 /* fallback here, /dev/pts/ptmx exists just mount bind */
1377 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1378 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1379 return -1;
1380 }
cd54d859
DL
1381
1382 INFO("created new pts instance");
d852c78c 1383
3c26f34e 1384out:
1385 return 0;
1386}
1387
cccc74b5
DL
1388static int setup_personality(int persona)
1389{
6ff05e18 1390 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1391 if (persona == -1)
1392 return 0;
1393
1394 if (personality(persona) < 0) {
1395 SYSERROR("failed to set personality to '0x%x'", persona);
1396 return -1;
1397 }
1398
1399 INFO("set personality to '0x%x'", persona);
6ff05e18 1400 #endif
cccc74b5
DL
1401
1402 return 0;
1403}
1404
7c6ef2a2 1405static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1406 const struct lxc_console *console)
6e590161 1407{
63376d7d
DL
1408 char path[MAXPATHLEN];
1409 struct stat s;
7c6ef2a2 1410 int ret;
52e35957 1411
7c6ef2a2
SH
1412 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1413 if (ret >= sizeof(path)) {
959aee9c 1414 ERROR("console path too long");
7c6ef2a2
SH
1415 return -1;
1416 }
52e35957 1417
63376d7d 1418 if (access(path, F_OK)) {
466978b0 1419 WARN("rootfs specified but no console found at '%s'", path);
63376d7d 1420 return 0;
52e35957
DL
1421 }
1422
b5159817
DE
1423 if (console->master < 0) {
1424 INFO("no console");
f78a1f32
DL
1425 return 0;
1426 }
ed502555 1427
63376d7d
DL
1428 if (stat(path, &s)) {
1429 SYSERROR("failed to stat '%s'", path);
1430 return -1;
1431 }
1432
1433 if (chmod(console->name, s.st_mode)) {
1434 SYSERROR("failed to set mode '0%o' to '%s'",
1435 s.st_mode, console->name);
1436 return -1;
1437 }
13954cce 1438
592fd47a 1439 if (safe_mount(console->name, path, "none", MS_BIND, 0, rootfs->mount)) {
63376d7d 1440 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1441 return -1;
1442 }
1443
63376d7d 1444 INFO("console has been setup");
7c6ef2a2
SH
1445 return 0;
1446}
1447
1448static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1449 const struct lxc_console *console,
1450 char *ttydir)
1451{
1452 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1453 int ret;
1454
1455 /* create rootfs/dev/<ttydir> directory */
1456 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1457 ttydir);
1458 if (ret >= sizeof(path))
1459 return -1;
1460 ret = mkdir(path, 0755);
1461 if (ret && errno != EEXIST) {
959aee9c 1462 SYSERROR("failed with errno %d to create %s", errno, path);
7c6ef2a2
SH
1463 return -1;
1464 }
959aee9c 1465 INFO("created %s", path);
7c6ef2a2
SH
1466
1467 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1468 rootfs->mount, ttydir);
1469 if (ret >= sizeof(lxcpath)) {
959aee9c 1470 ERROR("console path too long");
7c6ef2a2
SH
1471 return -1;
1472 }
1473
1474 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1475 ret = unlink(path);
1476 if (ret && errno != ENOENT) {
959aee9c 1477 SYSERROR("error unlinking %s", path);
7c6ef2a2
SH
1478 return -1;
1479 }
1480
1481 ret = creat(lxcpath, 0660);
1482 if (ret==-1 && errno != EEXIST) {
959aee9c 1483 SYSERROR("error %d creating %s", errno, lxcpath);
7c6ef2a2
SH
1484 return -1;
1485 }
4d44e274
SH
1486 if (ret >= 0)
1487 close(ret);
7c6ef2a2 1488
b5159817
DE
1489 if (console->master < 0) {
1490 INFO("no console");
7c6ef2a2
SH
1491 return 0;
1492 }
1493
592fd47a 1494 if (safe_mount(console->name, lxcpath, "none", MS_BIND, 0, rootfs->mount)) {
7c6ef2a2
SH
1495 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1496 return -1;
1497 }
1498
1499 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1500 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1501 if (ret >= sizeof(lxcpath)) {
1502 ERROR("lxc/console path too long");
1503 return -1;
1504 }
7c6ef2a2
SH
1505 ret = symlink(lxcpath, path);
1506 if (ret) {
1507 SYSERROR("failed to create symlink for console");
1508 return -1;
1509 }
1510
1511 INFO("console has been setup on %s", lxcpath);
cd54d859 1512
6e590161 1513 return 0;
1514}
1515
7c6ef2a2
SH
1516static int setup_console(const struct lxc_rootfs *rootfs,
1517 const struct lxc_console *console,
1518 char *ttydir)
1519{
1520 /* We don't have a rootfs, /dev/console will be shared */
1521 if (!rootfs->path)
1522 return 0;
1523 if (!ttydir)
1524 return setup_dev_console(rootfs, console);
1525
1526 return setup_ttydir_console(rootfs, console, ttydir);
1527}
1528
1bd051a6
SH
1529static int setup_kmsg(const struct lxc_rootfs *rootfs,
1530 const struct lxc_console *console)
1531{
1532 char kpath[MAXPATHLEN];
1533 int ret;
1534
222fea5a
DE
1535 if (!rootfs->path)
1536 return 0;
1bd051a6
SH
1537 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1538 if (ret < 0 || ret >= sizeof(kpath))
1539 return -1;
1540
1541 ret = unlink(kpath);
1542 if (ret && errno != ENOENT) {
959aee9c 1543 SYSERROR("error unlinking %s", kpath);
1bd051a6
SH
1544 return -1;
1545 }
1546
1547 ret = symlink("console", kpath);
1548 if (ret) {
1549 SYSERROR("failed to create symlink for kmsg");
1550 return -1;
1551 }
1552
1553 return 0;
1554}
1555
998ac676
RT
1556static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1557{
1558 struct mount_opt *mo;
1559
1560 /* If opt is found in mount_opt, set or clear flags.
1561 * Otherwise append it to data. */
1562
1563 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1564 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1565 if (mo->clear)
1566 *flags &= ~mo->flag;
1567 else
1568 *flags |= mo->flag;
1569 return;
1570 }
1571 }
1572
1573 if (strlen(*data))
1574 strcat(*data, ",");
1575 strcat(*data, opt);
1576}
1577
a17b1e65 1578int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1579 char **mntdata)
1580{
1581 char *s, *data;
1582 char *p, *saveptr = NULL;
1583
911324ef 1584 *mntdata = NULL;
91656ce5 1585 *mntflags = 0L;
911324ef
DL
1586
1587 if (!mntopts)
998ac676
RT
1588 return 0;
1589
911324ef 1590 s = strdup(mntopts);
998ac676 1591 if (!s) {
36eb9bde 1592 SYSERROR("failed to allocate memory");
998ac676
RT
1593 return -1;
1594 }
1595
1596 data = malloc(strlen(s) + 1);
1597 if (!data) {
36eb9bde 1598 SYSERROR("failed to allocate memory");
998ac676
RT
1599 free(s);
1600 return -1;
1601 }
1602 *data = 0;
1603
1604 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1605 p = strtok_r(NULL, ",", &saveptr))
1606 parse_mntopt(p, mntflags, &data);
1607
1608 if (*data)
1609 *mntdata = data;
1610 else
1611 free(data);
1612 free(s);
1613
1614 return 0;
1615}
1616
6fd5e769
SH
1617static void null_endofword(char *word)
1618{
1619 while (*word && *word != ' ' && *word != '\t')
1620 word++;
1621 *word = '\0';
1622}
1623
1624/*
1625 * skip @nfields spaces in @src
1626 */
1627static char *get_field(char *src, int nfields)
1628{
1629 char *p = src;
1630 int i;
1631
1632 for (i = 0; i < nfields; i++) {
1633 while (*p && *p != ' ' && *p != '\t')
1634 p++;
1635 if (!*p)
1636 break;
1637 p++;
1638 }
1639 return p;
1640}
1641
911324ef
DL
1642static int mount_entry(const char *fsname, const char *target,
1643 const char *fstype, unsigned long mountflags,
592fd47a 1644 const char *data, int optional, const char *rootfs)
911324ef 1645{
614305f3 1646#ifdef HAVE_STATVFS
2938f7c8 1647 struct statvfs sb;
614305f3 1648#endif
2938f7c8 1649
592fd47a 1650 if (safe_mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data, rootfs)) {
1fc64d22
SG
1651 if (optional) {
1652 INFO("failed to mount '%s' on '%s' (optional): %s", fsname,
1653 target, strerror(errno));
1654 return 0;
1655 }
1656 else {
1657 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1658 return -1;
1659 }
911324ef
DL
1660 }
1661
1662 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
2938f7c8
SH
1663 DEBUG("remounting %s on %s to respect bind or remount options",
1664 fsname ? fsname : "(none)", target ? target : "(none)");
7c5b6e7c
AS
1665 unsigned long rqd_flags = 0;
1666 if (mountflags & MS_RDONLY)
1667 rqd_flags |= MS_RDONLY;
614305f3 1668#ifdef HAVE_STATVFS
2938f7c8 1669 if (statvfs(fsname, &sb) == 0) {
7c5b6e7c 1670 unsigned long required_flags = rqd_flags;
2938f7c8
SH
1671 if (sb.f_flag & MS_NOSUID)
1672 required_flags |= MS_NOSUID;
1673 if (sb.f_flag & MS_NODEV)
1674 required_flags |= MS_NODEV;
1675 if (sb.f_flag & MS_RDONLY)
1676 required_flags |= MS_RDONLY;
1677 if (sb.f_flag & MS_NOEXEC)
1678 required_flags |= MS_NOEXEC;
1679 DEBUG("(at remount) flags for %s was %lu, required extra flags are %lu", fsname, sb.f_flag, required_flags);
1680 /*
1681 * If this was a bind mount request, and required_flags
1682 * does not have any flags which are not already in
1683 * mountflags, then skip the remount
1684 */
1685 if (!(mountflags & MS_REMOUNT)) {
7c5b6e7c 1686 if (!(required_flags & ~mountflags) && rqd_flags == 0) {
2938f7c8
SH
1687 DEBUG("mountflags already was %lu, skipping remount",
1688 mountflags);
1689 goto skipremount;
1690 }
1691 }
1692 mountflags |= required_flags;
6fd5e769 1693 }
614305f3 1694#endif
911324ef
DL
1695
1696 if (mount(fsname, target, fstype,
592fd47a 1697 mountflags | MS_REMOUNT, data) < 0) {
1fc64d22
SG
1698 if (optional) {
1699 INFO("failed to mount '%s' on '%s' (optional): %s",
1700 fsname, target, strerror(errno));
1701 return 0;
1702 }
1703 else {
1704 SYSERROR("failed to mount '%s' on '%s'",
1705 fsname, target);
1706 return -1;
1707 }
911324ef
DL
1708 }
1709 }
1710
614305f3 1711#ifdef HAVE_STATVFS
6fd5e769 1712skipremount:
614305f3 1713#endif
911324ef
DL
1714 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1715
1716 return 0;
1717}
1718
4e4ca161
SH
1719/*
1720 * Remove 'optional', 'create=dir', and 'create=file' from mntopt
1721 */
1722static void cull_mntent_opt(struct mntent *mntent)
1723{
1724 int i;
1725 char *p, *p2;
1726 char *list[] = {"create=dir",
1727 "create=file",
1728 "optional",
1729 NULL };
1730
1731 for (i=0; list[i]; i++) {
1732 if (!(p = strstr(mntent->mnt_opts, list[i])))
1733 continue;
1734 p2 = strchr(p, ',');
1735 if (!p2) {
1736 /* no more mntopts, so just chop it here */
1737 *p = '\0';
1738 continue;
1739 }
1740 memmove(p, p2+1, strlen(p2+1)+1);
1741 }
1742}
1743
6e46cc0d 1744static int mount_entry_create_aufs_dirs(const struct mntent *mntent,
0a2dddd4
CB
1745 const struct lxc_rootfs *rootfs,
1746 const char *lxc_name,
1747 const char *lxc_path)
6e46cc0d 1748{
0a2dddd4 1749 char lxcpath[MAXPATHLEN];
1e3ce0da 1750 char *rootfsdir = NULL;
6e46cc0d
CB
1751 char *scratch = NULL;
1752 char *tmp = NULL;
1753 char *upperdir = NULL;
1754 char **opts = NULL;
9e5a2a01 1755 int fret = -1;
0a2dddd4 1756 int ret = 0;
6e46cc0d
CB
1757 size_t arrlen = 0;
1758 size_t i;
1759 size_t len = 0;
1e3ce0da 1760 size_t rootfslen = 0;
6e46cc0d 1761
1ec0e8e3 1762 if (!rootfs || !rootfs->path || !lxc_name || !lxc_path)
1e3ce0da 1763 goto err;
6e46cc0d
CB
1764
1765 opts = lxc_string_split(mntent->mnt_opts, ',');
1766 if (opts)
1767 arrlen = lxc_array_len((void **)opts);
1768 else
1e3ce0da 1769 goto err;
6e46cc0d
CB
1770
1771 for (i = 0; i < arrlen; i++) {
1772 if (strstr(opts[i], "br=") && (strlen(opts[i]) > (len = strlen("br="))))
1773 tmp = opts[i] + len;
1774 }
1e3ce0da
CB
1775 if (!tmp)
1776 goto err;
6e46cc0d
CB
1777
1778 upperdir = strtok_r(tmp, ":=", &scratch);
1e3ce0da
CB
1779 if (!upperdir)
1780 goto err;
6e46cc0d 1781
0a2dddd4 1782 ret = snprintf(lxcpath, MAXPATHLEN, "%s/%s", lxc_path, lxc_name);
1e3ce0da
CB
1783 if (ret < 0 || ret >= MAXPATHLEN)
1784 goto err;
1785
5c484f79 1786 rootfsdir = ovl_get_rootfs(rootfs->path, &rootfslen);
1e3ce0da
CB
1787 if (!rootfsdir)
1788 goto err;
6e46cc0d
CB
1789
1790 /* We neither allow users to create upperdirs outside the containerdir
1791 * nor inside the rootfs. The latter might be debatable. */
1e3ce0da 1792 if ((strncmp(upperdir, lxcpath, strlen(lxcpath)) == 0) && (strncmp(upperdir, rootfsdir, rootfslen) != 0))
6e46cc0d
CB
1793 if (mkdir_p(upperdir, 0755) < 0) {
1794 WARN("Failed to create upperdir");
1795 }
1796
9e5a2a01 1797 fret = 0;
1e3ce0da
CB
1798
1799err:
1800 free(rootfsdir);
1801 lxc_free_array((void **)opts, free);
9e5a2a01 1802 return fret;
6e46cc0d
CB
1803}
1804
0a2dddd4 1805
4d5b72a1 1806static int mount_entry_create_dir_file(const struct mntent *mntent,
0a2dddd4
CB
1807 const char* path, const struct lxc_rootfs *rootfs,
1808 const char *lxc_name, const char *lxc_path)
0ad19a3f 1809{
4d5b72a1 1810 char *pathdirname = NULL;
608e3567 1811 int ret = 0;
34cfffb3 1812 FILE *pathfile = NULL;
911324ef 1813
6e46cc0d 1814 if (strncmp(mntent->mnt_type, "overlay", 7) == 0) {
5c484f79 1815 if (ovl_mkdir(mntent, rootfs, lxc_name, lxc_path) < 0)
6e46cc0d
CB
1816 return -1;
1817 } else if (strncmp(mntent->mnt_type, "aufs", 4) == 0) {
0a2dddd4 1818 if (mount_entry_create_aufs_dirs(mntent, rootfs, lxc_name, lxc_path) < 0)
6e46cc0d
CB
1819 return -1;
1820 }
1821
34cfffb3 1822 if (hasmntopt(mntent, "create=dir")) {
4d5b72a1
NC
1823 if (mkdir_p(path, 0755) < 0) {
1824 WARN("Failed to create mount target '%s'", path);
34cfffb3
SG
1825 ret = -1;
1826 }
1827 }
1828
4d5b72a1
NC
1829 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
1830 pathdirname = strdup(path);
34cfffb3 1831 pathdirname = dirname(pathdirname);
119126b6
SG
1832 if (mkdir_p(pathdirname, 0755) < 0) {
1833 WARN("Failed to create target directory");
1834 }
4d5b72a1 1835 pathfile = fopen(path, "wb");
34cfffb3 1836 if (!pathfile) {
4d5b72a1 1837 WARN("Failed to create mount target '%s'", path);
34cfffb3 1838 ret = -1;
6e46cc0d 1839 } else {
34cfffb3 1840 fclose(pathfile);
6e46cc0d 1841 }
34cfffb3 1842 }
4d5b72a1
NC
1843 free(pathdirname);
1844 return ret;
1845}
1846
db4aba38 1847static inline int mount_entry_on_generic(struct mntent *mntent,
0a2dddd4
CB
1848 const char* path, const struct lxc_rootfs *rootfs,
1849 const char *lxc_name, const char *lxc_path)
4d5b72a1
NC
1850{
1851 unsigned long mntflags;
1852 char *mntdata;
1853 int ret;
1854 bool optional = hasmntopt(mntent, "optional") != NULL;
1855
0a2dddd4 1856 ret = mount_entry_create_dir_file(mntent, path, rootfs, lxc_name, lxc_path);
34cfffb3 1857
608e3567
SH
1858 if (ret < 0)
1859 return optional ? 0 : -1;
1860
4e4ca161
SH
1861 cull_mntent_opt(mntent);
1862
a17b1e65
SG
1863 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1864 free(mntdata);
1865 return -1;
1866 }
1867
1ec0e8e3
CB
1868 /* rootfs struct will be empty when container is created without rootfs. */
1869 char *rootfs_path = NULL;
1870 if (rootfs && rootfs->path)
1871 rootfs_path = rootfs->mount;
1872
6e46cc0d 1873 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type, mntflags,
1ec0e8e3 1874 mntdata, optional, rootfs_path);
68c152ef 1875
911324ef 1876 free(mntdata);
911324ef
DL
1877 return ret;
1878}
1879
db4aba38
NC
1880static inline int mount_entry_on_systemfs(struct mntent *mntent)
1881{
0a2dddd4 1882 return mount_entry_on_generic(mntent, mntent->mnt_dir, NULL, NULL, NULL);
db4aba38
NC
1883}
1884
4e4ca161 1885static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2 1886 const struct lxc_rootfs *rootfs,
0a2dddd4
CB
1887 const char *lxc_name,
1888 const char *lxc_path)
911324ef 1889{
013bd428 1890 char *aux;
59760f5d 1891 char path[MAXPATHLEN];
80a881b2 1892 int r, ret = 0, offset;
67e571de 1893 const char *lxcpath;
0ad19a3f 1894
593e8478 1895 lxcpath = lxc_global_config_value("lxc.lxcpath");
2a59a681
SH
1896 if (!lxcpath) {
1897 ERROR("Out of memory");
1898 return -1;
1899 }
1900
80a881b2 1901 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
1902 * use $lxcpath/CN/rootfs as the target prefix */
1903 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
1904 if (r < 0 || r >= MAXPATHLEN)
1905 goto skipvarlib;
1906
1907 aux = strstr(mntent->mnt_dir, path);
1908 if (aux) {
1909 offset = strlen(path);
1910 goto skipabs;
1911 }
1912
1913skipvarlib:
013bd428
DL
1914 aux = strstr(mntent->mnt_dir, rootfs->path);
1915 if (!aux) {
1916 WARN("ignoring mount point '%s'", mntent->mnt_dir);
db4aba38 1917 return ret;
013bd428 1918 }
80a881b2
SH
1919 offset = strlen(rootfs->path);
1920
1921skipabs:
013bd428 1922
9ba8130c 1923 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
1924 aux + offset);
1925 if (r < 0 || r >= MAXPATHLEN) {
1926 WARN("pathnme too long for '%s'", mntent->mnt_dir);
a17b1e65
SG
1927 return -1;
1928 }
1929
0a2dddd4 1930 return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
911324ef 1931}
d330fe7b 1932
4e4ca161 1933static int mount_entry_on_relative_rootfs(struct mntent *mntent,
0a2dddd4
CB
1934 const struct lxc_rootfs *rootfs,
1935 const char *lxc_name,
1936 const char *lxc_path)
911324ef
DL
1937{
1938 char path[MAXPATHLEN];
911324ef 1939 int ret;
d330fe7b 1940
34cfffb3 1941 /* relative to root mount point */
6e46cc0d 1942 ret = snprintf(path, sizeof(path), "%s/%s", rootfs->mount, mntent->mnt_dir);
9ba8130c
SH
1943 if (ret >= sizeof(path)) {
1944 ERROR("path name too long");
1945 return -1;
1946 }
911324ef 1947
0a2dddd4 1948 return mount_entry_on_generic(mntent, path, rootfs, lxc_name, lxc_path);
911324ef
DL
1949}
1950
80a881b2 1951static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
0a2dddd4 1952 const char *lxc_name, const char *lxc_path)
911324ef 1953{
aaf901be
AM
1954 struct mntent mntent;
1955 char buf[4096];
911324ef 1956 int ret = -1;
e76b8764 1957
aaf901be 1958 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
e76b8764 1959
911324ef 1960 if (!rootfs->path) {
aaf901be 1961 if (mount_entry_on_systemfs(&mntent))
e76b8764 1962 goto out;
911324ef 1963 continue;
e76b8764
CDC
1964 }
1965
911324ef 1966 /* We have a separate root, mounts are relative to it */
aaf901be 1967 if (mntent.mnt_dir[0] != '/') {
0a2dddd4 1968 if (mount_entry_on_relative_rootfs(&mntent, rootfs, lxc_name, lxc_path))
911324ef
DL
1969 goto out;
1970 continue;
1971 }
cd54d859 1972
0a2dddd4 1973 if (mount_entry_on_absolute_rootfs(&mntent, rootfs, lxc_name, lxc_path))
911324ef 1974 goto out;
0ad19a3f 1975 }
cd54d859 1976
0ad19a3f 1977 ret = 0;
cd54d859
DL
1978
1979 INFO("mount points have been setup");
0ad19a3f 1980out:
e7938e9e
MN
1981 return ret;
1982}
1983
80a881b2 1984static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
0a2dddd4 1985 const char *lxc_name, const char *lxc_path)
e7938e9e
MN
1986{
1987 FILE *file;
1988 int ret;
1989
1990 if (!fstab)
1991 return 0;
1992
1993 file = setmntent(fstab, "r");
1994 if (!file) {
1995 SYSERROR("failed to use '%s'", fstab);
1996 return -1;
1997 }
1998
0a2dddd4 1999 ret = mount_file_entries(rootfs, file, lxc_name, lxc_path);
e7938e9e 2000
0ad19a3f 2001 endmntent(file);
2002 return ret;
2003}
2004
9fc7f8c0 2005FILE *write_mount_file(struct lxc_list *mount)
e7938e9e
MN
2006{
2007 FILE *file;
2008 struct lxc_list *iterator;
2009 char *mount_entry;
e7938e9e
MN
2010
2011 file = tmpfile();
2012 if (!file) {
2013 ERROR("tmpfile error: %m");
9fc7f8c0 2014 return NULL;
e7938e9e
MN
2015 }
2016
2017 lxc_list_for_each(iterator, mount) {
2018 mount_entry = iterator->elem;
1d6b1976 2019 fprintf(file, "%s\n", mount_entry);
e7938e9e
MN
2020 }
2021
2022 rewind(file);
9fc7f8c0
TA
2023 return file;
2024}
2025
2026static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
0a2dddd4 2027 const char *lxc_name, const char *lxc_path)
9fc7f8c0
TA
2028{
2029 FILE *file;
2030 int ret;
2031
2032 file = write_mount_file(mount);
2033 if (!file)
2034 return -1;
e7938e9e 2035
0a2dddd4 2036 ret = mount_file_entries(rootfs, file, lxc_name, lxc_path);
e7938e9e
MN
2037
2038 fclose(file);
2039 return ret;
2040}
2041
bab88e68
CS
2042static int parse_cap(const char *cap)
2043{
2044 char *ptr = NULL;
2045 int i, capid = -1;
2046
7035407c
DE
2047 if (!strcmp(cap, "none"))
2048 return -2;
2049
bab88e68
CS
2050 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2051
2052 if (strcmp(cap, caps_opt[i].name))
2053 continue;
2054
2055 capid = caps_opt[i].value;
2056 break;
2057 }
2058
2059 if (capid < 0) {
2060 /* try to see if it's numeric, so the user may specify
2061 * capabilities that the running kernel knows about but
2062 * we don't */
2063 errno = 0;
2064 capid = strtol(cap, &ptr, 10);
2065 if (!ptr || *ptr != '\0' || errno != 0)
2066 /* not a valid number */
2067 capid = -1;
2068 else if (capid > lxc_caps_last_cap())
2069 /* we have a number but it's not a valid
2070 * capability */
2071 capid = -1;
2072 }
2073
2074 return capid;
2075}
2076
0769b82a
CS
2077int in_caplist(int cap, struct lxc_list *caps)
2078{
2079 struct lxc_list *iterator;
2080 int capid;
2081
2082 lxc_list_for_each(iterator, caps) {
2083 capid = parse_cap(iterator->elem);
2084 if (capid == cap)
2085 return 1;
2086 }
2087
2088 return 0;
2089}
2090
81810dd1
DL
2091static int setup_caps(struct lxc_list *caps)
2092{
2093 struct lxc_list *iterator;
2094 char *drop_entry;
bab88e68 2095 int capid;
81810dd1
DL
2096
2097 lxc_list_for_each(iterator, caps) {
2098
2099 drop_entry = iterator->elem;
2100
bab88e68 2101 capid = parse_cap(drop_entry);
d55bc1ad 2102
81810dd1 2103 if (capid < 0) {
1e11be34
DL
2104 ERROR("unknown capability %s", drop_entry);
2105 return -1;
81810dd1
DL
2106 }
2107
2108 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
2109
2110 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
3ec1648d
SH
2111 SYSERROR("failed to remove %s capability", drop_entry);
2112 return -1;
2113 }
81810dd1
DL
2114
2115 }
2116
1fb86a7c
SH
2117 DEBUG("capabilities have been setup");
2118
2119 return 0;
2120}
2121
2122static int dropcaps_except(struct lxc_list *caps)
2123{
2124 struct lxc_list *iterator;
2125 char *keep_entry;
1fb86a7c
SH
2126 int i, capid;
2127 int numcaps = lxc_caps_last_cap() + 1;
959aee9c 2128 INFO("found %d capabilities", numcaps);
1fb86a7c 2129
2caf9a97
SH
2130 if (numcaps <= 0 || numcaps > 200)
2131 return -1;
2132
1fb86a7c
SH
2133 // caplist[i] is 1 if we keep capability i
2134 int *caplist = alloca(numcaps * sizeof(int));
2135 memset(caplist, 0, numcaps * sizeof(int));
2136
2137 lxc_list_for_each(iterator, caps) {
2138
2139 keep_entry = iterator->elem;
2140
bab88e68 2141 capid = parse_cap(keep_entry);
1fb86a7c 2142
7035407c
DE
2143 if (capid == -2)
2144 continue;
2145
1fb86a7c
SH
2146 if (capid < 0) {
2147 ERROR("unknown capability %s", keep_entry);
2148 return -1;
2149 }
2150
8255688a 2151 DEBUG("keep capability '%s' (%d)", keep_entry, capid);
1fb86a7c
SH
2152
2153 caplist[capid] = 1;
2154 }
2155 for (i=0; i<numcaps; i++) {
2156 if (caplist[i])
2157 continue;
2158 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
3ec1648d
SH
2159 SYSERROR("failed to remove capability %d", i);
2160 return -1;
2161 }
1fb86a7c
SH
2162 }
2163
2164 DEBUG("capabilities have been setup");
81810dd1
DL
2165
2166 return 0;
2167}
2168
0ad19a3f 2169static int setup_hw_addr(char *hwaddr, const char *ifname)
2170{
2171 struct sockaddr sockaddr;
2172 struct ifreq ifr;
2173 int ret, fd;
2174
3cfc0f3a
MN
2175 ret = lxc_convert_mac(hwaddr, &sockaddr);
2176 if (ret) {
2177 ERROR("mac address '%s' conversion failed : %s",
2178 hwaddr, strerror(-ret));
0ad19a3f 2179 return -1;
2180 }
2181
2182 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
5da6aa8c 2183 ifr.ifr_name[IFNAMSIZ-1] = '\0';
0ad19a3f 2184 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2185
2186 fd = socket(AF_INET, SOCK_DGRAM, 0);
2187 if (fd < 0) {
3ab87b66 2188 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 2189 return -1;
2190 }
2191
2192 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
2193 close(fd);
2194 if (ret)
3ab87b66 2195 ERROR("ioctl failure : %s", strerror(errno));
0ad19a3f 2196
5da6aa8c 2197 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
cd54d859 2198
0ad19a3f 2199 return ret;
2200}
2201
82d5ae15 2202static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2203{
82d5ae15
DL
2204 struct lxc_list *iterator;
2205 struct lxc_inetdev *inetdev;
3cfc0f3a 2206 int err;
0ad19a3f 2207
82d5ae15
DL
2208 lxc_list_for_each(iterator, ip) {
2209
2210 inetdev = iterator->elem;
2211
0093bb8c
DL
2212 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2213 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
2214 if (err) {
2215 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2216 ifindex, strerror(-err));
82d5ae15
DL
2217 return -1;
2218 }
2219 }
2220
2221 return 0;
0ad19a3f 2222}
2223
82d5ae15 2224static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2225{
82d5ae15 2226 struct lxc_list *iterator;
7fa9074f 2227 struct lxc_inet6dev *inet6dev;
3cfc0f3a 2228 int err;
0ad19a3f 2229
82d5ae15
DL
2230 lxc_list_for_each(iterator, ip) {
2231
2232 inet6dev = iterator->elem;
2233
b3df193c 2234 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
2235 &inet6dev->mcast, &inet6dev->acast,
2236 inet6dev->prefix);
3cfc0f3a
MN
2237 if (err) {
2238 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2239 ifindex, strerror(-err));
82d5ae15 2240 return -1;
3cfc0f3a 2241 }
82d5ae15
DL
2242 }
2243
2244 return 0;
0ad19a3f 2245}
2246
82d5ae15 2247static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 2248{
0ad19a3f 2249 char ifname[IFNAMSIZ];
0ad19a3f 2250 char *current_ifname = ifname;
3cfc0f3a 2251 int err;
0ad19a3f 2252
82d5ae15
DL
2253 /* empty network namespace */
2254 if (!netdev->ifindex) {
b0efbac4 2255 if (netdev->flags & IFF_UP) {
d472214b 2256 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2257 if (err) {
2258 ERROR("failed to set the loopback up : %s",
2259 strerror(-err));
82d5ae15
DL
2260 return -1;
2261 }
82d5ae15 2262 }
40790553
SH
2263 if (netdev->type != LXC_NET_VETH)
2264 return 0;
2265 netdev->ifindex = if_nametoindex(netdev->name);
0ad19a3f 2266 }
13954cce 2267
b466dc33 2268 /* get the new ifindex in case of physical netdev */
40790553 2269 if (netdev->type == LXC_NET_PHYS) {
b466dc33
BP
2270 if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
2271 ERROR("failed to get ifindex for %s",
2272 netdev->link);
2273 return -1;
2274 }
40790553 2275 }
b466dc33 2276
82d5ae15
DL
2277 /* retrieve the name of the interface */
2278 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 2279 ERROR("no interface corresponding to index '%d'",
82d5ae15 2280 netdev->ifindex);
0ad19a3f 2281 return -1;
2282 }
13954cce 2283
018ef520 2284 /* default: let the system to choose one interface name */
9d083402 2285 if (!netdev->name)
fb6d9b2f
DL
2286 netdev->name = netdev->type == LXC_NET_PHYS ?
2287 netdev->link : "eth%d";
018ef520 2288
82d5ae15 2289 /* rename the interface name */
40790553
SH
2290 if (strcmp(ifname, netdev->name) != 0) {
2291 err = lxc_netdev_rename_by_name(ifname, netdev->name);
2292 if (err) {
2293 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2294 strerror(-err));
2295 return -1;
2296 }
018ef520
DL
2297 }
2298
2299 /* Re-read the name of the interface because its name has changed
2300 * and would be automatically allocated by the system
2301 */
82d5ae15 2302 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 2303 ERROR("no interface corresponding to index '%d'",
82d5ae15 2304 netdev->ifindex);
018ef520 2305 return -1;
0ad19a3f 2306 }
2307
82d5ae15
DL
2308 /* set a mac address */
2309 if (netdev->hwaddr) {
2310 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 2311 ERROR("failed to setup hw address for '%s'",
82d5ae15 2312 current_ifname);
0ad19a3f 2313 return -1;
2314 }
2315 }
2316
82d5ae15
DL
2317 /* setup ipv4 addresses on the interface */
2318 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 2319 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 2320 ifname);
2321 return -1;
2322 }
2323
82d5ae15
DL
2324 /* setup ipv6 addresses on the interface */
2325 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 2326 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 2327 ifname);
2328 return -1;
2329 }
2330
82d5ae15 2331 /* set the network device up */
b0efbac4 2332 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
2333 int err;
2334
d472214b 2335 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
2336 if (err) {
2337 ERROR("failed to set '%s' up : %s", current_ifname,
2338 strerror(-err));
0ad19a3f 2339 return -1;
2340 }
2341
2342 /* the network is up, make the loopback up too */
d472214b 2343 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2344 if (err) {
2345 ERROR("failed to set the loopback up : %s",
2346 strerror(-err));
0ad19a3f 2347 return -1;
2348 }
2349 }
2350
f8fee0e2
MK
2351 /* We can only set up the default routes after bringing
2352 * up the interface, sine bringing up the interface adds
2353 * the link-local routes and we can't add a default
2354 * route if the gateway is not reachable. */
2355
2356 /* setup ipv4 gateway on the interface */
2357 if (netdev->ipv4_gateway) {
2358 if (!(netdev->flags & IFF_UP)) {
2359 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2360 return -1;
2361 }
2362
2363 if (lxc_list_empty(&netdev->ipv4)) {
2364 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2365 return -1;
2366 }
2367
2368 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2369 if (err) {
fc739df5
SG
2370 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2371 if (err) {
2372 ERROR("failed to add ipv4 dest for '%s': %s",
2373 ifname, strerror(-err));
2374 }
2375
2376 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2377 if (err) {
2378 ERROR("failed to setup ipv4 gateway for '%s': %s",
2379 ifname, strerror(-err));
2380 if (netdev->ipv4_gateway_auto) {
2381 char buf[INET_ADDRSTRLEN];
2382 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2383 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2384 }
2385 return -1;
19a26f82 2386 }
f8fee0e2
MK
2387 }
2388 }
2389
2390 /* setup ipv6 gateway on the interface */
2391 if (netdev->ipv6_gateway) {
2392 if (!(netdev->flags & IFF_UP)) {
2393 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2394 return -1;
2395 }
2396
2397 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2398 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2399 return -1;
2400 }
2401
2402 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2403 if (err) {
fc739df5
SG
2404 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2405 if (err) {
2406 ERROR("failed to add ipv6 dest for '%s': %s",
f8fee0e2 2407 ifname, strerror(-err));
19a26f82 2408 }
fc739df5
SG
2409
2410 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2411 if (err) {
2412 ERROR("failed to setup ipv6 gateway for '%s': %s",
2413 ifname, strerror(-err));
2414 if (netdev->ipv6_gateway_auto) {
2415 char buf[INET6_ADDRSTRLEN];
2416 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2417 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2418 }
2419 return -1;
2420 }
f8fee0e2
MK
2421 }
2422 }
2423
cd54d859
DL
2424 DEBUG("'%s' has been setup", current_ifname);
2425
0ad19a3f 2426 return 0;
2427}
2428
5f4535a3 2429static int setup_network(struct lxc_list *network)
0ad19a3f 2430{
82d5ae15 2431 struct lxc_list *iterator;
82d5ae15 2432 struct lxc_netdev *netdev;
0ad19a3f 2433
5f4535a3 2434 lxc_list_for_each(iterator, network) {
cd54d859 2435
5f4535a3 2436 netdev = iterator->elem;
82d5ae15
DL
2437
2438 if (setup_netdev(netdev)) {
2439 ERROR("failed to setup netdev");
2440 return -1;
2441 }
2442 }
cd54d859 2443
5f4535a3
DL
2444 if (!lxc_list_empty(network))
2445 INFO("network has been setup");
cd54d859
DL
2446
2447 return 0;
0ad19a3f 2448}
2449
2af6bd1b
SH
2450/* try to move physical nics to the init netns */
2451void restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf)
2452{
2453 int i, ret, oldfd;
2454 char path[MAXPATHLEN];
4ec31c52 2455 char ifname[IFNAMSIZ];
2af6bd1b
SH
2456
2457 if (netnsfd < 0)
2458 return;
2459
2460 ret = snprintf(path, MAXPATHLEN, "/proc/self/ns/net");
2461 if (ret < 0 || ret >= MAXPATHLEN) {
2462 WARN("Failed to open monitor netns fd");
2463 return;
2464 }
2465 if ((oldfd = open(path, O_RDONLY)) < 0) {
2466 SYSERROR("Failed to open monitor netns fd");
2467 return;
2468 }
2469 if (setns(netnsfd, 0) != 0) {
2470 SYSERROR("Failed to enter container netns to reset nics");
2471 close(oldfd);
2472 return;
2473 }
2474 for (i=0; i<conf->num_savednics; i++) {
2475 struct saved_nic *s = &conf->saved_nics[i];
f2e206ff 2476 /* retrieve the name of the interface */
2477 if (!if_indextoname(s->ifindex, ifname)) {
2478 WARN("no interface corresponding to index '%d'", s->ifindex);
2479 continue;
2480 }
2481 if (lxc_netdev_move_by_name(ifname, 1, NULL))
2482 WARN("Error moving nic name:%s back to host netns", ifname);
2af6bd1b
SH
2483 }
2484 if (setns(oldfd, 0) != 0)
2485 SYSERROR("Failed to re-enter monitor's netns");
2486 close(oldfd);
2487}
2488
2489void lxc_rename_phys_nics_on_shutdown(int netnsfd, struct lxc_conf *conf)
7b35f3d6
SH
2490{
2491 int i;
2492
2af6bd1b
SH
2493 if (conf->num_savednics == 0)
2494 return;
2495
7b35f3d6 2496 INFO("running to reset %d nic names", conf->num_savednics);
2af6bd1b 2497 restore_phys_nics_to_netns(netnsfd, conf);
7b35f3d6
SH
2498 for (i=0; i<conf->num_savednics; i++) {
2499 struct saved_nic *s = &conf->saved_nics[i];
959aee9c 2500 INFO("resetting nic %d to %s", s->ifindex, s->orig_name);
7b35f3d6
SH
2501 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
2502 free(s->orig_name);
2503 }
2504 conf->num_savednics = 0;
7b35f3d6
SH
2505}
2506
ae9242c8
SH
2507static char *default_rootfs_mount = LXCROOTFSMOUNT;
2508
7b379ab3 2509struct lxc_conf *lxc_conf_init(void)
089cd8b8 2510{
7b379ab3 2511 struct lxc_conf *new;
26ddeedd 2512 int i;
7b379ab3
MN
2513
2514 new = malloc(sizeof(*new));
2515 if (!new) {
2516 ERROR("lxc_conf_init : %m");
2517 return NULL;
2518 }
2519 memset(new, 0, sizeof(*new));
2520
b40a606e 2521 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
cccc74b5 2522 new->personality = -1;
124fa0a8 2523 new->autodev = 1;
596a818d
DE
2524 new->console.log_path = NULL;
2525 new->console.log_fd = -1;
28a4b0e5 2526 new->console.path = NULL;
63376d7d 2527 new->console.peer = -1;
b5159817
DE
2528 new->console.peerpty.busy = -1;
2529 new->console.peerpty.master = -1;
2530 new->console.peerpty.slave = -1;
63376d7d
DL
2531 new->console.master = -1;
2532 new->console.slave = -1;
2533 new->console.name[0] = '\0';
d2e30e99 2534 new->maincmd_fd = -1;
76a26f55 2535 new->nbd_idx = -1;
54c30e29 2536 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048
SH
2537 if (!new->rootfs.mount) {
2538 ERROR("lxc_conf_init : %m");
2539 free(new);
2540 return NULL;
2541 }
d89de239 2542 new->kmsg = 0;
858377e4 2543 new->logfd = -1;
7b379ab3
MN
2544 lxc_list_init(&new->cgroup);
2545 lxc_list_init(&new->network);
2546 lxc_list_init(&new->mount_list);
81810dd1 2547 lxc_list_init(&new->caps);
1fb86a7c 2548 lxc_list_init(&new->keepcaps);
f6d3e3e4 2549 lxc_list_init(&new->id_map);
f979ac15 2550 lxc_list_init(&new->includes);
4184c3e1 2551 lxc_list_init(&new->aliens);
7c661726 2552 lxc_list_init(&new->environment);
26ddeedd
SH
2553 for (i=0; i<NUM_LXC_HOOKS; i++)
2554 lxc_list_init(&new->hooks[i]);
ee1e7aa0 2555 lxc_list_init(&new->groups);
fe4de9a6
DE
2556 new->lsm_aa_profile = NULL;
2557 new->lsm_se_context = NULL;
5112cd70 2558 new->tmp_umount_proc = 0;
7b379ab3 2559
9f30a190
MM
2560 for (i = 0; i < LXC_NS_MAX; i++)
2561 new->inherit_ns_fd[i] = -1;
2562
72bb04e4
PT
2563 /* if running in a new user namespace, init and COMMAND
2564 * default to running as UID/GID 0 when using lxc-execute */
2565 new->init_uid = 0;
2566 new->init_gid = 0;
2567
7b379ab3 2568 return new;
089cd8b8
DL
2569}
2570
a589434e 2571static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2572{
8634bc19 2573 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2574 char veth2buf[IFNAMSIZ], *veth2;
e54864d3 2575 int err, mtu = 0;
13954cce 2576
8bee8851 2577 if (netdev->priv.veth_attr.pair) {
e892973e 2578 veth1 = netdev->priv.veth_attr.pair;
8bee8851
WB
2579 if (handler->conf->reboot)
2580 lxc_netdev_delete_by_name(veth1);
2581 } else {
9ba8130c
SH
2582 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2583 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2584 ERROR("veth1 name too long");
2585 return -1;
2586 }
a0265685 2587 veth1 = lxc_mkifname(veth1buf);
ad40563e
ÇO
2588 if (!veth1) {
2589 ERROR("failed to allocate a temporary name");
2590 return -1;
2591 }
74a2b586
JK
2592 /* store away for deconf */
2593 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2594 }
82d5ae15 2595
0e391e57 2596 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
a0265685 2597 veth2 = lxc_mkifname(veth2buf);
ad40563e 2598 if (!veth2) {
82d5ae15 2599 ERROR("failed to allocate a temporary name");
ad40563e 2600 goto out_delete;
0ad19a3f 2601 }
2602
3cfc0f3a
MN
2603 err = lxc_veth_create(veth1, veth2);
2604 if (err) {
2e2d6a7b 2605 ERROR("failed to create veth pair (%s and %s): %s", veth1, veth2,
3cfc0f3a 2606 strerror(-err));
ad40563e 2607 goto out_delete;
0ad19a3f 2608 }
13954cce 2609
49684c0b
CS
2610 /* changing the high byte of the mac address to 0xfe, the bridge interface
2611 * will always keep the host's mac address and not take the mac address
2612 * of a container */
2613 err = setup_private_host_hw_addr(veth1);
2614 if (err) {
2e2d6a7b 2615 ERROR("failed to change mac address of host interface '%s': %s",
49684c0b
CS
2616 veth1, strerror(-err));
2617 goto out_delete;
2618 }
2619
af651aa9
SN
2620 netdev->ifindex = if_nametoindex(veth2);
2621 if (!netdev->ifindex) {
2622 ERROR("failed to retrieve the index for %s", veth2);
2623 goto out_delete;
2624 }
2625
82d5ae15 2626 if (netdev->mtu) {
e54864d3
NC
2627 mtu = atoi(netdev->mtu);
2628 } else if (netdev->link) {
af651aa9 2629 mtu = netdev_get_mtu(netdev->ifindex);
e54864d3
NC
2630 }
2631
2632 if (mtu) {
2633 err = lxc_netdev_set_mtu(veth1, mtu);
3cfc0f3a 2634 if (!err)
e54864d3 2635 err = lxc_netdev_set_mtu(veth2, mtu);
3cfc0f3a 2636 if (err) {
e54864d3
NC
2637 ERROR("failed to set mtu '%i' for veth pair (%s and %s): %s",
2638 mtu, veth1, veth2, strerror(-err));
eb14c10a 2639 goto out_delete;
75d09f83
DL
2640 }
2641 }
2642
3cfc0f3a 2643 if (netdev->link) {
c43cbc04 2644 err = lxc_bridge_attach(handler->lxcpath, handler->name, netdev->link, veth1);
3cfc0f3a 2645 if (err) {
2e2d6a7b 2646 ERROR("failed to attach '%s' to the bridge '%s': %s",
3cfc0f3a
MN
2647 veth1, netdev->link, strerror(-err));
2648 goto out_delete;
2649 }
eb14c10a
DL
2650 }
2651
d472214b 2652 err = lxc_netdev_up(veth1);
6e35af2e
DL
2653 if (err) {
2654 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2655 goto out_delete;
0ad19a3f 2656 }
2657
e3b4c4c4 2658 if (netdev->upscript) {
751d9dcd
DL
2659 err = run_script(handler->name, "net", netdev->upscript, "up",
2660 "veth", veth1, (char*) NULL);
2661 if (err)
e3b4c4c4 2662 goto out_delete;
e3b4c4c4
ST
2663 }
2664
a589434e 2665 DEBUG("instantiated veth '%s/%s', index is '%d'",
82d5ae15
DL
2666 veth1, veth2, netdev->ifindex);
2667
6ab9ab6d 2668 return 0;
eb14c10a
DL
2669
2670out_delete:
b84f58b9 2671 lxc_netdev_delete_by_name(veth1);
f10fad2f 2672 if (!netdev->priv.veth_attr.pair)
ad40563e 2673 free(veth1);
f10fad2f 2674 free(veth2);
6ab9ab6d 2675 return -1;
13954cce 2676}
d957ae2d 2677
74a2b586
JK
2678static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2679{
2680 char *veth1;
2681 int err;
2682
2683 if (netdev->priv.veth_attr.pair)
2684 veth1 = netdev->priv.veth_attr.pair;
2685 else
2686 veth1 = netdev->priv.veth_attr.veth1;
2687
2688 if (netdev->downscript) {
2689 err = run_script(handler->name, "net", netdev->downscript,
2690 "down", "veth", veth1, (char*) NULL);
2691 if (err)
2692 return -1;
2693 }
2694 return 0;
2695}
2696
a589434e 2697static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2698{
0e391e57 2699 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2700 int err;
d957ae2d
MT
2701
2702 if (!netdev->link) {
2703 ERROR("no link specified for macvlan netdev");
2704 return -1;
2705 }
13954cce 2706
9ba8130c
SH
2707 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2708 if (err >= sizeof(peerbuf))
2709 return -1;
82d5ae15 2710
a0265685 2711 peer = lxc_mkifname(peerbuf);
ad40563e 2712 if (!peer) {
82d5ae15
DL
2713 ERROR("failed to make a temporary name");
2714 return -1;
0ad19a3f 2715 }
2716
3cfc0f3a
MN
2717 err = lxc_macvlan_create(netdev->link, peer,
2718 netdev->priv.macvlan_attr.mode);
2719 if (err) {
2720 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2721 peer, netdev->link, strerror(-err));
ad40563e 2722 goto out;
0ad19a3f 2723 }
2724
82d5ae15
DL
2725 netdev->ifindex = if_nametoindex(peer);
2726 if (!netdev->ifindex) {
36eb9bde 2727 ERROR("failed to retrieve the index for %s", peer);
ad40563e 2728 goto out;
22ebac19 2729 }
2730
e3b4c4c4 2731 if (netdev->upscript) {
751d9dcd
DL
2732 err = run_script(handler->name, "net", netdev->upscript, "up",
2733 "macvlan", netdev->link, (char*) NULL);
2734 if (err)
ad40563e 2735 goto out;
e3b4c4c4
ST
2736 }
2737
a589434e 2738 DEBUG("instantiated macvlan '%s', index is '%d' and mode '%d'",
e892973e 2739 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 2740
d957ae2d 2741 return 0;
ad40563e
ÇO
2742out:
2743 lxc_netdev_delete_by_name(peer);
2744 free(peer);
2745 return -1;
0ad19a3f 2746}
2747
74a2b586
JK
2748static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2749{
2750 int err;
2751
2752 if (netdev->downscript) {
2753 err = run_script(handler->name, "net", netdev->downscript,
2754 "down", "macvlan", netdev->link,
2755 (char*) NULL);
2756 if (err)
2757 return -1;
2758 }
2759 return 0;
2760}
2761
a589434e
JN
2762/* XXX: merge with instantiate_macvlan */
2763static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
2764{
2765 char peer[IFNAMSIZ];
3cfc0f3a 2766 int err;
82f58d03 2767 static uint16_t vlan_cntr = 0;
26c39028
JHS
2768
2769 if (!netdev->link) {
2770 ERROR("no link specified for vlan netdev");
2771 return -1;
2772 }
2773
82f58d03 2774 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
9ba8130c
SH
2775 if (err >= sizeof(peer)) {
2776 ERROR("peer name too long");
2777 return -1;
2778 }
26c39028 2779
3cfc0f3a
MN
2780 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2781 if (err) {
2782 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2783 peer, netdev->link, strerror(-err));
26c39028
JHS
2784 return -1;
2785 }
2786
2787 netdev->ifindex = if_nametoindex(peer);
2788 if (!netdev->ifindex) {
2789 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 2790 lxc_netdev_delete_by_name(peer);
26c39028
JHS
2791 return -1;
2792 }
2793
a589434e 2794 DEBUG("instantiated vlan '%s', ifindex is '%d'", " vlan1000",
e892973e
DL
2795 netdev->ifindex);
2796
26c39028
JHS
2797 return 0;
2798}
2799
74a2b586
JK
2800static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2801{
2802 return 0;
2803}
2804
a589434e 2805static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2806{
6168e99f
DL
2807 if (!netdev->link) {
2808 ERROR("no link specified for the physical interface");
2809 return -1;
2810 }
2811
9d083402 2812 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 2813 if (!netdev->ifindex) {
9d083402 2814 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 2815 return -1;
2816 }
2817
e3b4c4c4
ST
2818 if (netdev->upscript) {
2819 int err;
751d9dcd
DL
2820 err = run_script(handler->name, "net", netdev->upscript,
2821 "up", "phys", netdev->link, (char*) NULL);
2822 if (err)
e3b4c4c4 2823 return -1;
e3b4c4c4
ST
2824 }
2825
82d5ae15 2826 return 0;
0ad19a3f 2827}
2828
74a2b586
JK
2829static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2830{
2831 int err;
2832
2833 if (netdev->downscript) {
2834 err = run_script(handler->name, "net", netdev->downscript,
2835 "down", "phys", netdev->link, (char*) NULL);
2836 if (err)
2837 return -1;
2838 }
2839 return 0;
2840}
2841
a589434e 2842static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
26b797f3
SH
2843{
2844 netdev->ifindex = 0;
2845 return 0;
2846}
2847
a589434e 2848static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2849{
82d5ae15 2850 netdev->ifindex = 0;
e3b4c4c4
ST
2851 if (netdev->upscript) {
2852 int err;
751d9dcd
DL
2853 err = run_script(handler->name, "net", netdev->upscript,
2854 "up", "empty", (char*) NULL);
2855 if (err)
e3b4c4c4 2856 return -1;
e3b4c4c4 2857 }
82d5ae15 2858 return 0;
0ad19a3f 2859}
2860
74a2b586
JK
2861static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2862{
2863 int err;
2864
2865 if (netdev->downscript) {
2866 err = run_script(handler->name, "net", netdev->downscript,
2867 "down", "empty", (char*) NULL);
2868 if (err)
2869 return -1;
2870 }
2871 return 0;
2872}
2873
26b797f3
SH
2874static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
2875{
2876 return 0;
2877}
2878
2879int lxc_requests_empty_network(struct lxc_handler *handler)
2880{
2881 struct lxc_list *network = &handler->conf->network;
2882 struct lxc_list *iterator;
2883 struct lxc_netdev *netdev;
2884 bool found_none = false, found_nic = false;
2885
2886 if (lxc_list_empty(network))
2887 return 0;
2888
2889 lxc_list_for_each(iterator, network) {
2890
2891 netdev = iterator->elem;
2892
2893 if (netdev->type == LXC_NET_NONE)
2894 found_none = true;
2895 else
2896 found_nic = true;
2897 }
2898 if (found_none && !found_nic)
2899 return 1;
2900 return 0;
2901}
2902
e3b4c4c4 2903int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 2904{
e3b4c4c4 2905 struct lxc_list *network = &handler->conf->network;
82d5ae15 2906 struct lxc_list *iterator;
82d5ae15 2907 struct lxc_netdev *netdev;
cbef6c52
SH
2908 int am_root = (getuid() == 0);
2909
2910 if (!am_root)
2911 return 0;
0ad19a3f 2912
5f4535a3 2913 lxc_list_for_each(iterator, network) {
0ad19a3f 2914
5f4535a3 2915 netdev = iterator->elem;
13954cce 2916
24654103 2917 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 2918 ERROR("invalid network configuration type '%d'",
5f4535a3 2919 netdev->type);
82d5ae15
DL
2920 return -1;
2921 }
0ad19a3f 2922
e3b4c4c4 2923 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
2924 ERROR("failed to create netdev");
2925 return -1;
2926 }
e3b4c4c4 2927
0ad19a3f 2928 }
2929
2930 return 0;
2931}
2932
74a2b586 2933void lxc_delete_network(struct lxc_handler *handler)
7fef7a06 2934{
74a2b586 2935 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
2936 struct lxc_list *iterator;
2937 struct lxc_netdev *netdev;
2938
2939 lxc_list_for_each(iterator, network) {
2940 netdev = iterator->elem;
d472214b 2941
74a2b586 2942 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352
DL
2943 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
2944 WARN("failed to rename to the initial name the " \
2945 "netdev '%s'", netdev->link);
d472214b 2946 continue;
d8f8e352 2947 }
d472214b 2948
74a2b586
JK
2949 if (netdev_deconf[netdev->type](handler, netdev)) {
2950 WARN("failed to destroy netdev");
2951 }
2952
d8f8e352
DL
2953 /* Recent kernel remove the virtual interfaces when the network
2954 * namespace is destroyed but in case we did not moved the
2955 * interface to the network namespace, we have to destroy it
2956 */
74a2b586
JK
2957 if (netdev->ifindex != 0 &&
2958 lxc_netdev_delete_by_index(netdev->ifindex))
685062d6
SH
2959 WARN("failed to remove interface %d '%s'",
2960 netdev->ifindex,
2961 netdev->name ? netdev->name : "(null)");
7fef7a06
DL
2962 }
2963}
2964
45e854dc
SG
2965#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
2966
fe1f672f
ÇO
2967/* lxc-user-nic returns "interface_name:interface_name\n" */
2968#define MAX_BUFFER_SIZE IFNAMSIZ*2 + 2
c43cbc04
SH
2969static int unpriv_assign_nic(const char *lxcpath, char *lxcname,
2970 struct lxc_netdev *netdev, pid_t pid)
cbef6c52
SH
2971{
2972 pid_t child;
a7242d9a
ÇO
2973 int bytes, pipefd[2];
2974 char *token, *saveptr = NULL;
fe1f672f 2975 char buffer[MAX_BUFFER_SIZE];
cff7b5eb 2976 char netdev_link[IFNAMSIZ+1];
cbef6c52
SH
2977
2978 if (netdev->type != LXC_NET_VETH) {
2979 ERROR("nic type %d not support for unprivileged use",
2980 netdev->type);
2981 return -1;
2982 }
2983
a7242d9a
ÇO
2984 if(pipe(pipefd) < 0) {
2985 SYSERROR("pipe failed");
2986 return -1;
2987 }
2988
cbef6c52
SH
2989 if ((child = fork()) < 0) {
2990 SYSERROR("fork");
a7242d9a
ÇO
2991 close(pipefd[0]);
2992 close(pipefd[1]);
2993 return -1;
2994 }
2995
2996 if (child == 0) { // child
2997 /* close the read-end of the pipe */
2998 close(pipefd[0]);
2999 /* redirect the stdout to write-end of the pipe */
3000 dup2(pipefd[1], STDOUT_FILENO);
3001 /* close the write-end of the pipe */
fe1f672f 3002 close(pipefd[1]);
a7242d9a
ÇO
3003
3004 // Call lxc-user-nic pid type bridge
3005 char pidstr[20];
cff7b5eb
FN
3006 if (netdev->link) {
3007 strncpy(netdev_link, netdev->link, IFNAMSIZ);
3008 } else {
3009 strncpy(netdev_link, "none", IFNAMSIZ);
3010 }
a7242d9a
ÇO
3011 snprintf(pidstr, 19, "%lu", (unsigned long) pid);
3012 pidstr[19] = '\0';
c43cbc04
SH
3013 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, lxcpath, lxcname,
3014 pidstr, "veth", netdev_link, netdev->name, NULL);
a7242d9a
ÇO
3015 SYSERROR("execvp lxc-user-nic");
3016 exit(1);
3017 }
3018
3019 /* close the write-end of the pipe */
3020 close(pipefd[1]);
3021
fe1f672f 3022 bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE);
a7242d9a
ÇO
3023 if (bytes < 0) {
3024 SYSERROR("read failed");
3025 }
3026 buffer[bytes - 1] = '\0';
3027
3028 if (wait_for_pid(child) != 0) {
3029 close(pipefd[0]);
cbef6c52
SH
3030 return -1;
3031 }
3032
a7242d9a
ÇO
3033 /* close the read-end of the pipe */
3034 close(pipefd[0]);
cbef6c52 3035
a7242d9a
ÇO
3036 /* fill netdev->name field */
3037 token = strtok_r(buffer, ":", &saveptr);
3038 if (!token)
3039 return -1;
658979c5
SH
3040 netdev->name = malloc(IFNAMSIZ+1);
3041 if (!netdev->name) {
3042 ERROR("Out of memory");
3043 return -1;
3044 }
3045 memset(netdev->name, 0, IFNAMSIZ+1);
3046 strncpy(netdev->name, token, IFNAMSIZ);
a7242d9a
ÇO
3047
3048 /* fill netdev->veth_attr.pair field */
3049 token = strtok_r(NULL, ":", &saveptr);
3050 if (!token)
3051 return -1;
3052 netdev->priv.veth_attr.pair = strdup(token);
658979c5
SH
3053 if (!netdev->priv.veth_attr.pair) {
3054 ERROR("Out of memory");
3055 return -1;
3056 }
45e854dc 3057
a7242d9a 3058 return 0;
cbef6c52
SH
3059}
3060
c43cbc04
SH
3061int lxc_assign_network(const char *lxcpath, char *lxcname,
3062 struct lxc_list *network, pid_t pid)
0ad19a3f 3063{
82d5ae15 3064 struct lxc_list *iterator;
82d5ae15 3065 struct lxc_netdev *netdev;
f2e206ff 3066 char ifname[IFNAMSIZ];
cbef6c52 3067 int am_root = (getuid() == 0);
3cfc0f3a 3068 int err;
0ad19a3f 3069
5f4535a3 3070 lxc_list_for_each(iterator, network) {
82d5ae15 3071
5f4535a3 3072 netdev = iterator->elem;
82d5ae15 3073
fbb16259 3074 if (netdev->type == LXC_NET_VETH && !am_root) {
c43cbc04 3075 if (unpriv_assign_nic(lxcpath, lxcname, netdev, pid))
cbef6c52 3076 return -1;
658979c5
SH
3077 // lxc-user-nic has moved the nic to the new ns.
3078 // unpriv_assign_nic() fills in netdev->name.
3079 // netdev->ifindex will be filed in at setup_netdev.
cbef6c52
SH
3080 continue;
3081 }
236087a6 3082
fbb16259
SH
3083 /* empty network namespace, nothing to move */
3084 if (!netdev->ifindex)
3085 continue;
3086
f2e206ff 3087 /* retrieve the name of the interface */
3088 if (!if_indextoname(netdev->ifindex, ifname)) {
3089 ERROR("no interface corresponding to index '%d'", netdev->ifindex);
3090 return -1;
3091 }
3092
3093 err = lxc_netdev_move_by_name(ifname, pid, NULL);
3cfc0f3a
MN
3094 if (err) {
3095 ERROR("failed to move '%s' to the container : %s",
3096 netdev->link, strerror(-err));
82d5ae15
DL
3097 return -1;
3098 }
3099
c1c75c04 3100 DEBUG("move '%s' to '%d'", netdev->name, pid);
0ad19a3f 3101 }
3102
3103 return 0;
3104}
3105
251d0d2a
DE
3106static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
3107 size_t buf_size)
f6d3e3e4
SH
3108{
3109 char path[PATH_MAX];
e4ccd113 3110 int ret, closeret;
f6d3e3e4
SH
3111 FILE *f;
3112
3113 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
3114 if (ret < 0 || ret >= PATH_MAX) {
03fadd16 3115 fprintf(stderr, "%s: path name too long\n", __func__);
f6d3e3e4
SH
3116 return -E2BIG;
3117 }
3118 f = fopen(path, "w");
3119 if (!f) {
3120 perror("open");
3121 return -EINVAL;
3122 }
251d0d2a 3123 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 3124 if (ret < 0)
e4ccd113
SH
3125 SYSERROR("writing id mapping");
3126 closeret = fclose(f);
3127 if (closeret)
3128 SYSERROR("writing id mapping");
3129 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
3130}
3131
3132int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
3133{
3134 struct lxc_list *iterator;
3135 struct id_map *map;
8afb3e61 3136 int ret = 0, use_shadow = 0;
251d0d2a 3137 enum idtype type;
8afb3e61
SG
3138 char *buf = NULL, *pos, *cmdpath = NULL;
3139
22038de5
SH
3140 /*
3141 * If newuidmap exists, that is, if shadow is handing out subuid
3142 * ranges, then insist that root also reserve ranges in subuid. This
3143 * will protected it by preventing another user from being handed the
3144 * range by shadow.
3145 */
9d9c111c 3146 cmdpath = on_path("newuidmap", NULL);
8afb3e61
SG
3147 if (cmdpath) {
3148 use_shadow = 1;
3149 free(cmdpath);
3150 }
3151
0e6e3a41
SG
3152 if (!use_shadow && geteuid()) {
3153 ERROR("Missing newuidmap/newgidmap");
3154 return -1;
3155 }
251d0d2a
DE
3156
3157 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4 3158 int left, fill;
cf3ef16d
SH
3159 int had_entry = 0;
3160 if (!buf) {
3161 buf = pos = malloc(4096);
4f7521b4
SH
3162 if (!buf)
3163 return -ENOMEM;
cf3ef16d
SH
3164 }
3165 pos = buf;
0e6e3a41 3166 if (use_shadow)
d1838f34 3167 pos += sprintf(buf, "new%cidmap %d",
cf3ef16d
SH
3168 type == ID_TYPE_UID ? 'u' : 'g',
3169 pid);
4f7521b4 3170
cf3ef16d
SH
3171 lxc_list_for_each(iterator, idmap) {
3172 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
251d0d2a 3173 map = iterator->elem;
cf3ef16d
SH
3174 if (map->idtype != type)
3175 continue;
3176
3177 had_entry = 1;
3178 left = 4096 - (pos - buf);
d1838f34 3179 fill = snprintf(pos, left, "%s%lu %lu %lu%s",
0e6e3a41 3180 use_shadow ? " " : "",
d1838f34 3181 map->nsid, map->hostid, map->range,
0e6e3a41 3182 use_shadow ? "" : "\n");
cf3ef16d
SH
3183 if (fill <= 0 || fill >= left)
3184 SYSERROR("snprintf failed, too many mappings");
3185 pos += fill;
251d0d2a 3186 }
cf3ef16d 3187 if (!had_entry)
4f7521b4 3188 continue;
cf3ef16d 3189
0e6e3a41 3190 if (!use_shadow) {
cf3ef16d 3191 ret = write_id_mapping(type, pid, buf, pos-buf);
d1838f34
MS
3192 } else {
3193 left = 4096 - (pos - buf);
3194 fill = snprintf(pos, left, "\n");
3195 if (fill <= 0 || fill >= left)
3196 SYSERROR("snprintf failed, too many mappings");
3197 pos += fill;
cf3ef16d 3198 ret = system(buf);
d1838f34 3199 }
cf3ef16d 3200
f6d3e3e4
SH
3201 if (ret)
3202 break;
3203 }
251d0d2a 3204
f10fad2f 3205 free(buf);
f6d3e3e4
SH
3206 return ret;
3207}
3208
cf3ef16d 3209/*
7b50c609
TS
3210 * return the host uid/gid to which the container root is mapped in
3211 * *val.
0b3a6504 3212 * Return true if id was found, false otherwise.
cf3ef16d 3213 */
2a9a80cb 3214bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
3ec1648d 3215 unsigned long *val)
cf3ef16d
SH
3216{
3217 struct lxc_list *it;
3218 struct id_map *map;
3219
3220 lxc_list_for_each(it, &conf->id_map) {
3221 map = it->elem;
7b50c609 3222 if (map->idtype != idtype)
cf3ef16d
SH
3223 continue;
3224 if (map->nsid != 0)
3225 continue;
2a9a80cb
SH
3226 *val = map->hostid;
3227 return true;
cf3ef16d 3228 }
2a9a80cb 3229 return false;
cf3ef16d
SH
3230}
3231
2133f58c 3232int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3233{
3234 struct lxc_list *it;
3235 struct id_map *map;
3236 lxc_list_for_each(it, &conf->id_map) {
3237 map = it->elem;
2133f58c 3238 if (map->idtype != idtype)
cf3ef16d
SH
3239 continue;
3240 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 3241 return (id - map->hostid) + map->nsid;
cf3ef16d 3242 }
57d116ab 3243 return -1;
cf3ef16d
SH
3244}
3245
2133f58c 3246int find_unmapped_nsuid(struct lxc_conf *conf, enum idtype idtype)
cf3ef16d
SH
3247{
3248 struct lxc_list *it;
3249 struct id_map *map;
2133f58c 3250 unsigned int freeid = 0;
cf3ef16d
SH
3251again:
3252 lxc_list_for_each(it, &conf->id_map) {
3253 map = it->elem;
2133f58c 3254 if (map->idtype != idtype)
cf3ef16d
SH
3255 continue;
3256 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3257 freeid = map->nsid + map->range;
3258 goto again;
3259 }
3260 }
3261 return freeid;
3262}
3263
19a26f82
MK
3264int lxc_find_gateway_addresses(struct lxc_handler *handler)
3265{
3266 struct lxc_list *network = &handler->conf->network;
3267 struct lxc_list *iterator;
3268 struct lxc_netdev *netdev;
3269 int link_index;
3270
3271 lxc_list_for_each(iterator, network) {
3272 netdev = iterator->elem;
3273
3274 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
3275 continue;
3276
3277 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
3278 ERROR("gateway = auto only supported for "
3279 "veth and macvlan");
3280 return -1;
3281 }
3282
3283 if (!netdev->link) {
3284 ERROR("gateway = auto needs a link interface");
3285 return -1;
3286 }
3287
3288 link_index = if_nametoindex(netdev->link);
3289 if (!link_index)
3290 return -EINVAL;
3291
3292 if (netdev->ipv4_gateway_auto) {
3293 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
3294 ERROR("failed to automatically find ipv4 gateway "
3295 "address from link interface '%s'", netdev->link);
3296 return -1;
3297 }
3298 }
3299
3300 if (netdev->ipv6_gateway_auto) {
3301 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
3302 ERROR("failed to automatically find ipv6 gateway "
3303 "address from link interface '%s'", netdev->link);
3304 return -1;
3305 }
3306 }
3307 }
3308
3309 return 0;
3310}
3311
5e4a62bf 3312int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 3313{
5e4a62bf 3314 struct lxc_tty_info *tty_info = &conf->tty_info;
025ed0f3 3315 int i, ret;
b0a33c1e 3316
5e4a62bf
DL
3317 /* no tty in the configuration */
3318 if (!conf->tty)
b0a33c1e 3319 return 0;
3320
13954cce 3321 tty_info->pty_info =
e4e7d59d 3322 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 3323 if (!tty_info->pty_info) {
36eb9bde 3324 SYSERROR("failed to allocate pty_info");
985d15b1 3325 return -1;
b0a33c1e 3326 }
3327
985d15b1 3328 for (i = 0; i < conf->tty; i++) {
13954cce 3329
b0a33c1e 3330 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3331
025ed0f3
SH
3332 process_lock();
3333 ret = openpty(&pty_info->master, &pty_info->slave,
3334 pty_info->name, NULL, NULL);
3335 process_unlock();
3336 if (ret) {
36eb9bde 3337 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
3338 tty_info->nbtty = i;
3339 lxc_delete_tty(tty_info);
3340 return -1;
b0a33c1e 3341 }
3342
5332bb84
DL
3343 DEBUG("allocated pty '%s' (%d/%d)",
3344 pty_info->name, pty_info->master, pty_info->slave);
3345
3ec1648d 3346 /* Prevent leaking the file descriptors to the container */
b035ad62
MS
3347 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3348 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3349
b0a33c1e 3350 pty_info->busy = 0;
3351 }
3352
985d15b1 3353 tty_info->nbtty = conf->tty;
1ac470c0
DL
3354
3355 INFO("tty's configured");
3356
985d15b1 3357 return 0;
b0a33c1e 3358}
3359
3360void lxc_delete_tty(struct lxc_tty_info *tty_info)
3361{
3362 int i;
3363
3364 for (i = 0; i < tty_info->nbtty; i++) {
3365 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3366
3367 close(pty_info->master);
3368 close(pty_info->slave);
3369 }
3370
3371 free(tty_info->pty_info);
3372 tty_info->nbtty = 0;
3373}
3374
f6d3e3e4 3375/*
7b50c609
TS
3376 * chown_mapped_root: for an unprivileged user with uid/gid X to
3377 * chown a dir to subuid/subgid Y, he needs to run chown as root
3378 * in a userns where nsid 0 is mapped to hostuid/hostgid Y, and
3379 * nsid Y is mapped to hostuid/hostgid X. That way, the container
3380 * root is privileged with respect to hostuid/hostgid X, allowing
3381 * him to do the chown.
f6d3e3e4 3382 */
c4d10a05 3383int chown_mapped_root(char *path, struct lxc_conf *conf)
f6d3e3e4 3384{
7b50c609
TS
3385 uid_t rootuid;
3386 gid_t rootgid;
c4d10a05 3387 pid_t pid;
2a9a80cb 3388 unsigned long val;
a7ef8753 3389 char *chownpath = path;
f6d3e3e4 3390
2a9a80cb 3391 if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
c4d10a05
SH
3392 ERROR("No mapping for container root");
3393 return -1;
f6d3e3e4 3394 }
7b50c609
TS
3395 rootuid = (uid_t) val;
3396 if (!get_mapped_rootid(conf, ID_TYPE_GID, &val)) {
3397 ERROR("No mapping for container root");
3398 return -1;
3399 }
3400 rootgid = (gid_t) val;
2a9a80cb 3401
a7ef8753
SH
3402 /*
3403 * In case of overlay, we want only the writeable layer
3404 * to be chowned
3405 */
1f92162d 3406 if (strncmp(path, "overlayfs:", 10) == 0 || strncmp(path, "aufs:", 5) == 0) {
a7ef8753
SH
3407 chownpath = strchr(path, ':');
3408 if (!chownpath) {
3409 ERROR("Bad overlay path: %s", path);
3410 return -1;
3411 }
3412 chownpath = strchr(chownpath+1, ':');
3413 if (!chownpath) {
3414 ERROR("Bad overlay path: %s", path);
3415 return -1;
3416 }
3417 chownpath++;
3418 }
3419 path = chownpath;
c4d10a05 3420 if (geteuid() == 0) {
7b50c609 3421 if (chown(path, rootuid, rootgid) < 0) {
c4d10a05
SH
3422 ERROR("Error chowning %s", path);
3423 return -1;
3424 }
3425 return 0;
3426 }
f3d7e4ca 3427
7b50c609 3428 if (rootuid == geteuid()) {
f3d7e4ca
SH
3429 // nothing to do
3430 INFO("%s: container root is our uid; no need to chown" ,__func__);
3431 return 0;
3432 }
3433
c4d10a05
SH
3434 pid = fork();
3435 if (pid < 0) {
3436 SYSERROR("Failed forking");
f6d3e3e4
SH
3437 return -1;
3438 }
c4d10a05 3439 if (!pid) {
7b50c609
TS
3440 int hostuid = geteuid(), hostgid = getegid(), ret;
3441 struct stat sb;
3442 char map1[100], map2[100], map3[100], map4[100], map5[100];
3443 char ugid[100];
3444 char *args1[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3445 "-m", map3, "-m", map5,
3446 "--", "chown", ugid, path, NULL };
3447 char *args2[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3448 "-m", map3, "-m", map4, "-m", map5,
3449 "--", "chown", ugid, path, NULL };
3450
3451 // save the current gid of "path"
3452 if (stat(path, &sb) < 0) {
3453 ERROR("Error stat %s", path);
3454 return -1;
3455 }
f6d3e3e4 3456
9a7c2aba
SH
3457 /*
3458 * A file has to be group-owned by a gid mapped into the
3459 * container, or the container won't be privileged over it.
3460 */
3461 if (sb.st_uid == geteuid() &&
3462 mapped_hostid(sb.st_gid, conf, ID_TYPE_GID) < 0 &&
3463 chown(path, -1, hostgid) < 0) {
3464 ERROR("Failed chgrping %s", path);
7b50c609
TS
3465 return -1;
3466 }
3467
3468 // "u:0:rootuid:1"
3469 ret = snprintf(map1, 100, "u:0:%d:1", rootuid);
c4d10a05
SH
3470 if (ret < 0 || ret >= 100) {
3471 ERROR("Error uid printing map string");
f6d3e3e4
SH
3472 return -1;
3473 }
c4d10a05 3474
98e5ba51
SH
3475 // "u:hostuid:hostuid:1"
3476 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3477 if (ret < 0 || ret >= 100) {
3478 ERROR("Error uid printing map string");
3479 return -1;
3480 }
3481
7b50c609
TS
3482 // "g:0:rootgid:1"
3483 ret = snprintf(map3, 100, "g:0:%d:1", rootgid);
c4d10a05 3484 if (ret < 0 || ret >= 100) {
7b50c609 3485 ERROR("Error gid printing map string");
c4d10a05
SH
3486 return -1;
3487 }
3488
7b50c609 3489 // "g:pathgid:rootgid+pathgid:1"
b4c1e35d
SG
3490 ret = snprintf(map4, 100, "g:%d:%d:1", (gid_t)sb.st_gid,
3491 rootgid + (gid_t)sb.st_gid);
7b50c609
TS
3492 if (ret < 0 || ret >= 100) {
3493 ERROR("Error gid printing map string");
3494 return -1;
3495 }
3496
3497 // "g:hostgid:hostgid:1"
3498 ret = snprintf(map5, 100, "g:%d:%d:1", hostgid, hostgid);
3499 if (ret < 0 || ret >= 100) {
3500 ERROR("Error gid printing map string");
3501 return -1;
3502 }
3503
3504 // "0:pathgid" (chown)
b4c1e35d 3505 ret = snprintf(ugid, 100, "0:%d", (gid_t)sb.st_gid);
7b50c609
TS
3506 if (ret < 0 || ret >= 100) {
3507 ERROR("Error owner printing format string for chown");
3508 return -1;
3509 }
3510
3511 if (hostgid == sb.st_gid)
3512 ret = execvp("lxc-usernsexec", args1);
3513 else
3514 ret = execvp("lxc-usernsexec", args2);
c4d10a05
SH
3515 SYSERROR("Failed executing usernsexec");
3516 exit(1);
f6d3e3e4 3517 }
c4d10a05 3518 return wait_for_pid(pid);
f6d3e3e4
SH
3519}
3520
c4d10a05 3521int ttys_shift_ids(struct lxc_conf *c)
f6d3e3e4 3522{
c4d10a05 3523 if (lxc_list_empty(&c->id_map))
f6d3e3e4 3524 return 0;
c4d10a05 3525
29b10e4f 3526 if (strcmp(c->console.name, "") !=0 && chown_mapped_root(c->console.name, c) < 0) {
c4d10a05
SH
3527 ERROR("Failed to chown %s", c->console.name);
3528 return -1;
3529 }
3530
f6d3e3e4
SH
3531 return 0;
3532}
3533
f267d666 3534/* NOTE: not to be called from inside the container namespace! */
5112cd70
SH
3535int tmp_proc_mount(struct lxc_conf *lxc_conf)
3536{
3537 int mounted;
3538
01958b1f 3539 mounted = mount_proc_if_needed(lxc_conf->rootfs.path ? lxc_conf->rootfs.mount : "");
5112cd70
SH
3540 if (mounted == -1) {
3541 SYSERROR("failed to mount /proc in the container.");
01958b1f
DW
3542 /* continue only if there is no rootfs */
3543 if (lxc_conf->rootfs.path)
3544 return -1;
5112cd70
SH
3545 } else if (mounted == 1) {
3546 lxc_conf->tmp_umount_proc = 1;
3547 }
3548 return 0;
3549}
3550
3551void tmp_proc_unmount(struct lxc_conf *lxc_conf)
3552{
3553 if (lxc_conf->tmp_umount_proc == 1) {
3554 umount("/proc");
3555 lxc_conf->tmp_umount_proc = 0;
3556 }
3557}
3558
6a0c909a 3559void remount_all_slave(void)
e995d7a2
SH
3560{
3561 /* walk /proc/mounts and change any shared entries to slave */
3562 FILE *f = fopen("/proc/self/mountinfo", "r");
3563 char *line = NULL;
3564 size_t len = 0;
3565
3566 if (!f) {
3567 SYSERROR("Failed to open /proc/self/mountinfo to mark all shared");
3568 ERROR("Continuing container startup...");
3569 return;
3570 }
3571
3572 while (getline(&line, &len, f) != -1) {
3573 char *target, *opts;
3574 target = get_field(line, 4);
3575 if (!target)
3576 continue;
3577 opts = get_field(target, 2);
3578 if (!opts)
3579 continue;
3580 null_endofword(opts);
3581 if (!strstr(opts, "shared"))
3582 continue;
3583 null_endofword(target);
3584 if (mount(NULL, target, NULL, MS_SLAVE, NULL)) {
3585 SYSERROR("Failed to make %s rslave", target);
3586 ERROR("Continuing...");
3587 }
3588 }
3589 fclose(f);
f10fad2f 3590 free(line);
e995d7a2
SH
3591}
3592
2322903b
SH
3593void lxc_execute_bind_init(struct lxc_conf *conf)
3594{
3595 int ret;
9d9c111c
SH
3596 char path[PATH_MAX], destpath[PATH_MAX], *p;
3597
3598 /* If init exists in the container, don't bind mount a static one */
3599 p = choose_init(conf->rootfs.mount);
3600 if (p) {
3601 free(p);
3602 return;
3603 }
2322903b
SH
3604
3605 ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
3606 if (ret < 0 || ret >= PATH_MAX) {
3607 WARN("Path name too long searching for lxc.init.static");
3608 return;
3609 }
3610
3611 if (!file_exists(path)) {
3612 INFO("%s does not exist on host", path);
3613 return;
3614 }
3615
3616 ret = snprintf(destpath, PATH_MAX, "%s%s", conf->rootfs.mount, "/init.lxc.static");
3617 if (ret < 0 || ret >= PATH_MAX) {
3618 WARN("Path name too long for container's lxc.init.static");
3619 return;
3620 }
3621
3622 if (!file_exists(destpath)) {
3623 FILE * pathfile = fopen(destpath, "wb");
3624 if (!pathfile) {
3625 SYSERROR("Failed to create mount target '%s'", destpath);
3626 return;
3627 }
3628 fclose(pathfile);
3629 }
3630
592fd47a 3631 ret = safe_mount(path, destpath, "none", MS_BIND, NULL, conf->rootfs.mount);
2322903b
SH
3632 if (ret < 0)
3633 SYSERROR("Failed to bind lxc.init.static into container");
3634 INFO("lxc.init.static bound into container at %s", path);
3635}
3636
35120d9c
SH
3637/*
3638 * This does the work of remounting / if it is shared, calling the
3639 * container pre-mount hooks, and mounting the rootfs.
3640 */
3641int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath)
0ad19a3f 3642{
35120d9c
SH
3643 if (conf->rootfs_setup) {
3644 /*
3645 * rootfs was set up in another namespace. bind-mount it
3646 * to give us a mount in our own ns so we can pivot_root to it
3647 */
3648 const char *path = conf->rootfs.mount;
3649 if (mount(path, path, "rootfs", MS_BIND, NULL) < 0) {
3650 ERROR("Failed to bind-mount container / onto itself");
145832ba 3651 return -1;
35120d9c 3652 }
145832ba 3653 return 0;
35120d9c 3654 }
d4ef7c50 3655
e995d7a2
SH
3656 remount_all_slave();
3657
35120d9c
SH
3658 if (run_lxc_hooks(name, "pre-mount", conf, lxcpath, NULL)) {
3659 ERROR("failed to run pre-mount hooks for container '%s'.", name);
3660 return -1;
3661 }
3662
3663 if (setup_rootfs(conf)) {
3664 ERROR("failed to setup rootfs for '%s'", name);
3665 return -1;
3666 }
3667
3668 conf->rootfs_setup = true;
3669 return 0;
3670}
3671
1c1c7051
SH
3672static bool verify_start_hooks(struct lxc_conf *conf)
3673{
3674 struct lxc_list *it;
3675 char path[MAXPATHLEN];
3676 lxc_list_for_each(it, &conf->hooks[LXCHOOK_START]) {
3677 char *hookname = it->elem;
3678 struct stat st;
3679 int ret;
3680
3681 ret = snprintf(path, MAXPATHLEN, "%s%s",
7b6753e7 3682 conf->rootfs.path ? conf->rootfs.mount : "", hookname);
1c1c7051
SH
3683 if (ret < 0 || ret >= MAXPATHLEN)
3684 return false;
3685 ret = stat(path, &st);
3686 if (ret) {
7b6753e7 3687 SYSERROR("Start hook %s not found in container",
1c1c7051
SH
3688 hookname);
3689 return false;
3690 }
6a0c909a 3691 return true;
1c1c7051
SH
3692 }
3693
3694 return true;
3695}
3696
e8bd4e43
SH
3697static int send_fd(int sock, int fd)
3698{
3699 int ret = lxc_abstract_unix_send_fd(sock, fd, NULL, 0);
3700
3701
3702 if (ret < 0) {
3703 SYSERROR("Error sending tty fd to parent");
3704 return -1;
3705 }
3706
3707 return 0;
3708}
3709
3710static int send_ttys_to_parent(struct lxc_handler *handler)
3711{
3712 struct lxc_conf *conf = handler->conf;
3713 const struct lxc_tty_info *tty_info = &conf->tty_info;
3714 int i;
3715 int sock = handler->ttysock[0];
3716
3717 for (i = 0; i < tty_info->nbtty; i++) {
3718 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3719 if (send_fd(sock, pty_info->slave) < 0)
3720 goto bad;
3721 close(pty_info->slave);
3722 pty_info->slave = -1;
3723 if (send_fd(sock, pty_info->master) < 0)
3724 goto bad;
3725 close(pty_info->master);
3726 pty_info->master = -1;
3727 }
3728
3729 close(handler->ttysock[0]);
3730 close(handler->ttysock[1]);
3731
3732 return 0;
3733
3734bad:
3735 ERROR("Error writing tty fd to parent");
3736 return -1;
3737}
3738
35120d9c
SH
3739int lxc_setup(struct lxc_handler *handler)
3740{
3741 const char *name = handler->name;
3742 struct lxc_conf *lxc_conf = handler->conf;
3743 const char *lxcpath = handler->lxcpath;
35120d9c
SH
3744
3745 if (do_rootfs_setup(lxc_conf, name, lxcpath) < 0) {
3746 ERROR("Error setting up rootfs mount after spawn");
3747 return -1;
3748 }
3749
6c544cb3
MM
3750 if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
3751 if (setup_utsname(lxc_conf->utsname)) {
3752 ERROR("failed to setup the utsname for '%s'", name);
3753 return -1;
3754 }
0ad19a3f 3755 }
3756
5f4535a3 3757 if (setup_network(&lxc_conf->network)) {
36eb9bde 3758 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 3759 return -1;
0ad19a3f 3760 }
3761
bc6928ff 3762 if (lxc_conf->autodev > 0) {
14221cbb 3763 if (mount_autodev(name, &lxc_conf->rootfs, lxcpath)) {
91c3830e 3764 ERROR("failed to mount /dev in the container");
c6883f38
SH
3765 return -1;
3766 }
3767 }
3768
368bbc02
CS
3769 /* do automatic mounts (mainly /proc and /sys), but exclude
3770 * those that need to wait until other stuff has finished
3771 */
4fb3cba5 3772 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
3773 ERROR("failed to setup the automatic mounts for '%s'", name);
3774 return -1;
3775 }
3776
0a2dddd4 3777 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name, lxcpath)) {
36eb9bde 3778 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 3779 return -1;
576f946d 3780 }
3781
0a2dddd4 3782 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name, lxcpath)) {
e7938e9e
MN
3783 ERROR("failed to setup the mount entries for '%s'", name);
3784 return -1;
3785 }
3786
7b6753e7 3787 /* Make sure any start hooks are in the container */
1c1c7051
SH
3788 if (!verify_start_hooks(lxc_conf))
3789 return -1;
3790
2322903b
SH
3791 if (lxc_conf->is_execute)
3792 lxc_execute_bind_init(lxc_conf);
3793
368bbc02
CS
3794 /* now mount only cgroup, if wanted;
3795 * before, /sys could not have been mounted
3796 * (is either mounted automatically or via fstab entries)
3797 */
4fb3cba5 3798 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler) < 0) {
368bbc02
CS
3799 ERROR("failed to setup the automatic mounts for '%s'", name);
3800 return -1;
3801 }
3802
283678ed 3803 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
773fb9ca
SH
3804 ERROR("failed to run mount hooks for container '%s'.", name);
3805 return -1;
3806 }
3807
bc6928ff 3808 if (lxc_conf->autodev > 0) {
283678ed 3809 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
f7bee6c6
MW
3810 ERROR("failed to run autodev hooks for container '%s'.", name);
3811 return -1;
3812 }
14221cbb 3813 if (fill_autodev(&lxc_conf->rootfs)) {
91c3830e
SH
3814 ERROR("failed to populate /dev in the container");
3815 return -1;
3816 }
3817 }
368bbc02 3818
37903589 3819 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 3820 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 3821 return -1;
6e590161 3822 }
3823
7e0e1d94
AV
3824 if (lxc_conf->kmsg) {
3825 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
3826 ERROR("failed to setup kmsg for '%s'", name);
3827 }
1bd051a6 3828
69aa6655
DE
3829 if (!lxc_conf->is_execute && setup_dev_symlinks(&lxc_conf->rootfs)) {
3830 ERROR("failed to setup /dev symlinks for '%s'", name);
3831 return -1;
3832 }
3833
5112cd70
SH
3834 /* mount /proc if it's not already there */
3835 if (tmp_proc_mount(lxc_conf) < 0) {
fe4de9a6 3836 ERROR("failed to LSM mount proc for '%s'", name);
e075f5d9 3837 return -1;
e075f5d9 3838 }
e075f5d9 3839
ac778708 3840 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 3841 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 3842 return -1;
ed502555 3843 }
3844
571e6ec8 3845 if (setup_pts(lxc_conf->pts)) {
36eb9bde 3846 ERROR("failed to setup the new pts instance");
95b5ffaf 3847 return -1;
3c26f34e 3848 }
3849
e8bd4e43
SH
3850 if (lxc_create_tty(name, lxc_conf)) {
3851 ERROR("failed to create the ttys");
3852 return -1;
3853 }
3854
3855 if (send_ttys_to_parent(handler) < 0) {
3856 ERROR("failure sending console info to parent");
3857 return -1;
3858 }
3859
3860
3861 if (!lxc_conf->is_execute && setup_tty(lxc_conf)) {
3862 ERROR("failed to setup the ttys for '%s'", name);
3863 return -1;
3864 }
3865
3866 if (lxc_conf->pty_names && setenv("container_ttys", lxc_conf->pty_names, 1))
3867 SYSERROR("failed to set environment variable for container ptys");
3868
3869
cccc74b5
DL
3870 if (setup_personality(lxc_conf->personality)) {
3871 ERROR("failed to setup personality");
3872 return -1;
3873 }
3874
97a8f74f
SG
3875 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
3876 if (!lxc_list_empty(&lxc_conf->caps)) {
3877 ERROR("Simultaneously requested dropping and keeping caps");
f6d3e3e4
SH
3878 return -1;
3879 }
97a8f74f
SG
3880 if (dropcaps_except(&lxc_conf->keepcaps)) {
3881 ERROR("failed to keep requested caps");
3882 return -1;
3883 }
3884 } else if (setup_caps(&lxc_conf->caps)) {
3885 ERROR("failed to drop capabilities");
3886 return -1;
81810dd1
DL
3887 }
3888
cd54d859
DL
3889 NOTICE("'%s' is setup.", name);
3890
0ad19a3f 3891 return 0;
3892}
26ddeedd 3893
283678ed
SH
3894int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
3895 const char *lxcpath, char *argv[])
26ddeedd
SH
3896{
3897 int which = -1;
3898 struct lxc_list *it;
3899
3900 if (strcmp(hook, "pre-start") == 0)
3901 which = LXCHOOK_PRESTART;
5ea6163a
SH
3902 else if (strcmp(hook, "pre-mount") == 0)
3903 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
3904 else if (strcmp(hook, "mount") == 0)
3905 which = LXCHOOK_MOUNT;
f7bee6c6
MW
3906 else if (strcmp(hook, "autodev") == 0)
3907 which = LXCHOOK_AUTODEV;
26ddeedd
SH
3908 else if (strcmp(hook, "start") == 0)
3909 which = LXCHOOK_START;
52492063
WB
3910 else if (strcmp(hook, "stop") == 0)
3911 which = LXCHOOK_STOP;
26ddeedd
SH
3912 else if (strcmp(hook, "post-stop") == 0)
3913 which = LXCHOOK_POSTSTOP;
148e91f5
SH
3914 else if (strcmp(hook, "clone") == 0)
3915 which = LXCHOOK_CLONE;
37cf711b
SY
3916 else if (strcmp(hook, "destroy") == 0)
3917 which = LXCHOOK_DESTROY;
26ddeedd
SH
3918 else
3919 return -1;
3920 lxc_list_for_each(it, &conf->hooks[which]) {
3921 int ret;
3922 char *hookname = it->elem;
283678ed 3923 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
26ddeedd
SH
3924 if (ret)
3925 return ret;
3926 }
3927 return 0;
3928}
72d0e1cb 3929
427b3a21 3930static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
3931{
3932 struct lxc_netdev *netdev = it->elem;
9ebb03ad 3933 struct lxc_list *it2,*next;
72d0e1cb
SG
3934
3935 lxc_list_del(it);
3936
f10fad2f
ME
3937 free(netdev->link);
3938 free(netdev->name);
3939 if (netdev->type == LXC_NET_VETH)
c9bb9a85 3940 free(netdev->priv.veth_attr.pair);
f10fad2f
ME
3941 free(netdev->upscript);
3942 free(netdev->hwaddr);
3943 free(netdev->mtu);
3944 free(netdev->ipv4_gateway);
3945 free(netdev->ipv6_gateway);
9ebb03ad 3946 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3947 lxc_list_del(it2);
3948 free(it2->elem);
3949 free(it2);
3950 }
9ebb03ad 3951 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3952 lxc_list_del(it2);
3953 free(it2->elem);
3954 free(it2);
3955 }
d95db067 3956 free(netdev);
72d0e1cb
SG
3957 free(it);
3958}
3959
3960/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 3961int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
3962{
3963 char *p1;
3964 int ret, idx, i;
3965 struct lxc_list *it;
3966 struct lxc_netdev *netdev;
3967
46cd2845 3968 p1 = strchr(key, '.');
72d0e1cb
SG
3969 if (!p1 || *(p1+1) == '\0')
3970 p1 = NULL;
3971
3972 ret = sscanf(key, "%d", &idx);
3973 if (ret != 1) return -1;
3974 if (idx < 0)
3975 return -1;
3976
3977 i = 0;
3978 lxc_list_for_each(it, &c->network) {
3979 if (i == idx)
3980 break;
3981 i++;
3982 }
3983 if (i < idx) // we don't have that many nics defined
3984 return -1;
3985
3986 if (!it || !it->elem)
3987 return -1;
3988
3989 netdev = it->elem;
3990
3991 if (!p1) {
3992 lxc_remove_nic(it);
52d21d40 3993 } else if (strcmp(p1, ".ipv4") == 0) {
9ebb03ad
DE
3994 struct lxc_list *it2,*next;
3995 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3996 lxc_list_del(it2);
3997 free(it2->elem);
3998 free(it2);
3999 }
52d21d40 4000 } else if (strcmp(p1, ".ipv6") == 0) {
9ebb03ad
DE
4001 struct lxc_list *it2,*next;
4002 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
4003 lxc_list_del(it2);
4004 free(it2->elem);
4005 free(it2);
4006 }
72d0e1cb
SG
4007 }
4008 else return -1;
4009
4010 return 0;
4011}
4012
4013int lxc_clear_config_network(struct lxc_conf *c)
4014{
9ebb03ad
DE
4015 struct lxc_list *it,*next;
4016 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
4017 lxc_remove_nic(it);
4018 }
4019 return 0;
4020}
4021
4022int lxc_clear_config_caps(struct lxc_conf *c)
4023{
9ebb03ad 4024 struct lxc_list *it,*next;
72d0e1cb 4025
9ebb03ad 4026 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
4027 lxc_list_del(it);
4028 free(it->elem);
4029 free(it);
4030 }
4031 return 0;
4032}
4033
74a3920a 4034static int lxc_free_idmap(struct lxc_list *id_map) {
27c27d73
SH
4035 struct lxc_list *it, *next;
4036
4355ab5f 4037 lxc_list_for_each_safe(it, id_map, next) {
27c27d73
SH
4038 lxc_list_del(it);
4039 free(it->elem);
4040 free(it);
4041 }
4042 return 0;
4043}
4044
4355ab5f
SH
4045int lxc_clear_idmaps(struct lxc_conf *c)
4046{
4047 return lxc_free_idmap(&c->id_map);
4048}
4049
1fb86a7c
SH
4050int lxc_clear_config_keepcaps(struct lxc_conf *c)
4051{
4052 struct lxc_list *it,*next;
4053
4054 lxc_list_for_each_safe(it, &c->keepcaps, next) {
4055 lxc_list_del(it);
4056 free(it->elem);
4057 free(it);
4058 }
4059 return 0;
4060}
4061
12a50cc6 4062int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 4063{
9ebb03ad 4064 struct lxc_list *it,*next;
72d0e1cb 4065 bool all = false;
12a50cc6 4066 const char *k = key + 11;
72d0e1cb
SG
4067
4068 if (strcmp(key, "lxc.cgroup") == 0)
4069 all = true;
4070
9ebb03ad 4071 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
4072 struct lxc_cgroup *cg = it->elem;
4073 if (!all && strcmp(cg->subsystem, k) != 0)
4074 continue;
4075 lxc_list_del(it);
4076 free(cg->subsystem);
4077 free(cg->value);
4078 free(cg);
4079 free(it);
4080 }
4081 return 0;
4082}
4083
ee1e7aa0
SG
4084int lxc_clear_groups(struct lxc_conf *c)
4085{
4086 struct lxc_list *it,*next;
4087
4088 lxc_list_for_each_safe(it, &c->groups, next) {
4089 lxc_list_del(it);
4090 free(it->elem);
4091 free(it);
4092 }
4093 return 0;
4094}
4095
ab799c0b
SG
4096int lxc_clear_environment(struct lxc_conf *c)
4097{
4098 struct lxc_list *it,*next;
4099
4100 lxc_list_for_each_safe(it, &c->environment, next) {
4101 lxc_list_del(it);
4102 free(it->elem);
4103 free(it);
4104 }
4105 return 0;
4106}
4107
4108
72d0e1cb
SG
4109int lxc_clear_mount_entries(struct lxc_conf *c)
4110{
9ebb03ad 4111 struct lxc_list *it,*next;
72d0e1cb 4112
9ebb03ad 4113 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
4114 lxc_list_del(it);
4115 free(it->elem);
4116 free(it);
4117 }
4118 return 0;
4119}
4120
b099e9e9
SH
4121int lxc_clear_automounts(struct lxc_conf *c)
4122{
4123 c->auto_mounts = 0;
4124 return 0;
4125}
4126
12a50cc6 4127int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 4128{
9ebb03ad 4129 struct lxc_list *it,*next;
17ed13a3 4130 bool all = false, done = false;
12a50cc6 4131 const char *k = key + 9;
72d0e1cb
SG
4132 int i;
4133
17ed13a3
SH
4134 if (strcmp(key, "lxc.hook") == 0)
4135 all = true;
4136
72d0e1cb 4137 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 4138 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 4139 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
4140 lxc_list_del(it);
4141 free(it->elem);
4142 free(it);
4143 }
4144 done = true;
72d0e1cb
SG
4145 }
4146 }
17ed13a3
SH
4147
4148 if (!done) {
4149 ERROR("Invalid hook key: %s", key);
4150 return -1;
4151 }
72d0e1cb
SG
4152 return 0;
4153}
8eb5694b 4154
74a3920a 4155static void lxc_clear_saved_nics(struct lxc_conf *conf)
7b35f3d6
SH
4156{
4157 int i;
4158
0cf45501 4159 if (!conf->saved_nics)
7b35f3d6
SH
4160 return;
4161 for (i=0; i < conf->num_savednics; i++)
4162 free(conf->saved_nics[i].orig_name);
7b35f3d6
SH
4163 free(conf->saved_nics);
4164}
4165
4184c3e1
SH
4166static inline void lxc_clear_aliens(struct lxc_conf *conf)
4167{
4168 struct lxc_list *it,*next;
4169
4170 lxc_list_for_each_safe(it, &conf->aliens, next) {
4171 lxc_list_del(it);
4172 free(it->elem);
4173 free(it);
4174 }
4175}
4176
f979ac15
SH
4177static inline void lxc_clear_includes(struct lxc_conf *conf)
4178{
4179 struct lxc_list *it,*next;
4180
4181 lxc_list_for_each_safe(it, &conf->includes, next) {
4182 lxc_list_del(it);
4183 free(it->elem);
4184 free(it);
4185 }
4186}
4187
8eb5694b
SH
4188void lxc_conf_free(struct lxc_conf *conf)
4189{
4190 if (!conf)
4191 return;
858377e4
SH
4192 if (current_config == conf)
4193 current_config = NULL;
f10fad2f
ME
4194 free(conf->console.log_path);
4195 free(conf->console.path);
4196 free(conf->rootfs.mount);
4197 free(conf->rootfs.options);
4198 free(conf->rootfs.path);
4199 free(conf->rootfs.pivot);
4200 free(conf->logfile);
858377e4
SH
4201 if (conf->logfd != -1)
4202 close(conf->logfd);
f10fad2f
ME
4203 free(conf->utsname);
4204 free(conf->ttydir);
4205 free(conf->fstab);
4206 free(conf->rcfile);
4207 free(conf->init_cmd);
6b0d5538 4208 free(conf->unexpanded_config);
393903d1 4209 free(conf->pty_names);
8eb5694b 4210 lxc_clear_config_network(conf);
f10fad2f
ME
4211 free(conf->lsm_aa_profile);
4212 free(conf->lsm_se_context);
769872f9 4213 lxc_seccomp_free(conf);
8eb5694b 4214 lxc_clear_config_caps(conf);
1fb86a7c 4215 lxc_clear_config_keepcaps(conf);
8eb5694b 4216 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 4217 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 4218 lxc_clear_mount_entries(conf);
7b35f3d6 4219 lxc_clear_saved_nics(conf);
27c27d73 4220 lxc_clear_idmaps(conf);
ee1e7aa0 4221 lxc_clear_groups(conf);
f979ac15 4222 lxc_clear_includes(conf);
761d81ca 4223 lxc_clear_aliens(conf);
ab799c0b 4224 lxc_clear_environment(conf);
8eb5694b
SH
4225 free(conf);
4226}
4355ab5f
SH
4227
4228struct userns_fn_data {
4229 int (*fn)(void *);
4230 void *arg;
4231 int p[2];
4232};
4233
4234static int run_userns_fn(void *data)
4235{
4236 struct userns_fn_data *d = data;
4237 char c;
4238 // we're not sharing with the parent any more, if it was a thread
4239
4240 close(d->p[1]);
4241 if (read(d->p[0], &c, 1) != 1)
4242 return -1;
4243 close(d->p[0]);
4244 return d->fn(d->arg);
4245}
4246
4247/*
8b227008
TS
4248 * Add ID_TYPE_UID/ID_TYPE_GID entries to an existing lxc_conf,
4249 * if they are not already there.
4355ab5f 4250 */
8b227008
TS
4251static struct lxc_list *idmap_add_id(struct lxc_conf *conf,
4252 uid_t uid, gid_t gid)
4355ab5f 4253{
8b227008
TS
4254 int hostuid_mapped = mapped_hostid(uid, conf, ID_TYPE_UID);
4255 int hostgid_mapped = mapped_hostid(gid, conf, ID_TYPE_GID);
4355ab5f
SH
4256 struct lxc_list *new = NULL, *tmp, *it, *next;
4257 struct id_map *entry;
4258
3ec1648d
SH
4259 new = malloc(sizeof(*new));
4260 if (!new) {
4261 ERROR("Out of memory building id map");
4262 return NULL;
4263 }
4264 lxc_list_init(new);
4265
8b227008
TS
4266 if (hostuid_mapped < 0) {
4267 hostuid_mapped = find_unmapped_nsuid(conf, ID_TYPE_UID);
4268 if (hostuid_mapped < 0)
3ec1648d
SH
4269 goto err;
4270 tmp = malloc(sizeof(*tmp));
4271 if (!tmp)
4272 goto err;
4355ab5f
SH
4273 entry = malloc(sizeof(*entry));
4274 if (!entry) {
3ec1648d
SH
4275 free(tmp);
4276 goto err;
4355ab5f 4277 }
3ec1648d 4278 tmp->elem = entry;
4355ab5f 4279 entry->idtype = ID_TYPE_UID;
8b227008
TS
4280 entry->nsid = hostuid_mapped;
4281 entry->hostid = (unsigned long) uid;
4282 entry->range = 1;
4283 lxc_list_add_tail(new, tmp);
4284 }
4285 if (hostgid_mapped < 0) {
4286 hostgid_mapped = find_unmapped_nsuid(conf, ID_TYPE_GID);
4287 if (hostgid_mapped < 0)
4288 goto err;
4289 tmp = malloc(sizeof(*tmp));
4290 if (!tmp)
4291 goto err;
4292 entry = malloc(sizeof(*entry));
4293 if (!entry) {
4294 free(tmp);
4295 goto err;
4296 }
4297 tmp->elem = entry;
4298 entry->idtype = ID_TYPE_GID;
4299 entry->nsid = hostgid_mapped;
4300 entry->hostid = (unsigned long) gid;
4355ab5f 4301 entry->range = 1;
3ec1648d 4302 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4303 }
4304 lxc_list_for_each_safe(it, &conf->id_map, next) {
4305 tmp = malloc(sizeof(*tmp));
4306 if (!tmp)
4307 goto err;
4308 entry = malloc(sizeof(*entry));
4309 if (!entry) {
4310 free(tmp);
4311 goto err;
4312 }
4313 memset(entry, 0, sizeof(*entry));
4314 memcpy(entry, it->elem, sizeof(*entry));
4315 tmp->elem = entry;
3ec1648d 4316 lxc_list_add_tail(new, tmp);
4355ab5f
SH
4317 }
4318
4319 return new;
4320
4321err:
8b227008 4322 ERROR("Out of memory building a new uid/gid map");
908fde6a
SH
4323 if (new)
4324 lxc_free_idmap(new);
c30ac545 4325 free(new);
4355ab5f
SH
4326 return NULL;
4327}
4328
4329/*
4330 * Run a function in a new user namespace.
8b227008 4331 * The caller's euid/egid will be mapped in if it is not already.
4355ab5f
SH
4332 */
4333int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
4334{
4335 int ret, pid;
4336 struct userns_fn_data d;
4337 char c = '1';
4338 int p[2];
4339 struct lxc_list *idmap;
4340
4355ab5f 4341 ret = pipe(p);
4355ab5f
SH
4342 if (ret < 0) {
4343 SYSERROR("opening pipe");
4344 return -1;
4345 }
4346 d.fn = fn;
4347 d.arg = data;
4348 d.p[0] = p[0];
4349 d.p[1] = p[1];
4350 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4351 if (pid < 0)
4352 goto err;
4355ab5f 4353 close(p[0]);
4355ab5f
SH
4354 p[0] = -1;
4355
8b227008
TS
4356 if ((idmap = idmap_add_id(conf, geteuid(), getegid())) == NULL) {
4357 ERROR("Error adding self to container uid/gid map");
4355ab5f
SH
4358 goto err;
4359 }
4360
4361 ret = lxc_map_ids(idmap, pid);
4362 lxc_free_idmap(idmap);
88dd66fc 4363 free(idmap);
565e571c 4364 if (ret) {
4355ab5f
SH
4365 ERROR("Error setting up child mappings");
4366 goto err;
4367 }
4368
4369 // kick the child
4370 if (write(p[1], &c, 1) != 1) {
4371 SYSERROR("writing to pipe to child");
4372 goto err;
4373 }
4374
3139aead
SG
4375 ret = wait_for_pid(pid);
4376
4377 close(p[1]);
4378 return ret;
4379
4355ab5f 4380err:
4355ab5f
SH
4381 if (p[0] != -1)
4382 close(p[0]);
4383 close(p[1]);
4355ab5f
SH
4384 return -1;
4385}
97e9cfa0 4386
a96a8e8c 4387/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4388static char* getuname(void)
4389{
a96a8e8c 4390 struct passwd *result;
97e9cfa0 4391
a96a8e8c
SH
4392 result = getpwuid(geteuid());
4393 if (!result)
97e9cfa0
SH
4394 return NULL;
4395
a96a8e8c 4396 return strdup(result->pw_name);
97e9cfa0
SH
4397}
4398
a96a8e8c 4399/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4400static char *getgname(void)
4401{
a96a8e8c 4402 struct group *result;
97e9cfa0 4403
a96a8e8c
SH
4404 result = getgrgid(getegid());
4405 if (!result)
97e9cfa0
SH
4406 return NULL;
4407
a96a8e8c 4408 return strdup(result->gr_name);
97e9cfa0
SH
4409}
4410
a96a8e8c 4411/* not thread-safe, do not use from api without first forking */
97e9cfa0
SH
4412void suggest_default_idmap(void)
4413{
4414 FILE *f;
4415 unsigned int uid = 0, urange = 0, gid = 0, grange = 0;
4416 char *line = NULL;
4417 char *uname, *gname;
4418 size_t len = 0;
4419
4420 if (!(uname = getuname()))
4421 return;
4422
4423 if (!(gname = getgname())) {
4424 free(uname);
4425 return;
4426 }
4427
4428 f = fopen(subuidfile, "r");
4429 if (!f) {
4430 ERROR("Your system is not configured with subuids");
4431 free(gname);
4432 free(uname);
4433 return;
4434 }
4435 while (getline(&line, &len, f) != -1) {
4436 char *p = strchr(line, ':'), *p2;
4437 if (*line == '#')
4438 continue;
4439 if (!p)
4440 continue;
4441 *p = '\0';
4442 p++;
4443 if (strcmp(line, uname))
4444 continue;
4445 p2 = strchr(p, ':');
4446 if (!p2)
4447 continue;
4448 *p2 = '\0';
4449 p2++;
4450 if (!*p2)
4451 continue;
4452 uid = atoi(p);
4453 urange = atoi(p2);
4454 }
4455 fclose(f);
4456
4457 f = fopen(subuidfile, "r");
4458 if (!f) {
4459 ERROR("Your system is not configured with subgids");
4460 free(gname);
4461 free(uname);
4462 return;
4463 }
4464 while (getline(&line, &len, f) != -1) {
4465 char *p = strchr(line, ':'), *p2;
4466 if (*line == '#')
4467 continue;
4468 if (!p)
4469 continue;
4470 *p = '\0';
4471 p++;
4472 if (strcmp(line, uname))
4473 continue;
4474 p2 = strchr(p, ':');
4475 if (!p2)
4476 continue;
4477 *p2 = '\0';
4478 p2++;
4479 if (!*p2)
4480 continue;
4481 gid = atoi(p);
4482 grange = atoi(p2);
4483 }
4484 fclose(f);
4485
f10fad2f 4486 free(line);
97e9cfa0
SH
4487
4488 if (!urange || !grange) {
4489 ERROR("You do not have subuids or subgids allocated");
4490 ERROR("Unprivileged containers require subuids and subgids");
4491 return;
4492 }
4493
4494 ERROR("You must either run as root, or define uid mappings");
4495 ERROR("To pass uid mappings to lxc-create, you could create");
4496 ERROR("~/.config/lxc/default.conf:");
4497 ERROR("lxc.include = %s", LXC_DEFAULT_CONFIG);
4498 ERROR("lxc.id_map = u 0 %u %u", uid, urange);
4499 ERROR("lxc.id_map = g 0 %u %u", gid, grange);
4500
4501 free(gname);
4502 free(uname);
4503}
aaf26830 4504
a7307747
SH
4505static void free_cgroup_settings(struct lxc_list *result)
4506{
4507 struct lxc_list *iterator, *next;
4508
4509 lxc_list_for_each_safe(iterator, result, next) {
4510 lxc_list_del(iterator);
4511 free(iterator);
4512 }
4513 free(result);
4514}
4515
aaf26830
KT
4516/*
4517 * Return the list of cgroup_settings sorted according to the following rules
4518 * 1. Put memory.limit_in_bytes before memory.memsw.limit_in_bytes
4519 */
4520struct lxc_list *sort_cgroup_settings(struct lxc_list* cgroup_settings)
4521{
4522 struct lxc_list *result;
4523 struct lxc_list *memsw_limit = NULL;
4524 struct lxc_list *it = NULL;
4525 struct lxc_cgroup *cg = NULL;
4526 struct lxc_list *item = NULL;
4527
4528 result = malloc(sizeof(*result));
fac7c663
KT
4529 if (!result) {
4530 ERROR("failed to allocate memory to sort cgroup settings");
4531 return NULL;
4532 }
aaf26830
KT
4533 lxc_list_init(result);
4534
4535 /*Iterate over the cgroup settings and copy them to the output list*/
4536 lxc_list_for_each(it, cgroup_settings) {
4537 item = malloc(sizeof(*item));
fac7c663
KT
4538 if (!item) {
4539 ERROR("failed to allocate memory to sort cgroup settings");
a7307747 4540 free_cgroup_settings(result);
fac7c663
KT
4541 return NULL;
4542 }
aaf26830
KT
4543 item->elem = it->elem;
4544 cg = it->elem;
4545 if (strcmp(cg->subsystem, "memory.memsw.limit_in_bytes") == 0) {
4546 /* Store the memsw_limit location */
4547 memsw_limit = item;
4548 } else if (strcmp(cg->subsystem, "memory.limit_in_bytes") == 0 && memsw_limit != NULL) {
4d5b72a1 4549 /* lxc.cgroup.memory.memsw.limit_in_bytes is found before
aaf26830
KT
4550 * lxc.cgroup.memory.limit_in_bytes, swap these two items */
4551 item->elem = memsw_limit->elem;
4552 memsw_limit->elem = it->elem;
4553 }
4554 lxc_list_add_tail(result, item);
4555 }
4556
4557 return result;
a7307747 4558}