]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/conf.c
coverity 1126129: don't try to print c->name when c is NULL
[mirror_lxc.git] / src / lxc / conf.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
23#define _GNU_SOURCE
24#include <stdio.h>
25#undef _GNU_SOURCE
26#include <stdlib.h>
e3b4c4c4 27#include <stdarg.h>
0ad19a3f 28#include <errno.h>
29#include <string.h>
30#include <dirent.h>
0ad19a3f 31#include <unistd.h>
e3b4c4c4 32#include <sys/wait.h>
2d76d1d7 33#include <sys/syscall.h>
4a0ba80d 34#include <time.h>
e827ff7e 35
4ba0d9af
SG
36#if HAVE_IFADDRS_H
37#include <ifaddrs.h>
38#else
39#include <../include/ifaddrs.h>
40#endif
41
e827ff7e 42#if HAVE_PTY_H
b0a33c1e 43#include <pty.h>
e827ff7e
SG
44#else
45#include <../include/openpty.h>
46#endif
0ad19a3f 47
b3ecde1e
DL
48#include <linux/loop.h>
49
0ad19a3f 50#include <sys/types.h>
51#include <sys/utsname.h>
52#include <sys/param.h>
53#include <sys/stat.h>
54#include <sys/socket.h>
55#include <sys/mount.h>
56#include <sys/mman.h>
81810dd1 57#include <sys/prctl.h>
0ad19a3f 58
59#include <arpa/inet.h>
60#include <fcntl.h>
61#include <netinet/in.h>
62#include <net/if.h>
6f4a3756 63#include <libgen.h>
0ad19a3f 64
e5bda9ee 65#include "network.h"
66#include "error.h"
b2718c72 67#include "parse.h"
881450bb 68#include "config.h"
1b09f2c0
DL
69#include "utils.h"
70#include "conf.h"
71#include "log.h"
72#include "lxc.h" /* for lxc_cgroup_set() */
d55bc1ad 73#include "caps.h" /* for lxc_caps_last_cap() */
9be53773 74#include "bdev.h"
368bbc02 75#include "cgroup.h"
025ed0f3 76#include "lxclock.h"
fe4de9a6 77#include "lsm/lsm.h"
d0a36f2c 78
495d2046
SG
79#if HAVE_SYS_CAPABILITY_H
80#include <sys/capability.h>
81#endif
82
6ff05e18
SG
83#if HAVE_SYS_PERSONALITY_H
84#include <sys/personality.h>
85#endif
86
edaf8b1b
SG
87#if IS_BIONIC
88#include <../include/lxcmntent.h>
89#else
90#include <mntent.h>
91#endif
92
769872f9
SH
93#include "lxcseccomp.h"
94
36eb9bde 95lxc_log_define(lxc_conf, lxc);
e5bda9ee 96
0ad19a3f 97#define MAXHWLEN 18
98#define MAXINDEXLEN 20
442cbbe6 99#define MAXMTULEN 16
0ad19a3f 100#define MAXLINELEN 128
101
495d2046 102#if HAVE_SYS_CAPABILITY_H
b09094da
MN
103#ifndef CAP_SETFCAP
104#define CAP_SETFCAP 31
105#endif
106
107#ifndef CAP_MAC_OVERRIDE
108#define CAP_MAC_OVERRIDE 32
109#endif
110
111#ifndef CAP_MAC_ADMIN
112#define CAP_MAC_ADMIN 33
113#endif
495d2046 114#endif
b09094da
MN
115
116#ifndef PR_CAPBSET_DROP
117#define PR_CAPBSET_DROP 24
118#endif
119
9818cae4
SG
120#ifndef LO_FLAGS_AUTOCLEAR
121#define LO_FLAGS_AUTOCLEAR 4
122#endif
123
2d76d1d7
SG
124/* Define pivot_root() if missing from the C library */
125#ifndef HAVE_PIVOT_ROOT
126static int pivot_root(const char * new_root, const char * put_old)
127{
128#ifdef __NR_pivot_root
129return syscall(__NR_pivot_root, new_root, put_old);
130#else
131errno = ENOSYS;
132return -1;
133#endif
134}
135#else
136extern int pivot_root(const char * new_root, const char * put_old);
137#endif
138
139/* Define sethostname() if missing from the C library */
140#ifndef HAVE_SETHOSTNAME
141static int sethostname(const char * name, size_t len)
142{
143#ifdef __NR_sethostname
144return syscall(__NR_sethostname, name, len);
145#else
146errno = ENOSYS;
147return -1;
148#endif
149}
150#endif
151
72f919c4
SG
152/* Define __S_ISTYPE if missing from the C library */
153#ifndef __S_ISTYPE
154#define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
155#endif
156
72d0e1cb 157char *lxchook_names[NUM_LXC_HOOKS] = {
148e91f5 158 "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop", "clone" };
72d0e1cb 159
e3b4c4c4 160typedef int (*instanciate_cb)(struct lxc_handler *, struct lxc_netdev *);
0ad19a3f 161
998ac676
RT
162struct mount_opt {
163 char *name;
164 int clear;
165 int flag;
166};
167
81810dd1
DL
168struct caps_opt {
169 char *name;
170 int value;
171};
172
e3b4c4c4
ST
173static int instanciate_veth(struct lxc_handler *, struct lxc_netdev *);
174static int instanciate_macvlan(struct lxc_handler *, struct lxc_netdev *);
175static int instanciate_vlan(struct lxc_handler *, struct lxc_netdev *);
176static int instanciate_phys(struct lxc_handler *, struct lxc_netdev *);
177static int instanciate_empty(struct lxc_handler *, struct lxc_netdev *);
82d5ae15 178
24654103
DL
179static instanciate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
180 [LXC_NET_VETH] = instanciate_veth,
181 [LXC_NET_MACVLAN] = instanciate_macvlan,
182 [LXC_NET_VLAN] = instanciate_vlan,
183 [LXC_NET_PHYS] = instanciate_phys,
184 [LXC_NET_EMPTY] = instanciate_empty,
0ad19a3f 185};
186
74a2b586
JK
187static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
188static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
189static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
190static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
191static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
192
193static instanciate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
194 [LXC_NET_VETH] = shutdown_veth,
195 [LXC_NET_MACVLAN] = shutdown_macvlan,
196 [LXC_NET_VLAN] = shutdown_vlan,
197 [LXC_NET_PHYS] = shutdown_phys,
198 [LXC_NET_EMPTY] = shutdown_empty,
199};
200
998ac676 201static struct mount_opt mount_opt[] = {
88d413d5
SW
202 { "defaults", 0, 0 },
203 { "ro", 0, MS_RDONLY },
204 { "rw", 1, MS_RDONLY },
205 { "suid", 1, MS_NOSUID },
206 { "nosuid", 0, MS_NOSUID },
207 { "dev", 1, MS_NODEV },
208 { "nodev", 0, MS_NODEV },
209 { "exec", 1, MS_NOEXEC },
210 { "noexec", 0, MS_NOEXEC },
211 { "sync", 0, MS_SYNCHRONOUS },
212 { "async", 1, MS_SYNCHRONOUS },
213 { "dirsync", 0, MS_DIRSYNC },
214 { "remount", 0, MS_REMOUNT },
215 { "mand", 0, MS_MANDLOCK },
216 { "nomand", 1, MS_MANDLOCK },
217 { "atime", 1, MS_NOATIME },
218 { "noatime", 0, MS_NOATIME },
219 { "diratime", 1, MS_NODIRATIME },
220 { "nodiratime", 0, MS_NODIRATIME },
221 { "bind", 0, MS_BIND },
222 { "rbind", 0, MS_BIND|MS_REC },
223 { "relatime", 0, MS_RELATIME },
224 { "norelatime", 1, MS_RELATIME },
225 { "strictatime", 0, MS_STRICTATIME },
226 { "nostrictatime", 1, MS_STRICTATIME },
227 { NULL, 0, 0 },
998ac676
RT
228};
229
495d2046 230#if HAVE_SYS_CAPABILITY_H
81810dd1 231static struct caps_opt caps_opt[] = {
a6afdde9 232 { "chown", CAP_CHOWN },
1e11be34
DL
233 { "dac_override", CAP_DAC_OVERRIDE },
234 { "dac_read_search", CAP_DAC_READ_SEARCH },
235 { "fowner", CAP_FOWNER },
236 { "fsetid", CAP_FSETID },
81810dd1
DL
237 { "kill", CAP_KILL },
238 { "setgid", CAP_SETGID },
239 { "setuid", CAP_SETUID },
240 { "setpcap", CAP_SETPCAP },
241 { "linux_immutable", CAP_LINUX_IMMUTABLE },
242 { "net_bind_service", CAP_NET_BIND_SERVICE },
243 { "net_broadcast", CAP_NET_BROADCAST },
244 { "net_admin", CAP_NET_ADMIN },
245 { "net_raw", CAP_NET_RAW },
246 { "ipc_lock", CAP_IPC_LOCK },
247 { "ipc_owner", CAP_IPC_OWNER },
248 { "sys_module", CAP_SYS_MODULE },
249 { "sys_rawio", CAP_SYS_RAWIO },
250 { "sys_chroot", CAP_SYS_CHROOT },
251 { "sys_ptrace", CAP_SYS_PTRACE },
252 { "sys_pacct", CAP_SYS_PACCT },
253 { "sys_admin", CAP_SYS_ADMIN },
254 { "sys_boot", CAP_SYS_BOOT },
255 { "sys_nice", CAP_SYS_NICE },
256 { "sys_resource", CAP_SYS_RESOURCE },
257 { "sys_time", CAP_SYS_TIME },
258 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
259 { "mknod", CAP_MKNOD },
260 { "lease", CAP_LEASE },
9527e566 261#ifdef CAP_AUDIT_WRITE
81810dd1 262 { "audit_write", CAP_AUDIT_WRITE },
9527e566
FW
263#endif
264#ifdef CAP_AUDIT_CONTROL
81810dd1 265 { "audit_control", CAP_AUDIT_CONTROL },
9527e566 266#endif
81810dd1
DL
267 { "setfcap", CAP_SETFCAP },
268 { "mac_override", CAP_MAC_OVERRIDE },
269 { "mac_admin", CAP_MAC_ADMIN },
5170c716
CS
270#ifdef CAP_SYSLOG
271 { "syslog", CAP_SYSLOG },
272#endif
273#ifdef CAP_WAKE_ALARM
274 { "wake_alarm", CAP_WAKE_ALARM },
275#endif
81810dd1 276};
495d2046
SG
277#else
278static struct caps_opt caps_opt[] = {};
279#endif
81810dd1 280
4a0ba80d
SG
281static char padchar[] =
282"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
283
284static char *mkifname(char *template)
285{
286 char *name = NULL;
287 int i = 0;
288 FILE *urandom;
289 unsigned int seed;
4a0ba80d
SG
290 struct ifaddrs *ifaddr, *ifa;
291 int ifexists = 0;
292
293 /* Get all the network interfaces */
294 getifaddrs(&ifaddr);
295
296 /* Initialize the random number generator */
025ed0f3 297 process_lock();
4a0ba80d 298 urandom = fopen ("/dev/urandom", "r");
025ed0f3 299 process_unlock();
4a0ba80d
SG
300 if (urandom != NULL) {
301 if (fread (&seed, sizeof(seed), 1, urandom) <= 0)
302 seed = time(0);
025ed0f3 303 process_lock();
4a0ba80d 304 fclose(urandom);
025ed0f3 305 process_unlock();
4a0ba80d
SG
306 }
307 else
308 seed = time(0);
7f3e12f3
SG
309
310#ifndef HAVE_RAND_R
311 srand(seed);
312#endif
4a0ba80d
SG
313
314 /* Generate random names until we find one that doesn't exist */
315 while(1) {
316 ifexists = 0;
317 name = strdup(template);
318
319 if (name == NULL)
320 return NULL;
321
322 for (i = 0; i < strlen(name); i++) {
323 if (name[i] == 'X') {
7f3e12f3
SG
324#ifdef HAVE_RAND_R
325 name[i] = padchar[rand_r(&seed) % (strlen(padchar) - 1)];
326#else
327 name[i] = padchar[rand() % (strlen(padchar) - 1)];
328#endif
4a0ba80d
SG
329 }
330 }
331
332 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
333 if (strcmp(ifa->ifa_name, name) == 0) {
334 ifexists = 1;
335 break;
336 }
337 }
338
339 if (ifexists == 0)
340 break;
341
342 free(name);
343 }
344
345 freeifaddrs(ifaddr);
346 return name;
347}
348
91c3830e
SH
349static int run_buffer(char *buffer)
350{
351 FILE *f;
352 char *output;
8e7da691 353 int ret;
91c3830e 354
025ed0f3 355 process_lock();
91c3830e 356 f = popen(buffer, "r");
025ed0f3 357 process_unlock();
91c3830e
SH
358 if (!f) {
359 SYSERROR("popen failed");
360 return -1;
361 }
362
363 output = malloc(LXC_LOG_BUFFER_SIZE);
364 if (!output) {
365 ERROR("failed to allocate memory for script output");
025ed0f3 366 process_lock();
00b6be44 367 pclose(f);
025ed0f3 368 process_unlock();
91c3830e
SH
369 return -1;
370 }
371
372 while(fgets(output, LXC_LOG_BUFFER_SIZE, f))
373 DEBUG("script output: %s", output);
374
375 free(output);
376
025ed0f3 377 process_lock();
8e7da691 378 ret = pclose(f);
025ed0f3 379 process_unlock();
8e7da691 380 if (ret == -1) {
91c3830e
SH
381 SYSERROR("Script exited on error");
382 return -1;
8e7da691
DE
383 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
384 ERROR("Script exited with status %d", WEXITSTATUS(ret));
385 return -1;
386 } else if (WIFSIGNALED(ret)) {
387 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
388 strsignal(WTERMSIG(ret)));
389 return -1;
91c3830e
SH
390 }
391
392 return 0;
393}
394
148e91f5 395static int run_script_argv(const char *name, const char *section,
283678ed
SH
396 const char *script, const char *hook, const char *lxcpath,
397 char **argsin)
148e91f5
SH
398{
399 int ret, i;
400 char *buffer;
401 size_t size = 0;
402
403 INFO("Executing script '%s' for container '%s', config section '%s'",
404 script, name, section);
405
406 for (i=0; argsin && argsin[i]; i++)
407 size += strlen(argsin[i]) + 1;
408
409 size += strlen(hook) + 1;
410
411 size += strlen(script);
412 size += strlen(name);
413 size += strlen(section);
414 size += 3;
415
416 if (size > INT_MAX)
417 return -1;
418
419 buffer = alloca(size);
420 if (!buffer) {
421 ERROR("failed to allocate memory");
422 return -1;
423 }
424
425 ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
426 if (ret < 0 || ret >= size) {
427 ERROR("Script name too long");
428 return -1;
429 }
430
431 for (i=0; argsin && argsin[i]; i++) {
432 int len = size-ret;
433 int rc;
434 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
435 if (rc < 0 || rc >= len) {
436 ERROR("Script args too long");
437 return -1;
438 }
439 ret += rc;
440 }
441
442 return run_buffer(buffer);
443}
444
751d9dcd
DL
445static int run_script(const char *name, const char *section,
446 const char *script, ...)
e3b4c4c4 447{
abbfd20b 448 int ret;
91c3830e 449 char *buffer, *p;
abbfd20b
DL
450 size_t size = 0;
451 va_list ap;
751d9dcd
DL
452
453 INFO("Executing script '%s' for container '%s', config section '%s'",
454 script, name, section);
e3b4c4c4 455
abbfd20b
DL
456 va_start(ap, script);
457 while ((p = va_arg(ap, char *)))
95642a10 458 size += strlen(p) + 1;
abbfd20b
DL
459 va_end(ap);
460
461 size += strlen(script);
462 size += strlen(name);
463 size += strlen(section);
95642a10 464 size += 3;
abbfd20b 465
95642a10
MS
466 if (size > INT_MAX)
467 return -1;
468
469 buffer = alloca(size);
abbfd20b
DL
470 if (!buffer) {
471 ERROR("failed to allocate memory");
751d9dcd
DL
472 return -1;
473 }
474
9ba8130c
SH
475 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
476 if (ret < 0 || ret >= size) {
477 ERROR("Script name too long");
9ba8130c
SH
478 return -1;
479 }
751d9dcd 480
abbfd20b 481 va_start(ap, script);
9ba8130c
SH
482 while ((p = va_arg(ap, char *))) {
483 int len = size-ret;
484 int rc;
485 rc = snprintf(buffer + ret, len, " %s", p);
486 if (rc < 0 || rc >= len) {
9ba8130c
SH
487 ERROR("Script args too long");
488 return -1;
489 }
490 ret += rc;
491 }
abbfd20b 492 va_end(ap);
751d9dcd 493
91c3830e 494 return run_buffer(buffer);
e3b4c4c4
ST
495}
496
a6afdde9 497static int find_fstype_cb(char* buffer, void *data)
78ae2fcc 498{
499 struct cbarg {
500 const char *rootfs;
a6afdde9 501 const char *target;
78ae2fcc 502 int mntopt;
503 } *cbarg = data;
504
505 char *fstype;
506
507 /* we don't try 'nodev' entries */
508 if (strstr(buffer, "nodev"))
509 return 0;
510
511 fstype = buffer;
b2718c72 512 fstype += lxc_char_left_gc(fstype, strlen(fstype));
513 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
78ae2fcc 514
a6afdde9
DL
515 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
516 cbarg->rootfs, cbarg->target, fstype);
517
518 if (mount(cbarg->rootfs, cbarg->target, fstype, cbarg->mntopt, NULL)) {
519 DEBUG("mount failed with error: %s", strerror(errno));
78ae2fcc 520 return 0;
a6afdde9 521 }
78ae2fcc 522
a6afdde9
DL
523 INFO("mounted '%s' on '%s', with fstype '%s'",
524 cbarg->rootfs, cbarg->target, fstype);
78ae2fcc 525
526 return 1;
527}
528
2656d231 529static int mount_unknow_fs(const char *rootfs, const char *target, int mntopt)
78ae2fcc 530{
a6afdde9 531 int i;
78ae2fcc 532
533 struct cbarg {
534 const char *rootfs;
a6afdde9 535 const char *target;
78ae2fcc 536 int mntopt;
537 } cbarg = {
538 .rootfs = rootfs,
a6afdde9 539 .target = target,
78ae2fcc 540 .mntopt = mntopt,
541 };
542
a6afdde9
DL
543 /*
544 * find the filesystem type with brute force:
545 * first we check with /etc/filesystems, in case the modules
78ae2fcc 546 * are auto-loaded and fall back to the supported kernel fs
547 */
548 char *fsfile[] = {
549 "/etc/filesystems",
550 "/proc/filesystems",
551 };
552
a6afdde9
DL
553 for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
554
555 int ret;
556
557 if (access(fsfile[i], F_OK))
558 continue;
559
560 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
561 if (ret < 0) {
562 ERROR("failed to parse '%s'", fsfile[i]);
563 return -1;
564 }
565
566 if (ret)
567 return 0;
78ae2fcc 568 }
569
a6afdde9
DL
570 ERROR("failed to determine fs type for '%s'", rootfs);
571 return -1;
572}
573
2656d231 574static int mount_rootfs_dir(const char *rootfs, const char *target)
a6afdde9
DL
575{
576 return mount(rootfs, target, "none", MS_BIND | MS_REC, NULL);
577}
578
579static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
580{
581 int rfd;
582 int ret = -1;
583
025ed0f3 584 process_lock();
a6afdde9 585 rfd = open(rootfs, O_RDWR);
025ed0f3 586 process_unlock();
a6afdde9
DL
587 if (rfd < 0) {
588 SYSERROR("failed to open '%s'", rootfs);
78ae2fcc 589 return -1;
590 }
591
a6afdde9 592 memset(loinfo, 0, sizeof(*loinfo));
78ae2fcc 593
a6afdde9 594 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
78ae2fcc 595
a6afdde9
DL
596 if (ioctl(fd, LOOP_SET_FD, rfd)) {
597 SYSERROR("failed to LOOP_SET_FD");
598 goto out;
78ae2fcc 599 }
600
a6afdde9
DL
601 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
602 SYSERROR("failed to LOOP_SET_STATUS64");
78ae2fcc 603 goto out;
604 }
605
a6afdde9 606 ret = 0;
78ae2fcc 607out:
025ed0f3 608 process_lock();
a6afdde9 609 close(rfd);
025ed0f3 610 process_unlock();
78ae2fcc 611
a6afdde9 612 return ret;
78ae2fcc 613}
614
2656d231 615static int mount_rootfs_file(const char *rootfs, const char *target)
78ae2fcc 616{
a6afdde9
DL
617 struct dirent dirent, *direntp;
618 struct loop_info64 loinfo;
9ba8130c 619 int ret = -1, fd = -1, rc;
a6afdde9
DL
620 DIR *dir;
621 char path[MAXPATHLEN];
78ae2fcc 622
025ed0f3 623 process_lock();
a6afdde9 624 dir = opendir("/dev");
025ed0f3 625 process_unlock();
a6afdde9
DL
626 if (!dir) {
627 SYSERROR("failed to open '/dev'");
78ae2fcc 628 return -1;
629 }
630
a6afdde9
DL
631 while (!readdir_r(dir, &dirent, &direntp)) {
632
633 if (!direntp)
634 break;
635
636 if (!strcmp(direntp->d_name, "."))
637 continue;
638
639 if (!strcmp(direntp->d_name, ".."))
640 continue;
641
642 if (strncmp(direntp->d_name, "loop", 4))
643 continue;
644
9ba8130c
SH
645 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
646 if (rc < 0 || rc >= MAXPATHLEN)
647 continue;
648
025ed0f3 649 process_lock();
a6afdde9 650 fd = open(path, O_RDWR);
025ed0f3 651 process_unlock();
a6afdde9
DL
652 if (fd < 0)
653 continue;
654
655 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
025ed0f3 656 process_lock();
a6afdde9 657 close(fd);
025ed0f3 658 process_unlock();
a6afdde9
DL
659 continue;
660 }
661
662 if (errno != ENXIO) {
663 WARN("unexpected error for ioctl on '%s': %m",
664 direntp->d_name);
025ed0f3 665 process_lock();
00b6be44 666 close(fd);
025ed0f3 667 process_unlock();
a6afdde9
DL
668 continue;
669 }
670
671 DEBUG("found '%s' free lodev", path);
672
673 ret = setup_lodev(rootfs, fd, &loinfo);
674 if (!ret)
2656d231 675 ret = mount_unknow_fs(path, target, 0);
025ed0f3 676 process_lock();
a6afdde9 677 close(fd);
025ed0f3 678 process_unlock();
a6afdde9
DL
679
680 break;
681 }
682
025ed0f3 683 process_lock();
a6afdde9
DL
684 if (closedir(dir))
685 WARN("failed to close directory");
025ed0f3 686 process_unlock();
a6afdde9
DL
687
688 return ret;
78ae2fcc 689}
690
2656d231 691static int mount_rootfs_block(const char *rootfs, const char *target)
a6afdde9 692{
2656d231 693 return mount_unknow_fs(rootfs, target, 0);
a6afdde9
DL
694}
695
0c547523
SH
696/*
697 * pin_rootfs
b7ed4bf0
CS
698 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
699 * the duration of the container run, to prevent the container from marking
700 * the underlying fs readonly on shutdown. unlink the file immediately so
701 * no name pollution is happens
0c547523
SH
702 * return -1 on error.
703 * return -2 if nothing needed to be pinned.
704 * return an open fd (>=0) if we pinned it.
705 */
706int pin_rootfs(const char *rootfs)
707{
708 char absrootfs[MAXPATHLEN];
709 char absrootfspin[MAXPATHLEN];
710 struct stat s;
711 int ret, fd;
712
e99ee0de 713 if (rootfs == NULL || strlen(rootfs) == 0)
0d03360a 714 return -2;
e99ee0de 715
00ec333b 716 if (!realpath(rootfs, absrootfs))
9be53773 717 return -2;
0c547523 718
00ec333b 719 if (access(absrootfs, F_OK))
0c547523 720 return -1;
0c547523 721
00ec333b 722 if (stat(absrootfs, &s))
0c547523 723 return -1;
0c547523 724
72f919c4 725 if (!S_ISDIR(s.st_mode))
0c547523
SH
726 return -2;
727
b7ed4bf0 728 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
00ec333b 729 if (ret >= MAXPATHLEN)
0c547523 730 return -1;
0c547523 731
025ed0f3 732 process_lock();
0c547523 733 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
025ed0f3 734 process_unlock();
b7ed4bf0
CS
735 if (fd < 0)
736 return fd;
737 (void)unlink(absrootfspin);
0c547523
SH
738 return fd;
739}
740
368bbc02
CS
741static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct cgroup_process_info *cgroup_info)
742{
368bbc02 743 int r;
b06b8511
CS
744 size_t i;
745 static struct {
746 int match_mask;
747 int match_flag;
748 const char *source;
749 const char *destination;
750 const char *fstype;
751 unsigned long flags;
752 const char *options;
753 } default_mounts[] = {
754 /* Read-only bind-mounting... In older kernels, doing that required
755 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
756 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
757 * kernel 2.6.26 onwards. However, this apparently does not work on
758 * kernel 3.8. Unfortunately, on that very same kernel, doing the
759 * same trick as above doesn't seem to work either, there one needs
760 * to ALSO specify MS_BIND for the remount, otherwise the entire
761 * fs is remounted read-only or the mount fails because it's busy...
762 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
763 * 2.6.32...
368bbc02 764 */
b06b8511
CS
765 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
766 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
767 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
768 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
769 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
770 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
771 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
772 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
773 { 0, 0, NULL, NULL, NULL, 0, NULL }
774 };
368bbc02 775
b06b8511
CS
776 for (i = 0; default_mounts[i].match_mask; i++) {
777 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
778 char *source = NULL;
779 char *destination = NULL;
780 int saved_errno;
781
782 if (default_mounts[i].source) {
783 /* will act like strdup if %r is not present */
784 source = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].source);
785 if (!source) {
786 SYSERROR("memory allocation error");
787 return -1;
788 }
789 }
790 if (default_mounts[i].destination) {
791 /* will act like strdup if %r is not present */
792 destination = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].destination);
793 if (!destination) {
794 saved_errno = errno;
795 SYSERROR("memory allocation error");
796 free(source);
797 errno = saved_errno;
798 return -1;
799 }
800 }
801 r = mount(source, destination, default_mounts[i].fstype, default_mounts[i].flags, default_mounts[i].options);
802 saved_errno = errno;
c414be25
DE
803 if (r < 0)
804 SYSERROR("error mounting %s on %s", source, destination);
b06b8511
CS
805 free(source);
806 free(destination);
807 if (r < 0) {
b06b8511
CS
808 errno = saved_errno;
809 return -1;
810 }
368bbc02 811 }
368bbc02
CS
812 }
813
b06b8511 814 if (flags & LXC_AUTO_CGROUP_MASK) {
7997d7da 815 r = lxc_setup_mount_cgroup(conf->rootfs.mount, cgroup_info, flags & LXC_AUTO_CGROUP_MASK);
368bbc02
CS
816 if (r < 0) {
817 SYSERROR("error mounting /sys/fs/cgroup");
b06b8511 818 return -1;
368bbc02
CS
819 }
820 }
821
368bbc02 822 return 0;
368bbc02
CS
823}
824
2656d231 825static int mount_rootfs(const char *rootfs, const char *target)
0ad19a3f 826{
b09ef133 827 char absrootfs[MAXPATHLEN];
78ae2fcc 828 struct stat s;
a6afdde9 829 int i;
78ae2fcc 830
a6afdde9 831 typedef int (*rootfs_cb)(const char *, const char *);
78ae2fcc 832
833 struct rootfs_type {
834 int type;
835 rootfs_cb cb;
836 } rtfs_type[] = {
2656d231
DL
837 { S_IFDIR, mount_rootfs_dir },
838 { S_IFBLK, mount_rootfs_block },
839 { S_IFREG, mount_rootfs_file },
78ae2fcc 840 };
0ad19a3f 841
4c8ab83b 842 if (!realpath(rootfs, absrootfs)) {
36eb9bde 843 SYSERROR("failed to get real path for '%s'", rootfs);
4c8ab83b 844 return -1;
845 }
b09ef133 846
b09ef133 847 if (access(absrootfs, F_OK)) {
36eb9bde 848 SYSERROR("'%s' is not accessible", absrootfs);
b09ef133 849 return -1;
850 }
851
78ae2fcc 852 if (stat(absrootfs, &s)) {
36eb9bde 853 SYSERROR("failed to stat '%s'", absrootfs);
9b0f0477 854 return -1;
855 }
856
78ae2fcc 857 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
9b0f0477 858
78ae2fcc 859 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
860 continue;
9b0f0477 861
a6afdde9 862 return rtfs_type[i].cb(absrootfs, target);
78ae2fcc 863 }
9b0f0477 864
36eb9bde 865 ERROR("unsupported rootfs type for '%s'", absrootfs);
78ae2fcc 866 return -1;
0ad19a3f 867}
868
4e5440c6 869static int setup_utsname(struct utsname *utsname)
0ad19a3f 870{
4e5440c6
DL
871 if (!utsname)
872 return 0;
0ad19a3f 873
4e5440c6
DL
874 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
875 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
0ad19a3f 876 return -1;
877 }
878
4e5440c6 879 INFO("'%s' hostname has been setup", utsname->nodename);
cd54d859 880
0ad19a3f 881 return 0;
882}
883
33fcb7a0 884static int setup_tty(const struct lxc_rootfs *rootfs,
7c6ef2a2 885 const struct lxc_tty_info *tty_info, char *ttydir)
b0a33c1e 886{
7c6ef2a2
SH
887 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
888 int i, ret;
b0a33c1e 889
bc9bd0e3
DL
890 if (!rootfs->path)
891 return 0;
892
b0a33c1e 893 for (i = 0; i < tty_info->nbtty; i++) {
894
895 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
896
7c6ef2a2 897 ret = snprintf(path, sizeof(path), "%s/dev/tty%d",
12297168 898 rootfs->mount, i + 1);
7c6ef2a2
SH
899 if (ret >= sizeof(path)) {
900 ERROR("pathname too long for ttys");
901 return -1;
902 }
903 if (ttydir) {
904 /* create dev/lxc/tty%d" */
9ba8130c 905 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/tty%d",
7c6ef2a2
SH
906 rootfs->mount, ttydir, i + 1);
907 if (ret >= sizeof(lxcpath)) {
908 ERROR("pathname too long for ttys");
909 return -1;
910 }
025ed0f3 911 process_lock();
7c6ef2a2 912 ret = creat(lxcpath, 0660);
025ed0f3 913 process_unlock();
7c6ef2a2
SH
914 if (ret==-1 && errno != EEXIST) {
915 SYSERROR("error creating %s\n", lxcpath);
916 return -1;
917 }
025ed0f3 918 process_lock();
4d44e274
SH
919 if (ret >= 0)
920 close(ret);
025ed0f3 921 process_unlock();
7c6ef2a2
SH
922 ret = unlink(path);
923 if (ret && errno != ENOENT) {
924 SYSERROR("error unlinking %s\n", path);
925 return -1;
926 }
b0a33c1e 927
7c6ef2a2
SH
928 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
929 WARN("failed to mount '%s'->'%s'",
930 pty_info->name, path);
931 continue;
932 }
13954cce 933
9ba8130c
SH
934 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
935 if (ret >= sizeof(lxcpath)) {
936 ERROR("tty pathname too long");
937 return -1;
938 }
7c6ef2a2
SH
939 ret = symlink(lxcpath, path);
940 if (ret) {
941 SYSERROR("failed to create symlink for tty %d\n", i+1);
942 return -1;
943 }
944 } else {
c6883f38
SH
945 /* If we populated /dev, then we need to create /dev/ttyN */
946 if (access(path, F_OK)) {
025ed0f3 947 process_lock();
c6883f38 948 ret = creat(path, 0660);
025ed0f3 949 process_unlock();
c6883f38
SH
950 if (ret==-1) {
951 SYSERROR("error creating %s\n", path);
952 /* this isn't fatal, continue */
025ed0f3
SH
953 } else {
954 process_lock();
c6883f38 955 close(ret);
025ed0f3
SH
956 process_unlock();
957 }
c6883f38 958 }
7c6ef2a2
SH
959 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
960 WARN("failed to mount '%s'->'%s'",
961 pty_info->name, path);
962 continue;
963 }
b0a33c1e 964 }
965 }
966
cd54d859
DL
967 INFO("%d tty(s) has been setup", tty_info->nbtty);
968
b0a33c1e 969 return 0;
970}
971
7a7ff0c6 972static int setup_rootfs_pivot_root_cb(char *buffer, void *data)
bf601689
MH
973{
974 struct lxc_list *mountlist, *listentry, *iterator;
2c7d90ac 975 char *pivotdir, *mountpoint, *mountentry, *saveptr = NULL;
bf601689
MH
976 int found;
977 void **cbparm;
978
979 mountentry = buffer;
980 cbparm = (void **)data;
981
982 mountlist = cbparm[0];
983 pivotdir = cbparm[1];
984
985 /* parse entry, first field is mountname, ignore */
2796cf79 986 mountpoint = strtok_r(mountentry, " ", &saveptr);
bf601689
MH
987 if (!mountpoint)
988 return -1;
989
990 /* second field is mountpoint */
2796cf79 991 mountpoint = strtok_r(NULL, " ", &saveptr);
bf601689
MH
992 if (!mountpoint)
993 return -1;
994
995 /* only consider mountpoints below old root fs */
996 if (strncmp(mountpoint, pivotdir, strlen(pivotdir)))
997 return 0;
998
999 /* filter duplicate mountpoints */
1000 found = 0;
1001 lxc_list_for_each(iterator, mountlist) {
1002 if (!strcmp(iterator->elem, mountpoint)) {
1003 found = 1;
1004 break;
1005 }
1006 }
1007 if (found)
1008 return 0;
1009
1010 /* add entry to list */
1011 listentry = malloc(sizeof(*listentry));
1012 if (!listentry) {
1013 SYSERROR("malloc for mountpoint listentry failed");
1014 return -1;
1015 }
1016
1017 listentry->elem = strdup(mountpoint);
1018 if (!listentry->elem) {
1019 SYSERROR("strdup failed");
00b6be44 1020 free(listentry);
bf601689
MH
1021 return -1;
1022 }
1023 lxc_list_add_tail(mountlist, listentry);
1024
1025 return 0;
1026}
1027
cc6f6dd7 1028static int umount_oldrootfs(const char *oldrootfs)
bf601689 1029{
2382ecff 1030 char path[MAXPATHLEN];
bf601689 1031 void *cbparm[2];
9ebb03ad 1032 struct lxc_list mountlist, *iterator, *next;
bf601689 1033 int ok, still_mounted, last_still_mounted;
9ba8130c 1034 int rc;
bf601689
MH
1035
1036 /* read and parse /proc/mounts in old root fs */
1037 lxc_list_init(&mountlist);
1038
cc6f6dd7 1039 /* oldrootfs is on the top tree directory now */
9ba8130c
SH
1040 rc = snprintf(path, sizeof(path), "/%s", oldrootfs);
1041 if (rc >= sizeof(path)) {
1042 ERROR("rootfs name too long");
1043 return -1;
1044 }
bf601689 1045 cbparm[0] = &mountlist;
bf601689 1046
cc6f6dd7 1047 cbparm[1] = strdup(path);
bf601689
MH
1048 if (!cbparm[1]) {
1049 SYSERROR("strdup failed");
1050 return -1;
1051 }
1052
9ba8130c
SH
1053 rc = snprintf(path, sizeof(path), "%s/proc/mounts", oldrootfs);
1054 if (rc >= sizeof(path)) {
1055 ERROR("container proc/mounts name too long");
1056 return -1;
1057 }
cc6f6dd7
DL
1058
1059 ok = lxc_file_for_each_line(path,
1060 setup_rootfs_pivot_root_cb, &cbparm);
bf601689
MH
1061 if (ok < 0) {
1062 SYSERROR("failed to read or parse mount list '%s'", path);
1063 return -1;
1064 }
1065
1066 /* umount filesystems until none left or list no longer shrinks */
1067 still_mounted = 0;
1068 do {
1069 last_still_mounted = still_mounted;
1070 still_mounted = 0;
1071
9ebb03ad 1072 lxc_list_for_each_safe(iterator, &mountlist, next) {
bf601689 1073
c08556c6 1074 /* umount normally */
bf601689
MH
1075 if (!umount(iterator->elem)) {
1076 DEBUG("umounted '%s'", (char *)iterator->elem);
1077 lxc_list_del(iterator);
1078 continue;
1079 }
1080
bf601689
MH
1081 still_mounted++;
1082 }
7df119ee 1083
bf601689
MH
1084 } while (still_mounted > 0 && still_mounted != last_still_mounted);
1085
7df119ee 1086
c08556c6
DL
1087 lxc_list_for_each(iterator, &mountlist) {
1088
1089 /* let's try a lazy umount */
1090 if (!umount2(iterator->elem, MNT_DETACH)) {
1091 INFO("lazy unmount of '%s'", (char *)iterator->elem);
1092 continue;
1093 }
1094
1095 /* be more brutal (nfs) */
1096 if (!umount2(iterator->elem, MNT_FORCE)) {
1097 INFO("forced unmount of '%s'", (char *)iterator->elem);
1098 continue;
1099 }
1100
7df119ee 1101 WARN("failed to unmount '%s'", (char *)iterator->elem);
c08556c6 1102 }
bf601689 1103
cc6f6dd7
DL
1104 return 0;
1105}
1106
1107static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
1108{
1109 char path[MAXPATHLEN];
1110 int remove_pivotdir = 0;
9ba8130c 1111 int rc;
cc6f6dd7
DL
1112
1113 /* change into new root fs */
1114 if (chdir(rootfs)) {
1115 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
1116 return -1;
1117 }
1118
1119 if (!pivotdir)
30c5d292 1120 pivotdir = "lxc_putold";
cc6f6dd7 1121
4f9293b1 1122 /* compute the full path to pivotdir under rootfs */
9ba8130c
SH
1123 rc = snprintf(path, sizeof(path), "%s/%s", rootfs, pivotdir);
1124 if (rc >= sizeof(path)) {
1125 ERROR("pivot dir name too long");
1126 return -1;
1127 }
cc6f6dd7
DL
1128
1129 if (access(path, F_OK)) {
1130
1131 if (mkdir_p(path, 0755)) {
1132 SYSERROR("failed to create pivotdir '%s'", path);
1133 return -1;
1134 }
1135
1136 remove_pivotdir = 1;
1137 DEBUG("created '%s' directory", path);
1138 }
1139
1140 DEBUG("mountpoint for old rootfs is '%s'", path);
1141
1142 /* pivot_root into our new root fs */
1143 if (pivot_root(".", path)) {
1144 SYSERROR("pivot_root syscall failed");
bf601689
MH
1145 return -1;
1146 }
cc6f6dd7
DL
1147
1148 if (chdir("/")) {
1149 SYSERROR("can't chdir to / after pivot_root");
1150 return -1;
1151 }
1152
1153 DEBUG("pivot_root syscall to '%s' successful", rootfs);
1154
1155 /* we switch from absolute path to relative path */
1156 if (umount_oldrootfs(pivotdir))
1157 return -1;
bf601689 1158
c08556c6
DL
1159 /* remove temporary mount point, we don't consider the removing
1160 * as fatal */
a91d897a
FW
1161 if (remove_pivotdir && rmdir(pivotdir))
1162 WARN("can't remove mountpoint '%s': %m", pivotdir);
bf601689 1163
bf601689
MH
1164 return 0;
1165}
1166
91c3830e
SH
1167/*
1168 * Do we want to add options for max size of /dev and a file to
1169 * specify which devices to create?
1170 */
1171static int mount_autodev(char *root)
1172{
1173 int ret;
1174 char path[MAXPATHLEN];
1175
1176 INFO("Mounting /dev under %s\n", root);
1177 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
1178 if (ret < 0 || ret > MAXPATHLEN)
1179 return -1;
1180 ret = mount("none", path, "tmpfs", 0, "size=100000");
1181 if (ret) {
1182 SYSERROR("Failed to mount /dev at %s\n", root);
1183 return -1;
1184 }
1185 ret = snprintf(path, MAXPATHLEN, "%s/dev/pts", root);
1186 if (ret < 0 || ret >= MAXPATHLEN)
1187 return -1;
1188 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1189 if (ret) {
1190 SYSERROR("Failed to create /dev/pts in container");
1191 return -1;
1192 }
1193
1194 INFO("Mounted /dev under %s\n", root);
1195 return 0;
1196}
1197
c6883f38
SH
1198struct lxc_devs {
1199 char *name;
1200 mode_t mode;
1201 int maj;
1202 int min;
1203};
1204
1205struct lxc_devs lxc_devs[] = {
1206 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1207 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1208 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1209 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1210 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1211 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1212 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1213};
1214
c6883f38
SH
1215static int setup_autodev(char *root)
1216{
1217 int ret;
1218 struct lxc_devs *d;
1219 char path[MAXPATHLEN];
1220 int i;
3a32201c 1221 mode_t cmask;
c6883f38 1222
91c3830e
SH
1223 INFO("Creating initial consoles under %s/dev\n", root);
1224
c6883f38 1225 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
91c3830e
SH
1226 if (ret < 0 || ret >= MAXPATHLEN) {
1227 ERROR("Error calculating container /dev location");
c6883f38 1228 return -1;
f7bee6c6 1229 }
91c3830e
SH
1230
1231 INFO("Populating /dev under %s\n", root);
3a32201c 1232 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
c6883f38
SH
1233 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
1234 d = &lxc_devs[i];
1235 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", root, d->name);
1236 if (ret < 0 || ret >= MAXPATHLEN)
1237 return -1;
1238 ret = mknod(path, d->mode, makedev(d->maj, d->min));
91c3830e 1239 if (ret && errno != EEXIST) {
c6883f38
SH
1240 SYSERROR("Error creating %s\n", d->name);
1241 return -1;
1242 }
1243 }
3a32201c 1244 umask(cmask);
c6883f38
SH
1245
1246 INFO("Populated /dev under %s\n", root);
1247 return 0;
1248}
1249
cc28d0b0
SH
1250/*
1251 * Detect whether / is mounted MS_SHARED. The only way I know of to
1252 * check that is through /proc/self/mountinfo.
1253 * I'm only checking for /. If the container rootfs or mount location
1254 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1255 * out would be too much work to be worth it.
1256 */
1257#define LINELEN 4096
1258int detect_shared_rootfs(void)
1259{
1260 char buf[LINELEN], *p;
1261 FILE *f;
1262 int i;
1263 char *p2;
1264
025ed0f3 1265 process_lock();
cc28d0b0 1266 f = fopen("/proc/self/mountinfo", "r");
025ed0f3 1267 process_unlock();
cc28d0b0
SH
1268 if (!f)
1269 return 0;
1270 while ((p = fgets(buf, LINELEN, f))) {
cc28d0b0
SH
1271 for (p = buf, i=0; p && i < 4; i++)
1272 p = index(p+1, ' ');
1273 if (!p)
1274 continue;
1275 p2 = index(p+1, ' ');
1276 if (!p2)
1277 continue;
1278 *p2 = '\0';
cc28d0b0
SH
1279 if (strcmp(p+1, "/") == 0) {
1280 // this is '/'. is it shared?
1281 p = index(p2+1, ' ');
ab81cef0 1282 if (p && strstr(p, "shared:")) {
025ed0f3 1283 process_lock();
00b6be44 1284 fclose(f);
025ed0f3 1285 process_unlock();
cc28d0b0 1286 return 1;
00b6be44 1287 }
cc28d0b0
SH
1288 }
1289 }
025ed0f3 1290 process_lock();
cc28d0b0 1291 fclose(f);
025ed0f3 1292 process_unlock();
cc28d0b0
SH
1293 return 0;
1294}
1295
1296/*
1297 * I'll forgive you for asking whether all of this is needed :) The
1298 * answer is yes.
1299 * pivot_root will fail if the new root, the put_old dir, or the parent
1300 * of current->fs->root are MS_SHARED. (parent of current->fs_root may
1301 * or may not be current->fs_root - if we assumed it always was, we could
1302 * just mount --make-rslave /). So,
1303 * 1. mount a tiny tmpfs to be parent of current->fs->root.
1304 * 2. make that MS_SLAVE
1305 * 3. make a 'root' directory under that
1306 * 4. mount --rbind / under the $tinyroot/root.
1307 * 5. make that rslave
1308 * 6. chdir and chroot into $tinyroot/root
1309 * 7. $tinyroot will be unmounted by our parent in start.c
1310 */
1311static int chroot_into_slave(struct lxc_conf *conf)
1312{
1313 char path[MAXPATHLEN];
1314 const char *destpath = conf->rootfs.mount;
1315 int ret;
1316
1317 if (mount(destpath, destpath, NULL, MS_BIND, 0)) {
1318 SYSERROR("failed to mount %s bind", destpath);
1319 return -1;
1320 }
1321 if (mount("", destpath, NULL, MS_SLAVE, 0)) {
1322 SYSERROR("failed to make %s slave", destpath);
1323 return -1;
1324 }
1325 if (mount("none", destpath, "tmpfs", 0, "size=10000")) {
1326 SYSERROR("Failed to mount tmpfs / at %s", destpath);
1327 return -1;
1328 }
1329 ret = snprintf(path, MAXPATHLEN, "%s/root", destpath);
1330 if (ret < 0 || ret >= MAXPATHLEN) {
1331 ERROR("out of memory making root path");
1332 return -1;
1333 }
1334 if (mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) {
1335 SYSERROR("Failed to create /dev/pts in container");
1336 return -1;
1337 }
1338 if (mount("/", path, NULL, MS_BIND|MS_REC, 0)) {
1339 SYSERROR("Failed to rbind mount / to %s", path);
1340 return -1;
1341 }
1342 if (mount("", destpath, NULL, MS_SLAVE|MS_REC, 0)) {
1343 SYSERROR("Failed to make tmp-/ at %s rslave", path);
1344 return -1;
1345 }
1346 if (chdir(path)) {
1347 SYSERROR("Failed to chdir into tmp-/");
1348 return -1;
1349 }
1350 if (chroot(path)) {
1351 SYSERROR("Failed to chroot into tmp-/");
1352 return -1;
1353 }
1354 INFO("Chrooted into tmp-/ at %s\n", path);
1355 return 0;
1356}
1357
1358static int setup_rootfs(struct lxc_conf *conf)
0ad19a3f 1359{
cc28d0b0
SH
1360 const struct lxc_rootfs *rootfs = &conf->rootfs;
1361
a0f379bf
DW
1362 if (!rootfs->path) {
1363 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1364 SYSERROR("Failed to make / rslave");
1365 return -1;
1366 }
c69bd12f 1367 return 0;
a0f379bf 1368 }
0ad19a3f 1369
12297168 1370 if (access(rootfs->mount, F_OK)) {
b1789442 1371 SYSERROR("failed to access to '%s', check it is present",
12297168 1372 rootfs->mount);
b1789442
DL
1373 return -1;
1374 }
1375
cc28d0b0
SH
1376 if (detect_shared_rootfs()) {
1377 if (chroot_into_slave(conf)) {
1378 ERROR("Failed to chroot into slave /");
1379 return -1;
1380 }
1381 }
1382
9be53773
SH
1383 // First try mounting rootfs using a bdev
1384 struct bdev *bdev = bdev_init(rootfs->path, rootfs->mount, NULL);
1385 if (bdev && bdev->ops->mount(bdev) == 0) {
59d66af2 1386 bdev_put(bdev);
9be53773
SH
1387 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1388 return 0;
1389 }
59d66af2
SH
1390 if (bdev)
1391 bdev_put(bdev);
2656d231 1392 if (mount_rootfs(rootfs->path, rootfs->mount)) {
a6afdde9 1393 ERROR("failed to mount rootfs");
c3f0a28c 1394 return -1;
1395 }
0ad19a3f 1396
12297168 1397 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
c69bd12f 1398
ac778708
DL
1399 return 0;
1400}
1401
1402int setup_pivot_root(const struct lxc_rootfs *rootfs)
1403{
ac778708
DL
1404 if (!rootfs->path)
1405 return 0;
1406
12297168 1407 if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
cc6f6dd7 1408 ERROR("failed to setup pivot root");
25368b52 1409 return -1;
c69bd12f
DL
1410 }
1411
25368b52 1412 return 0;
0ad19a3f 1413}
1414
d852c78c 1415static int setup_pts(int pts)
3c26f34e 1416{
77890c6d
SW
1417 char target[PATH_MAX];
1418
d852c78c
DL
1419 if (!pts)
1420 return 0;
3c26f34e 1421
1422 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
36eb9bde 1423 SYSERROR("failed to umount 'dev/pts'");
3c26f34e 1424 return -1;
1425 }
1426
a6afdde9 1427 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
67e5a20a 1428 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
36eb9bde 1429 SYSERROR("failed to mount a new instance of '/dev/pts'");
3c26f34e 1430 return -1;
1431 }
1432
3c26f34e 1433 if (access("/dev/ptmx", F_OK)) {
1434 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1435 goto out;
36eb9bde 1436 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1437 return -1;
1438 }
1439
77890c6d
SW
1440 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1441 goto out;
1442
3c26f34e 1443 /* fallback here, /dev/pts/ptmx exists just mount bind */
1444 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
36eb9bde 1445 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
3c26f34e 1446 return -1;
1447 }
cd54d859
DL
1448
1449 INFO("created new pts instance");
d852c78c 1450
3c26f34e 1451out:
1452 return 0;
1453}
1454
cccc74b5
DL
1455static int setup_personality(int persona)
1456{
6ff05e18 1457 #if HAVE_SYS_PERSONALITY_H
cccc74b5
DL
1458 if (persona == -1)
1459 return 0;
1460
1461 if (personality(persona) < 0) {
1462 SYSERROR("failed to set personality to '0x%x'", persona);
1463 return -1;
1464 }
1465
1466 INFO("set personality to '0x%x'", persona);
6ff05e18 1467 #endif
cccc74b5
DL
1468
1469 return 0;
1470}
1471
7c6ef2a2 1472static int setup_dev_console(const struct lxc_rootfs *rootfs,
33fcb7a0 1473 const struct lxc_console *console)
6e590161 1474{
63376d7d
DL
1475 char path[MAXPATHLEN];
1476 struct stat s;
7c6ef2a2 1477 int ret;
52e35957 1478
7c6ef2a2
SH
1479 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1480 if (ret >= sizeof(path)) {
1481 ERROR("console path too long\n");
1482 return -1;
1483 }
52e35957 1484
63376d7d 1485 if (access(path, F_OK)) {
466978b0 1486 WARN("rootfs specified but no console found at '%s'", path);
63376d7d 1487 return 0;
52e35957
DL
1488 }
1489
b5159817
DE
1490 if (console->master < 0) {
1491 INFO("no console");
f78a1f32
DL
1492 return 0;
1493 }
ed502555 1494
63376d7d
DL
1495 if (stat(path, &s)) {
1496 SYSERROR("failed to stat '%s'", path);
1497 return -1;
1498 }
1499
1500 if (chmod(console->name, s.st_mode)) {
1501 SYSERROR("failed to set mode '0%o' to '%s'",
1502 s.st_mode, console->name);
1503 return -1;
1504 }
13954cce 1505
63376d7d
DL
1506 if (mount(console->name, path, "none", MS_BIND, 0)) {
1507 ERROR("failed to mount '%s' on '%s'", console->name, path);
6e590161 1508 return -1;
1509 }
1510
63376d7d 1511 INFO("console has been setup");
7c6ef2a2
SH
1512 return 0;
1513}
1514
1515static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1516 const struct lxc_console *console,
1517 char *ttydir)
1518{
1519 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1520 int ret;
1521
1522 /* create rootfs/dev/<ttydir> directory */
1523 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1524 ttydir);
1525 if (ret >= sizeof(path))
1526 return -1;
1527 ret = mkdir(path, 0755);
1528 if (ret && errno != EEXIST) {
1529 SYSERROR("failed with errno %d to create %s\n", errno, path);
1530 return -1;
1531 }
1532 INFO("created %s\n", path);
1533
1534 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1535 rootfs->mount, ttydir);
1536 if (ret >= sizeof(lxcpath)) {
1537 ERROR("console path too long\n");
1538 return -1;
1539 }
1540
1541 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1542 ret = unlink(path);
1543 if (ret && errno != ENOENT) {
1544 SYSERROR("error unlinking %s\n", path);
1545 return -1;
1546 }
1547
025ed0f3 1548 process_lock();
7c6ef2a2 1549 ret = creat(lxcpath, 0660);
025ed0f3 1550 process_unlock();
7c6ef2a2
SH
1551 if (ret==-1 && errno != EEXIST) {
1552 SYSERROR("error %d creating %s\n", errno, lxcpath);
1553 return -1;
1554 }
025ed0f3 1555 process_lock();
4d44e274
SH
1556 if (ret >= 0)
1557 close(ret);
025ed0f3 1558 process_unlock();
7c6ef2a2 1559
b5159817
DE
1560 if (console->master < 0) {
1561 INFO("no console");
7c6ef2a2
SH
1562 return 0;
1563 }
1564
1565 if (mount(console->name, lxcpath, "none", MS_BIND, 0)) {
1566 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1567 return -1;
1568 }
1569
1570 /* create symlink from rootfs/dev/console to 'lxc/console' */
9ba8130c
SH
1571 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1572 if (ret >= sizeof(lxcpath)) {
1573 ERROR("lxc/console path too long");
1574 return -1;
1575 }
7c6ef2a2
SH
1576 ret = symlink(lxcpath, path);
1577 if (ret) {
1578 SYSERROR("failed to create symlink for console");
1579 return -1;
1580 }
1581
1582 INFO("console has been setup on %s", lxcpath);
cd54d859 1583
6e590161 1584 return 0;
1585}
1586
7c6ef2a2
SH
1587static int setup_console(const struct lxc_rootfs *rootfs,
1588 const struct lxc_console *console,
1589 char *ttydir)
1590{
1591 /* We don't have a rootfs, /dev/console will be shared */
1592 if (!rootfs->path)
1593 return 0;
1594 if (!ttydir)
1595 return setup_dev_console(rootfs, console);
1596
1597 return setup_ttydir_console(rootfs, console, ttydir);
1598}
1599
1bd051a6
SH
1600static int setup_kmsg(const struct lxc_rootfs *rootfs,
1601 const struct lxc_console *console)
1602{
1603 char kpath[MAXPATHLEN];
1604 int ret;
1605
222fea5a
DE
1606 if (!rootfs->path)
1607 return 0;
1bd051a6
SH
1608 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1609 if (ret < 0 || ret >= sizeof(kpath))
1610 return -1;
1611
1612 ret = unlink(kpath);
1613 if (ret && errno != ENOENT) {
1614 SYSERROR("error unlinking %s\n", kpath);
1615 return -1;
1616 }
1617
1618 ret = symlink("console", kpath);
1619 if (ret) {
1620 SYSERROR("failed to create symlink for kmsg");
1621 return -1;
1622 }
1623
1624 return 0;
1625}
1626
998ac676
RT
1627static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1628{
1629 struct mount_opt *mo;
1630
1631 /* If opt is found in mount_opt, set or clear flags.
1632 * Otherwise append it to data. */
1633
1634 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1635 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1636 if (mo->clear)
1637 *flags &= ~mo->flag;
1638 else
1639 *flags |= mo->flag;
1640 return;
1641 }
1642 }
1643
1644 if (strlen(*data))
1645 strcat(*data, ",");
1646 strcat(*data, opt);
1647}
1648
911324ef 1649static int parse_mntopts(const char *mntopts, unsigned long *mntflags,
998ac676
RT
1650 char **mntdata)
1651{
1652 char *s, *data;
1653 char *p, *saveptr = NULL;
1654
911324ef 1655 *mntdata = NULL;
91656ce5 1656 *mntflags = 0L;
911324ef
DL
1657
1658 if (!mntopts)
998ac676
RT
1659 return 0;
1660
911324ef 1661 s = strdup(mntopts);
998ac676 1662 if (!s) {
36eb9bde 1663 SYSERROR("failed to allocate memory");
998ac676
RT
1664 return -1;
1665 }
1666
1667 data = malloc(strlen(s) + 1);
1668 if (!data) {
36eb9bde 1669 SYSERROR("failed to allocate memory");
998ac676
RT
1670 free(s);
1671 return -1;
1672 }
1673 *data = 0;
1674
1675 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1676 p = strtok_r(NULL, ",", &saveptr))
1677 parse_mntopt(p, mntflags, &data);
1678
1679 if (*data)
1680 *mntdata = data;
1681 else
1682 free(data);
1683 free(s);
1684
1685 return 0;
1686}
1687
911324ef
DL
1688static int mount_entry(const char *fsname, const char *target,
1689 const char *fstype, unsigned long mountflags,
1690 const char *data)
1691{
1692 if (mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data)) {
1693 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1694 return -1;
1695 }
1696
1697 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
1698
1699 DEBUG("remounting %s on %s to respect bind or remount options",
1700 fsname, target);
1701
1702 if (mount(fsname, target, fstype,
1703 mountflags | MS_REMOUNT, data)) {
1704 SYSERROR("failed to mount '%s' on '%s'",
1705 fsname, target);
1706 return -1;
1707 }
1708 }
1709
1710 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1711
1712 return 0;
1713}
1714
1715static inline int mount_entry_on_systemfs(struct mntent *mntent)
0ad19a3f 1716{
998ac676
RT
1717 unsigned long mntflags;
1718 char *mntdata;
911324ef
DL
1719 int ret;
1720
1721 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1722 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1723 return -1;
1724 }
1725
1726 ret = mount_entry(mntent->mnt_fsname, mntent->mnt_dir,
1727 mntent->mnt_type, mntflags, mntdata);
1728
68c152ef
SH
1729 if (hasmntopt(mntent, "optional") != NULL)
1730 ret = 0;
1731
911324ef
DL
1732 free(mntdata);
1733
1734 return ret;
1735}
1736
1737static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
80a881b2
SH
1738 const struct lxc_rootfs *rootfs,
1739 const char *lxc_name)
911324ef 1740{
013bd428 1741 char *aux;
59760f5d 1742 char path[MAXPATHLEN];
911324ef
DL
1743 unsigned long mntflags;
1744 char *mntdata;
80a881b2 1745 int r, ret = 0, offset;
67e571de 1746 const char *lxcpath;
0ad19a3f 1747
911324ef
DL
1748 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1749 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1750 return -1;
1751 }
1bc60a65 1752
2a59a681
SH
1753 lxcpath = default_lxc_path();
1754 if (!lxcpath) {
1755 ERROR("Out of memory");
1756 return -1;
1757 }
1758
80a881b2 1759 /* if rootfs->path is a blockdev path, allow container fstab to
2a59a681
SH
1760 * use $lxcpath/CN/rootfs as the target prefix */
1761 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
80a881b2
SH
1762 if (r < 0 || r >= MAXPATHLEN)
1763 goto skipvarlib;
1764
1765 aux = strstr(mntent->mnt_dir, path);
1766 if (aux) {
1767 offset = strlen(path);
1768 goto skipabs;
1769 }
1770
1771skipvarlib:
013bd428
DL
1772 aux = strstr(mntent->mnt_dir, rootfs->path);
1773 if (!aux) {
1774 WARN("ignoring mount point '%s'", mntent->mnt_dir);
1775 goto out;
1776 }
80a881b2
SH
1777 offset = strlen(rootfs->path);
1778
1779skipabs:
013bd428 1780
9ba8130c 1781 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
80a881b2
SH
1782 aux + offset);
1783 if (r < 0 || r >= MAXPATHLEN) {
1784 WARN("pathnme too long for '%s'", mntent->mnt_dir);
1785 ret = -1;
1786 goto out;
1787 }
1788
d330fe7b 1789
013bd428 1790 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
911324ef 1791 mntflags, mntdata);
0ad19a3f 1792
68c152ef
SH
1793 if (hasmntopt(mntent, "optional") != NULL)
1794 ret = 0;
1795
013bd428 1796out:
911324ef
DL
1797 free(mntdata);
1798 return ret;
1799}
d330fe7b 1800
911324ef
DL
1801static int mount_entry_on_relative_rootfs(struct mntent *mntent,
1802 const char *rootfs)
1803{
1804 char path[MAXPATHLEN];
1805 unsigned long mntflags;
1806 char *mntdata;
1807 int ret;
d330fe7b 1808
911324ef
DL
1809 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1810 ERROR("failed to parse mount option '%s'", mntent->mnt_opts);
1811 return -1;
1812 }
d330fe7b 1813
911324ef 1814 /* relative to root mount point */
9ba8130c
SH
1815 ret = snprintf(path, sizeof(path), "%s/%s", rootfs, mntent->mnt_dir);
1816 if (ret >= sizeof(path)) {
1817 ERROR("path name too long");
1818 return -1;
1819 }
911324ef
DL
1820
1821 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1822 mntflags, mntdata);
1823
68c152ef
SH
1824 if (hasmntopt(mntent, "optional") != NULL)
1825 ret = 0;
1826
911324ef 1827 free(mntdata);
998ac676 1828
911324ef
DL
1829 return ret;
1830}
1831
80a881b2
SH
1832static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
1833 const char *lxc_name)
911324ef
DL
1834{
1835 struct mntent *mntent;
1836 int ret = -1;
e76b8764 1837
911324ef 1838 while ((mntent = getmntent(file))) {
e76b8764 1839
911324ef
DL
1840 if (!rootfs->path) {
1841 if (mount_entry_on_systemfs(mntent))
e76b8764 1842 goto out;
911324ef 1843 continue;
e76b8764
CDC
1844 }
1845
911324ef
DL
1846 /* We have a separate root, mounts are relative to it */
1847 if (mntent->mnt_dir[0] != '/') {
1848 if (mount_entry_on_relative_rootfs(mntent,
1849 rootfs->mount))
1850 goto out;
1851 continue;
1852 }
cd54d859 1853
80a881b2 1854 if (mount_entry_on_absolute_rootfs(mntent, rootfs, lxc_name))
911324ef 1855 goto out;
0ad19a3f 1856 }
cd54d859 1857
0ad19a3f 1858 ret = 0;
cd54d859
DL
1859
1860 INFO("mount points have been setup");
0ad19a3f 1861out:
e7938e9e
MN
1862 return ret;
1863}
1864
80a881b2
SH
1865static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
1866 const char *lxc_name)
e7938e9e
MN
1867{
1868 FILE *file;
1869 int ret;
1870
1871 if (!fstab)
1872 return 0;
1873
025ed0f3 1874 process_lock();
e7938e9e 1875 file = setmntent(fstab, "r");
025ed0f3 1876 process_unlock();
e7938e9e
MN
1877 if (!file) {
1878 SYSERROR("failed to use '%s'", fstab);
1879 return -1;
1880 }
1881
80a881b2 1882 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e 1883
025ed0f3 1884 process_lock();
0ad19a3f 1885 endmntent(file);
025ed0f3 1886 process_unlock();
0ad19a3f 1887 return ret;
1888}
1889
80a881b2
SH
1890static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
1891 const char *lxc_name)
e7938e9e
MN
1892{
1893 FILE *file;
1894 struct lxc_list *iterator;
1895 char *mount_entry;
1896 int ret;
1897
025ed0f3 1898 process_lock();
e7938e9e 1899 file = tmpfile();
025ed0f3 1900 process_unlock();
e7938e9e
MN
1901 if (!file) {
1902 ERROR("tmpfile error: %m");
1903 return -1;
1904 }
1905
1906 lxc_list_for_each(iterator, mount) {
1907 mount_entry = iterator->elem;
1d6b1976 1908 fprintf(file, "%s\n", mount_entry);
e7938e9e
MN
1909 }
1910
1911 rewind(file);
1912
80a881b2 1913 ret = mount_file_entries(rootfs, file, lxc_name);
e7938e9e 1914
025ed0f3 1915 process_lock();
e7938e9e 1916 fclose(file);
025ed0f3 1917 process_unlock();
e7938e9e
MN
1918 return ret;
1919}
1920
81810dd1
DL
1921static int setup_caps(struct lxc_list *caps)
1922{
1923 struct lxc_list *iterator;
1924 char *drop_entry;
d55bc1ad 1925 char *ptr;
81810dd1
DL
1926 int i, capid;
1927
1928 lxc_list_for_each(iterator, caps) {
1929
1930 drop_entry = iterator->elem;
1931
1932 capid = -1;
1933
1934 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
1935
1936 if (strcmp(drop_entry, caps_opt[i].name))
1937 continue;
1938
1939 capid = caps_opt[i].value;
1940 break;
1941 }
1942
d55bc1ad
CS
1943 if (capid < 0) {
1944 /* try to see if it's numeric, so the user may specify
1945 * capabilities that the running kernel knows about but
1946 * we don't */
09bbd745 1947 errno = 0;
d55bc1ad 1948 capid = strtol(drop_entry, &ptr, 10);
09bbd745 1949 if (!ptr || *ptr != '\0' || errno != 0)
d55bc1ad
CS
1950 /* not a valid number */
1951 capid = -1;
1952 else if (capid > lxc_caps_last_cap())
1953 /* we have a number but it's not a valid
1954 * capability */
1955 capid = -1;
1956 }
1957
81810dd1 1958 if (capid < 0) {
1e11be34
DL
1959 ERROR("unknown capability %s", drop_entry);
1960 return -1;
81810dd1
DL
1961 }
1962
1963 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
1964
1965 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
1966 SYSERROR("failed to remove %s capability", drop_entry);
1967 return -1;
1968 }
1969
1970 }
1971
1fb86a7c
SH
1972 DEBUG("capabilities have been setup");
1973
1974 return 0;
1975}
1976
1977static int dropcaps_except(struct lxc_list *caps)
1978{
1979 struct lxc_list *iterator;
1980 char *keep_entry;
1981 char *ptr;
1982 int i, capid;
1983 int numcaps = lxc_caps_last_cap() + 1;
1984 INFO("found %d capabilities\n", numcaps);
1985
2caf9a97
SH
1986 if (numcaps <= 0 || numcaps > 200)
1987 return -1;
1988
1fb86a7c
SH
1989 // caplist[i] is 1 if we keep capability i
1990 int *caplist = alloca(numcaps * sizeof(int));
1991 memset(caplist, 0, numcaps * sizeof(int));
1992
1993 lxc_list_for_each(iterator, caps) {
1994
1995 keep_entry = iterator->elem;
1996
1997 capid = -1;
1998
1999 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2000
2001 if (strcmp(keep_entry, caps_opt[i].name))
2002 continue;
2003
2004 capid = caps_opt[i].value;
2005 break;
2006 }
2007
2008 if (capid < 0) {
2009 /* try to see if it's numeric, so the user may specify
2010 * capabilities that the running kernel knows about but
2011 * we don't */
2012 capid = strtol(keep_entry, &ptr, 10);
2013 if (!ptr || *ptr != '\0' ||
f371aca9 2014 capid == INT_MIN || capid == INT_MAX)
1fb86a7c
SH
2015 /* not a valid number */
2016 capid = -1;
2017 else if (capid > lxc_caps_last_cap())
2018 /* we have a number but it's not a valid
2019 * capability */
2020 capid = -1;
2021 }
2022
2023 if (capid < 0) {
2024 ERROR("unknown capability %s", keep_entry);
2025 return -1;
2026 }
2027
2028 DEBUG("drop capability '%s' (%d)", keep_entry, capid);
2029
2030 caplist[capid] = 1;
2031 }
2032 for (i=0; i<numcaps; i++) {
2033 if (caplist[i])
2034 continue;
2035 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
2036 SYSERROR("failed to remove capability %d", i);
2037 return -1;
2038 }
2039 }
2040
2041 DEBUG("capabilities have been setup");
81810dd1
DL
2042
2043 return 0;
2044}
2045
0ad19a3f 2046static int setup_hw_addr(char *hwaddr, const char *ifname)
2047{
2048 struct sockaddr sockaddr;
2049 struct ifreq ifr;
2050 int ret, fd;
2051
3cfc0f3a
MN
2052 ret = lxc_convert_mac(hwaddr, &sockaddr);
2053 if (ret) {
2054 ERROR("mac address '%s' conversion failed : %s",
2055 hwaddr, strerror(-ret));
0ad19a3f 2056 return -1;
2057 }
2058
2059 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
5da6aa8c 2060 ifr.ifr_name[IFNAMSIZ-1] = '\0';
0ad19a3f 2061 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2062
025ed0f3 2063 process_lock();
0ad19a3f 2064 fd = socket(AF_INET, SOCK_DGRAM, 0);
025ed0f3 2065 process_unlock();
0ad19a3f 2066 if (fd < 0) {
3ab87b66 2067 ERROR("socket failure : %s", strerror(errno));
0ad19a3f 2068 return -1;
2069 }
2070
2071 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
025ed0f3 2072 process_lock();
0ad19a3f 2073 close(fd);
025ed0f3 2074 process_unlock();
0ad19a3f 2075 if (ret)
3ab87b66 2076 ERROR("ioctl failure : %s", strerror(errno));
0ad19a3f 2077
5da6aa8c 2078 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
cd54d859 2079
0ad19a3f 2080 return ret;
2081}
2082
82d5ae15 2083static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2084{
82d5ae15
DL
2085 struct lxc_list *iterator;
2086 struct lxc_inetdev *inetdev;
3cfc0f3a 2087 int err;
0ad19a3f 2088
82d5ae15
DL
2089 lxc_list_for_each(iterator, ip) {
2090
2091 inetdev = iterator->elem;
2092
0093bb8c
DL
2093 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2094 &inetdev->bcast, inetdev->prefix);
3cfc0f3a
MN
2095 if (err) {
2096 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2097 ifindex, strerror(-err));
82d5ae15
DL
2098 return -1;
2099 }
2100 }
2101
2102 return 0;
0ad19a3f 2103}
2104
82d5ae15 2105static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
0ad19a3f 2106{
82d5ae15 2107 struct lxc_list *iterator;
7fa9074f 2108 struct lxc_inet6dev *inet6dev;
3cfc0f3a 2109 int err;
0ad19a3f 2110
82d5ae15
DL
2111 lxc_list_for_each(iterator, ip) {
2112
2113 inet6dev = iterator->elem;
2114
b3df193c 2115 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
0093bb8c
DL
2116 &inet6dev->mcast, &inet6dev->acast,
2117 inet6dev->prefix);
3cfc0f3a
MN
2118 if (err) {
2119 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2120 ifindex, strerror(-err));
82d5ae15 2121 return -1;
3cfc0f3a 2122 }
82d5ae15
DL
2123 }
2124
2125 return 0;
0ad19a3f 2126}
2127
82d5ae15 2128static int setup_netdev(struct lxc_netdev *netdev)
0ad19a3f 2129{
0ad19a3f 2130 char ifname[IFNAMSIZ];
0ad19a3f 2131 char *current_ifname = ifname;
3cfc0f3a 2132 int err;
0ad19a3f 2133
82d5ae15
DL
2134 /* empty network namespace */
2135 if (!netdev->ifindex) {
b0efbac4 2136 if (netdev->flags & IFF_UP) {
d472214b 2137 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2138 if (err) {
2139 ERROR("failed to set the loopback up : %s",
2140 strerror(-err));
82d5ae15
DL
2141 return -1;
2142 }
82d5ae15 2143 }
7b57e8b6 2144 return 0;
0ad19a3f 2145 }
13954cce 2146
82d5ae15
DL
2147 /* retrieve the name of the interface */
2148 if (!if_indextoname(netdev->ifindex, current_ifname)) {
36eb9bde 2149 ERROR("no interface corresponding to index '%d'",
82d5ae15 2150 netdev->ifindex);
0ad19a3f 2151 return -1;
2152 }
13954cce 2153
018ef520 2154 /* default: let the system to choose one interface name */
9d083402 2155 if (!netdev->name)
fb6d9b2f
DL
2156 netdev->name = netdev->type == LXC_NET_PHYS ?
2157 netdev->link : "eth%d";
018ef520 2158
82d5ae15 2159 /* rename the interface name */
b84f58b9 2160 err = lxc_netdev_rename_by_name(ifname, netdev->name);
3cfc0f3a
MN
2161 if (err) {
2162 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2163 strerror(-err));
018ef520
DL
2164 return -1;
2165 }
2166
2167 /* Re-read the name of the interface because its name has changed
2168 * and would be automatically allocated by the system
2169 */
82d5ae15 2170 if (!if_indextoname(netdev->ifindex, current_ifname)) {
018ef520 2171 ERROR("no interface corresponding to index '%d'",
82d5ae15 2172 netdev->ifindex);
018ef520 2173 return -1;
0ad19a3f 2174 }
2175
82d5ae15
DL
2176 /* set a mac address */
2177 if (netdev->hwaddr) {
2178 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
36eb9bde 2179 ERROR("failed to setup hw address for '%s'",
82d5ae15 2180 current_ifname);
0ad19a3f 2181 return -1;
2182 }
2183 }
2184
82d5ae15
DL
2185 /* setup ipv4 addresses on the interface */
2186 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
36eb9bde 2187 ERROR("failed to setup ip addresses for '%s'",
0ad19a3f 2188 ifname);
2189 return -1;
2190 }
2191
82d5ae15
DL
2192 /* setup ipv6 addresses on the interface */
2193 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
36eb9bde 2194 ERROR("failed to setup ipv6 addresses for '%s'",
0ad19a3f 2195 ifname);
2196 return -1;
2197 }
2198
82d5ae15 2199 /* set the network device up */
b0efbac4 2200 if (netdev->flags & IFF_UP) {
3cfc0f3a
MN
2201 int err;
2202
d472214b 2203 err = lxc_netdev_up(current_ifname);
3cfc0f3a
MN
2204 if (err) {
2205 ERROR("failed to set '%s' up : %s", current_ifname,
2206 strerror(-err));
0ad19a3f 2207 return -1;
2208 }
2209
2210 /* the network is up, make the loopback up too */
d472214b 2211 err = lxc_netdev_up("lo");
3cfc0f3a
MN
2212 if (err) {
2213 ERROR("failed to set the loopback up : %s",
2214 strerror(-err));
0ad19a3f 2215 return -1;
2216 }
2217 }
2218
f8fee0e2
MK
2219 /* We can only set up the default routes after bringing
2220 * up the interface, sine bringing up the interface adds
2221 * the link-local routes and we can't add a default
2222 * route if the gateway is not reachable. */
2223
2224 /* setup ipv4 gateway on the interface */
2225 if (netdev->ipv4_gateway) {
2226 if (!(netdev->flags & IFF_UP)) {
2227 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2228 return -1;
2229 }
2230
2231 if (lxc_list_empty(&netdev->ipv4)) {
2232 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2233 return -1;
2234 }
2235
2236 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2237 if (err) {
2238 ERROR("failed to setup ipv4 gateway for '%s': %s",
2239 ifname, strerror(-err));
19a26f82
MK
2240 if (netdev->ipv4_gateway_auto) {
2241 char buf[INET_ADDRSTRLEN];
2242 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2243 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2244 }
f8fee0e2
MK
2245 return -1;
2246 }
2247 }
2248
2249 /* setup ipv6 gateway on the interface */
2250 if (netdev->ipv6_gateway) {
2251 if (!(netdev->flags & IFF_UP)) {
2252 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2253 return -1;
2254 }
2255
2256 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2257 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2258 return -1;
2259 }
2260
2261 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2262 if (err) {
2263 ERROR("failed to setup ipv6 gateway for '%s': %s",
2264 ifname, strerror(-err));
19a26f82
MK
2265 if (netdev->ipv6_gateway_auto) {
2266 char buf[INET6_ADDRSTRLEN];
72d0e1cb 2267 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
19a26f82
MK
2268 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2269 }
f8fee0e2
MK
2270 return -1;
2271 }
2272 }
2273
cd54d859
DL
2274 DEBUG("'%s' has been setup", current_ifname);
2275
0ad19a3f 2276 return 0;
2277}
2278
5f4535a3 2279static int setup_network(struct lxc_list *network)
0ad19a3f 2280{
82d5ae15 2281 struct lxc_list *iterator;
82d5ae15 2282 struct lxc_netdev *netdev;
0ad19a3f 2283
5f4535a3 2284 lxc_list_for_each(iterator, network) {
cd54d859 2285
5f4535a3 2286 netdev = iterator->elem;
82d5ae15
DL
2287
2288 if (setup_netdev(netdev)) {
2289 ERROR("failed to setup netdev");
2290 return -1;
2291 }
2292 }
cd54d859 2293
5f4535a3
DL
2294 if (!lxc_list_empty(network))
2295 INFO("network has been setup");
cd54d859
DL
2296
2297 return 0;
0ad19a3f 2298}
2299
7b35f3d6
SH
2300void lxc_rename_phys_nics_on_shutdown(struct lxc_conf *conf)
2301{
2302 int i;
2303
2304 INFO("running to reset %d nic names", conf->num_savednics);
2305 for (i=0; i<conf->num_savednics; i++) {
2306 struct saved_nic *s = &conf->saved_nics[i];
2307 INFO("resetting nic %d to %s\n", s->ifindex, s->orig_name);
2308 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
2309 free(s->orig_name);
2310 }
2311 conf->num_savednics = 0;
2312 free(conf->saved_nics);
2313}
2314
49684c0b
CS
2315static int setup_private_host_hw_addr(char *veth1)
2316{
2317 struct ifreq ifr;
2318 int err;
2319 int sockfd;
2320
025ed0f3 2321 process_lock();
49684c0b 2322 sockfd = socket(AF_INET, SOCK_DGRAM, 0);
025ed0f3 2323 process_unlock();
49684c0b
CS
2324 if (sockfd < 0)
2325 return -errno;
2326
2327 snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
2328 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2329 if (err < 0) {
025ed0f3 2330 process_lock();
49684c0b 2331 close(sockfd);
025ed0f3 2332 process_unlock();
49684c0b
CS
2333 return -errno;
2334 }
2335
2336 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2337 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
025ed0f3 2338 process_lock();
49684c0b 2339 close(sockfd);
025ed0f3 2340 process_unlock();
49684c0b
CS
2341 if (err < 0)
2342 return -errno;
2343
7ad84da7
DL
2344 DEBUG("mac address of host interface '%s' changed to private "
2345 "%02x:%02x:%02x:%02x:%02x:%02x", veth1,
2346 ifr.ifr_hwaddr.sa_data[0] & 0xff,
2347 ifr.ifr_hwaddr.sa_data[1] & 0xff,
2348 ifr.ifr_hwaddr.sa_data[2] & 0xff,
2349 ifr.ifr_hwaddr.sa_data[3] & 0xff,
2350 ifr.ifr_hwaddr.sa_data[4] & 0xff,
2351 ifr.ifr_hwaddr.sa_data[5] & 0xff);
49684c0b
CS
2352
2353 return 0;
2354}
2355
ae9242c8
SH
2356static char *default_rootfs_mount = LXCROOTFSMOUNT;
2357
7b379ab3 2358struct lxc_conf *lxc_conf_init(void)
089cd8b8 2359{
7b379ab3 2360 struct lxc_conf *new;
26ddeedd 2361 int i;
7b379ab3
MN
2362
2363 new = malloc(sizeof(*new));
2364 if (!new) {
2365 ERROR("lxc_conf_init : %m");
2366 return NULL;
2367 }
2368 memset(new, 0, sizeof(*new));
2369
b40a606e 2370 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
cccc74b5 2371 new->personality = -1;
596a818d
DE
2372 new->console.log_path = NULL;
2373 new->console.log_fd = -1;
28a4b0e5 2374 new->console.path = NULL;
63376d7d 2375 new->console.peer = -1;
b5159817
DE
2376 new->console.peerpty.busy = -1;
2377 new->console.peerpty.master = -1;
2378 new->console.peerpty.slave = -1;
63376d7d
DL
2379 new->console.master = -1;
2380 new->console.slave = -1;
2381 new->console.name[0] = '\0';
d2e30e99 2382 new->maincmd_fd = -1;
54c30e29 2383 new->rootfs.mount = strdup(default_rootfs_mount);
53f3f048
SH
2384 if (!new->rootfs.mount) {
2385 ERROR("lxc_conf_init : %m");
2386 free(new);
2387 return NULL;
2388 }
2f3f41d0 2389 new->kmsg = 1;
7b379ab3
MN
2390 lxc_list_init(&new->cgroup);
2391 lxc_list_init(&new->network);
2392 lxc_list_init(&new->mount_list);
81810dd1 2393 lxc_list_init(&new->caps);
1fb86a7c 2394 lxc_list_init(&new->keepcaps);
f6d3e3e4 2395 lxc_list_init(&new->id_map);
26ddeedd
SH
2396 for (i=0; i<NUM_LXC_HOOKS; i++)
2397 lxc_list_init(&new->hooks[i]);
fe4de9a6
DE
2398 new->lsm_aa_profile = NULL;
2399 new->lsm_se_context = NULL;
e075f5d9 2400 new->lsm_umount_proc = 0;
7b379ab3
MN
2401
2402 return new;
089cd8b8
DL
2403}
2404
e3b4c4c4 2405static int instanciate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2406{
8634bc19 2407 char veth1buf[IFNAMSIZ], *veth1;
0e391e57 2408 char veth2buf[IFNAMSIZ], *veth2;
3cfc0f3a 2409 int err;
13954cce 2410
e892973e
DL
2411 if (netdev->priv.veth_attr.pair)
2412 veth1 = netdev->priv.veth_attr.pair;
8634bc19 2413 else {
9ba8130c
SH
2414 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2415 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2416 ERROR("veth1 name too long");
2417 return -1;
2418 }
4a0ba80d 2419 veth1 = mkifname(veth1buf);
74a2b586
JK
2420 /* store away for deconf */
2421 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
8634bc19 2422 }
82d5ae15 2423
0e391e57 2424 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
4a0ba80d 2425 veth2 = mkifname(veth2buf);
82d5ae15
DL
2426
2427 if (!strlen(veth1) || !strlen(veth2)) {
2428 ERROR("failed to allocate a temporary name");
2429 return -1;
0ad19a3f 2430 }
2431
3cfc0f3a
MN
2432 err = lxc_veth_create(veth1, veth2);
2433 if (err) {
2434 ERROR("failed to create %s-%s : %s", veth1, veth2,
2435 strerror(-err));
6ab9ab6d 2436 return -1;
0ad19a3f 2437 }
13954cce 2438
49684c0b
CS
2439 /* changing the high byte of the mac address to 0xfe, the bridge interface
2440 * will always keep the host's mac address and not take the mac address
2441 * of a container */
2442 err = setup_private_host_hw_addr(veth1);
2443 if (err) {
2444 ERROR("failed to change mac address of host interface '%s' : %s",
2445 veth1, strerror(-err));
2446 goto out_delete;
2447 }
2448
82d5ae15 2449 if (netdev->mtu) {
d472214b 2450 err = lxc_netdev_set_mtu(veth1, atoi(netdev->mtu));
3cfc0f3a 2451 if (!err)
d472214b 2452 err = lxc_netdev_set_mtu(veth2, atoi(netdev->mtu));
3cfc0f3a
MN
2453 if (err) {
2454 ERROR("failed to set mtu '%s' for %s-%s : %s",
2455 netdev->mtu, veth1, veth2, strerror(-err));
eb14c10a 2456 goto out_delete;
75d09f83
DL
2457 }
2458 }
2459
3cfc0f3a
MN
2460 if (netdev->link) {
2461 err = lxc_bridge_attach(netdev->link, veth1);
2462 if (err) {
2463 ERROR("failed to attach '%s' to the bridge '%s' : %s",
2464 veth1, netdev->link, strerror(-err));
2465 goto out_delete;
2466 }
eb14c10a
DL
2467 }
2468
82d5ae15
DL
2469 netdev->ifindex = if_nametoindex(veth2);
2470 if (!netdev->ifindex) {
36eb9bde 2471 ERROR("failed to retrieve the index for %s", veth2);
eb14c10a
DL
2472 goto out_delete;
2473 }
2474
d472214b 2475 err = lxc_netdev_up(veth1);
6e35af2e
DL
2476 if (err) {
2477 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2478 goto out_delete;
0ad19a3f 2479 }
2480
e3b4c4c4 2481 if (netdev->upscript) {
751d9dcd
DL
2482 err = run_script(handler->name, "net", netdev->upscript, "up",
2483 "veth", veth1, (char*) NULL);
2484 if (err)
e3b4c4c4 2485 goto out_delete;
e3b4c4c4
ST
2486 }
2487
82d5ae15
DL
2488 DEBUG("instanciated veth '%s/%s', index is '%d'",
2489 veth1, veth2, netdev->ifindex);
2490
6ab9ab6d 2491 return 0;
eb14c10a
DL
2492
2493out_delete:
b84f58b9 2494 lxc_netdev_delete_by_name(veth1);
6ab9ab6d 2495 return -1;
13954cce 2496}
d957ae2d 2497
74a2b586
JK
2498static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2499{
2500 char *veth1;
2501 int err;
2502
2503 if (netdev->priv.veth_attr.pair)
2504 veth1 = netdev->priv.veth_attr.pair;
2505 else
2506 veth1 = netdev->priv.veth_attr.veth1;
2507
2508 if (netdev->downscript) {
2509 err = run_script(handler->name, "net", netdev->downscript,
2510 "down", "veth", veth1, (char*) NULL);
2511 if (err)
2512 return -1;
2513 }
2514 return 0;
2515}
2516
e3b4c4c4 2517static int instanciate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2518{
0e391e57 2519 char peerbuf[IFNAMSIZ], *peer;
3cfc0f3a 2520 int err;
d957ae2d
MT
2521
2522 if (!netdev->link) {
2523 ERROR("no link specified for macvlan netdev");
2524 return -1;
2525 }
13954cce 2526
9ba8130c
SH
2527 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2528 if (err >= sizeof(peerbuf))
2529 return -1;
82d5ae15 2530
4a0ba80d 2531 peer = mkifname(peerbuf);
82d5ae15
DL
2532 if (!strlen(peer)) {
2533 ERROR("failed to make a temporary name");
2534 return -1;
0ad19a3f 2535 }
2536
3cfc0f3a
MN
2537 err = lxc_macvlan_create(netdev->link, peer,
2538 netdev->priv.macvlan_attr.mode);
2539 if (err) {
2540 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2541 peer, netdev->link, strerror(-err));
d957ae2d 2542 return -1;
0ad19a3f 2543 }
2544
82d5ae15
DL
2545 netdev->ifindex = if_nametoindex(peer);
2546 if (!netdev->ifindex) {
36eb9bde 2547 ERROR("failed to retrieve the index for %s", peer);
b84f58b9 2548 lxc_netdev_delete_by_name(peer);
d957ae2d 2549 return -1;
22ebac19 2550 }
2551
e3b4c4c4 2552 if (netdev->upscript) {
751d9dcd
DL
2553 err = run_script(handler->name, "net", netdev->upscript, "up",
2554 "macvlan", netdev->link, (char*) NULL);
2555 if (err)
e3b4c4c4 2556 return -1;
e3b4c4c4
ST
2557 }
2558
e892973e
DL
2559 DEBUG("instanciated macvlan '%s', index is '%d' and mode '%d'",
2560 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
0ad19a3f 2561
d957ae2d 2562 return 0;
0ad19a3f 2563}
2564
74a2b586
JK
2565static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2566{
2567 int err;
2568
2569 if (netdev->downscript) {
2570 err = run_script(handler->name, "net", netdev->downscript,
2571 "down", "macvlan", netdev->link,
2572 (char*) NULL);
2573 if (err)
2574 return -1;
2575 }
2576 return 0;
2577}
2578
26c39028 2579/* XXX: merge with instanciate_macvlan */
e3b4c4c4 2580static int instanciate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
26c39028
JHS
2581{
2582 char peer[IFNAMSIZ];
3cfc0f3a 2583 int err;
26c39028
JHS
2584
2585 if (!netdev->link) {
2586 ERROR("no link specified for vlan netdev");
2587 return -1;
2588 }
2589
9ba8130c
SH
2590 err = snprintf(peer, sizeof(peer), "vlan%d", netdev->priv.vlan_attr.vid);
2591 if (err >= sizeof(peer)) {
2592 ERROR("peer name too long");
2593 return -1;
2594 }
26c39028 2595
3cfc0f3a
MN
2596 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2597 if (err) {
2598 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2599 peer, netdev->link, strerror(-err));
26c39028
JHS
2600 return -1;
2601 }
2602
2603 netdev->ifindex = if_nametoindex(peer);
2604 if (!netdev->ifindex) {
2605 ERROR("failed to retrieve the ifindex for %s", peer);
b84f58b9 2606 lxc_netdev_delete_by_name(peer);
26c39028
JHS
2607 return -1;
2608 }
2609
e892973e
DL
2610 DEBUG("instanciated vlan '%s', ifindex is '%d'", " vlan1000",
2611 netdev->ifindex);
2612
26c39028
JHS
2613 return 0;
2614}
2615
74a2b586
JK
2616static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2617{
2618 return 0;
2619}
2620
e3b4c4c4 2621static int instanciate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2622{
6168e99f
DL
2623 if (!netdev->link) {
2624 ERROR("no link specified for the physical interface");
2625 return -1;
2626 }
2627
9d083402 2628 netdev->ifindex = if_nametoindex(netdev->link);
82d5ae15 2629 if (!netdev->ifindex) {
9d083402 2630 ERROR("failed to retrieve the index for %s", netdev->link);
0ad19a3f 2631 return -1;
2632 }
2633
e3b4c4c4
ST
2634 if (netdev->upscript) {
2635 int err;
751d9dcd
DL
2636 err = run_script(handler->name, "net", netdev->upscript,
2637 "up", "phys", netdev->link, (char*) NULL);
2638 if (err)
e3b4c4c4 2639 return -1;
e3b4c4c4
ST
2640 }
2641
82d5ae15 2642 return 0;
0ad19a3f 2643}
2644
74a2b586
JK
2645static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2646{
2647 int err;
2648
2649 if (netdev->downscript) {
2650 err = run_script(handler->name, "net", netdev->downscript,
2651 "down", "phys", netdev->link, (char*) NULL);
2652 if (err)
2653 return -1;
2654 }
2655 return 0;
2656}
2657
e3b4c4c4 2658static int instanciate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
0ad19a3f 2659{
82d5ae15 2660 netdev->ifindex = 0;
e3b4c4c4
ST
2661 if (netdev->upscript) {
2662 int err;
751d9dcd
DL
2663 err = run_script(handler->name, "net", netdev->upscript,
2664 "up", "empty", (char*) NULL);
2665 if (err)
e3b4c4c4 2666 return -1;
e3b4c4c4 2667 }
82d5ae15 2668 return 0;
0ad19a3f 2669}
2670
74a2b586
JK
2671static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2672{
2673 int err;
2674
2675 if (netdev->downscript) {
2676 err = run_script(handler->name, "net", netdev->downscript,
2677 "down", "empty", (char*) NULL);
2678 if (err)
2679 return -1;
2680 }
2681 return 0;
2682}
2683
e3b4c4c4 2684int lxc_create_network(struct lxc_handler *handler)
0ad19a3f 2685{
e3b4c4c4 2686 struct lxc_list *network = &handler->conf->network;
82d5ae15 2687 struct lxc_list *iterator;
82d5ae15 2688 struct lxc_netdev *netdev;
cbef6c52
SH
2689 int am_root = (getuid() == 0);
2690
2691 if (!am_root)
2692 return 0;
0ad19a3f 2693
5f4535a3 2694 lxc_list_for_each(iterator, network) {
0ad19a3f 2695
5f4535a3 2696 netdev = iterator->elem;
13954cce 2697
24654103 2698 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
82d5ae15 2699 ERROR("invalid network configuration type '%d'",
5f4535a3 2700 netdev->type);
82d5ae15
DL
2701 return -1;
2702 }
0ad19a3f 2703
e3b4c4c4 2704 if (netdev_conf[netdev->type](handler, netdev)) {
82d5ae15
DL
2705 ERROR("failed to create netdev");
2706 return -1;
2707 }
e3b4c4c4 2708
0ad19a3f 2709 }
2710
2711 return 0;
2712}
2713
74a2b586 2714void lxc_delete_network(struct lxc_handler *handler)
7fef7a06 2715{
74a2b586 2716 struct lxc_list *network = &handler->conf->network;
7fef7a06
DL
2717 struct lxc_list *iterator;
2718 struct lxc_netdev *netdev;
2719
2720 lxc_list_for_each(iterator, network) {
2721 netdev = iterator->elem;
d472214b 2722
74a2b586 2723 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
d8f8e352
DL
2724 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
2725 WARN("failed to rename to the initial name the " \
2726 "netdev '%s'", netdev->link);
d472214b 2727 continue;
d8f8e352 2728 }
d472214b 2729
74a2b586
JK
2730 if (netdev_deconf[netdev->type](handler, netdev)) {
2731 WARN("failed to destroy netdev");
2732 }
2733
d8f8e352
DL
2734 /* Recent kernel remove the virtual interfaces when the network
2735 * namespace is destroyed but in case we did not moved the
2736 * interface to the network namespace, we have to destroy it
2737 */
74a2b586
JK
2738 if (netdev->ifindex != 0 &&
2739 lxc_netdev_delete_by_index(netdev->ifindex))
d8f8e352 2740 WARN("failed to remove interface '%s'", netdev->name);
7fef7a06
DL
2741 }
2742}
2743
cbef6c52
SH
2744int unpriv_assign_nic(struct lxc_netdev *netdev, pid_t pid)
2745{
2746 pid_t child;
2747
2748 if (netdev->type != LXC_NET_VETH) {
2749 ERROR("nic type %d not support for unprivileged use",
2750 netdev->type);
2751 return -1;
2752 }
2753
2754 if ((child = fork()) < 0) {
2755 SYSERROR("fork");
2756 return -1;
2757 }
2758
2759 if (child > 0)
2760 return wait_for_pid(child);
2761
2762 // Call lxc-user-nic pid type bridge
2763 char pidstr[20];
4119204e 2764 char *args[] = { "lxc-user-nic", pidstr, "veth", netdev->link, netdev->name, NULL };
cbef6c52
SH
2765 snprintf(pidstr, 19, "%lu", (unsigned long) pid);
2766 pidstr[19] = '\0';
2767 execvp("lxc-user-nic", args);
2768 SYSERROR("execvp lxc-user-nic");
2769 exit(1);
2770}
2771
5f4535a3 2772int lxc_assign_network(struct lxc_list *network, pid_t pid)
0ad19a3f 2773{
82d5ae15 2774 struct lxc_list *iterator;
82d5ae15 2775 struct lxc_netdev *netdev;
cbef6c52 2776 int am_root = (getuid() == 0);
3cfc0f3a 2777 int err;
0ad19a3f 2778
5f4535a3 2779 lxc_list_for_each(iterator, network) {
82d5ae15 2780
5f4535a3 2781 netdev = iterator->elem;
82d5ae15 2782
cbef6c52
SH
2783 if (!am_root) {
2784 if (unpriv_assign_nic(netdev, pid))
2785 return -1;
2786 // TODO fill in netdev->ifindex and name
2787 continue;
2788 }
236087a6
DL
2789 /* empty network namespace, nothing to move */
2790 if (!netdev->ifindex)
2791 continue;
2792
d472214b 2793 err = lxc_netdev_move_by_index(netdev->ifindex, pid);
3cfc0f3a
MN
2794 if (err) {
2795 ERROR("failed to move '%s' to the container : %s",
2796 netdev->link, strerror(-err));
82d5ae15
DL
2797 return -1;
2798 }
2799
c1c75c04 2800 DEBUG("move '%s' to '%d'", netdev->name, pid);
0ad19a3f 2801 }
2802
2803 return 0;
2804}
2805
251d0d2a
DE
2806static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
2807 size_t buf_size)
f6d3e3e4
SH
2808{
2809 char path[PATH_MAX];
e4ccd113 2810 int ret, closeret;
f6d3e3e4
SH
2811 FILE *f;
2812
2813 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
2814 if (ret < 0 || ret >= PATH_MAX) {
2815 fprintf(stderr, "%s: path name too long", __func__);
2816 return -E2BIG;
2817 }
025ed0f3 2818 process_lock();
f6d3e3e4 2819 f = fopen(path, "w");
025ed0f3 2820 process_unlock();
f6d3e3e4
SH
2821 if (!f) {
2822 perror("open");
2823 return -EINVAL;
2824 }
251d0d2a 2825 ret = fwrite(buf, buf_size, 1, f);
f6d3e3e4 2826 if (ret < 0)
e4ccd113 2827 SYSERROR("writing id mapping");
025ed0f3 2828 process_lock();
e4ccd113 2829 closeret = fclose(f);
025ed0f3 2830 process_unlock();
e4ccd113
SH
2831 if (closeret)
2832 SYSERROR("writing id mapping");
2833 return ret < 0 ? ret : closeret;
f6d3e3e4
SH
2834}
2835
2836int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
2837{
2838 struct lxc_list *iterator;
2839 struct id_map *map;
2840 int ret = 0;
251d0d2a 2841 enum idtype type;
4f7521b4 2842 char *buf = NULL, *pos;
cf3ef16d 2843 int am_root = (getuid() == 0);
251d0d2a
DE
2844
2845 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
4f7521b4 2846 int left, fill;
cf3ef16d
SH
2847 int had_entry = 0;
2848 if (!buf) {
2849 buf = pos = malloc(4096);
4f7521b4
SH
2850 if (!buf)
2851 return -ENOMEM;
cf3ef16d
SH
2852 }
2853 pos = buf;
2854 if (!am_root)
2855 pos += sprintf(buf, "new%cidmap %d ",
2856 type == ID_TYPE_UID ? 'u' : 'g',
2857 pid);
4f7521b4 2858
cf3ef16d
SH
2859 lxc_list_for_each(iterator, idmap) {
2860 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
251d0d2a 2861 map = iterator->elem;
cf3ef16d
SH
2862 if (map->idtype != type)
2863 continue;
2864
2865 had_entry = 1;
2866 left = 4096 - (pos - buf);
2867 fill = snprintf(pos, left, " %lu %lu %lu", map->nsid,
2868 map->hostid, map->range);
2869 if (fill <= 0 || fill >= left)
2870 SYSERROR("snprintf failed, too many mappings");
2871 pos += fill;
251d0d2a 2872 }
cf3ef16d 2873 if (!had_entry)
4f7521b4 2874 continue;
cf3ef16d
SH
2875 left = 4096 - (pos - buf);
2876 fill = snprintf(pos, left, "\n");
2877 if (fill <= 0 || fill >= left)
2878 SYSERROR("snprintf failed, too many mappings");
2879 pos += fill;
2880
2881 if (am_root)
2882 ret = write_id_mapping(type, pid, buf, pos-buf);
2883 else
2884 ret = system(buf);
2885
f6d3e3e4
SH
2886 if (ret)
2887 break;
2888 }
251d0d2a 2889
4f7521b4
SH
2890 if (buf)
2891 free(buf);
f6d3e3e4
SH
2892 return ret;
2893}
2894
cf3ef16d
SH
2895/*
2896 * return the host uid to which the container root is mapped, or -1 on
2897 * error
2898 */
c4d10a05 2899uid_t get_mapped_rootid(struct lxc_conf *conf)
cf3ef16d
SH
2900{
2901 struct lxc_list *it;
2902 struct id_map *map;
2903
2904 lxc_list_for_each(it, &conf->id_map) {
2905 map = it->elem;
2906 if (map->idtype != ID_TYPE_UID)
2907 continue;
2908 if (map->nsid != 0)
2909 continue;
c4d10a05 2910 return (uid_t) map->hostid;
cf3ef16d 2911 }
c4d10a05 2912 return (uid_t)-1;
cf3ef16d
SH
2913}
2914
57d116ab 2915int mapped_hostid(int id, struct lxc_conf *conf)
cf3ef16d
SH
2916{
2917 struct lxc_list *it;
2918 struct id_map *map;
2919 lxc_list_for_each(it, &conf->id_map) {
2920 map = it->elem;
2921 if (map->idtype != ID_TYPE_UID)
2922 continue;
2923 if (id >= map->hostid && id < map->hostid + map->range)
57d116ab 2924 return (id - map->hostid) + map->nsid;
cf3ef16d 2925 }
57d116ab 2926 return -1;
cf3ef16d
SH
2927}
2928
2929int find_unmapped_nsuid(struct lxc_conf *conf)
2930{
2931 struct lxc_list *it;
2932 struct id_map *map;
2933 uid_t freeid = 0;
2934again:
2935 lxc_list_for_each(it, &conf->id_map) {
2936 map = it->elem;
2937 if (map->idtype != ID_TYPE_UID)
2938 continue;
2939 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
2940 freeid = map->nsid + map->range;
2941 goto again;
2942 }
2943 }
2944 return freeid;
2945}
2946
19a26f82
MK
2947int lxc_find_gateway_addresses(struct lxc_handler *handler)
2948{
2949 struct lxc_list *network = &handler->conf->network;
2950 struct lxc_list *iterator;
2951 struct lxc_netdev *netdev;
2952 int link_index;
2953
2954 lxc_list_for_each(iterator, network) {
2955 netdev = iterator->elem;
2956
2957 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2958 continue;
2959
2960 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
2961 ERROR("gateway = auto only supported for "
2962 "veth and macvlan");
2963 return -1;
2964 }
2965
2966 if (!netdev->link) {
2967 ERROR("gateway = auto needs a link interface");
2968 return -1;
2969 }
2970
2971 link_index = if_nametoindex(netdev->link);
2972 if (!link_index)
2973 return -EINVAL;
2974
2975 if (netdev->ipv4_gateway_auto) {
2976 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
2977 ERROR("failed to automatically find ipv4 gateway "
2978 "address from link interface '%s'", netdev->link);
2979 return -1;
2980 }
2981 }
2982
2983 if (netdev->ipv6_gateway_auto) {
2984 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
2985 ERROR("failed to automatically find ipv6 gateway "
2986 "address from link interface '%s'", netdev->link);
2987 return -1;
2988 }
2989 }
2990 }
2991
2992 return 0;
2993}
2994
5e4a62bf 2995int lxc_create_tty(const char *name, struct lxc_conf *conf)
b0a33c1e 2996{
5e4a62bf 2997 struct lxc_tty_info *tty_info = &conf->tty_info;
025ed0f3 2998 int i, ret;
b0a33c1e 2999
5e4a62bf
DL
3000 /* no tty in the configuration */
3001 if (!conf->tty)
b0a33c1e 3002 return 0;
3003
13954cce 3004 tty_info->pty_info =
e4e7d59d 3005 malloc(sizeof(*tty_info->pty_info)*conf->tty);
b0a33c1e 3006 if (!tty_info->pty_info) {
36eb9bde 3007 SYSERROR("failed to allocate pty_info");
985d15b1 3008 return -1;
b0a33c1e 3009 }
3010
985d15b1 3011 for (i = 0; i < conf->tty; i++) {
13954cce 3012
b0a33c1e 3013 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3014
025ed0f3
SH
3015 process_lock();
3016 ret = openpty(&pty_info->master, &pty_info->slave,
3017 pty_info->name, NULL, NULL);
3018 process_unlock();
3019 if (ret) {
36eb9bde 3020 SYSERROR("failed to create pty #%d", i);
985d15b1
MT
3021 tty_info->nbtty = i;
3022 lxc_delete_tty(tty_info);
3023 return -1;
b0a33c1e 3024 }
3025
5332bb84
DL
3026 DEBUG("allocated pty '%s' (%d/%d)",
3027 pty_info->name, pty_info->master, pty_info->slave);
3028
b035ad62
MS
3029 /* Prevent leaking the file descriptors to the container */
3030 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3031 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3032
b0a33c1e 3033 pty_info->busy = 0;
3034 }
3035
985d15b1 3036 tty_info->nbtty = conf->tty;
1ac470c0
DL
3037
3038 INFO("tty's configured");
3039
985d15b1 3040 return 0;
b0a33c1e 3041}
3042
3043void lxc_delete_tty(struct lxc_tty_info *tty_info)
3044{
3045 int i;
3046
3047 for (i = 0; i < tty_info->nbtty; i++) {
3048 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3049
025ed0f3 3050 process_lock();
b0a33c1e 3051 close(pty_info->master);
3052 close(pty_info->slave);
025ed0f3 3053 process_unlock();
b0a33c1e 3054 }
3055
3056 free(tty_info->pty_info);
3057 tty_info->nbtty = 0;
3058}
3059
f6d3e3e4 3060/*
c4d10a05
SH
3061 * chown_mapped_root: for an unprivileged user with uid X to chown a dir
3062 * to subuid Y, he needs to run chown as root in a userns where
3063 * nsid 0 is mapped to hostuid Y, and nsid Y is mapped to hostuid
3064 * X. That way, the container root is privileged with respect to
3065 * hostuid X, allowing him to do the chown.
f6d3e3e4 3066 */
c4d10a05 3067int chown_mapped_root(char *path, struct lxc_conf *conf)
f6d3e3e4 3068{
c4d10a05
SH
3069 uid_t rootid;
3070 pid_t pid;
f6d3e3e4 3071
c4d10a05
SH
3072 if ((rootid = get_mapped_rootid(conf)) <= 0) {
3073 ERROR("No mapping for container root");
3074 return -1;
f6d3e3e4 3075 }
c4d10a05
SH
3076 if (geteuid() == 0) {
3077 if (chown(path, rootid, -1) < 0) {
3078 ERROR("Error chowning %s", path);
3079 return -1;
3080 }
3081 return 0;
3082 }
3083 pid = fork();
3084 if (pid < 0) {
3085 SYSERROR("Failed forking");
f6d3e3e4
SH
3086 return -1;
3087 }
c4d10a05
SH
3088 if (!pid) {
3089 int hostuid = geteuid(), ret;
3090 char map1[100], map2[100];
3091 char *args[] = {"lxc-usernsexec", "-m", map1, "-m", map2, "--", "chown",
3092 "0", path, NULL};
f6d3e3e4 3093
c4d10a05
SH
3094 // "b:0:rootid:1"
3095 ret = snprintf(map1, 100, "b:0:%d:1", rootid);
3096 if (ret < 0 || ret >= 100) {
3097 ERROR("Error uid printing map string");
f6d3e3e4
SH
3098 return -1;
3099 }
c4d10a05
SH
3100
3101 // "b:hostuid:hostuid:1"
3102 ret = snprintf(map2, 100, "b:%d:%d:1", hostuid, hostuid);
3103 if (ret < 0 || ret >= 100) {
3104 ERROR("Error uid printing map string");
3105 return -1;
3106 }
3107
3108 ret = execvp("lxc-usernsexec", args);
3109 SYSERROR("Failed executing usernsexec");
3110 exit(1);
f6d3e3e4 3111 }
c4d10a05 3112 return wait_for_pid(pid);
f6d3e3e4
SH
3113}
3114
c4d10a05 3115int ttys_shift_ids(struct lxc_conf *c)
f6d3e3e4 3116{
c4d10a05 3117 int i;
f6d3e3e4 3118
c4d10a05 3119 if (lxc_list_empty(&c->id_map))
f6d3e3e4 3120 return 0;
c4d10a05
SH
3121
3122 for (i = 0; i < c->tty_info.nbtty; i++) {
3123 struct lxc_pty_info *pty_info = &c->tty_info.pty_info[i];
3124
3125 if (chown_mapped_root(pty_info->name, c) < 0) {
3126 ERROR("Failed to chown %s", pty_info->name);
f6d3e3e4
SH
3127 return -1;
3128 }
3129 }
3130
c4d10a05
SH
3131 if (chown_mapped_root(c->console.name, c) < 0) {
3132 ERROR("Failed to chown %s", c->console.name);
3133 return -1;
3134 }
3135
f6d3e3e4
SH
3136 return 0;
3137}
3138
368bbc02 3139int lxc_setup(const char *name, struct lxc_conf *lxc_conf, const char *lxcpath, struct cgroup_process_info *cgroup_info)
0ad19a3f 3140{
571e6ec8 3141 if (setup_utsname(lxc_conf->utsname)) {
36eb9bde 3142 ERROR("failed to setup the utsname for '%s'", name);
95b5ffaf 3143 return -1;
0ad19a3f 3144 }
3145
5f4535a3 3146 if (setup_network(&lxc_conf->network)) {
36eb9bde 3147 ERROR("failed to setup the network for '%s'", name);
95b5ffaf 3148 return -1;
0ad19a3f 3149 }
3150
283678ed 3151 if (run_lxc_hooks(name, "pre-mount", lxc_conf, lxcpath, NULL)) {
89eaa05e
SH
3152 ERROR("failed to run pre-mount hooks for container '%s'.", name);
3153 return -1;
3154 }
5ea6163a 3155
cc28d0b0 3156 if (setup_rootfs(lxc_conf)) {
ac778708 3157 ERROR("failed to setup rootfs for '%s'", name);
95b5ffaf 3158 return -1;
0ad19a3f 3159 }
3160
c6883f38 3161 if (lxc_conf->autodev) {
91c3830e
SH
3162 if (mount_autodev(lxc_conf->rootfs.mount)) {
3163 ERROR("failed to mount /dev in the container");
c6883f38
SH
3164 return -1;
3165 }
3166 }
3167
368bbc02
CS
3168 /* do automatic mounts (mainly /proc and /sys), but exclude
3169 * those that need to wait until other stuff has finished
3170 */
b06b8511 3171 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, cgroup_info) < 0) {
368bbc02
CS
3172 ERROR("failed to setup the automatic mounts for '%s'", name);
3173 return -1;
3174 }
3175
80a881b2 3176 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
36eb9bde 3177 ERROR("failed to setup the mounts for '%s'", name);
95b5ffaf 3178 return -1;
576f946d 3179 }
3180
c1dc38c2 3181 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
e7938e9e
MN
3182 ERROR("failed to setup the mount entries for '%s'", name);
3183 return -1;
3184 }
3185
368bbc02
CS
3186 /* now mount only cgroup, if wanted;
3187 * before, /sys could not have been mounted
3188 * (is either mounted automatically or via fstab entries)
3189 */
b06b8511 3190 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, cgroup_info) < 0) {
368bbc02
CS
3191 ERROR("failed to setup the automatic mounts for '%s'", name);
3192 return -1;
3193 }
3194
283678ed 3195 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
773fb9ca
SH
3196 ERROR("failed to run mount hooks for container '%s'.", name);
3197 return -1;
3198 }
3199
91c3830e 3200 if (lxc_conf->autodev) {
283678ed 3201 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
f7bee6c6
MW
3202 ERROR("failed to run autodev hooks for container '%s'.", name);
3203 return -1;
3204 }
91c3830e
SH
3205 if (setup_autodev(lxc_conf->rootfs.mount)) {
3206 ERROR("failed to populate /dev in the container");
3207 return -1;
3208 }
3209 }
368bbc02 3210
37903589 3211 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
36eb9bde 3212 ERROR("failed to setup the console for '%s'", name);
95b5ffaf 3213 return -1;
6e590161 3214 }
3215
7e0e1d94
AV
3216 if (lxc_conf->kmsg) {
3217 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
3218 ERROR("failed to setup kmsg for '%s'", name);
3219 }
1bd051a6 3220
37903589 3221 if (!lxc_conf->is_execute && setup_tty(&lxc_conf->rootfs, &lxc_conf->tty_info, lxc_conf->ttydir)) {
36eb9bde 3222 ERROR("failed to setup the ttys for '%s'", name);
95b5ffaf 3223 return -1;
b0a33c1e 3224 }
3225
fe4de9a6
DE
3226 /* mount /proc if needed for LSM transition */
3227 if (lsm_proc_mount(lxc_conf) < 0) {
3228 ERROR("failed to LSM mount proc for '%s'", name);
e075f5d9 3229 return -1;
e075f5d9 3230 }
e075f5d9 3231
ac778708 3232 if (setup_pivot_root(&lxc_conf->rootfs)) {
36eb9bde 3233 ERROR("failed to set rootfs for '%s'", name);
95b5ffaf 3234 return -1;
ed502555 3235 }
3236
571e6ec8 3237 if (setup_pts(lxc_conf->pts)) {
36eb9bde 3238 ERROR("failed to setup the new pts instance");
95b5ffaf 3239 return -1;
3c26f34e 3240 }
3241
cccc74b5
DL
3242 if (setup_personality(lxc_conf->personality)) {
3243 ERROR("failed to setup personality");
3244 return -1;
3245 }
3246
f6d3e3e4 3247 if (lxc_list_empty(&lxc_conf->id_map)) {
1fb86a7c
SH
3248 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
3249 if (!lxc_list_empty(&lxc_conf->caps)) {
3250 ERROR("Simultaneously requested dropping and keeping caps");
3251 return -1;
3252 }
3253 if (dropcaps_except(&lxc_conf->keepcaps)) {
3254 ERROR("failed to keep requested caps\n");
3255 return -1;
3256 }
3257 } else if (setup_caps(&lxc_conf->caps)) {
f6d3e3e4
SH
3258 ERROR("failed to drop capabilities");
3259 return -1;
3260 }
81810dd1
DL
3261 }
3262
cd54d859
DL
3263 NOTICE("'%s' is setup.", name);
3264
0ad19a3f 3265 return 0;
3266}
26ddeedd 3267
283678ed
SH
3268int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
3269 const char *lxcpath, char *argv[])
26ddeedd
SH
3270{
3271 int which = -1;
3272 struct lxc_list *it;
3273
3274 if (strcmp(hook, "pre-start") == 0)
3275 which = LXCHOOK_PRESTART;
5ea6163a
SH
3276 else if (strcmp(hook, "pre-mount") == 0)
3277 which = LXCHOOK_PREMOUNT;
26ddeedd
SH
3278 else if (strcmp(hook, "mount") == 0)
3279 which = LXCHOOK_MOUNT;
f7bee6c6
MW
3280 else if (strcmp(hook, "autodev") == 0)
3281 which = LXCHOOK_AUTODEV;
26ddeedd
SH
3282 else if (strcmp(hook, "start") == 0)
3283 which = LXCHOOK_START;
3284 else if (strcmp(hook, "post-stop") == 0)
3285 which = LXCHOOK_POSTSTOP;
148e91f5
SH
3286 else if (strcmp(hook, "clone") == 0)
3287 which = LXCHOOK_CLONE;
26ddeedd
SH
3288 else
3289 return -1;
3290 lxc_list_for_each(it, &conf->hooks[which]) {
3291 int ret;
3292 char *hookname = it->elem;
283678ed 3293 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
26ddeedd
SH
3294 if (ret)
3295 return ret;
3296 }
3297 return 0;
3298}
72d0e1cb 3299
427b3a21 3300static void lxc_remove_nic(struct lxc_list *it)
72d0e1cb
SG
3301{
3302 struct lxc_netdev *netdev = it->elem;
9ebb03ad 3303 struct lxc_list *it2,*next;
72d0e1cb
SG
3304
3305 lxc_list_del(it);
3306
3307 if (netdev->link)
3308 free(netdev->link);
3309 if (netdev->name)
3310 free(netdev->name);
c9bb9a85
DE
3311 if (netdev->type == LXC_NET_VETH && netdev->priv.veth_attr.pair)
3312 free(netdev->priv.veth_attr.pair);
72d0e1cb
SG
3313 if (netdev->upscript)
3314 free(netdev->upscript);
3315 if (netdev->hwaddr)
3316 free(netdev->hwaddr);
3317 if (netdev->mtu)
3318 free(netdev->mtu);
3319 if (netdev->ipv4_gateway)
3320 free(netdev->ipv4_gateway);
3321 if (netdev->ipv6_gateway)
3322 free(netdev->ipv6_gateway);
9ebb03ad 3323 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3324 lxc_list_del(it2);
3325 free(it2->elem);
3326 free(it2);
3327 }
9ebb03ad 3328 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3329 lxc_list_del(it2);
3330 free(it2->elem);
3331 free(it2);
3332 }
d95db067 3333 free(netdev);
72d0e1cb
SG
3334 free(it);
3335}
3336
3337/* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
12a50cc6 3338int lxc_clear_nic(struct lxc_conf *c, const char *key)
72d0e1cb
SG
3339{
3340 char *p1;
3341 int ret, idx, i;
3342 struct lxc_list *it;
3343 struct lxc_netdev *netdev;
3344
3345 p1 = index(key, '.');
3346 if (!p1 || *(p1+1) == '\0')
3347 p1 = NULL;
3348
3349 ret = sscanf(key, "%d", &idx);
3350 if (ret != 1) return -1;
3351 if (idx < 0)
3352 return -1;
3353
3354 i = 0;
3355 lxc_list_for_each(it, &c->network) {
3356 if (i == idx)
3357 break;
3358 i++;
3359 }
3360 if (i < idx) // we don't have that many nics defined
3361 return -1;
3362
3363 if (!it || !it->elem)
3364 return -1;
3365
3366 netdev = it->elem;
3367
3368 if (!p1) {
3369 lxc_remove_nic(it);
3370 } else if (strcmp(p1, "ipv4") == 0) {
9ebb03ad
DE
3371 struct lxc_list *it2,*next;
3372 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
72d0e1cb
SG
3373 lxc_list_del(it2);
3374 free(it2->elem);
3375 free(it2);
3376 }
3377 } else if (strcmp(p1, "ipv6") == 0) {
9ebb03ad
DE
3378 struct lxc_list *it2,*next;
3379 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
72d0e1cb
SG
3380 lxc_list_del(it2);
3381 free(it2->elem);
3382 free(it2);
3383 }
3384 } else if (strcmp(p1, "link") == 0) {
3385 if (netdev->link) {
3386 free(netdev->link);
3387 netdev->link = NULL;
3388 }
3389 } else if (strcmp(p1, "name") == 0) {
3390 if (netdev->name) {
3391 free(netdev->name);
3392 netdev->name = NULL;
3393 }
3394 } else if (strcmp(p1, "script.up") == 0) {
3395 if (netdev->upscript) {
3396 free(netdev->upscript);
3397 netdev->upscript = NULL;
3398 }
3399 } else if (strcmp(p1, "hwaddr") == 0) {
3400 if (netdev->hwaddr) {
3401 free(netdev->hwaddr);
3402 netdev->hwaddr = NULL;
3403 }
3404 } else if (strcmp(p1, "mtu") == 0) {
3405 if (netdev->mtu) {
3406 free(netdev->mtu);
3407 netdev->mtu = NULL;
3408 }
3409 } else if (strcmp(p1, "ipv4_gateway") == 0) {
3410 if (netdev->ipv4_gateway) {
3411 free(netdev->ipv4_gateway);
3412 netdev->ipv4_gateway = NULL;
3413 }
3414 } else if (strcmp(p1, "ipv6_gateway") == 0) {
3415 if (netdev->ipv6_gateway) {
3416 free(netdev->ipv6_gateway);
3417 netdev->ipv6_gateway = NULL;
3418 }
3419 }
3420 else return -1;
3421
3422 return 0;
3423}
3424
3425int lxc_clear_config_network(struct lxc_conf *c)
3426{
9ebb03ad
DE
3427 struct lxc_list *it,*next;
3428 lxc_list_for_each_safe(it, &c->network, next) {
72d0e1cb
SG
3429 lxc_remove_nic(it);
3430 }
3431 return 0;
3432}
3433
3434int lxc_clear_config_caps(struct lxc_conf *c)
3435{
9ebb03ad 3436 struct lxc_list *it,*next;
72d0e1cb 3437
9ebb03ad 3438 lxc_list_for_each_safe(it, &c->caps, next) {
72d0e1cb
SG
3439 lxc_list_del(it);
3440 free(it->elem);
3441 free(it);
3442 }
3443 return 0;
3444}
3445
27c27d73
SH
3446int lxc_clear_idmaps(struct lxc_conf *c)
3447{
3448 struct lxc_list *it, *next;
3449
3450 lxc_list_for_each_safe(it, &c->id_map, next) {
3451 lxc_list_del(it);
3452 free(it->elem);
3453 free(it);
3454 }
3455 return 0;
3456}
3457
1fb86a7c
SH
3458int lxc_clear_config_keepcaps(struct lxc_conf *c)
3459{
3460 struct lxc_list *it,*next;
3461
3462 lxc_list_for_each_safe(it, &c->keepcaps, next) {
3463 lxc_list_del(it);
3464 free(it->elem);
3465 free(it);
3466 }
3467 return 0;
3468}
3469
12a50cc6 3470int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
72d0e1cb 3471{
9ebb03ad 3472 struct lxc_list *it,*next;
72d0e1cb 3473 bool all = false;
12a50cc6 3474 const char *k = key + 11;
72d0e1cb
SG
3475
3476 if (strcmp(key, "lxc.cgroup") == 0)
3477 all = true;
3478
9ebb03ad 3479 lxc_list_for_each_safe(it, &c->cgroup, next) {
72d0e1cb
SG
3480 struct lxc_cgroup *cg = it->elem;
3481 if (!all && strcmp(cg->subsystem, k) != 0)
3482 continue;
3483 lxc_list_del(it);
3484 free(cg->subsystem);
3485 free(cg->value);
3486 free(cg);
3487 free(it);
3488 }
3489 return 0;
3490}
3491
3492int lxc_clear_mount_entries(struct lxc_conf *c)
3493{
9ebb03ad 3494 struct lxc_list *it,*next;
72d0e1cb 3495
9ebb03ad 3496 lxc_list_for_each_safe(it, &c->mount_list, next) {
72d0e1cb
SG
3497 lxc_list_del(it);
3498 free(it->elem);
3499 free(it);
3500 }
3501 return 0;
3502}
3503
12a50cc6 3504int lxc_clear_hooks(struct lxc_conf *c, const char *key)
72d0e1cb 3505{
9ebb03ad 3506 struct lxc_list *it,*next;
17ed13a3 3507 bool all = false, done = false;
12a50cc6 3508 const char *k = key + 9;
72d0e1cb
SG
3509 int i;
3510
17ed13a3
SH
3511 if (strcmp(key, "lxc.hook") == 0)
3512 all = true;
3513
72d0e1cb 3514 for (i=0; i<NUM_LXC_HOOKS; i++) {
17ed13a3 3515 if (all || strcmp(k, lxchook_names[i]) == 0) {
9ebb03ad 3516 lxc_list_for_each_safe(it, &c->hooks[i], next) {
17ed13a3
SH
3517 lxc_list_del(it);
3518 free(it->elem);
3519 free(it);
3520 }
3521 done = true;
72d0e1cb
SG
3522 }
3523 }
17ed13a3
SH
3524
3525 if (!done) {
3526 ERROR("Invalid hook key: %s", key);
3527 return -1;
3528 }
72d0e1cb
SG
3529 return 0;
3530}
8eb5694b 3531
7b35f3d6
SH
3532void lxc_clear_saved_nics(struct lxc_conf *conf)
3533{
3534 int i;
3535
3536 if (!conf->num_savednics)
3537 return;
3538 for (i=0; i < conf->num_savednics; i++)
3539 free(conf->saved_nics[i].orig_name);
3540 conf->saved_nics = 0;
3541 free(conf->saved_nics);
3542}
3543
8eb5694b
SH
3544void lxc_conf_free(struct lxc_conf *conf)
3545{
3546 if (!conf)
3547 return;
3548 if (conf->console.path)
3549 free(conf->console.path);
54c30e29 3550 if (conf->rootfs.mount)
8eb5694b 3551 free(conf->rootfs.mount);
d95db067
DE
3552 if (conf->rootfs.path)
3553 free(conf->rootfs.path);
3554 if (conf->utsname)
3555 free(conf->utsname);
3556 if (conf->ttydir)
3557 free(conf->ttydir);
3558 if (conf->fstab)
3559 free(conf->fstab);
fc7e8864
WM
3560 if (conf->rcfile)
3561 free(conf->rcfile);
8eb5694b 3562 lxc_clear_config_network(conf);
fe4de9a6
DE
3563 if (conf->lsm_aa_profile)
3564 free(conf->lsm_aa_profile);
3565 if (conf->lsm_se_context)
3566 free(conf->lsm_se_context);
769872f9 3567 lxc_seccomp_free(conf);
8eb5694b 3568 lxc_clear_config_caps(conf);
1fb86a7c 3569 lxc_clear_config_keepcaps(conf);
8eb5694b 3570 lxc_clear_cgroups(conf, "lxc.cgroup");
17ed13a3 3571 lxc_clear_hooks(conf, "lxc.hook");
8eb5694b 3572 lxc_clear_mount_entries(conf);
7b35f3d6 3573 lxc_clear_saved_nics(conf);
27c27d73 3574 lxc_clear_idmaps(conf);
8eb5694b
SH
3575 free(conf);
3576}