]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/conf.c
Revert (by hand) "logs: introduce a thread-local 'current' lxc_config"
[mirror_lxc.git] / src / lxc / conf.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23 #include "config.h"
24
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <stdarg.h>
28 #include <errno.h>
29 #include <string.h>
30 #include <dirent.h>
31 #include <unistd.h>
32 #include <inttypes.h>
33 #include <sys/wait.h>
34 #include <sys/syscall.h>
35 #include <sys/types.h>
36 #include <pwd.h>
37 #include <grp.h>
38 #include <time.h>
39 #ifdef HAVE_STATVFS
40 #include <sys/statvfs.h>
41 #endif
42
43 #if HAVE_PTY_H
44 #include <pty.h>
45 #else
46 #include <../include/openpty.h>
47 #endif
48
49 #include <linux/loop.h>
50
51 #include <sys/types.h>
52 #include <sys/utsname.h>
53 #include <sys/param.h>
54 #include <sys/stat.h>
55 #include <sys/socket.h>
56 #include <sys/mount.h>
57 #include <sys/mman.h>
58 #include <sys/prctl.h>
59
60 #include <arpa/inet.h>
61 #include <fcntl.h>
62 #include <netinet/in.h>
63 #include <net/if.h>
64 #include <libgen.h>
65
66 #include "network.h"
67 #include "error.h"
68 #include "af_unix.h"
69 #include "parse.h"
70 #include "utils.h"
71 #include "conf.h"
72 #include "log.h"
73 #include "caps.h" /* for lxc_caps_last_cap() */
74 #include "bdev.h"
75 #include "cgroup.h"
76 #include "lxclock.h"
77 #include "namespace.h"
78 #include "lsm/lsm.h"
79
80 #if HAVE_SYS_CAPABILITY_H
81 #include <sys/capability.h>
82 #endif
83
84 #if HAVE_SYS_PERSONALITY_H
85 #include <sys/personality.h>
86 #endif
87
88 #if IS_BIONIC
89 #include <../include/lxcmntent.h>
90 #else
91 #include <mntent.h>
92 #endif
93
94 #include "lxcseccomp.h"
95
96 lxc_log_define(lxc_conf, lxc);
97
98 #define LINELEN 4096
99
100 #if HAVE_SYS_CAPABILITY_H
101 #ifndef CAP_SETFCAP
102 #define CAP_SETFCAP 31
103 #endif
104
105 #ifndef CAP_MAC_OVERRIDE
106 #define CAP_MAC_OVERRIDE 32
107 #endif
108
109 #ifndef CAP_MAC_ADMIN
110 #define CAP_MAC_ADMIN 33
111 #endif
112 #endif
113
114 #ifndef PR_CAPBSET_DROP
115 #define PR_CAPBSET_DROP 24
116 #endif
117
118 #ifndef LO_FLAGS_AUTOCLEAR
119 #define LO_FLAGS_AUTOCLEAR 4
120 #endif
121
122 /* needed for cgroup automount checks, regardless of whether we
123 * have included linux/capability.h or not */
124 #ifndef CAP_SYS_ADMIN
125 #define CAP_SYS_ADMIN 21
126 #endif
127
128 /* Define pivot_root() if missing from the C library */
129 #ifndef HAVE_PIVOT_ROOT
130 static int pivot_root(const char * new_root, const char * put_old)
131 {
132 #ifdef __NR_pivot_root
133 return syscall(__NR_pivot_root, new_root, put_old);
134 #else
135 errno = ENOSYS;
136 return -1;
137 #endif
138 }
139 #else
140 extern int pivot_root(const char * new_root, const char * put_old);
141 #endif
142
143 /* Define sethostname() if missing from the C library */
144 #ifndef HAVE_SETHOSTNAME
145 static int sethostname(const char * name, size_t len)
146 {
147 #ifdef __NR_sethostname
148 return syscall(__NR_sethostname, name, len);
149 #else
150 errno = ENOSYS;
151 return -1;
152 #endif
153 }
154 #endif
155
156 /* Define __S_ISTYPE if missing from the C library */
157 #ifndef __S_ISTYPE
158 #define __S_ISTYPE(mode, mask) (((mode) & S_IFMT) == (mask))
159 #endif
160
161 #ifndef MS_PRIVATE
162 #define MS_PRIVATE (1<<18)
163 #endif
164
165 char *lxchook_names[NUM_LXC_HOOKS] = {
166 "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop", "clone" };
167
168 typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
169
170 struct mount_opt {
171 char *name;
172 int clear;
173 int flag;
174 };
175
176 struct caps_opt {
177 char *name;
178 int value;
179 };
180
181 /* Declare this here, since we don't want to reshuffle the whole file. */
182 static int in_caplist(int cap, struct lxc_list *caps);
183
184 static int instantiate_veth(struct lxc_handler *, struct lxc_netdev *);
185 static int instantiate_macvlan(struct lxc_handler *, struct lxc_netdev *);
186 static int instantiate_vlan(struct lxc_handler *, struct lxc_netdev *);
187 static int instantiate_phys(struct lxc_handler *, struct lxc_netdev *);
188 static int instantiate_empty(struct lxc_handler *, struct lxc_netdev *);
189 static int instantiate_none(struct lxc_handler *, struct lxc_netdev *);
190
191 static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
192 [LXC_NET_VETH] = instantiate_veth,
193 [LXC_NET_MACVLAN] = instantiate_macvlan,
194 [LXC_NET_VLAN] = instantiate_vlan,
195 [LXC_NET_PHYS] = instantiate_phys,
196 [LXC_NET_EMPTY] = instantiate_empty,
197 [LXC_NET_NONE] = instantiate_none,
198 };
199
200 static int shutdown_veth(struct lxc_handler *, struct lxc_netdev *);
201 static int shutdown_macvlan(struct lxc_handler *, struct lxc_netdev *);
202 static int shutdown_vlan(struct lxc_handler *, struct lxc_netdev *);
203 static int shutdown_phys(struct lxc_handler *, struct lxc_netdev *);
204 static int shutdown_empty(struct lxc_handler *, struct lxc_netdev *);
205 static int shutdown_none(struct lxc_handler *, struct lxc_netdev *);
206
207 static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
208 [LXC_NET_VETH] = shutdown_veth,
209 [LXC_NET_MACVLAN] = shutdown_macvlan,
210 [LXC_NET_VLAN] = shutdown_vlan,
211 [LXC_NET_PHYS] = shutdown_phys,
212 [LXC_NET_EMPTY] = shutdown_empty,
213 [LXC_NET_NONE] = shutdown_none,
214 };
215
216 static struct mount_opt mount_opt[] = {
217 { "defaults", 0, 0 },
218 { "ro", 0, MS_RDONLY },
219 { "rw", 1, MS_RDONLY },
220 { "suid", 1, MS_NOSUID },
221 { "nosuid", 0, MS_NOSUID },
222 { "dev", 1, MS_NODEV },
223 { "nodev", 0, MS_NODEV },
224 { "exec", 1, MS_NOEXEC },
225 { "noexec", 0, MS_NOEXEC },
226 { "sync", 0, MS_SYNCHRONOUS },
227 { "async", 1, MS_SYNCHRONOUS },
228 { "dirsync", 0, MS_DIRSYNC },
229 { "remount", 0, MS_REMOUNT },
230 { "mand", 0, MS_MANDLOCK },
231 { "nomand", 1, MS_MANDLOCK },
232 { "atime", 1, MS_NOATIME },
233 { "noatime", 0, MS_NOATIME },
234 { "diratime", 1, MS_NODIRATIME },
235 { "nodiratime", 0, MS_NODIRATIME },
236 { "bind", 0, MS_BIND },
237 { "rbind", 0, MS_BIND|MS_REC },
238 { "relatime", 0, MS_RELATIME },
239 { "norelatime", 1, MS_RELATIME },
240 { "strictatime", 0, MS_STRICTATIME },
241 { "nostrictatime", 1, MS_STRICTATIME },
242 { NULL, 0, 0 },
243 };
244
245 #if HAVE_SYS_CAPABILITY_H
246 static struct caps_opt caps_opt[] = {
247 { "chown", CAP_CHOWN },
248 { "dac_override", CAP_DAC_OVERRIDE },
249 { "dac_read_search", CAP_DAC_READ_SEARCH },
250 { "fowner", CAP_FOWNER },
251 { "fsetid", CAP_FSETID },
252 { "kill", CAP_KILL },
253 { "setgid", CAP_SETGID },
254 { "setuid", CAP_SETUID },
255 { "setpcap", CAP_SETPCAP },
256 { "linux_immutable", CAP_LINUX_IMMUTABLE },
257 { "net_bind_service", CAP_NET_BIND_SERVICE },
258 { "net_broadcast", CAP_NET_BROADCAST },
259 { "net_admin", CAP_NET_ADMIN },
260 { "net_raw", CAP_NET_RAW },
261 { "ipc_lock", CAP_IPC_LOCK },
262 { "ipc_owner", CAP_IPC_OWNER },
263 { "sys_module", CAP_SYS_MODULE },
264 { "sys_rawio", CAP_SYS_RAWIO },
265 { "sys_chroot", CAP_SYS_CHROOT },
266 { "sys_ptrace", CAP_SYS_PTRACE },
267 { "sys_pacct", CAP_SYS_PACCT },
268 { "sys_admin", CAP_SYS_ADMIN },
269 { "sys_boot", CAP_SYS_BOOT },
270 { "sys_nice", CAP_SYS_NICE },
271 { "sys_resource", CAP_SYS_RESOURCE },
272 { "sys_time", CAP_SYS_TIME },
273 { "sys_tty_config", CAP_SYS_TTY_CONFIG },
274 { "mknod", CAP_MKNOD },
275 { "lease", CAP_LEASE },
276 #ifdef CAP_AUDIT_WRITE
277 { "audit_write", CAP_AUDIT_WRITE },
278 #endif
279 #ifdef CAP_AUDIT_CONTROL
280 { "audit_control", CAP_AUDIT_CONTROL },
281 #endif
282 { "setfcap", CAP_SETFCAP },
283 { "mac_override", CAP_MAC_OVERRIDE },
284 { "mac_admin", CAP_MAC_ADMIN },
285 #ifdef CAP_SYSLOG
286 { "syslog", CAP_SYSLOG },
287 #endif
288 #ifdef CAP_WAKE_ALARM
289 { "wake_alarm", CAP_WAKE_ALARM },
290 #endif
291 };
292 #else
293 static struct caps_opt caps_opt[] = {};
294 #endif
295
296 static int run_buffer(char *buffer)
297 {
298 struct lxc_popen_FILE *f;
299 char *output;
300 int ret;
301
302 f = lxc_popen(buffer);
303 if (!f) {
304 SYSERROR("popen failed");
305 return -1;
306 }
307
308 output = malloc(LXC_LOG_BUFFER_SIZE);
309 if (!output) {
310 ERROR("failed to allocate memory for script output");
311 lxc_pclose(f);
312 return -1;
313 }
314
315 while(fgets(output, LXC_LOG_BUFFER_SIZE, f->f))
316 DEBUG("script output: %s", output);
317
318 free(output);
319
320 ret = lxc_pclose(f);
321 if (ret == -1) {
322 SYSERROR("Script exited on error");
323 return -1;
324 } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) {
325 ERROR("Script exited with status %d", WEXITSTATUS(ret));
326 return -1;
327 } else if (WIFSIGNALED(ret)) {
328 ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret),
329 strsignal(WTERMSIG(ret)));
330 return -1;
331 }
332
333 return 0;
334 }
335
336 static int run_script_argv(const char *name, const char *section,
337 const char *script, const char *hook, const char *lxcpath,
338 char **argsin)
339 {
340 int ret, i;
341 char *buffer;
342 size_t size = 0;
343
344 INFO("Executing script '%s' for container '%s', config section '%s'",
345 script, name, section);
346
347 for (i=0; argsin && argsin[i]; i++)
348 size += strlen(argsin[i]) + 1;
349
350 size += strlen(hook) + 1;
351
352 size += strlen(script);
353 size += strlen(name);
354 size += strlen(section);
355 size += 3;
356
357 if (size > INT_MAX)
358 return -1;
359
360 buffer = alloca(size);
361 if (!buffer) {
362 ERROR("failed to allocate memory");
363 return -1;
364 }
365
366 ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook);
367 if (ret < 0 || ret >= size) {
368 ERROR("Script name too long");
369 return -1;
370 }
371
372 for (i=0; argsin && argsin[i]; i++) {
373 int len = size-ret;
374 int rc;
375 rc = snprintf(buffer + ret, len, " %s", argsin[i]);
376 if (rc < 0 || rc >= len) {
377 ERROR("Script args too long");
378 return -1;
379 }
380 ret += rc;
381 }
382
383 return run_buffer(buffer);
384 }
385
386 static int run_script(const char *name, const char *section,
387 const char *script, ...)
388 {
389 int ret;
390 char *buffer, *p;
391 size_t size = 0;
392 va_list ap;
393
394 INFO("Executing script '%s' for container '%s', config section '%s'",
395 script, name, section);
396
397 va_start(ap, script);
398 while ((p = va_arg(ap, char *)))
399 size += strlen(p) + 1;
400 va_end(ap);
401
402 size += strlen(script);
403 size += strlen(name);
404 size += strlen(section);
405 size += 3;
406
407 if (size > INT_MAX)
408 return -1;
409
410 buffer = alloca(size);
411 if (!buffer) {
412 ERROR("failed to allocate memory");
413 return -1;
414 }
415
416 ret = snprintf(buffer, size, "%s %s %s", script, name, section);
417 if (ret < 0 || ret >= size) {
418 ERROR("Script name too long");
419 return -1;
420 }
421
422 va_start(ap, script);
423 while ((p = va_arg(ap, char *))) {
424 int len = size-ret;
425 int rc;
426 rc = snprintf(buffer + ret, len, " %s", p);
427 if (rc < 0 || rc >= len) {
428 ERROR("Script args too long");
429 return -1;
430 }
431 ret += rc;
432 }
433 va_end(ap);
434
435 return run_buffer(buffer);
436 }
437
438 static int find_fstype_cb(char* buffer, void *data)
439 {
440 struct cbarg {
441 const char *rootfs;
442 const char *target;
443 const char *options;
444 } *cbarg = data;
445
446 unsigned long mntflags;
447 char *mntdata;
448 char *fstype;
449
450 /* we don't try 'nodev' entries */
451 if (strstr(buffer, "nodev"))
452 return 0;
453
454 fstype = buffer;
455 fstype += lxc_char_left_gc(fstype, strlen(fstype));
456 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
457
458 /* ignore blank line and comment */
459 if (fstype[0] == '\0' || fstype[0] == '#')
460 return 0;
461
462 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
463 cbarg->rootfs, cbarg->target, fstype);
464
465 if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) {
466 free(mntdata);
467 return -1;
468 }
469
470 if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
471 DEBUG("mount failed with error: %s", strerror(errno));
472 free(mntdata);
473 return 0;
474 }
475 free(mntdata);
476
477 INFO("mounted '%s' on '%s', with fstype '%s'",
478 cbarg->rootfs, cbarg->target, fstype);
479
480 return 1;
481 }
482
483 static int mount_unknown_fs(const char *rootfs, const char *target,
484 const char *options)
485 {
486 int i;
487
488 struct cbarg {
489 const char *rootfs;
490 const char *target;
491 const char *options;
492 } cbarg = {
493 .rootfs = rootfs,
494 .target = target,
495 .options = options,
496 };
497
498 /*
499 * find the filesystem type with brute force:
500 * first we check with /etc/filesystems, in case the modules
501 * are auto-loaded and fall back to the supported kernel fs
502 */
503 char *fsfile[] = {
504 "/etc/filesystems",
505 "/proc/filesystems",
506 };
507
508 for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) {
509
510 int ret;
511
512 if (access(fsfile[i], F_OK))
513 continue;
514
515 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
516 if (ret < 0) {
517 ERROR("failed to parse '%s'", fsfile[i]);
518 return -1;
519 }
520
521 if (ret)
522 return 0;
523 }
524
525 ERROR("failed to determine fs type for '%s'", rootfs);
526 return -1;
527 }
528
529 static int mount_rootfs_dir(const char *rootfs, const char *target,
530 const char *options)
531 {
532 unsigned long mntflags;
533 char *mntdata;
534 int ret;
535
536 if (parse_mntopts(options, &mntflags, &mntdata) < 0) {
537 free(mntdata);
538 return -1;
539 }
540
541 ret = mount(rootfs, target, "none", MS_BIND | MS_REC | mntflags, mntdata);
542 free(mntdata);
543
544 return ret;
545 }
546
547 static int setup_lodev(const char *rootfs, int fd, struct loop_info64 *loinfo)
548 {
549 int rfd;
550 int ret = -1;
551
552 rfd = open(rootfs, O_RDWR);
553 if (rfd < 0) {
554 SYSERROR("failed to open '%s'", rootfs);
555 return -1;
556 }
557
558 memset(loinfo, 0, sizeof(*loinfo));
559
560 loinfo->lo_flags = LO_FLAGS_AUTOCLEAR;
561
562 if (ioctl(fd, LOOP_SET_FD, rfd)) {
563 SYSERROR("failed to LOOP_SET_FD");
564 goto out;
565 }
566
567 if (ioctl(fd, LOOP_SET_STATUS64, loinfo)) {
568 SYSERROR("failed to LOOP_SET_STATUS64");
569 goto out;
570 }
571
572 ret = 0;
573 out:
574 close(rfd);
575
576 return ret;
577 }
578
579 static int mount_rootfs_file(const char *rootfs, const char *target,
580 const char *options)
581 {
582 struct dirent dirent, *direntp;
583 struct loop_info64 loinfo;
584 int ret = -1, fd = -1, rc;
585 DIR *dir;
586 char path[MAXPATHLEN];
587
588 dir = opendir("/dev");
589 if (!dir) {
590 SYSERROR("failed to open '/dev'");
591 return -1;
592 }
593
594 while (!readdir_r(dir, &dirent, &direntp)) {
595
596 if (!direntp)
597 break;
598
599 if (!strcmp(direntp->d_name, "."))
600 continue;
601
602 if (!strcmp(direntp->d_name, ".."))
603 continue;
604
605 if (strncmp(direntp->d_name, "loop", 4))
606 continue;
607
608 rc = snprintf(path, MAXPATHLEN, "/dev/%s", direntp->d_name);
609 if (rc < 0 || rc >= MAXPATHLEN)
610 continue;
611
612 fd = open(path, O_RDWR);
613 if (fd < 0)
614 continue;
615
616 if (ioctl(fd, LOOP_GET_STATUS64, &loinfo) == 0) {
617 close(fd);
618 continue;
619 }
620
621 if (errno != ENXIO) {
622 WARN("unexpected error for ioctl on '%s': %m",
623 direntp->d_name);
624 close(fd);
625 continue;
626 }
627
628 DEBUG("found '%s' free lodev", path);
629
630 ret = setup_lodev(rootfs, fd, &loinfo);
631 if (!ret)
632 ret = mount_unknown_fs(path, target, options);
633 close(fd);
634
635 break;
636 }
637
638 if (closedir(dir))
639 WARN("failed to close directory");
640
641 return ret;
642 }
643
644 static int mount_rootfs_block(const char *rootfs, const char *target,
645 const char *options)
646 {
647 return mount_unknown_fs(rootfs, target, options);
648 }
649
650 /*
651 * pin_rootfs
652 * if rootfs is a directory, then open ${rootfs}/lxc.hold for writing for
653 * the duration of the container run, to prevent the container from marking
654 * the underlying fs readonly on shutdown. unlink the file immediately so
655 * no name pollution is happens
656 * return -1 on error.
657 * return -2 if nothing needed to be pinned.
658 * return an open fd (>=0) if we pinned it.
659 */
660 int pin_rootfs(const char *rootfs)
661 {
662 char absrootfs[MAXPATHLEN];
663 char absrootfspin[MAXPATHLEN];
664 struct stat s;
665 int ret, fd;
666
667 if (rootfs == NULL || strlen(rootfs) == 0)
668 return -2;
669
670 if (!realpath(rootfs, absrootfs))
671 return -2;
672
673 if (access(absrootfs, F_OK))
674 return -1;
675
676 if (stat(absrootfs, &s))
677 return -1;
678
679 if (!S_ISDIR(s.st_mode))
680 return -2;
681
682 ret = snprintf(absrootfspin, MAXPATHLEN, "%s/lxc.hold", absrootfs);
683 if (ret >= MAXPATHLEN)
684 return -1;
685
686 fd = open(absrootfspin, O_CREAT | O_RDWR, S_IWUSR|S_IRUSR);
687 if (fd < 0)
688 return fd;
689 (void)unlink(absrootfspin);
690 return fd;
691 }
692
693 /*
694 * If we are asking to remount something, make sure that any
695 * NOEXEC etc are honored.
696 */
697 static unsigned long add_required_remount_flags(const char *s, const char *d,
698 unsigned long flags)
699 {
700 #ifdef HAVE_STATVFS
701 struct statvfs sb;
702 unsigned long required_flags = 0;
703
704 if (!(flags & MS_REMOUNT))
705 return flags;
706
707 if (!s)
708 s = d;
709
710 if (!s)
711 return flags;
712 if (statvfs(s, &sb) < 0)
713 return flags;
714
715 if (sb.f_flag & MS_NOSUID)
716 required_flags |= MS_NOSUID;
717 if (sb.f_flag & MS_NODEV)
718 required_flags |= MS_NODEV;
719 if (sb.f_flag & MS_RDONLY)
720 required_flags |= MS_RDONLY;
721 if (sb.f_flag & MS_NOEXEC)
722 required_flags |= MS_NOEXEC;
723
724 return flags | required_flags;
725 #else
726 return flags;
727 #endif
728 }
729
730 static int lxc_mount_auto_mounts(struct lxc_conf *conf, int flags, struct lxc_handler *handler)
731 {
732 int r;
733 size_t i;
734 static struct {
735 int match_mask;
736 int match_flag;
737 const char *source;
738 const char *destination;
739 const char *fstype;
740 unsigned long flags;
741 const char *options;
742 } default_mounts[] = {
743 /* Read-only bind-mounting... In older kernels, doing that required
744 * to do one MS_BIND mount and then MS_REMOUNT|MS_RDONLY the same
745 * one. According to mount(2) manpage, MS_BIND honors MS_RDONLY from
746 * kernel 2.6.26 onwards. However, this apparently does not work on
747 * kernel 3.8. Unfortunately, on that very same kernel, doing the
748 * same trick as above doesn't seem to work either, there one needs
749 * to ALSO specify MS_BIND for the remount, otherwise the entire
750 * fs is remounted read-only or the mount fails because it's busy...
751 * MS_REMOUNT|MS_BIND|MS_RDONLY seems to work for kernels as low as
752 * 2.6.32...
753 */
754 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
755 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys/net", "%r/proc/net", NULL, MS_BIND, NULL },
756 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sys", "%r/proc/sys", NULL, MS_BIND, NULL },
757 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
758 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/net", "%r/proc/sys/net", NULL, MS_MOVE, NULL },
759 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, "%r/proc/sysrq-trigger", "%r/proc/sysrq-trigger", NULL, MS_BIND, NULL },
760 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_MIXED, NULL, "%r/proc/sysrq-trigger", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
761 { LXC_AUTO_PROC_MASK, LXC_AUTO_PROC_RW, "proc", "%r/proc", "proc", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
762 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RW, "sysfs", "%r/sys", "sysfs", 0, NULL },
763 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_RO, "sysfs", "%r/sys", "sysfs", MS_RDONLY, NULL },
764 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys", "sysfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL },
765 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys", "%r/sys", NULL, MS_BIND, NULL },
766 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys", NULL, MS_REMOUNT|MS_BIND|MS_RDONLY, NULL },
767 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "sysfs", "%r/sys/devices/virtual/net", "sysfs", 0, NULL },
768 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, "%r/sys/devices/virtual/net/devices/virtual/net", "%r/sys/devices/virtual/net", NULL, MS_BIND, NULL },
769 { LXC_AUTO_SYS_MASK, LXC_AUTO_SYS_MIXED, NULL, "%r/sys/devices/virtual/net", NULL, MS_REMOUNT|MS_BIND|MS_NOSUID|MS_NODEV|MS_NOEXEC, NULL },
770 { 0, 0, NULL, NULL, NULL, 0, NULL }
771 };
772
773 for (i = 0; default_mounts[i].match_mask; i++) {
774 if ((flags & default_mounts[i].match_mask) == default_mounts[i].match_flag) {
775 char *source = NULL;
776 char *destination = NULL;
777 int saved_errno;
778 unsigned long mflags;
779
780 if (default_mounts[i].source) {
781 /* will act like strdup if %r is not present */
782 source = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].source);
783 if (!source) {
784 SYSERROR("memory allocation error");
785 return -1;
786 }
787 }
788 if (default_mounts[i].destination) {
789 /* will act like strdup if %r is not present */
790 destination = lxc_string_replace("%r", conf->rootfs.mount, default_mounts[i].destination);
791 if (!destination) {
792 saved_errno = errno;
793 SYSERROR("memory allocation error");
794 free(source);
795 errno = saved_errno;
796 return -1;
797 }
798 }
799 mflags = add_required_remount_flags(source, destination,
800 default_mounts[i].flags);
801 r = mount(source, destination, default_mounts[i].fstype, mflags, default_mounts[i].options);
802 saved_errno = errno;
803 if (r < 0 && errno == ENOENT) {
804 INFO("Mount source or target for %s on %s doesn't exist. Skipping.", source, destination);
805 r = 0;
806 }
807 else if (r < 0)
808 SYSERROR("error mounting %s on %s flags %lu", source, destination, mflags);
809
810 free(source);
811 free(destination);
812 if (r < 0) {
813 errno = saved_errno;
814 return -1;
815 }
816 }
817 }
818
819 if (flags & LXC_AUTO_CGROUP_MASK) {
820 int cg_flags;
821
822 cg_flags = flags & LXC_AUTO_CGROUP_MASK;
823 /* If the type of cgroup mount was not specified, it depends on the
824 * container's capabilities as to what makes sense: if we have
825 * CAP_SYS_ADMIN, the read-only part can be remounted read-write
826 * anyway, so we may as well default to read-write; then the admin
827 * will not be given a false sense of security. (And if they really
828 * want mixed r/o r/w, then they can explicitly specify :mixed.)
829 * OTOH, if the container lacks CAP_SYS_ADMIN, do only default to
830 * :mixed, because then the container can't remount it read-write. */
831 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC || cg_flags == LXC_AUTO_CGROUP_FULL_NOSPEC) {
832 int has_sys_admin = 0;
833 if (!lxc_list_empty(&conf->keepcaps)) {
834 has_sys_admin = in_caplist(CAP_SYS_ADMIN, &conf->keepcaps);
835 } else {
836 has_sys_admin = !in_caplist(CAP_SYS_ADMIN, &conf->caps);
837 }
838 if (cg_flags == LXC_AUTO_CGROUP_NOSPEC) {
839 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_RW : LXC_AUTO_CGROUP_MIXED;
840 } else {
841 cg_flags = has_sys_admin ? LXC_AUTO_CGROUP_FULL_RW : LXC_AUTO_CGROUP_FULL_MIXED;
842 }
843 }
844
845 if (!cgroup_mount(conf->rootfs.mount, handler, cg_flags)) {
846 SYSERROR("error mounting /sys/fs/cgroup");
847 return -1;
848 }
849 }
850
851 return 0;
852 }
853
854 static int mount_rootfs(const char *rootfs, const char *target, const char *options)
855 {
856 char absrootfs[MAXPATHLEN];
857 struct stat s;
858 int i;
859
860 typedef int (*rootfs_cb)(const char *, const char *, const char *);
861
862 struct rootfs_type {
863 int type;
864 rootfs_cb cb;
865 } rtfs_type[] = {
866 { S_IFDIR, mount_rootfs_dir },
867 { S_IFBLK, mount_rootfs_block },
868 { S_IFREG, mount_rootfs_file },
869 };
870
871 if (!realpath(rootfs, absrootfs)) {
872 SYSERROR("failed to get real path for '%s'", rootfs);
873 return -1;
874 }
875
876 if (access(absrootfs, F_OK)) {
877 SYSERROR("'%s' is not accessible", absrootfs);
878 return -1;
879 }
880
881 if (stat(absrootfs, &s)) {
882 SYSERROR("failed to stat '%s'", absrootfs);
883 return -1;
884 }
885
886 for (i = 0; i < sizeof(rtfs_type)/sizeof(rtfs_type[0]); i++) {
887
888 if (!__S_ISTYPE(s.st_mode, rtfs_type[i].type))
889 continue;
890
891 return rtfs_type[i].cb(absrootfs, target, options);
892 }
893
894 ERROR("unsupported rootfs type for '%s'", absrootfs);
895 return -1;
896 }
897
898 static int setup_utsname(struct utsname *utsname)
899 {
900 if (!utsname)
901 return 0;
902
903 if (sethostname(utsname->nodename, strlen(utsname->nodename))) {
904 SYSERROR("failed to set the hostname to '%s'", utsname->nodename);
905 return -1;
906 }
907
908 INFO("'%s' hostname has been setup", utsname->nodename);
909
910 return 0;
911 }
912
913 struct dev_symlinks {
914 const char *oldpath;
915 const char *name;
916 };
917
918 static const struct dev_symlinks dev_symlinks[] = {
919 {"/proc/self/fd", "fd"},
920 {"/proc/self/fd/0", "stdin"},
921 {"/proc/self/fd/1", "stdout"},
922 {"/proc/self/fd/2", "stderr"},
923 };
924
925 static int setup_dev_symlinks(const struct lxc_rootfs *rootfs)
926 {
927 char path[MAXPATHLEN];
928 int ret,i;
929 struct stat s;
930
931
932 for (i = 0; i < sizeof(dev_symlinks) / sizeof(dev_symlinks[0]); i++) {
933 const struct dev_symlinks *d = &dev_symlinks[i];
934 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount, d->name);
935 if (ret < 0 || ret >= MAXPATHLEN)
936 return -1;
937
938 /*
939 * Stat the path first. If we don't get an error
940 * accept it as is and don't try to create it
941 */
942 if (!stat(path, &s)) {
943 continue;
944 }
945
946 ret = symlink(d->oldpath, path);
947
948 if (ret && errno != EEXIST) {
949 if ( errno == EROFS ) {
950 WARN("Warning: Read Only file system while creating %s", path);
951 } else {
952 SYSERROR("Error creating %s", path);
953 return -1;
954 }
955 }
956 }
957 return 0;
958 }
959
960 /*
961 * Build a space-separate list of ptys to pass to systemd.
962 */
963 static bool append_ptyname(char **pp, char *name)
964 {
965 char *p;
966
967 if (!*pp) {
968 *pp = malloc(strlen(name) + strlen("container_ttys=") + 1);
969 if (!*pp)
970 return false;
971 sprintf(*pp, "container_ttys=%s", name);
972 return true;
973 }
974 p = realloc(*pp, strlen(*pp) + strlen(name) + 2);
975 if (!p)
976 return false;
977 *pp = p;
978 strcat(p, " ");
979 strcat(p, name);
980 return true;
981 }
982
983 static int setup_tty(struct lxc_conf *conf)
984 {
985 const struct lxc_tty_info *tty_info = &conf->tty_info;
986 char *ttydir = conf->ttydir;
987 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
988 int i, ret;
989
990 if (!conf->rootfs.path)
991 return 0;
992
993 for (i = 0; i < tty_info->nbtty; i++) {
994
995 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
996
997 ret = snprintf(path, sizeof(path), "/dev/tty%d", i + 1);
998 if (ret >= sizeof(path)) {
999 ERROR("pathname too long for ttys");
1000 return -1;
1001 }
1002 if (ttydir) {
1003 /* create dev/lxc/tty%d" */
1004 ret = snprintf(lxcpath, sizeof(lxcpath), "/dev/%s/tty%d", ttydir, i + 1);
1005 if (ret >= sizeof(lxcpath)) {
1006 ERROR("pathname too long for ttys");
1007 return -1;
1008 }
1009 ret = creat(lxcpath, 0660);
1010 if (ret==-1 && errno != EEXIST) {
1011 SYSERROR("error creating %s", lxcpath);
1012 return -1;
1013 }
1014 if (ret >= 0)
1015 close(ret);
1016 ret = unlink(path);
1017 if (ret && errno != ENOENT) {
1018 SYSERROR("error unlinking %s", path);
1019 return -1;
1020 }
1021
1022 if (mount(pty_info->name, lxcpath, "none", MS_BIND, 0)) {
1023 WARN("failed to mount '%s'->'%s'",
1024 pty_info->name, path);
1025 continue;
1026 }
1027
1028 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/tty%d", ttydir, i+1);
1029 if (ret >= sizeof(lxcpath)) {
1030 ERROR("tty pathname too long");
1031 return -1;
1032 }
1033 ret = symlink(lxcpath, path);
1034 if (ret) {
1035 SYSERROR("failed to create symlink for tty %d", i+1);
1036 return -1;
1037 }
1038 } else {
1039 /* If we populated /dev, then we need to create /dev/ttyN */
1040 if (access(path, F_OK)) {
1041 ret = creat(path, 0660);
1042 if (ret==-1) {
1043 SYSERROR("error creating %s", path);
1044 /* this isn't fatal, continue */
1045 } else {
1046 close(ret);
1047 }
1048 }
1049 if (mount(pty_info->name, path, "none", MS_BIND, 0)) {
1050 SYSERROR("failed to mount '%s'->'%s'", pty_info->name, path);
1051 continue;
1052 }
1053 }
1054 if (!append_ptyname(&conf->pty_names, pty_info->name)) {
1055 ERROR("Error setting up container_ttys string");
1056 return -1;
1057 }
1058 }
1059
1060 INFO("%d tty(s) has been setup", tty_info->nbtty);
1061
1062 return 0;
1063 }
1064
1065
1066 static int setup_rootfs_pivot_root(const char *rootfs, const char *pivotdir)
1067 {
1068 int oldroot = -1, newroot = -1;
1069
1070 oldroot = open("/", O_DIRECTORY | O_RDONLY);
1071 if (oldroot < 0) {
1072 SYSERROR("Error opening old-/ for fchdir");
1073 return -1;
1074 }
1075 newroot = open(rootfs, O_DIRECTORY | O_RDONLY);
1076 if (newroot < 0) {
1077 SYSERROR("Error opening new-/ for fchdir");
1078 goto fail;
1079 }
1080
1081 /* change into new root fs */
1082 if (fchdir(newroot)) {
1083 SYSERROR("can't chdir to new rootfs '%s'", rootfs);
1084 goto fail;
1085 }
1086
1087 /* pivot_root into our new root fs */
1088 if (pivot_root(".", ".")) {
1089 SYSERROR("pivot_root syscall failed");
1090 goto fail;
1091 }
1092
1093 /*
1094 * at this point the old-root is mounted on top of our new-root
1095 * To unmounted it we must not be chdir'd into it, so escape back
1096 * to old-root
1097 */
1098 if (fchdir(oldroot) < 0) {
1099 SYSERROR("Error entering oldroot");
1100 goto fail;
1101 }
1102 if (umount2(".", MNT_DETACH) < 0) {
1103 SYSERROR("Error detaching old root");
1104 goto fail;
1105 }
1106
1107 if (fchdir(newroot) < 0) {
1108 SYSERROR("Error re-entering newroot");
1109 goto fail;
1110 }
1111
1112 close(oldroot);
1113 close(newroot);
1114
1115 DEBUG("pivot_root syscall to '%s' successful", rootfs);
1116
1117 return 0;
1118
1119 fail:
1120 if (oldroot != -1)
1121 close(oldroot);
1122 if (newroot != -1)
1123 close(newroot);
1124 return -1;
1125 }
1126
1127 /*
1128 * Just create a path for /dev under $lxcpath/$name and in rootfs
1129 * If we hit an error, log it but don't fail yet.
1130 */
1131 static int mount_autodev(const char *name, char *root, const char *lxcpath)
1132 {
1133 int ret;
1134 size_t clen;
1135 char *path;
1136
1137 INFO("Mounting /dev under %s", root);
1138
1139 /* $(root) + "/dev/pts" + '\0' */
1140 clen = strlen(root) + 9;
1141 path = alloca(clen);
1142
1143 ret = snprintf(path, clen, "%s/dev", root);
1144 if (ret < 0 || ret >= clen)
1145 return -1;
1146
1147 if (!dir_exists(path)) {
1148 WARN("No /dev on container rootfs.");
1149 WARN("Proceeding without autodev setup");
1150 return 0;
1151 }
1152
1153 if (mount("none", path, "tmpfs", 0, "size=100000,mode=755")) {
1154 SYSERROR("Failed mounting tmpfs onto %s\n", path);
1155 return false;
1156 }
1157
1158 INFO("Mounted tmpfs onto %s", path);
1159
1160 ret = snprintf(path, clen, "%s/dev/pts", root);
1161 if (ret < 0 || ret >= clen)
1162 return -1;
1163
1164 /*
1165 * If we are running on a devtmpfs mapping, dev/pts may already exist.
1166 * If not, then create it and exit if that fails...
1167 */
1168 if (!dir_exists(path)) {
1169 ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1170 if (ret) {
1171 SYSERROR("Failed to create /dev/pts in container");
1172 return -1;
1173 }
1174 }
1175
1176 INFO("Mounted /dev under %s", root);
1177 return 0;
1178 }
1179
1180 struct lxc_devs {
1181 const char *name;
1182 mode_t mode;
1183 int maj;
1184 int min;
1185 };
1186
1187 static const struct lxc_devs lxc_devs[] = {
1188 { "null", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 3 },
1189 { "zero", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 5 },
1190 { "full", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 7 },
1191 { "urandom", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 9 },
1192 { "random", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 1, 8 },
1193 { "tty", S_IFCHR | S_IRWXU | S_IRWXG | S_IRWXO, 5, 0 },
1194 { "console", S_IFCHR | S_IRUSR | S_IWUSR, 5, 1 },
1195 };
1196
1197 static int fill_autodev(const char *root)
1198 {
1199 int ret;
1200 char path[MAXPATHLEN];
1201 int i;
1202 mode_t cmask;
1203
1204 INFO("Creating initial consoles under %s/dev", root);
1205
1206 ret = snprintf(path, MAXPATHLEN, "%s/dev", root);
1207 if (ret < 0 || ret >= MAXPATHLEN) {
1208 ERROR("Error calculating container /dev location");
1209 return -1;
1210 }
1211
1212 if (!dir_exists(path)) // ignore, just don't try to fill in
1213 return 0;
1214
1215 INFO("Populating /dev under %s", root);
1216 cmask = umask(S_IXUSR | S_IXGRP | S_IXOTH);
1217 for (i = 0; i < sizeof(lxc_devs) / sizeof(lxc_devs[0]); i++) {
1218 const struct lxc_devs *d = &lxc_devs[i];
1219 ret = snprintf(path, MAXPATHLEN, "%s/dev/%s", root, d->name);
1220 if (ret < 0 || ret >= MAXPATHLEN)
1221 return -1;
1222 ret = mknod(path, d->mode, makedev(d->maj, d->min));
1223 if (ret && errno != EEXIST) {
1224 char hostpath[MAXPATHLEN];
1225 FILE *pathfile;
1226
1227 // Unprivileged containers cannot create devices, so
1228 // bind mount the device from the host
1229 ret = snprintf(hostpath, MAXPATHLEN, "/dev/%s", d->name);
1230 if (ret < 0 || ret >= MAXPATHLEN)
1231 return -1;
1232 pathfile = fopen(path, "wb");
1233 if (!pathfile) {
1234 SYSERROR("Failed to create device mount target '%s'", path);
1235 return -1;
1236 }
1237 fclose(pathfile);
1238 if (mount(hostpath, path, 0, MS_BIND, NULL) != 0) {
1239 SYSERROR("Failed bind mounting device %s from host into container",
1240 d->name);
1241 return -1;
1242 }
1243 }
1244 }
1245 umask(cmask);
1246
1247 INFO("Populated /dev under %s", root);
1248 return 0;
1249 }
1250
1251 static int setup_rootfs(struct lxc_conf *conf)
1252 {
1253 const struct lxc_rootfs *rootfs = &conf->rootfs;
1254
1255 if (!rootfs->path) {
1256 if (mount("", "/", NULL, MS_SLAVE|MS_REC, 0)) {
1257 SYSERROR("Failed to make / rslave");
1258 return -1;
1259 }
1260 return 0;
1261 }
1262
1263 if (access(rootfs->mount, F_OK)) {
1264 SYSERROR("failed to access to '%s', check it is present",
1265 rootfs->mount);
1266 return -1;
1267 }
1268
1269 // First try mounting rootfs using a bdev
1270 struct bdev *bdev = bdev_init(conf, rootfs->path, rootfs->mount, rootfs->options);
1271 if (bdev && bdev->ops->mount(bdev) == 0) {
1272 bdev_put(bdev);
1273 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1274 return 0;
1275 }
1276 if (bdev)
1277 bdev_put(bdev);
1278 if (mount_rootfs(rootfs->path, rootfs->mount, rootfs->options)) {
1279 ERROR("failed to mount rootfs");
1280 return -1;
1281 }
1282
1283 DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount);
1284
1285 return 0;
1286 }
1287
1288 int prepare_ramfs_root(char *root)
1289 {
1290 char buf[LINELEN], *p;
1291 char nroot[PATH_MAX];
1292 FILE *f;
1293 int i;
1294 char *p2;
1295
1296 if (realpath(root, nroot) == NULL)
1297 return -1;
1298
1299 if (chdir("/") == -1)
1300 return -1;
1301
1302 /*
1303 * We could use here MS_MOVE, but in userns this mount is
1304 * locked and can't be moved.
1305 */
1306 if (mount(root, "/", NULL, MS_REC | MS_BIND, NULL)) {
1307 SYSERROR("Failed to move %s into /", root);
1308 return -1;
1309 }
1310
1311 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
1312 SYSERROR("Failed to make . rprivate");
1313 return -1;
1314 }
1315
1316 /*
1317 * The following code cleans up inhereted mounts which are not
1318 * required for CT.
1319 *
1320 * The mountinfo file shows not all mounts, if a few points have been
1321 * unmounted between read operations from the mountinfo. So we need to
1322 * read mountinfo a few times.
1323 *
1324 * This loop can be skipped if a container uses unserns, because all
1325 * inherited mounts are locked and we should live with all this trash.
1326 */
1327 while (1) {
1328 int progress = 0;
1329
1330 f = fopen("./proc/self/mountinfo", "r");
1331 if (!f) {
1332 SYSERROR("Unable to open /proc/self/mountinfo");
1333 return -1;
1334 }
1335 while (fgets(buf, LINELEN, f)) {
1336 for (p = buf, i=0; p && i < 4; i++)
1337 p = strchr(p+1, ' ');
1338 if (!p)
1339 continue;
1340 p2 = strchr(p+1, ' ');
1341 if (!p2)
1342 continue;
1343
1344 *p2 = '\0';
1345 *p = '.';
1346
1347 if (strcmp(p + 1, "/") == 0)
1348 continue;
1349 if (strcmp(p + 1, "/proc") == 0)
1350 continue;
1351
1352 if (umount2(p, MNT_DETACH) == 0)
1353 progress++;
1354 }
1355 fclose(f);
1356 if (!progress)
1357 break;
1358 }
1359
1360 if (umount2("./proc", MNT_DETACH)) {
1361 SYSERROR("Unable to umount /proc");
1362 return -1;
1363 }
1364
1365 /* It is weird, but chdir("..") moves us in a new root */
1366 if (chdir("..") == -1) {
1367 SYSERROR("Unable to change working directory");
1368 return -1;
1369 }
1370
1371 if (chroot(".") == -1) {
1372 SYSERROR("Unable to chroot");
1373 return -1;
1374 }
1375
1376 return 0;
1377 }
1378
1379 static int setup_pivot_root(const struct lxc_rootfs *rootfs)
1380 {
1381 if (!rootfs->path)
1382 return 0;
1383
1384 if (detect_ramfs_rootfs()) {
1385 if (prepare_ramfs_root(rootfs->mount))
1386 return -1;
1387 } else if (setup_rootfs_pivot_root(rootfs->mount, rootfs->pivot)) {
1388 ERROR("failed to setup pivot root");
1389 return -1;
1390 }
1391
1392 return 0;
1393 }
1394
1395 static int setup_pts(int pts)
1396 {
1397 char target[PATH_MAX];
1398
1399 if (!pts)
1400 return 0;
1401
1402 if (!access("/dev/pts/ptmx", F_OK) && umount("/dev/pts")) {
1403 SYSERROR("failed to umount 'dev/pts'");
1404 return -1;
1405 }
1406
1407 if (mkdir("/dev/pts", 0755)) {
1408 if ( errno != EEXIST ) {
1409 SYSERROR("failed to create '/dev/pts'");
1410 return -1;
1411 }
1412 }
1413
1414 if (mount("devpts", "/dev/pts", "devpts", MS_MGC_VAL,
1415 "newinstance,ptmxmode=0666,mode=0620,gid=5")) {
1416 SYSERROR("failed to mount a new instance of '/dev/pts'");
1417 return -1;
1418 }
1419
1420 if (access("/dev/ptmx", F_OK)) {
1421 if (!symlink("/dev/pts/ptmx", "/dev/ptmx"))
1422 goto out;
1423 SYSERROR("failed to symlink '/dev/pts/ptmx'->'/dev/ptmx'");
1424 return -1;
1425 }
1426
1427 if (realpath("/dev/ptmx", target) && !strcmp(target, "/dev/pts/ptmx"))
1428 goto out;
1429
1430 /* fallback here, /dev/pts/ptmx exists just mount bind */
1431 if (mount("/dev/pts/ptmx", "/dev/ptmx", "none", MS_BIND, 0)) {
1432 SYSERROR("mount failed '/dev/pts/ptmx'->'/dev/ptmx'");
1433 return -1;
1434 }
1435
1436 INFO("created new pts instance");
1437
1438 out:
1439 return 0;
1440 }
1441
1442 static int setup_personality(int persona)
1443 {
1444 #if HAVE_SYS_PERSONALITY_H
1445 if (persona == -1)
1446 return 0;
1447
1448 if (personality(persona) < 0) {
1449 SYSERROR("failed to set personality to '0x%x'", persona);
1450 return -1;
1451 }
1452
1453 INFO("set personality to '0x%x'", persona);
1454 #endif
1455
1456 return 0;
1457 }
1458
1459 static int setup_dev_console(const struct lxc_rootfs *rootfs,
1460 const struct lxc_console *console)
1461 {
1462 char path[MAXPATHLEN];
1463 struct stat s;
1464 int ret;
1465
1466 ret = snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1467 if (ret >= sizeof(path)) {
1468 ERROR("console path too long");
1469 return -1;
1470 }
1471
1472 if (access(path, F_OK)) {
1473 WARN("rootfs specified but no console found at '%s'", path);
1474 return 0;
1475 }
1476
1477 if (console->master < 0) {
1478 INFO("no console");
1479 return 0;
1480 }
1481
1482 if (stat(path, &s)) {
1483 SYSERROR("failed to stat '%s'", path);
1484 return -1;
1485 }
1486
1487 if (chmod(console->name, s.st_mode)) {
1488 SYSERROR("failed to set mode '0%o' to '%s'",
1489 s.st_mode, console->name);
1490 return -1;
1491 }
1492
1493 if (mount(console->name, path, "none", MS_BIND, 0)) {
1494 ERROR("failed to mount '%s' on '%s'", console->name, path);
1495 return -1;
1496 }
1497
1498 INFO("console has been setup");
1499 return 0;
1500 }
1501
1502 static int setup_ttydir_console(const struct lxc_rootfs *rootfs,
1503 const struct lxc_console *console,
1504 char *ttydir)
1505 {
1506 char path[MAXPATHLEN], lxcpath[MAXPATHLEN];
1507 int ret;
1508
1509 /* create rootfs/dev/<ttydir> directory */
1510 ret = snprintf(path, sizeof(path), "%s/dev/%s", rootfs->mount,
1511 ttydir);
1512 if (ret >= sizeof(path))
1513 return -1;
1514 ret = mkdir(path, 0755);
1515 if (ret && errno != EEXIST) {
1516 SYSERROR("failed with errno %d to create %s", errno, path);
1517 return -1;
1518 }
1519 INFO("created %s", path);
1520
1521 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/dev/%s/console",
1522 rootfs->mount, ttydir);
1523 if (ret >= sizeof(lxcpath)) {
1524 ERROR("console path too long");
1525 return -1;
1526 }
1527
1528 snprintf(path, sizeof(path), "%s/dev/console", rootfs->mount);
1529 ret = unlink(path);
1530 if (ret && errno != ENOENT) {
1531 SYSERROR("error unlinking %s", path);
1532 return -1;
1533 }
1534
1535 ret = creat(lxcpath, 0660);
1536 if (ret==-1 && errno != EEXIST) {
1537 SYSERROR("error %d creating %s", errno, lxcpath);
1538 return -1;
1539 }
1540 if (ret >= 0)
1541 close(ret);
1542
1543 if (console->master < 0) {
1544 INFO("no console");
1545 return 0;
1546 }
1547
1548 if (mount(console->name, lxcpath, "none", MS_BIND, 0)) {
1549 ERROR("failed to mount '%s' on '%s'", console->name, lxcpath);
1550 return -1;
1551 }
1552
1553 /* create symlink from rootfs/dev/console to 'lxc/console' */
1554 ret = snprintf(lxcpath, sizeof(lxcpath), "%s/console", ttydir);
1555 if (ret >= sizeof(lxcpath)) {
1556 ERROR("lxc/console path too long");
1557 return -1;
1558 }
1559 ret = symlink(lxcpath, path);
1560 if (ret) {
1561 SYSERROR("failed to create symlink for console");
1562 return -1;
1563 }
1564
1565 INFO("console has been setup on %s", lxcpath);
1566
1567 return 0;
1568 }
1569
1570 static int setup_console(const struct lxc_rootfs *rootfs,
1571 const struct lxc_console *console,
1572 char *ttydir)
1573 {
1574 /* We don't have a rootfs, /dev/console will be shared */
1575 if (!rootfs->path)
1576 return 0;
1577 if (!ttydir)
1578 return setup_dev_console(rootfs, console);
1579
1580 return setup_ttydir_console(rootfs, console, ttydir);
1581 }
1582
1583 static int setup_kmsg(const struct lxc_rootfs *rootfs,
1584 const struct lxc_console *console)
1585 {
1586 char kpath[MAXPATHLEN];
1587 int ret;
1588
1589 if (!rootfs->path)
1590 return 0;
1591 ret = snprintf(kpath, sizeof(kpath), "%s/dev/kmsg", rootfs->mount);
1592 if (ret < 0 || ret >= sizeof(kpath))
1593 return -1;
1594
1595 ret = unlink(kpath);
1596 if (ret && errno != ENOENT) {
1597 SYSERROR("error unlinking %s", kpath);
1598 return -1;
1599 }
1600
1601 ret = symlink("console", kpath);
1602 if (ret) {
1603 SYSERROR("failed to create symlink for kmsg");
1604 return -1;
1605 }
1606
1607 return 0;
1608 }
1609
1610 static void parse_mntopt(char *opt, unsigned long *flags, char **data)
1611 {
1612 struct mount_opt *mo;
1613
1614 /* If opt is found in mount_opt, set or clear flags.
1615 * Otherwise append it to data. */
1616
1617 for (mo = &mount_opt[0]; mo->name != NULL; mo++) {
1618 if (!strncmp(opt, mo->name, strlen(mo->name))) {
1619 if (mo->clear)
1620 *flags &= ~mo->flag;
1621 else
1622 *flags |= mo->flag;
1623 return;
1624 }
1625 }
1626
1627 if (strlen(*data))
1628 strcat(*data, ",");
1629 strcat(*data, opt);
1630 }
1631
1632 int parse_mntopts(const char *mntopts, unsigned long *mntflags,
1633 char **mntdata)
1634 {
1635 char *s, *data;
1636 char *p, *saveptr = NULL;
1637
1638 *mntdata = NULL;
1639 *mntflags = 0L;
1640
1641 if (!mntopts)
1642 return 0;
1643
1644 s = strdup(mntopts);
1645 if (!s) {
1646 SYSERROR("failed to allocate memory");
1647 return -1;
1648 }
1649
1650 data = malloc(strlen(s) + 1);
1651 if (!data) {
1652 SYSERROR("failed to allocate memory");
1653 free(s);
1654 return -1;
1655 }
1656 *data = 0;
1657
1658 for (p = strtok_r(s, ",", &saveptr); p != NULL;
1659 p = strtok_r(NULL, ",", &saveptr))
1660 parse_mntopt(p, mntflags, &data);
1661
1662 if (*data)
1663 *mntdata = data;
1664 else
1665 free(data);
1666 free(s);
1667
1668 return 0;
1669 }
1670
1671 static void null_endofword(char *word)
1672 {
1673 while (*word && *word != ' ' && *word != '\t')
1674 word++;
1675 *word = '\0';
1676 }
1677
1678 /*
1679 * skip @nfields spaces in @src
1680 */
1681 static char *get_field(char *src, int nfields)
1682 {
1683 char *p = src;
1684 int i;
1685
1686 for (i = 0; i < nfields; i++) {
1687 while (*p && *p != ' ' && *p != '\t')
1688 p++;
1689 if (!*p)
1690 break;
1691 p++;
1692 }
1693 return p;
1694 }
1695
1696 static int mount_entry(const char *fsname, const char *target,
1697 const char *fstype, unsigned long mountflags,
1698 const char *data, int optional)
1699 {
1700 #ifdef HAVE_STATVFS
1701 struct statvfs sb;
1702 #endif
1703
1704 if (mount(fsname, target, fstype, mountflags & ~MS_REMOUNT, data)) {
1705 if (optional) {
1706 INFO("failed to mount '%s' on '%s' (optional): %s", fsname,
1707 target, strerror(errno));
1708 return 0;
1709 }
1710 else {
1711 SYSERROR("failed to mount '%s' on '%s'", fsname, target);
1712 return -1;
1713 }
1714 }
1715
1716 if ((mountflags & MS_REMOUNT) || (mountflags & MS_BIND)) {
1717 DEBUG("remounting %s on %s to respect bind or remount options",
1718 fsname ? fsname : "(none)", target ? target : "(none)");
1719 unsigned long rqd_flags = 0;
1720 if (mountflags & MS_RDONLY)
1721 rqd_flags |= MS_RDONLY;
1722 #ifdef HAVE_STATVFS
1723 if (statvfs(fsname, &sb) == 0) {
1724 unsigned long required_flags = rqd_flags;
1725 if (sb.f_flag & MS_NOSUID)
1726 required_flags |= MS_NOSUID;
1727 if (sb.f_flag & MS_NODEV)
1728 required_flags |= MS_NODEV;
1729 if (sb.f_flag & MS_RDONLY)
1730 required_flags |= MS_RDONLY;
1731 if (sb.f_flag & MS_NOEXEC)
1732 required_flags |= MS_NOEXEC;
1733 DEBUG("(at remount) flags for %s was %lu, required extra flags are %lu", fsname, sb.f_flag, required_flags);
1734 /*
1735 * If this was a bind mount request, and required_flags
1736 * does not have any flags which are not already in
1737 * mountflags, then skip the remount
1738 */
1739 if (!(mountflags & MS_REMOUNT)) {
1740 if (!(required_flags & ~mountflags) && rqd_flags == 0) {
1741 DEBUG("mountflags already was %lu, skipping remount",
1742 mountflags);
1743 goto skipremount;
1744 }
1745 }
1746 mountflags |= required_flags;
1747 }
1748 #endif
1749
1750 if (mount(fsname, target, fstype,
1751 mountflags | MS_REMOUNT, data)) {
1752 if (optional) {
1753 INFO("failed to mount '%s' on '%s' (optional): %s",
1754 fsname, target, strerror(errno));
1755 return 0;
1756 }
1757 else {
1758 SYSERROR("failed to mount '%s' on '%s'",
1759 fsname, target);
1760 return -1;
1761 }
1762 }
1763 }
1764
1765 #ifdef HAVE_STATVFS
1766 skipremount:
1767 #endif
1768 DEBUG("mounted '%s' on '%s', type '%s'", fsname, target, fstype);
1769
1770 return 0;
1771 }
1772
1773 /*
1774 * Remove 'optional', 'create=dir', and 'create=file' from mntopt
1775 */
1776 static void cull_mntent_opt(struct mntent *mntent)
1777 {
1778 int i;
1779 char *p, *p2;
1780 char *list[] = {"create=dir",
1781 "create=file",
1782 "optional",
1783 NULL };
1784
1785 for (i=0; list[i]; i++) {
1786 if (!(p = strstr(mntent->mnt_opts, list[i])))
1787 continue;
1788 p2 = strchr(p, ',');
1789 if (!p2) {
1790 /* no more mntopts, so just chop it here */
1791 *p = '\0';
1792 continue;
1793 }
1794 memmove(p, p2+1, strlen(p2+1)+1);
1795 }
1796 }
1797
1798 static inline int mount_entry_on_systemfs(struct mntent *mntent)
1799 {
1800 unsigned long mntflags;
1801 char *mntdata;
1802 int ret;
1803 FILE *pathfile = NULL;
1804 char* pathdirname = NULL;
1805 bool optional = hasmntopt(mntent, "optional") != NULL;
1806
1807 if (hasmntopt(mntent, "create=dir")) {
1808 if (mkdir_p(mntent->mnt_dir, 0755) < 0) {
1809 WARN("Failed to create mount target '%s'", mntent->mnt_dir);
1810 ret = -1;
1811 }
1812 }
1813
1814 if (hasmntopt(mntent, "create=file") && access(mntent->mnt_dir, F_OK)) {
1815 pathdirname = strdup(mntent->mnt_dir);
1816 pathdirname = dirname(pathdirname);
1817 if (mkdir_p(pathdirname, 0755) < 0) {
1818 WARN("Failed to create target directory");
1819 }
1820 pathfile = fopen(mntent->mnt_dir, "wb");
1821 if (!pathfile) {
1822 WARN("Failed to create mount target '%s'", mntent->mnt_dir);
1823 ret = -1;
1824 }
1825 else
1826 fclose(pathfile);
1827 }
1828
1829 cull_mntent_opt(mntent);
1830
1831 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1832 free(mntdata);
1833 return -1;
1834 }
1835
1836 ret = mount_entry(mntent->mnt_fsname, mntent->mnt_dir,
1837 mntent->mnt_type, mntflags, mntdata, optional);
1838
1839 free(pathdirname);
1840 free(mntdata);
1841
1842 return ret;
1843 }
1844
1845 static int mount_entry_on_absolute_rootfs(struct mntent *mntent,
1846 const struct lxc_rootfs *rootfs,
1847 const char *lxc_name)
1848 {
1849 char *aux;
1850 char path[MAXPATHLEN];
1851 unsigned long mntflags;
1852 char *mntdata;
1853 int r, ret = 0, offset;
1854 const char *lxcpath;
1855 FILE *pathfile = NULL;
1856 char *pathdirname = NULL;
1857 bool optional = hasmntopt(mntent, "optional") != NULL;
1858
1859 lxcpath = lxc_global_config_value("lxc.lxcpath");
1860 if (!lxcpath) {
1861 ERROR("Out of memory");
1862 return -1;
1863 }
1864
1865 /* if rootfs->path is a blockdev path, allow container fstab to
1866 * use $lxcpath/CN/rootfs as the target prefix */
1867 r = snprintf(path, MAXPATHLEN, "%s/%s/rootfs", lxcpath, lxc_name);
1868 if (r < 0 || r >= MAXPATHLEN)
1869 goto skipvarlib;
1870
1871 aux = strstr(mntent->mnt_dir, path);
1872 if (aux) {
1873 offset = strlen(path);
1874 goto skipabs;
1875 }
1876
1877 skipvarlib:
1878 aux = strstr(mntent->mnt_dir, rootfs->path);
1879 if (!aux) {
1880 WARN("ignoring mount point '%s'", mntent->mnt_dir);
1881 goto out;
1882 }
1883 offset = strlen(rootfs->path);
1884
1885 skipabs:
1886
1887 r = snprintf(path, MAXPATHLEN, "%s/%s", rootfs->mount,
1888 aux + offset);
1889 if (r < 0 || r >= MAXPATHLEN) {
1890 WARN("pathnme too long for '%s'", mntent->mnt_dir);
1891 ret = -1;
1892 goto out;
1893 }
1894
1895 if (hasmntopt(mntent, "create=dir")) {
1896 if (mkdir_p(path, 0755) < 0) {
1897 WARN("Failed to create mount target '%s'", path);
1898 ret = -1;
1899 }
1900 }
1901
1902 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
1903 pathdirname = strdup(path);
1904 pathdirname = dirname(pathdirname);
1905 if (mkdir_p(pathdirname, 0755) < 0) {
1906 WARN("Failed to create target directory");
1907 }
1908 pathfile = fopen(path, "wb");
1909 if (!pathfile) {
1910 WARN("Failed to create mount target '%s'", path);
1911 ret = -1;
1912 }
1913 else
1914 fclose(pathfile);
1915 }
1916 cull_mntent_opt(mntent);
1917
1918 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1919 free(mntdata);
1920 return -1;
1921 }
1922
1923 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1924 mntflags, mntdata, optional);
1925
1926 free(mntdata);
1927
1928 out:
1929 free(pathdirname);
1930 return ret;
1931 }
1932
1933 static int mount_entry_on_relative_rootfs(struct mntent *mntent,
1934 const char *rootfs)
1935 {
1936 char path[MAXPATHLEN];
1937 unsigned long mntflags;
1938 char *mntdata;
1939 int ret;
1940 FILE *pathfile = NULL;
1941 char *pathdirname = NULL;
1942 bool optional = hasmntopt(mntent, "optional") != NULL;
1943
1944 /* relative to root mount point */
1945 ret = snprintf(path, sizeof(path), "%s/%s", rootfs, mntent->mnt_dir);
1946 if (ret >= sizeof(path)) {
1947 ERROR("path name too long");
1948 return -1;
1949 }
1950
1951 if (hasmntopt(mntent, "create=dir")) {
1952 if (mkdir_p(path, 0755) < 0) {
1953 WARN("Failed to create mount target '%s'", path);
1954 ret = -1;
1955 }
1956 }
1957
1958 if (hasmntopt(mntent, "create=file") && access(path, F_OK)) {
1959 pathdirname = strdup(path);
1960 pathdirname = dirname(pathdirname);
1961 if (mkdir_p(pathdirname, 0755) < 0) {
1962 WARN("Failed to create target directory");
1963 }
1964 pathfile = fopen(path, "wb");
1965 if (!pathfile) {
1966 WARN("Failed to create mount target '%s'", path);
1967 ret = -1;
1968 }
1969 else
1970 fclose(pathfile);
1971 }
1972 cull_mntent_opt(mntent);
1973
1974 if (parse_mntopts(mntent->mnt_opts, &mntflags, &mntdata) < 0) {
1975 free(mntdata);
1976 return -1;
1977 }
1978
1979 ret = mount_entry(mntent->mnt_fsname, path, mntent->mnt_type,
1980 mntflags, mntdata, optional);
1981
1982 free(pathdirname);
1983 free(mntdata);
1984
1985 return ret;
1986 }
1987
1988 static int mount_file_entries(const struct lxc_rootfs *rootfs, FILE *file,
1989 const char *lxc_name)
1990 {
1991 struct mntent mntent;
1992 char buf[4096];
1993 int ret = -1;
1994
1995 while (getmntent_r(file, &mntent, buf, sizeof(buf))) {
1996
1997 if (!rootfs->path) {
1998 if (mount_entry_on_systemfs(&mntent))
1999 goto out;
2000 continue;
2001 }
2002
2003 /* We have a separate root, mounts are relative to it */
2004 if (mntent.mnt_dir[0] != '/') {
2005 if (mount_entry_on_relative_rootfs(&mntent,
2006 rootfs->mount))
2007 goto out;
2008 continue;
2009 }
2010
2011 if (mount_entry_on_absolute_rootfs(&mntent, rootfs, lxc_name))
2012 goto out;
2013 }
2014
2015 ret = 0;
2016
2017 INFO("mount points have been setup");
2018 out:
2019 return ret;
2020 }
2021
2022 static int setup_mount(const struct lxc_rootfs *rootfs, const char *fstab,
2023 const char *lxc_name)
2024 {
2025 FILE *file;
2026 int ret;
2027
2028 if (!fstab)
2029 return 0;
2030
2031 file = setmntent(fstab, "r");
2032 if (!file) {
2033 SYSERROR("failed to use '%s'", fstab);
2034 return -1;
2035 }
2036
2037 ret = mount_file_entries(rootfs, file, lxc_name);
2038
2039 endmntent(file);
2040 return ret;
2041 }
2042
2043 FILE *write_mount_file(struct lxc_list *mount)
2044 {
2045 FILE *file;
2046 struct lxc_list *iterator;
2047 char *mount_entry;
2048
2049 file = tmpfile();
2050 if (!file) {
2051 ERROR("tmpfile error: %m");
2052 return NULL;
2053 }
2054
2055 lxc_list_for_each(iterator, mount) {
2056 mount_entry = iterator->elem;
2057 fprintf(file, "%s\n", mount_entry);
2058 }
2059
2060 rewind(file);
2061 return file;
2062 }
2063
2064 static int setup_mount_entries(const struct lxc_rootfs *rootfs, struct lxc_list *mount,
2065 const char *lxc_name)
2066 {
2067 FILE *file;
2068 int ret;
2069
2070 file = write_mount_file(mount);
2071 if (!file)
2072 return -1;
2073
2074 ret = mount_file_entries(rootfs, file, lxc_name);
2075
2076 fclose(file);
2077 return ret;
2078 }
2079
2080 static int parse_cap(const char *cap)
2081 {
2082 char *ptr = NULL;
2083 int i, capid = -1;
2084
2085 if (!strcmp(cap, "none"))
2086 return -2;
2087
2088 for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) {
2089
2090 if (strcmp(cap, caps_opt[i].name))
2091 continue;
2092
2093 capid = caps_opt[i].value;
2094 break;
2095 }
2096
2097 if (capid < 0) {
2098 /* try to see if it's numeric, so the user may specify
2099 * capabilities that the running kernel knows about but
2100 * we don't */
2101 errno = 0;
2102 capid = strtol(cap, &ptr, 10);
2103 if (!ptr || *ptr != '\0' || errno != 0)
2104 /* not a valid number */
2105 capid = -1;
2106 else if (capid > lxc_caps_last_cap())
2107 /* we have a number but it's not a valid
2108 * capability */
2109 capid = -1;
2110 }
2111
2112 return capid;
2113 }
2114
2115 int in_caplist(int cap, struct lxc_list *caps)
2116 {
2117 struct lxc_list *iterator;
2118 int capid;
2119
2120 lxc_list_for_each(iterator, caps) {
2121 capid = parse_cap(iterator->elem);
2122 if (capid == cap)
2123 return 1;
2124 }
2125
2126 return 0;
2127 }
2128
2129 static int setup_caps(struct lxc_list *caps)
2130 {
2131 struct lxc_list *iterator;
2132 char *drop_entry;
2133 int capid;
2134
2135 lxc_list_for_each(iterator, caps) {
2136
2137 drop_entry = iterator->elem;
2138
2139 capid = parse_cap(drop_entry);
2140
2141 if (capid < 0) {
2142 ERROR("unknown capability %s", drop_entry);
2143 return -1;
2144 }
2145
2146 DEBUG("drop capability '%s' (%d)", drop_entry, capid);
2147
2148 if (prctl(PR_CAPBSET_DROP, capid, 0, 0, 0)) {
2149 SYSERROR("failed to remove %s capability", drop_entry);
2150 return -1;
2151 }
2152
2153 }
2154
2155 DEBUG("capabilities have been setup");
2156
2157 return 0;
2158 }
2159
2160 static int dropcaps_except(struct lxc_list *caps)
2161 {
2162 struct lxc_list *iterator;
2163 char *keep_entry;
2164 int i, capid;
2165 int numcaps = lxc_caps_last_cap() + 1;
2166 INFO("found %d capabilities", numcaps);
2167
2168 if (numcaps <= 0 || numcaps > 200)
2169 return -1;
2170
2171 // caplist[i] is 1 if we keep capability i
2172 int *caplist = alloca(numcaps * sizeof(int));
2173 memset(caplist, 0, numcaps * sizeof(int));
2174
2175 lxc_list_for_each(iterator, caps) {
2176
2177 keep_entry = iterator->elem;
2178
2179 capid = parse_cap(keep_entry);
2180
2181 if (capid == -2)
2182 continue;
2183
2184 if (capid < 0) {
2185 ERROR("unknown capability %s", keep_entry);
2186 return -1;
2187 }
2188
2189 DEBUG("keep capability '%s' (%d)", keep_entry, capid);
2190
2191 caplist[capid] = 1;
2192 }
2193 for (i=0; i<numcaps; i++) {
2194 if (caplist[i])
2195 continue;
2196 if (prctl(PR_CAPBSET_DROP, i, 0, 0, 0)) {
2197 SYSERROR("failed to remove capability %d", i);
2198 return -1;
2199 }
2200 }
2201
2202 DEBUG("capabilities have been setup");
2203
2204 return 0;
2205 }
2206
2207 static int setup_hw_addr(char *hwaddr, const char *ifname)
2208 {
2209 struct sockaddr sockaddr;
2210 struct ifreq ifr;
2211 int ret, fd;
2212
2213 ret = lxc_convert_mac(hwaddr, &sockaddr);
2214 if (ret) {
2215 ERROR("mac address '%s' conversion failed : %s",
2216 hwaddr, strerror(-ret));
2217 return -1;
2218 }
2219
2220 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
2221 ifr.ifr_name[IFNAMSIZ-1] = '\0';
2222 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2223
2224 fd = socket(AF_INET, SOCK_DGRAM, 0);
2225 if (fd < 0) {
2226 ERROR("socket failure : %s", strerror(errno));
2227 return -1;
2228 }
2229
2230 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
2231 close(fd);
2232 if (ret)
2233 ERROR("ioctl failure : %s", strerror(errno));
2234
2235 DEBUG("mac address '%s' on '%s' has been setup", hwaddr, ifr.ifr_name);
2236
2237 return ret;
2238 }
2239
2240 static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
2241 {
2242 struct lxc_list *iterator;
2243 struct lxc_inetdev *inetdev;
2244 int err;
2245
2246 lxc_list_for_each(iterator, ip) {
2247
2248 inetdev = iterator->elem;
2249
2250 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2251 &inetdev->bcast, inetdev->prefix);
2252 if (err) {
2253 ERROR("failed to setup_ipv4_addr ifindex %d : %s",
2254 ifindex, strerror(-err));
2255 return -1;
2256 }
2257 }
2258
2259 return 0;
2260 }
2261
2262 static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
2263 {
2264 struct lxc_list *iterator;
2265 struct lxc_inet6dev *inet6dev;
2266 int err;
2267
2268 lxc_list_for_each(iterator, ip) {
2269
2270 inet6dev = iterator->elem;
2271
2272 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
2273 &inet6dev->mcast, &inet6dev->acast,
2274 inet6dev->prefix);
2275 if (err) {
2276 ERROR("failed to setup_ipv6_addr ifindex %d : %s",
2277 ifindex, strerror(-err));
2278 return -1;
2279 }
2280 }
2281
2282 return 0;
2283 }
2284
2285 static int setup_netdev(struct lxc_netdev *netdev)
2286 {
2287 char ifname[IFNAMSIZ];
2288 char *current_ifname = ifname;
2289 int err;
2290
2291 /* empty network namespace */
2292 if (!netdev->ifindex) {
2293 if (netdev->flags & IFF_UP) {
2294 err = lxc_netdev_up("lo");
2295 if (err) {
2296 ERROR("failed to set the loopback up : %s",
2297 strerror(-err));
2298 return -1;
2299 }
2300 }
2301 if (netdev->type != LXC_NET_VETH)
2302 return 0;
2303 netdev->ifindex = if_nametoindex(netdev->name);
2304 }
2305
2306 /* get the new ifindex in case of physical netdev */
2307 if (netdev->type == LXC_NET_PHYS) {
2308 if (!(netdev->ifindex = if_nametoindex(netdev->link))) {
2309 ERROR("failed to get ifindex for %s",
2310 netdev->link);
2311 return -1;
2312 }
2313 }
2314
2315 /* retrieve the name of the interface */
2316 if (!if_indextoname(netdev->ifindex, current_ifname)) {
2317 ERROR("no interface corresponding to index '%d'",
2318 netdev->ifindex);
2319 return -1;
2320 }
2321
2322 /* default: let the system to choose one interface name */
2323 if (!netdev->name)
2324 netdev->name = netdev->type == LXC_NET_PHYS ?
2325 netdev->link : "eth%d";
2326
2327 /* rename the interface name */
2328 if (strcmp(ifname, netdev->name) != 0) {
2329 err = lxc_netdev_rename_by_name(ifname, netdev->name);
2330 if (err) {
2331 ERROR("failed to rename %s->%s : %s", ifname, netdev->name,
2332 strerror(-err));
2333 return -1;
2334 }
2335 }
2336
2337 /* Re-read the name of the interface because its name has changed
2338 * and would be automatically allocated by the system
2339 */
2340 if (!if_indextoname(netdev->ifindex, current_ifname)) {
2341 ERROR("no interface corresponding to index '%d'",
2342 netdev->ifindex);
2343 return -1;
2344 }
2345
2346 /* set a mac address */
2347 if (netdev->hwaddr) {
2348 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
2349 ERROR("failed to setup hw address for '%s'",
2350 current_ifname);
2351 return -1;
2352 }
2353 }
2354
2355 /* setup ipv4 addresses on the interface */
2356 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
2357 ERROR("failed to setup ip addresses for '%s'",
2358 ifname);
2359 return -1;
2360 }
2361
2362 /* setup ipv6 addresses on the interface */
2363 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
2364 ERROR("failed to setup ipv6 addresses for '%s'",
2365 ifname);
2366 return -1;
2367 }
2368
2369 /* set the network device up */
2370 if (netdev->flags & IFF_UP) {
2371 int err;
2372
2373 err = lxc_netdev_up(current_ifname);
2374 if (err) {
2375 ERROR("failed to set '%s' up : %s", current_ifname,
2376 strerror(-err));
2377 return -1;
2378 }
2379
2380 /* the network is up, make the loopback up too */
2381 err = lxc_netdev_up("lo");
2382 if (err) {
2383 ERROR("failed to set the loopback up : %s",
2384 strerror(-err));
2385 return -1;
2386 }
2387 }
2388
2389 /* We can only set up the default routes after bringing
2390 * up the interface, sine bringing up the interface adds
2391 * the link-local routes and we can't add a default
2392 * route if the gateway is not reachable. */
2393
2394 /* setup ipv4 gateway on the interface */
2395 if (netdev->ipv4_gateway) {
2396 if (!(netdev->flags & IFF_UP)) {
2397 ERROR("Cannot add ipv4 gateway for %s when not bringing up the interface", ifname);
2398 return -1;
2399 }
2400
2401 if (lxc_list_empty(&netdev->ipv4)) {
2402 ERROR("Cannot add ipv4 gateway for %s when not assigning an address", ifname);
2403 return -1;
2404 }
2405
2406 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2407 if (err) {
2408 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2409 if (err) {
2410 ERROR("failed to add ipv4 dest for '%s': %s",
2411 ifname, strerror(-err));
2412 }
2413
2414 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2415 if (err) {
2416 ERROR("failed to setup ipv4 gateway for '%s': %s",
2417 ifname, strerror(-err));
2418 if (netdev->ipv4_gateway_auto) {
2419 char buf[INET_ADDRSTRLEN];
2420 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2421 ERROR("tried to set autodetected ipv4 gateway '%s'", buf);
2422 }
2423 return -1;
2424 }
2425 }
2426 }
2427
2428 /* setup ipv6 gateway on the interface */
2429 if (netdev->ipv6_gateway) {
2430 if (!(netdev->flags & IFF_UP)) {
2431 ERROR("Cannot add ipv6 gateway for %s when not bringing up the interface", ifname);
2432 return -1;
2433 }
2434
2435 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2436 ERROR("Cannot add ipv6 gateway for %s when not assigning an address", ifname);
2437 return -1;
2438 }
2439
2440 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2441 if (err) {
2442 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2443 if (err) {
2444 ERROR("failed to add ipv6 dest for '%s': %s",
2445 ifname, strerror(-err));
2446 }
2447
2448 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2449 if (err) {
2450 ERROR("failed to setup ipv6 gateway for '%s': %s",
2451 ifname, strerror(-err));
2452 if (netdev->ipv6_gateway_auto) {
2453 char buf[INET6_ADDRSTRLEN];
2454 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2455 ERROR("tried to set autodetected ipv6 gateway '%s'", buf);
2456 }
2457 return -1;
2458 }
2459 }
2460 }
2461
2462 DEBUG("'%s' has been setup", current_ifname);
2463
2464 return 0;
2465 }
2466
2467 static int setup_network(struct lxc_list *network)
2468 {
2469 struct lxc_list *iterator;
2470 struct lxc_netdev *netdev;
2471
2472 lxc_list_for_each(iterator, network) {
2473
2474 netdev = iterator->elem;
2475
2476 if (setup_netdev(netdev)) {
2477 ERROR("failed to setup netdev");
2478 return -1;
2479 }
2480 }
2481
2482 if (!lxc_list_empty(network))
2483 INFO("network has been setup");
2484
2485 return 0;
2486 }
2487
2488 /* try to move physical nics to the init netns */
2489 void restore_phys_nics_to_netns(int netnsfd, struct lxc_conf *conf)
2490 {
2491 int i, ret, oldfd;
2492 char path[MAXPATHLEN];
2493
2494 if (netnsfd < 0)
2495 return;
2496
2497 ret = snprintf(path, MAXPATHLEN, "/proc/self/ns/net");
2498 if (ret < 0 || ret >= MAXPATHLEN) {
2499 WARN("Failed to open monitor netns fd");
2500 return;
2501 }
2502 if ((oldfd = open(path, O_RDONLY)) < 0) {
2503 SYSERROR("Failed to open monitor netns fd");
2504 return;
2505 }
2506 if (setns(netnsfd, 0) != 0) {
2507 SYSERROR("Failed to enter container netns to reset nics");
2508 close(oldfd);
2509 return;
2510 }
2511 for (i=0; i<conf->num_savednics; i++) {
2512 struct saved_nic *s = &conf->saved_nics[i];
2513 if (lxc_netdev_move_by_index(s->ifindex, 1, NULL))
2514 WARN("Error moving nic index:%d back to host netns",
2515 s->ifindex);
2516 }
2517 if (setns(oldfd, 0) != 0)
2518 SYSERROR("Failed to re-enter monitor's netns");
2519 close(oldfd);
2520 }
2521
2522 void lxc_rename_phys_nics_on_shutdown(int netnsfd, struct lxc_conf *conf)
2523 {
2524 int i;
2525
2526 if (conf->num_savednics == 0)
2527 return;
2528
2529 INFO("running to reset %d nic names", conf->num_savednics);
2530 restore_phys_nics_to_netns(netnsfd, conf);
2531 for (i=0; i<conf->num_savednics; i++) {
2532 struct saved_nic *s = &conf->saved_nics[i];
2533 INFO("resetting nic %d to %s", s->ifindex, s->orig_name);
2534 lxc_netdev_rename_by_index(s->ifindex, s->orig_name);
2535 free(s->orig_name);
2536 }
2537 conf->num_savednics = 0;
2538 }
2539
2540 static char *default_rootfs_mount = LXCROOTFSMOUNT;
2541
2542 struct lxc_conf *lxc_conf_init(void)
2543 {
2544 struct lxc_conf *new;
2545 int i;
2546
2547 new = malloc(sizeof(*new));
2548 if (!new) {
2549 ERROR("lxc_conf_init : %m");
2550 return NULL;
2551 }
2552 memset(new, 0, sizeof(*new));
2553
2554 new->loglevel = LXC_LOG_PRIORITY_NOTSET;
2555 new->personality = -1;
2556 new->autodev = 1;
2557 new->console.log_path = NULL;
2558 new->console.log_fd = -1;
2559 new->console.path = NULL;
2560 new->console.peer = -1;
2561 new->console.peerpty.busy = -1;
2562 new->console.peerpty.master = -1;
2563 new->console.peerpty.slave = -1;
2564 new->console.master = -1;
2565 new->console.slave = -1;
2566 new->console.name[0] = '\0';
2567 new->maincmd_fd = -1;
2568 new->nbd_idx = -1;
2569 new->rootfs.mount = strdup(default_rootfs_mount);
2570 if (!new->rootfs.mount) {
2571 ERROR("lxc_conf_init : %m");
2572 free(new);
2573 return NULL;
2574 }
2575 new->kmsg = 0;
2576 lxc_list_init(&new->cgroup);
2577 lxc_list_init(&new->network);
2578 lxc_list_init(&new->mount_list);
2579 lxc_list_init(&new->caps);
2580 lxc_list_init(&new->keepcaps);
2581 lxc_list_init(&new->id_map);
2582 lxc_list_init(&new->includes);
2583 lxc_list_init(&new->aliens);
2584 lxc_list_init(&new->environment);
2585 for (i=0; i<NUM_LXC_HOOKS; i++)
2586 lxc_list_init(&new->hooks[i]);
2587 lxc_list_init(&new->groups);
2588 new->lsm_aa_profile = NULL;
2589 new->lsm_se_context = NULL;
2590 new->tmp_umount_proc = 0;
2591
2592 for (i = 0; i < LXC_NS_MAX; i++)
2593 new->inherit_ns_fd[i] = -1;
2594
2595 return new;
2596 }
2597
2598 static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2599 {
2600 char veth1buf[IFNAMSIZ], *veth1;
2601 char veth2buf[IFNAMSIZ], *veth2;
2602 int err;
2603
2604 if (netdev->priv.veth_attr.pair)
2605 veth1 = netdev->priv.veth_attr.pair;
2606 else {
2607 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
2608 if (err >= sizeof(veth1buf)) { /* can't *really* happen, but... */
2609 ERROR("veth1 name too long");
2610 return -1;
2611 }
2612 veth1 = lxc_mkifname(veth1buf);
2613 if (!veth1) {
2614 ERROR("failed to allocate a temporary name");
2615 return -1;
2616 }
2617 /* store away for deconf */
2618 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
2619 }
2620
2621 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
2622 veth2 = lxc_mkifname(veth2buf);
2623 if (!veth2) {
2624 ERROR("failed to allocate a temporary name");
2625 goto out_delete;
2626 }
2627
2628 err = lxc_veth_create(veth1, veth2);
2629 if (err) {
2630 ERROR("failed to create veth pair (%s and %s): %s", veth1, veth2,
2631 strerror(-err));
2632 goto out_delete;
2633 }
2634
2635 /* changing the high byte of the mac address to 0xfe, the bridge interface
2636 * will always keep the host's mac address and not take the mac address
2637 * of a container */
2638 err = setup_private_host_hw_addr(veth1);
2639 if (err) {
2640 ERROR("failed to change mac address of host interface '%s': %s",
2641 veth1, strerror(-err));
2642 goto out_delete;
2643 }
2644
2645 if (netdev->mtu) {
2646 err = lxc_netdev_set_mtu(veth1, atoi(netdev->mtu));
2647 if (!err)
2648 err = lxc_netdev_set_mtu(veth2, atoi(netdev->mtu));
2649 if (err) {
2650 ERROR("failed to set mtu '%s' for veth pair (%s and %s): %s",
2651 netdev->mtu, veth1, veth2, strerror(-err));
2652 goto out_delete;
2653 }
2654 }
2655
2656 if (netdev->link) {
2657 err = lxc_bridge_attach(netdev->link, veth1);
2658 if (err) {
2659 ERROR("failed to attach '%s' to the bridge '%s': %s",
2660 veth1, netdev->link, strerror(-err));
2661 goto out_delete;
2662 }
2663 }
2664
2665 netdev->ifindex = if_nametoindex(veth2);
2666 if (!netdev->ifindex) {
2667 ERROR("failed to retrieve the index for %s", veth2);
2668 goto out_delete;
2669 }
2670
2671 err = lxc_netdev_up(veth1);
2672 if (err) {
2673 ERROR("failed to set %s up : %s", veth1, strerror(-err));
2674 goto out_delete;
2675 }
2676
2677 if (netdev->upscript) {
2678 err = run_script(handler->name, "net", netdev->upscript, "up",
2679 "veth", veth1, (char*) NULL);
2680 if (err)
2681 goto out_delete;
2682 }
2683
2684 DEBUG("instantiated veth '%s/%s', index is '%d'",
2685 veth1, veth2, netdev->ifindex);
2686
2687 return 0;
2688
2689 out_delete:
2690 lxc_netdev_delete_by_name(veth1);
2691 if (!netdev->priv.veth_attr.pair)
2692 free(veth1);
2693 free(veth2);
2694 return -1;
2695 }
2696
2697 static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
2698 {
2699 char *veth1;
2700 int err;
2701
2702 if (netdev->priv.veth_attr.pair)
2703 veth1 = netdev->priv.veth_attr.pair;
2704 else
2705 veth1 = netdev->priv.veth_attr.veth1;
2706
2707 if (netdev->downscript) {
2708 err = run_script(handler->name, "net", netdev->downscript,
2709 "down", "veth", veth1, (char*) NULL);
2710 if (err)
2711 return -1;
2712 }
2713 return 0;
2714 }
2715
2716 static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2717 {
2718 char peerbuf[IFNAMSIZ], *peer;
2719 int err;
2720
2721 if (!netdev->link) {
2722 ERROR("no link specified for macvlan netdev");
2723 return -1;
2724 }
2725
2726 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
2727 if (err >= sizeof(peerbuf))
2728 return -1;
2729
2730 peer = lxc_mkifname(peerbuf);
2731 if (!peer) {
2732 ERROR("failed to make a temporary name");
2733 return -1;
2734 }
2735
2736 err = lxc_macvlan_create(netdev->link, peer,
2737 netdev->priv.macvlan_attr.mode);
2738 if (err) {
2739 ERROR("failed to create macvlan interface '%s' on '%s' : %s",
2740 peer, netdev->link, strerror(-err));
2741 goto out;
2742 }
2743
2744 netdev->ifindex = if_nametoindex(peer);
2745 if (!netdev->ifindex) {
2746 ERROR("failed to retrieve the index for %s", peer);
2747 goto out;
2748 }
2749
2750 if (netdev->upscript) {
2751 err = run_script(handler->name, "net", netdev->upscript, "up",
2752 "macvlan", netdev->link, (char*) NULL);
2753 if (err)
2754 goto out;
2755 }
2756
2757 DEBUG("instantiated macvlan '%s', index is '%d' and mode '%d'",
2758 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
2759
2760 return 0;
2761 out:
2762 lxc_netdev_delete_by_name(peer);
2763 free(peer);
2764 return -1;
2765 }
2766
2767 static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2768 {
2769 int err;
2770
2771 if (netdev->downscript) {
2772 err = run_script(handler->name, "net", netdev->downscript,
2773 "down", "macvlan", netdev->link,
2774 (char*) NULL);
2775 if (err)
2776 return -1;
2777 }
2778 return 0;
2779 }
2780
2781 /* XXX: merge with instantiate_macvlan */
2782 static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2783 {
2784 char peer[IFNAMSIZ];
2785 int err;
2786 static uint16_t vlan_cntr = 0;
2787
2788 if (!netdev->link) {
2789 ERROR("no link specified for vlan netdev");
2790 return -1;
2791 }
2792
2793 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
2794 if (err >= sizeof(peer)) {
2795 ERROR("peer name too long");
2796 return -1;
2797 }
2798
2799 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
2800 if (err) {
2801 ERROR("failed to create vlan interface '%s' on '%s' : %s",
2802 peer, netdev->link, strerror(-err));
2803 return -1;
2804 }
2805
2806 netdev->ifindex = if_nametoindex(peer);
2807 if (!netdev->ifindex) {
2808 ERROR("failed to retrieve the ifindex for %s", peer);
2809 lxc_netdev_delete_by_name(peer);
2810 return -1;
2811 }
2812
2813 DEBUG("instantiated vlan '%s', ifindex is '%d'", " vlan1000",
2814 netdev->ifindex);
2815
2816 return 0;
2817 }
2818
2819 static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
2820 {
2821 return 0;
2822 }
2823
2824 static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2825 {
2826 if (!netdev->link) {
2827 ERROR("no link specified for the physical interface");
2828 return -1;
2829 }
2830
2831 netdev->ifindex = if_nametoindex(netdev->link);
2832 if (!netdev->ifindex) {
2833 ERROR("failed to retrieve the index for %s", netdev->link);
2834 return -1;
2835 }
2836
2837 if (netdev->upscript) {
2838 int err;
2839 err = run_script(handler->name, "net", netdev->upscript,
2840 "up", "phys", netdev->link, (char*) NULL);
2841 if (err)
2842 return -1;
2843 }
2844
2845 return 0;
2846 }
2847
2848 static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
2849 {
2850 int err;
2851
2852 if (netdev->downscript) {
2853 err = run_script(handler->name, "net", netdev->downscript,
2854 "down", "phys", netdev->link, (char*) NULL);
2855 if (err)
2856 return -1;
2857 }
2858 return 0;
2859 }
2860
2861 static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
2862 {
2863 netdev->ifindex = 0;
2864 return 0;
2865 }
2866
2867 static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2868 {
2869 netdev->ifindex = 0;
2870 if (netdev->upscript) {
2871 int err;
2872 err = run_script(handler->name, "net", netdev->upscript,
2873 "up", "empty", (char*) NULL);
2874 if (err)
2875 return -1;
2876 }
2877 return 0;
2878 }
2879
2880 static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
2881 {
2882 int err;
2883
2884 if (netdev->downscript) {
2885 err = run_script(handler->name, "net", netdev->downscript,
2886 "down", "empty", (char*) NULL);
2887 if (err)
2888 return -1;
2889 }
2890 return 0;
2891 }
2892
2893 static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
2894 {
2895 return 0;
2896 }
2897
2898 int lxc_requests_empty_network(struct lxc_handler *handler)
2899 {
2900 struct lxc_list *network = &handler->conf->network;
2901 struct lxc_list *iterator;
2902 struct lxc_netdev *netdev;
2903 bool found_none = false, found_nic = false;
2904
2905 if (lxc_list_empty(network))
2906 return 0;
2907
2908 lxc_list_for_each(iterator, network) {
2909
2910 netdev = iterator->elem;
2911
2912 if (netdev->type == LXC_NET_NONE)
2913 found_none = true;
2914 else
2915 found_nic = true;
2916 }
2917 if (found_none && !found_nic)
2918 return 1;
2919 return 0;
2920 }
2921
2922 int lxc_create_network(struct lxc_handler *handler)
2923 {
2924 struct lxc_list *network = &handler->conf->network;
2925 struct lxc_list *iterator;
2926 struct lxc_netdev *netdev;
2927 int am_root = (getuid() == 0);
2928
2929 if (!am_root)
2930 return 0;
2931
2932 lxc_list_for_each(iterator, network) {
2933
2934 netdev = iterator->elem;
2935
2936 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
2937 ERROR("invalid network configuration type '%d'",
2938 netdev->type);
2939 return -1;
2940 }
2941
2942 if (netdev_conf[netdev->type](handler, netdev)) {
2943 ERROR("failed to create netdev");
2944 return -1;
2945 }
2946
2947 }
2948
2949 return 0;
2950 }
2951
2952 void lxc_delete_network(struct lxc_handler *handler)
2953 {
2954 struct lxc_list *network = &handler->conf->network;
2955 struct lxc_list *iterator;
2956 struct lxc_netdev *netdev;
2957
2958 lxc_list_for_each(iterator, network) {
2959 netdev = iterator->elem;
2960
2961 if (netdev->ifindex != 0 && netdev->type == LXC_NET_PHYS) {
2962 if (lxc_netdev_rename_by_index(netdev->ifindex, netdev->link))
2963 WARN("failed to rename to the initial name the " \
2964 "netdev '%s'", netdev->link);
2965 continue;
2966 }
2967
2968 if (netdev_deconf[netdev->type](handler, netdev)) {
2969 WARN("failed to destroy netdev");
2970 }
2971
2972 /* Recent kernel remove the virtual interfaces when the network
2973 * namespace is destroyed but in case we did not moved the
2974 * interface to the network namespace, we have to destroy it
2975 */
2976 if (netdev->ifindex != 0 &&
2977 lxc_netdev_delete_by_index(netdev->ifindex))
2978 WARN("failed to remove interface '%s'", netdev->name);
2979 }
2980 }
2981
2982 #define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
2983
2984 /* lxc-user-nic returns "interface_name:interface_name\n" */
2985 #define MAX_BUFFER_SIZE IFNAMSIZ*2 + 2
2986 static int unpriv_assign_nic(struct lxc_netdev *netdev, pid_t pid)
2987 {
2988 pid_t child;
2989 int bytes, pipefd[2];
2990 char *token, *saveptr = NULL;
2991 char buffer[MAX_BUFFER_SIZE];
2992 char netdev_link[IFNAMSIZ+1];
2993
2994 if (netdev->type != LXC_NET_VETH) {
2995 ERROR("nic type %d not support for unprivileged use",
2996 netdev->type);
2997 return -1;
2998 }
2999
3000 if(pipe(pipefd) < 0) {
3001 SYSERROR("pipe failed");
3002 return -1;
3003 }
3004
3005 if ((child = fork()) < 0) {
3006 SYSERROR("fork");
3007 close(pipefd[0]);
3008 close(pipefd[1]);
3009 return -1;
3010 }
3011
3012 if (child == 0) { // child
3013 /* close the read-end of the pipe */
3014 close(pipefd[0]);
3015 /* redirect the stdout to write-end of the pipe */
3016 dup2(pipefd[1], STDOUT_FILENO);
3017 /* close the write-end of the pipe */
3018 close(pipefd[1]);
3019
3020 // Call lxc-user-nic pid type bridge
3021 char pidstr[20];
3022 if (netdev->link) {
3023 strncpy(netdev_link, netdev->link, IFNAMSIZ);
3024 } else {
3025 strncpy(netdev_link, "none", IFNAMSIZ);
3026 }
3027 char *args[] = {LXC_USERNIC_PATH, pidstr, "veth", netdev_link, netdev->name, NULL };
3028 snprintf(pidstr, 19, "%lu", (unsigned long) pid);
3029 pidstr[19] = '\0';
3030 execvp(args[0], args);
3031 SYSERROR("execvp lxc-user-nic");
3032 exit(1);
3033 }
3034
3035 /* close the write-end of the pipe */
3036 close(pipefd[1]);
3037
3038 bytes = read(pipefd[0], &buffer, MAX_BUFFER_SIZE);
3039 if (bytes < 0) {
3040 SYSERROR("read failed");
3041 }
3042 buffer[bytes - 1] = '\0';
3043
3044 if (wait_for_pid(child) != 0) {
3045 close(pipefd[0]);
3046 return -1;
3047 }
3048
3049 /* close the read-end of the pipe */
3050 close(pipefd[0]);
3051
3052 /* fill netdev->name field */
3053 token = strtok_r(buffer, ":", &saveptr);
3054 if (!token)
3055 return -1;
3056 netdev->name = malloc(IFNAMSIZ+1);
3057 if (!netdev->name) {
3058 ERROR("Out of memory");
3059 return -1;
3060 }
3061 memset(netdev->name, 0, IFNAMSIZ+1);
3062 strncpy(netdev->name, token, IFNAMSIZ);
3063
3064 /* fill netdev->veth_attr.pair field */
3065 token = strtok_r(NULL, ":", &saveptr);
3066 if (!token)
3067 return -1;
3068 netdev->priv.veth_attr.pair = strdup(token);
3069 if (!netdev->priv.veth_attr.pair) {
3070 ERROR("Out of memory");
3071 return -1;
3072 }
3073
3074 return 0;
3075 }
3076
3077 int lxc_assign_network(struct lxc_list *network, pid_t pid)
3078 {
3079 struct lxc_list *iterator;
3080 struct lxc_netdev *netdev;
3081 int am_root = (getuid() == 0);
3082 int err;
3083
3084 lxc_list_for_each(iterator, network) {
3085
3086 netdev = iterator->elem;
3087
3088 if (netdev->type == LXC_NET_VETH && !am_root) {
3089 if (unpriv_assign_nic(netdev, pid))
3090 return -1;
3091 // lxc-user-nic has moved the nic to the new ns.
3092 // unpriv_assign_nic() fills in netdev->name.
3093 // netdev->ifindex will be filed in at setup_netdev.
3094 continue;
3095 }
3096
3097 /* empty network namespace, nothing to move */
3098 if (!netdev->ifindex)
3099 continue;
3100
3101 err = lxc_netdev_move_by_index(netdev->ifindex, pid, NULL);
3102 if (err) {
3103 ERROR("failed to move '%s' to the container : %s",
3104 netdev->link, strerror(-err));
3105 return -1;
3106 }
3107
3108 DEBUG("move '%s' to '%d'", netdev->name, pid);
3109 }
3110
3111 return 0;
3112 }
3113
3114 static int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf,
3115 size_t buf_size)
3116 {
3117 char path[PATH_MAX];
3118 int ret, closeret;
3119 FILE *f;
3120
3121 ret = snprintf(path, PATH_MAX, "/proc/%d/%cid_map", pid, idtype == ID_TYPE_UID ? 'u' : 'g');
3122 if (ret < 0 || ret >= PATH_MAX) {
3123 fprintf(stderr, "%s: path name too long\n", __func__);
3124 return -E2BIG;
3125 }
3126 f = fopen(path, "w");
3127 if (!f) {
3128 perror("open");
3129 return -EINVAL;
3130 }
3131 ret = fwrite(buf, buf_size, 1, f);
3132 if (ret < 0)
3133 SYSERROR("writing id mapping");
3134 closeret = fclose(f);
3135 if (closeret)
3136 SYSERROR("writing id mapping");
3137 return ret < 0 ? ret : closeret;
3138 }
3139
3140 int lxc_map_ids(struct lxc_list *idmap, pid_t pid)
3141 {
3142 struct lxc_list *iterator;
3143 struct id_map *map;
3144 int ret = 0, use_shadow = 0;
3145 enum idtype type;
3146 char *buf = NULL, *pos, *cmdpath = NULL;
3147
3148 /*
3149 * If newuidmap exists, that is, if shadow is handing out subuid
3150 * ranges, then insist that root also reserve ranges in subuid. This
3151 * will protected it by preventing another user from being handed the
3152 * range by shadow.
3153 */
3154 cmdpath = on_path("newuidmap", NULL);
3155 if (cmdpath) {
3156 use_shadow = 1;
3157 free(cmdpath);
3158 }
3159
3160 if (!use_shadow && geteuid()) {
3161 ERROR("Missing newuidmap/newgidmap");
3162 return -1;
3163 }
3164
3165 for(type = ID_TYPE_UID; type <= ID_TYPE_GID; type++) {
3166 int left, fill;
3167 int had_entry = 0;
3168 if (!buf) {
3169 buf = pos = malloc(4096);
3170 if (!buf)
3171 return -ENOMEM;
3172 }
3173 pos = buf;
3174 if (use_shadow)
3175 pos += sprintf(buf, "new%cidmap %d",
3176 type == ID_TYPE_UID ? 'u' : 'g',
3177 pid);
3178
3179 lxc_list_for_each(iterator, idmap) {
3180 /* The kernel only takes <= 4k for writes to /proc/<nr>/[ug]id_map */
3181 map = iterator->elem;
3182 if (map->idtype != type)
3183 continue;
3184
3185 had_entry = 1;
3186 left = 4096 - (pos - buf);
3187 fill = snprintf(pos, left, "%s%lu %lu %lu%s",
3188 use_shadow ? " " : "",
3189 map->nsid, map->hostid, map->range,
3190 use_shadow ? "" : "\n");
3191 if (fill <= 0 || fill >= left)
3192 SYSERROR("snprintf failed, too many mappings");
3193 pos += fill;
3194 }
3195 if (!had_entry)
3196 continue;
3197
3198 if (!use_shadow) {
3199 ret = write_id_mapping(type, pid, buf, pos-buf);
3200 } else {
3201 left = 4096 - (pos - buf);
3202 fill = snprintf(pos, left, "\n");
3203 if (fill <= 0 || fill >= left)
3204 SYSERROR("snprintf failed, too many mappings");
3205 pos += fill;
3206 ret = system(buf);
3207 }
3208
3209 if (ret)
3210 break;
3211 }
3212
3213 free(buf);
3214 return ret;
3215 }
3216
3217 /*
3218 * return the host uid/gid to which the container root is mapped in
3219 * *val.
3220 * Return true if id was found, false otherwise.
3221 */
3222 bool get_mapped_rootid(struct lxc_conf *conf, enum idtype idtype,
3223 unsigned long *val)
3224 {
3225 struct lxc_list *it;
3226 struct id_map *map;
3227
3228 lxc_list_for_each(it, &conf->id_map) {
3229 map = it->elem;
3230 if (map->idtype != idtype)
3231 continue;
3232 if (map->nsid != 0)
3233 continue;
3234 *val = map->hostid;
3235 return true;
3236 }
3237 return false;
3238 }
3239
3240 int mapped_hostid(unsigned id, struct lxc_conf *conf, enum idtype idtype)
3241 {
3242 struct lxc_list *it;
3243 struct id_map *map;
3244 lxc_list_for_each(it, &conf->id_map) {
3245 map = it->elem;
3246 if (map->idtype != idtype)
3247 continue;
3248 if (id >= map->hostid && id < map->hostid + map->range)
3249 return (id - map->hostid) + map->nsid;
3250 }
3251 return -1;
3252 }
3253
3254 int find_unmapped_nsuid(struct lxc_conf *conf, enum idtype idtype)
3255 {
3256 struct lxc_list *it;
3257 struct id_map *map;
3258 unsigned int freeid = 0;
3259 again:
3260 lxc_list_for_each(it, &conf->id_map) {
3261 map = it->elem;
3262 if (map->idtype != idtype)
3263 continue;
3264 if (freeid >= map->nsid && freeid < map->nsid + map->range) {
3265 freeid = map->nsid + map->range;
3266 goto again;
3267 }
3268 }
3269 return freeid;
3270 }
3271
3272 int lxc_find_gateway_addresses(struct lxc_handler *handler)
3273 {
3274 struct lxc_list *network = &handler->conf->network;
3275 struct lxc_list *iterator;
3276 struct lxc_netdev *netdev;
3277 int link_index;
3278
3279 lxc_list_for_each(iterator, network) {
3280 netdev = iterator->elem;
3281
3282 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
3283 continue;
3284
3285 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
3286 ERROR("gateway = auto only supported for "
3287 "veth and macvlan");
3288 return -1;
3289 }
3290
3291 if (!netdev->link) {
3292 ERROR("gateway = auto needs a link interface");
3293 return -1;
3294 }
3295
3296 link_index = if_nametoindex(netdev->link);
3297 if (!link_index)
3298 return -EINVAL;
3299
3300 if (netdev->ipv4_gateway_auto) {
3301 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
3302 ERROR("failed to automatically find ipv4 gateway "
3303 "address from link interface '%s'", netdev->link);
3304 return -1;
3305 }
3306 }
3307
3308 if (netdev->ipv6_gateway_auto) {
3309 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
3310 ERROR("failed to automatically find ipv6 gateway "
3311 "address from link interface '%s'", netdev->link);
3312 return -1;
3313 }
3314 }
3315 }
3316
3317 return 0;
3318 }
3319
3320 int lxc_create_tty(const char *name, struct lxc_conf *conf)
3321 {
3322 struct lxc_tty_info *tty_info = &conf->tty_info;
3323 int i, ret;
3324
3325 /* no tty in the configuration */
3326 if (!conf->tty)
3327 return 0;
3328
3329 tty_info->pty_info =
3330 malloc(sizeof(*tty_info->pty_info)*conf->tty);
3331 if (!tty_info->pty_info) {
3332 SYSERROR("failed to allocate pty_info");
3333 return -1;
3334 }
3335
3336 for (i = 0; i < conf->tty; i++) {
3337
3338 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3339
3340 process_lock();
3341 ret = openpty(&pty_info->master, &pty_info->slave,
3342 pty_info->name, NULL, NULL);
3343 process_unlock();
3344 if (ret) {
3345 SYSERROR("failed to create pty #%d", i);
3346 tty_info->nbtty = i;
3347 lxc_delete_tty(tty_info);
3348 return -1;
3349 }
3350
3351 DEBUG("allocated pty '%s' (%d/%d)",
3352 pty_info->name, pty_info->master, pty_info->slave);
3353
3354 /* Prevent leaking the file descriptors to the container */
3355 fcntl(pty_info->master, F_SETFD, FD_CLOEXEC);
3356 fcntl(pty_info->slave, F_SETFD, FD_CLOEXEC);
3357
3358 pty_info->busy = 0;
3359 }
3360
3361 tty_info->nbtty = conf->tty;
3362
3363 INFO("tty's configured");
3364
3365 return 0;
3366 }
3367
3368 void lxc_delete_tty(struct lxc_tty_info *tty_info)
3369 {
3370 int i;
3371
3372 for (i = 0; i < tty_info->nbtty; i++) {
3373 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3374
3375 close(pty_info->master);
3376 close(pty_info->slave);
3377 }
3378
3379 free(tty_info->pty_info);
3380 tty_info->nbtty = 0;
3381 }
3382
3383 /*
3384 * chown_mapped_root: for an unprivileged user with uid/gid X to
3385 * chown a dir to subuid/subgid Y, he needs to run chown as root
3386 * in a userns where nsid 0 is mapped to hostuid/hostgid Y, and
3387 * nsid Y is mapped to hostuid/hostgid X. That way, the container
3388 * root is privileged with respect to hostuid/hostgid X, allowing
3389 * him to do the chown.
3390 */
3391 int chown_mapped_root(char *path, struct lxc_conf *conf)
3392 {
3393 uid_t rootuid;
3394 gid_t rootgid;
3395 pid_t pid;
3396 unsigned long val;
3397 char *chownpath = path;
3398
3399 if (!get_mapped_rootid(conf, ID_TYPE_UID, &val)) {
3400 ERROR("No mapping for container root");
3401 return -1;
3402 }
3403 rootuid = (uid_t) val;
3404 if (!get_mapped_rootid(conf, ID_TYPE_GID, &val)) {
3405 ERROR("No mapping for container root");
3406 return -1;
3407 }
3408 rootgid = (gid_t) val;
3409
3410 /*
3411 * In case of overlay, we want only the writeable layer
3412 * to be chowned
3413 */
3414 if (strncmp(path, "overlayfs:", 10) == 0 || strncmp(path, "aufs:", 5) == 0) {
3415 chownpath = strchr(path, ':');
3416 if (!chownpath) {
3417 ERROR("Bad overlay path: %s", path);
3418 return -1;
3419 }
3420 chownpath = strchr(chownpath+1, ':');
3421 if (!chownpath) {
3422 ERROR("Bad overlay path: %s", path);
3423 return -1;
3424 }
3425 chownpath++;
3426 }
3427 path = chownpath;
3428 if (geteuid() == 0) {
3429 if (chown(path, rootuid, rootgid) < 0) {
3430 ERROR("Error chowning %s", path);
3431 return -1;
3432 }
3433 return 0;
3434 }
3435
3436 if (rootuid == geteuid()) {
3437 // nothing to do
3438 INFO("%s: container root is our uid; no need to chown" ,__func__);
3439 return 0;
3440 }
3441
3442 pid = fork();
3443 if (pid < 0) {
3444 SYSERROR("Failed forking");
3445 return -1;
3446 }
3447 if (!pid) {
3448 int hostuid = geteuid(), hostgid = getegid(), ret;
3449 struct stat sb;
3450 char map1[100], map2[100], map3[100], map4[100], map5[100];
3451 char ugid[100];
3452 char *args1[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3453 "-m", map3, "-m", map5,
3454 "--", "chown", ugid, path, NULL };
3455 char *args2[] = { "lxc-usernsexec", "-m", map1, "-m", map2,
3456 "-m", map3, "-m", map4, "-m", map5,
3457 "--", "chown", ugid, path, NULL };
3458
3459 // save the current gid of "path"
3460 if (stat(path, &sb) < 0) {
3461 ERROR("Error stat %s", path);
3462 return -1;
3463 }
3464
3465 /*
3466 * A file has to be group-owned by a gid mapped into the
3467 * container, or the container won't be privileged over it.
3468 */
3469 if (sb.st_uid == geteuid() &&
3470 mapped_hostid(sb.st_gid, conf, ID_TYPE_GID) < 0 &&
3471 chown(path, -1, hostgid) < 0) {
3472 ERROR("Failed chgrping %s", path);
3473 return -1;
3474 }
3475
3476 // "u:0:rootuid:1"
3477 ret = snprintf(map1, 100, "u:0:%d:1", rootuid);
3478 if (ret < 0 || ret >= 100) {
3479 ERROR("Error uid printing map string");
3480 return -1;
3481 }
3482
3483 // "u:hostuid:hostuid:1"
3484 ret = snprintf(map2, 100, "u:%d:%d:1", hostuid, hostuid);
3485 if (ret < 0 || ret >= 100) {
3486 ERROR("Error uid printing map string");
3487 return -1;
3488 }
3489
3490 // "g:0:rootgid:1"
3491 ret = snprintf(map3, 100, "g:0:%d:1", rootgid);
3492 if (ret < 0 || ret >= 100) {
3493 ERROR("Error gid printing map string");
3494 return -1;
3495 }
3496
3497 // "g:pathgid:rootgid+pathgid:1"
3498 ret = snprintf(map4, 100, "g:%d:%d:1", (gid_t)sb.st_gid,
3499 rootgid + (gid_t)sb.st_gid);
3500 if (ret < 0 || ret >= 100) {
3501 ERROR("Error gid printing map string");
3502 return -1;
3503 }
3504
3505 // "g:hostgid:hostgid:1"
3506 ret = snprintf(map5, 100, "g:%d:%d:1", hostgid, hostgid);
3507 if (ret < 0 || ret >= 100) {
3508 ERROR("Error gid printing map string");
3509 return -1;
3510 }
3511
3512 // "0:pathgid" (chown)
3513 ret = snprintf(ugid, 100, "0:%d", (gid_t)sb.st_gid);
3514 if (ret < 0 || ret >= 100) {
3515 ERROR("Error owner printing format string for chown");
3516 return -1;
3517 }
3518
3519 if (hostgid == sb.st_gid)
3520 ret = execvp("lxc-usernsexec", args1);
3521 else
3522 ret = execvp("lxc-usernsexec", args2);
3523 SYSERROR("Failed executing usernsexec");
3524 exit(1);
3525 }
3526 return wait_for_pid(pid);
3527 }
3528
3529 int ttys_shift_ids(struct lxc_conf *c)
3530 {
3531 if (lxc_list_empty(&c->id_map))
3532 return 0;
3533
3534 if (strcmp(c->console.name, "") !=0 && chown_mapped_root(c->console.name, c) < 0) {
3535 ERROR("Failed to chown %s", c->console.name);
3536 return -1;
3537 }
3538
3539 return 0;
3540 }
3541
3542 /*
3543 * _do_tmp_proc_mount: Mount /proc inside container if not already
3544 * mounted
3545 *
3546 * @rootfs : the rootfs where proc should be mounted
3547 *
3548 * Returns < 0 on failure, 0 if the correct proc was already mounted
3549 * and 1 if a new proc was mounted.
3550 */
3551 static int do_tmp_proc_mount(const char *rootfs)
3552 {
3553 char path[MAXPATHLEN];
3554 char link[20];
3555 int linklen, ret;
3556
3557 ret = snprintf(path, MAXPATHLEN, "%s/proc/self", rootfs);
3558 if (ret < 0 || ret >= MAXPATHLEN) {
3559 SYSERROR("proc path name too long");
3560 return -1;
3561 }
3562 memset(link, 0, 20);
3563 linklen = readlink(path, link, 20);
3564 INFO("I am %d, /proc/self points to '%s'", getpid(), link);
3565 ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs);
3566 if (linklen < 0) /* /proc not mounted */
3567 goto domount;
3568 /* can't be longer than rootfs/proc/1 */
3569 if (strncmp(link, "1", linklen) != 0) {
3570 /* wrong /procs mounted */
3571 umount2(path, MNT_DETACH); /* ignore failure */
3572 goto domount;
3573 }
3574 /* the right proc is already mounted */
3575 return 0;
3576
3577 domount:
3578 if (mount("proc", path, "proc", 0, NULL))
3579 return -1;
3580 INFO("Mounted /proc in container for security transition");
3581 return 1;
3582 }
3583
3584 int tmp_proc_mount(struct lxc_conf *lxc_conf)
3585 {
3586 int mounted;
3587
3588 if (lxc_conf->rootfs.path == NULL || strlen(lxc_conf->rootfs.path) == 0) {
3589 if (mount("proc", "/proc", "proc", 0, NULL)) {
3590 SYSERROR("Failed mounting /proc, proceeding");
3591 mounted = 0;
3592 } else
3593 mounted = 1;
3594 } else
3595 mounted = do_tmp_proc_mount(lxc_conf->rootfs.mount);
3596 if (mounted == -1) {
3597 SYSERROR("failed to mount /proc in the container.");
3598 return -1;
3599 } else if (mounted == 1) {
3600 lxc_conf->tmp_umount_proc = 1;
3601 }
3602 return 0;
3603 }
3604
3605 void tmp_proc_unmount(struct lxc_conf *lxc_conf)
3606 {
3607 if (lxc_conf->tmp_umount_proc == 1) {
3608 umount("/proc");
3609 lxc_conf->tmp_umount_proc = 0;
3610 }
3611 }
3612
3613 void remount_all_slave(void)
3614 {
3615 /* walk /proc/mounts and change any shared entries to slave */
3616 FILE *f = fopen("/proc/self/mountinfo", "r");
3617 char *line = NULL;
3618 size_t len = 0;
3619
3620 if (!f) {
3621 SYSERROR("Failed to open /proc/self/mountinfo to mark all shared");
3622 ERROR("Continuing container startup...");
3623 return;
3624 }
3625
3626 while (getline(&line, &len, f) != -1) {
3627 char *target, *opts;
3628 target = get_field(line, 4);
3629 if (!target)
3630 continue;
3631 opts = get_field(target, 2);
3632 if (!opts)
3633 continue;
3634 null_endofword(opts);
3635 if (!strstr(opts, "shared"))
3636 continue;
3637 null_endofword(target);
3638 if (mount(NULL, target, NULL, MS_SLAVE, NULL)) {
3639 SYSERROR("Failed to make %s rslave", target);
3640 ERROR("Continuing...");
3641 }
3642 }
3643 fclose(f);
3644 free(line);
3645 }
3646
3647 void lxc_execute_bind_init(struct lxc_conf *conf)
3648 {
3649 int ret;
3650 char path[PATH_MAX], destpath[PATH_MAX], *p;
3651
3652 /* If init exists in the container, don't bind mount a static one */
3653 p = choose_init(conf->rootfs.mount);
3654 if (p) {
3655 free(p);
3656 return;
3657 }
3658
3659 ret = snprintf(path, PATH_MAX, SBINDIR "/init.lxc.static");
3660 if (ret < 0 || ret >= PATH_MAX) {
3661 WARN("Path name too long searching for lxc.init.static");
3662 return;
3663 }
3664
3665 if (!file_exists(path)) {
3666 INFO("%s does not exist on host", path);
3667 return;
3668 }
3669
3670 ret = snprintf(destpath, PATH_MAX, "%s%s", conf->rootfs.mount, "/init.lxc.static");
3671 if (ret < 0 || ret >= PATH_MAX) {
3672 WARN("Path name too long for container's lxc.init.static");
3673 return;
3674 }
3675
3676 if (!file_exists(destpath)) {
3677 FILE * pathfile = fopen(destpath, "wb");
3678 if (!pathfile) {
3679 SYSERROR("Failed to create mount target '%s'", destpath);
3680 return;
3681 }
3682 fclose(pathfile);
3683 }
3684
3685 ret = mount(path, destpath, "none", MS_BIND, NULL);
3686 if (ret < 0)
3687 SYSERROR("Failed to bind lxc.init.static into container");
3688 INFO("lxc.init.static bound into container at %s", path);
3689 }
3690
3691 /*
3692 * This does the work of remounting / if it is shared, calling the
3693 * container pre-mount hooks, and mounting the rootfs.
3694 */
3695 int do_rootfs_setup(struct lxc_conf *conf, const char *name, const char *lxcpath)
3696 {
3697 if (conf->rootfs_setup) {
3698 /*
3699 * rootfs was set up in another namespace. bind-mount it
3700 * to give us a mount in our own ns so we can pivot_root to it
3701 */
3702 const char *path = conf->rootfs.mount;
3703 if (mount(path, path, "rootfs", MS_BIND, NULL) < 0) {
3704 ERROR("Failed to bind-mount container / onto itself");
3705 return -1;
3706 }
3707 return 0;
3708 }
3709
3710 remount_all_slave();
3711
3712 if (run_lxc_hooks(name, "pre-mount", conf, lxcpath, NULL)) {
3713 ERROR("failed to run pre-mount hooks for container '%s'.", name);
3714 return -1;
3715 }
3716
3717 if (setup_rootfs(conf)) {
3718 ERROR("failed to setup rootfs for '%s'", name);
3719 return -1;
3720 }
3721
3722 conf->rootfs_setup = true;
3723 return 0;
3724 }
3725
3726 static bool verify_start_hooks(struct lxc_conf *conf)
3727 {
3728 struct lxc_list *it;
3729 char path[MAXPATHLEN];
3730 lxc_list_for_each(it, &conf->hooks[LXCHOOK_START]) {
3731 char *hookname = it->elem;
3732 struct stat st;
3733 int ret;
3734
3735 ret = snprintf(path, MAXPATHLEN, "%s%s",
3736 conf->rootfs.mount, hookname);
3737 if (ret < 0 || ret >= MAXPATHLEN)
3738 return false;
3739 ret = stat(path, &st);
3740 if (ret) {
3741 SYSERROR("Start hook %s not found in container rootfs",
3742 hookname);
3743 return false;
3744 }
3745 return true;
3746 }
3747
3748 return true;
3749 }
3750
3751 static int send_fd(int sock, int fd)
3752 {
3753 int ret = lxc_abstract_unix_send_fd(sock, fd, NULL, 0);
3754
3755
3756 if (ret < 0) {
3757 SYSERROR("Error sending tty fd to parent");
3758 return -1;
3759 }
3760
3761 return 0;
3762 }
3763
3764 static int send_ttys_to_parent(struct lxc_handler *handler)
3765 {
3766 struct lxc_conf *conf = handler->conf;
3767 const struct lxc_tty_info *tty_info = &conf->tty_info;
3768 int i;
3769 int sock = handler->ttysock[0];
3770
3771 for (i = 0; i < tty_info->nbtty; i++) {
3772 struct lxc_pty_info *pty_info = &tty_info->pty_info[i];
3773 if (send_fd(sock, pty_info->slave) < 0)
3774 goto bad;
3775 close(pty_info->slave);
3776 pty_info->slave = -1;
3777 if (send_fd(sock, pty_info->master) < 0)
3778 goto bad;
3779 close(pty_info->master);
3780 pty_info->master = -1;
3781 }
3782
3783 close(handler->ttysock[0]);
3784 close(handler->ttysock[1]);
3785
3786 return 0;
3787
3788 bad:
3789 ERROR("Error writing tty fd to parent");
3790 return -1;
3791 }
3792
3793 int lxc_setup(struct lxc_handler *handler)
3794 {
3795 const char *name = handler->name;
3796 struct lxc_conf *lxc_conf = handler->conf;
3797 const char *lxcpath = handler->lxcpath;
3798
3799 if (do_rootfs_setup(lxc_conf, name, lxcpath) < 0) {
3800 ERROR("Error setting up rootfs mount after spawn");
3801 return -1;
3802 }
3803
3804 if (lxc_conf->inherit_ns_fd[LXC_NS_UTS] == -1) {
3805 if (setup_utsname(lxc_conf->utsname)) {
3806 ERROR("failed to setup the utsname for '%s'", name);
3807 return -1;
3808 }
3809 }
3810
3811 if (setup_network(&lxc_conf->network)) {
3812 ERROR("failed to setup the network for '%s'", name);
3813 return -1;
3814 }
3815
3816 if (lxc_conf->autodev > 0) {
3817 if (mount_autodev(name, lxc_conf->rootfs.mount, lxcpath)) {
3818 ERROR("failed to mount /dev in the container");
3819 return -1;
3820 }
3821 }
3822
3823 /* do automatic mounts (mainly /proc and /sys), but exclude
3824 * those that need to wait until other stuff has finished
3825 */
3826 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & ~LXC_AUTO_CGROUP_MASK, handler) < 0) {
3827 ERROR("failed to setup the automatic mounts for '%s'", name);
3828 return -1;
3829 }
3830
3831 if (setup_mount(&lxc_conf->rootfs, lxc_conf->fstab, name)) {
3832 ERROR("failed to setup the mounts for '%s'", name);
3833 return -1;
3834 }
3835
3836 if (!lxc_list_empty(&lxc_conf->mount_list) && setup_mount_entries(&lxc_conf->rootfs, &lxc_conf->mount_list, name)) {
3837 ERROR("failed to setup the mount entries for '%s'", name);
3838 return -1;
3839 }
3840
3841 /* Make sure any start hooks are in the rootfs */
3842 if (!verify_start_hooks(lxc_conf))
3843 return -1;
3844
3845 if (lxc_conf->is_execute)
3846 lxc_execute_bind_init(lxc_conf);
3847
3848 /* now mount only cgroup, if wanted;
3849 * before, /sys could not have been mounted
3850 * (is either mounted automatically or via fstab entries)
3851 */
3852 if (lxc_mount_auto_mounts(lxc_conf, lxc_conf->auto_mounts & LXC_AUTO_CGROUP_MASK, handler) < 0) {
3853 ERROR("failed to setup the automatic mounts for '%s'", name);
3854 return -1;
3855 }
3856
3857 if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) {
3858 ERROR("failed to run mount hooks for container '%s'.", name);
3859 return -1;
3860 }
3861
3862 if (lxc_conf->autodev > 0) {
3863 if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) {
3864 ERROR("failed to run autodev hooks for container '%s'.", name);
3865 return -1;
3866 }
3867 if (fill_autodev(lxc_conf->rootfs.mount)) {
3868 ERROR("failed to populate /dev in the container");
3869 return -1;
3870 }
3871 }
3872
3873 if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) {
3874 ERROR("failed to setup the console for '%s'", name);
3875 return -1;
3876 }
3877
3878 if (lxc_conf->kmsg) {
3879 if (setup_kmsg(&lxc_conf->rootfs, &lxc_conf->console)) // don't fail
3880 ERROR("failed to setup kmsg for '%s'", name);
3881 }
3882
3883 if (!lxc_conf->is_execute && setup_dev_symlinks(&lxc_conf->rootfs)) {
3884 ERROR("failed to setup /dev symlinks for '%s'", name);
3885 return -1;
3886 }
3887
3888 /* mount /proc if it's not already there */
3889 if (tmp_proc_mount(lxc_conf) < 0) {
3890 ERROR("failed to LSM mount proc for '%s'", name);
3891 return -1;
3892 }
3893
3894 if (setup_pivot_root(&lxc_conf->rootfs)) {
3895 ERROR("failed to set rootfs for '%s'", name);
3896 return -1;
3897 }
3898
3899 if (setup_pts(lxc_conf->pts)) {
3900 ERROR("failed to setup the new pts instance");
3901 return -1;
3902 }
3903
3904 if (lxc_create_tty(name, lxc_conf)) {
3905 ERROR("failed to create the ttys");
3906 return -1;
3907 }
3908
3909 if (send_ttys_to_parent(handler) < 0) {
3910 ERROR("failure sending console info to parent");
3911 return -1;
3912 }
3913
3914
3915 if (!lxc_conf->is_execute && setup_tty(lxc_conf)) {
3916 ERROR("failed to setup the ttys for '%s'", name);
3917 return -1;
3918 }
3919
3920 if (lxc_conf->pty_names && setenv("container_ttys", lxc_conf->pty_names, 1))
3921 SYSERROR("failed to set environment variable for container ptys");
3922
3923
3924 if (setup_personality(lxc_conf->personality)) {
3925 ERROR("failed to setup personality");
3926 return -1;
3927 }
3928
3929 if (!lxc_list_empty(&lxc_conf->keepcaps)) {
3930 if (!lxc_list_empty(&lxc_conf->caps)) {
3931 ERROR("Simultaneously requested dropping and keeping caps");
3932 return -1;
3933 }
3934 if (dropcaps_except(&lxc_conf->keepcaps)) {
3935 ERROR("failed to keep requested caps");
3936 return -1;
3937 }
3938 } else if (setup_caps(&lxc_conf->caps)) {
3939 ERROR("failed to drop capabilities");
3940 return -1;
3941 }
3942
3943 NOTICE("'%s' is setup.", name);
3944
3945 return 0;
3946 }
3947
3948 int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf,
3949 const char *lxcpath, char *argv[])
3950 {
3951 int which = -1;
3952 struct lxc_list *it;
3953
3954 if (strcmp(hook, "pre-start") == 0)
3955 which = LXCHOOK_PRESTART;
3956 else if (strcmp(hook, "pre-mount") == 0)
3957 which = LXCHOOK_PREMOUNT;
3958 else if (strcmp(hook, "mount") == 0)
3959 which = LXCHOOK_MOUNT;
3960 else if (strcmp(hook, "autodev") == 0)
3961 which = LXCHOOK_AUTODEV;
3962 else if (strcmp(hook, "start") == 0)
3963 which = LXCHOOK_START;
3964 else if (strcmp(hook, "post-stop") == 0)
3965 which = LXCHOOK_POSTSTOP;
3966 else if (strcmp(hook, "clone") == 0)
3967 which = LXCHOOK_CLONE;
3968 else
3969 return -1;
3970 lxc_list_for_each(it, &conf->hooks[which]) {
3971 int ret;
3972 char *hookname = it->elem;
3973 ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv);
3974 if (ret)
3975 return ret;
3976 }
3977 return 0;
3978 }
3979
3980 static void lxc_remove_nic(struct lxc_list *it)
3981 {
3982 struct lxc_netdev *netdev = it->elem;
3983 struct lxc_list *it2,*next;
3984
3985 lxc_list_del(it);
3986
3987 free(netdev->link);
3988 free(netdev->name);
3989 if (netdev->type == LXC_NET_VETH)
3990 free(netdev->priv.veth_attr.pair);
3991 free(netdev->upscript);
3992 free(netdev->hwaddr);
3993 free(netdev->mtu);
3994 free(netdev->ipv4_gateway);
3995 free(netdev->ipv6_gateway);
3996 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
3997 lxc_list_del(it2);
3998 free(it2->elem);
3999 free(it2);
4000 }
4001 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
4002 lxc_list_del(it2);
4003 free(it2->elem);
4004 free(it2);
4005 }
4006 free(netdev);
4007 free(it);
4008 }
4009
4010 /* we get passed in something like '0', '0.ipv4' or '1.ipv6' */
4011 int lxc_clear_nic(struct lxc_conf *c, const char *key)
4012 {
4013 char *p1;
4014 int ret, idx, i;
4015 struct lxc_list *it;
4016 struct lxc_netdev *netdev;
4017
4018 p1 = strchr(key, '.');
4019 if (!p1 || *(p1+1) == '\0')
4020 p1 = NULL;
4021
4022 ret = sscanf(key, "%d", &idx);
4023 if (ret != 1) return -1;
4024 if (idx < 0)
4025 return -1;
4026
4027 i = 0;
4028 lxc_list_for_each(it, &c->network) {
4029 if (i == idx)
4030 break;
4031 i++;
4032 }
4033 if (i < idx) // we don't have that many nics defined
4034 return -1;
4035
4036 if (!it || !it->elem)
4037 return -1;
4038
4039 netdev = it->elem;
4040
4041 if (!p1) {
4042 lxc_remove_nic(it);
4043 } else if (strcmp(p1, ".ipv4") == 0) {
4044 struct lxc_list *it2,*next;
4045 lxc_list_for_each_safe(it2, &netdev->ipv4, next) {
4046 lxc_list_del(it2);
4047 free(it2->elem);
4048 free(it2);
4049 }
4050 } else if (strcmp(p1, ".ipv6") == 0) {
4051 struct lxc_list *it2,*next;
4052 lxc_list_for_each_safe(it2, &netdev->ipv6, next) {
4053 lxc_list_del(it2);
4054 free(it2->elem);
4055 free(it2);
4056 }
4057 }
4058 else return -1;
4059
4060 return 0;
4061 }
4062
4063 int lxc_clear_config_network(struct lxc_conf *c)
4064 {
4065 struct lxc_list *it,*next;
4066 lxc_list_for_each_safe(it, &c->network, next) {
4067 lxc_remove_nic(it);
4068 }
4069 return 0;
4070 }
4071
4072 int lxc_clear_config_caps(struct lxc_conf *c)
4073 {
4074 struct lxc_list *it,*next;
4075
4076 lxc_list_for_each_safe(it, &c->caps, next) {
4077 lxc_list_del(it);
4078 free(it->elem);
4079 free(it);
4080 }
4081 return 0;
4082 }
4083
4084 static int lxc_free_idmap(struct lxc_list *id_map) {
4085 struct lxc_list *it, *next;
4086
4087 lxc_list_for_each_safe(it, id_map, next) {
4088 lxc_list_del(it);
4089 free(it->elem);
4090 free(it);
4091 }
4092 return 0;
4093 }
4094
4095 int lxc_clear_idmaps(struct lxc_conf *c)
4096 {
4097 return lxc_free_idmap(&c->id_map);
4098 }
4099
4100 int lxc_clear_config_keepcaps(struct lxc_conf *c)
4101 {
4102 struct lxc_list *it,*next;
4103
4104 lxc_list_for_each_safe(it, &c->keepcaps, next) {
4105 lxc_list_del(it);
4106 free(it->elem);
4107 free(it);
4108 }
4109 return 0;
4110 }
4111
4112 int lxc_clear_cgroups(struct lxc_conf *c, const char *key)
4113 {
4114 struct lxc_list *it,*next;
4115 bool all = false;
4116 const char *k = key + 11;
4117
4118 if (strcmp(key, "lxc.cgroup") == 0)
4119 all = true;
4120
4121 lxc_list_for_each_safe(it, &c->cgroup, next) {
4122 struct lxc_cgroup *cg = it->elem;
4123 if (!all && strcmp(cg->subsystem, k) != 0)
4124 continue;
4125 lxc_list_del(it);
4126 free(cg->subsystem);
4127 free(cg->value);
4128 free(cg);
4129 free(it);
4130 }
4131 return 0;
4132 }
4133
4134 int lxc_clear_groups(struct lxc_conf *c)
4135 {
4136 struct lxc_list *it,*next;
4137
4138 lxc_list_for_each_safe(it, &c->groups, next) {
4139 lxc_list_del(it);
4140 free(it->elem);
4141 free(it);
4142 }
4143 return 0;
4144 }
4145
4146 int lxc_clear_environment(struct lxc_conf *c)
4147 {
4148 struct lxc_list *it,*next;
4149
4150 lxc_list_for_each_safe(it, &c->environment, next) {
4151 lxc_list_del(it);
4152 free(it->elem);
4153 free(it);
4154 }
4155 return 0;
4156 }
4157
4158
4159 int lxc_clear_mount_entries(struct lxc_conf *c)
4160 {
4161 struct lxc_list *it,*next;
4162
4163 lxc_list_for_each_safe(it, &c->mount_list, next) {
4164 lxc_list_del(it);
4165 free(it->elem);
4166 free(it);
4167 }
4168 return 0;
4169 }
4170
4171 int lxc_clear_automounts(struct lxc_conf *c)
4172 {
4173 c->auto_mounts = 0;
4174 return 0;
4175 }
4176
4177 int lxc_clear_hooks(struct lxc_conf *c, const char *key)
4178 {
4179 struct lxc_list *it,*next;
4180 bool all = false, done = false;
4181 const char *k = key + 9;
4182 int i;
4183
4184 if (strcmp(key, "lxc.hook") == 0)
4185 all = true;
4186
4187 for (i=0; i<NUM_LXC_HOOKS; i++) {
4188 if (all || strcmp(k, lxchook_names[i]) == 0) {
4189 lxc_list_for_each_safe(it, &c->hooks[i], next) {
4190 lxc_list_del(it);
4191 free(it->elem);
4192 free(it);
4193 }
4194 done = true;
4195 }
4196 }
4197
4198 if (!done) {
4199 ERROR("Invalid hook key: %s", key);
4200 return -1;
4201 }
4202 return 0;
4203 }
4204
4205 static void lxc_clear_saved_nics(struct lxc_conf *conf)
4206 {
4207 int i;
4208
4209 if (!conf->saved_nics)
4210 return;
4211 for (i=0; i < conf->num_savednics; i++)
4212 free(conf->saved_nics[i].orig_name);
4213 free(conf->saved_nics);
4214 }
4215
4216 static inline void lxc_clear_aliens(struct lxc_conf *conf)
4217 {
4218 struct lxc_list *it,*next;
4219
4220 lxc_list_for_each_safe(it, &conf->aliens, next) {
4221 lxc_list_del(it);
4222 free(it->elem);
4223 free(it);
4224 }
4225 }
4226
4227 static inline void lxc_clear_includes(struct lxc_conf *conf)
4228 {
4229 struct lxc_list *it,*next;
4230
4231 lxc_list_for_each_safe(it, &conf->includes, next) {
4232 lxc_list_del(it);
4233 free(it->elem);
4234 free(it);
4235 }
4236 }
4237
4238 void lxc_conf_free(struct lxc_conf *conf)
4239 {
4240 if (!conf)
4241 return;
4242 free(conf->console.log_path);
4243 free(conf->console.path);
4244 free(conf->rootfs.mount);
4245 free(conf->rootfs.options);
4246 free(conf->rootfs.path);
4247 free(conf->rootfs.pivot);
4248 free(conf->logfile);
4249 free(conf->utsname);
4250 free(conf->ttydir);
4251 free(conf->fstab);
4252 free(conf->rcfile);
4253 free(conf->init_cmd);
4254 free(conf->unexpanded_config);
4255 free(conf->pty_names);
4256 lxc_clear_config_network(conf);
4257 free(conf->lsm_aa_profile);
4258 free(conf->lsm_se_context);
4259 lxc_seccomp_free(conf);
4260 lxc_clear_config_caps(conf);
4261 lxc_clear_config_keepcaps(conf);
4262 lxc_clear_cgroups(conf, "lxc.cgroup");
4263 lxc_clear_hooks(conf, "lxc.hook");
4264 lxc_clear_mount_entries(conf);
4265 lxc_clear_saved_nics(conf);
4266 lxc_clear_idmaps(conf);
4267 lxc_clear_groups(conf);
4268 lxc_clear_includes(conf);
4269 lxc_clear_aliens(conf);
4270 lxc_clear_environment(conf);
4271 free(conf);
4272 }
4273
4274 struct userns_fn_data {
4275 int (*fn)(void *);
4276 void *arg;
4277 int p[2];
4278 };
4279
4280 static int run_userns_fn(void *data)
4281 {
4282 struct userns_fn_data *d = data;
4283 char c;
4284 // we're not sharing with the parent any more, if it was a thread
4285
4286 close(d->p[1]);
4287 if (read(d->p[0], &c, 1) != 1)
4288 return -1;
4289 close(d->p[0]);
4290 return d->fn(d->arg);
4291 }
4292
4293 /*
4294 * Add ID_TYPE_UID/ID_TYPE_GID entries to an existing lxc_conf,
4295 * if they are not already there.
4296 */
4297 static struct lxc_list *idmap_add_id(struct lxc_conf *conf,
4298 uid_t uid, gid_t gid)
4299 {
4300 int hostuid_mapped = mapped_hostid(uid, conf, ID_TYPE_UID);
4301 int hostgid_mapped = mapped_hostid(gid, conf, ID_TYPE_GID);
4302 struct lxc_list *new = NULL, *tmp, *it, *next;
4303 struct id_map *entry;
4304
4305 new = malloc(sizeof(*new));
4306 if (!new) {
4307 ERROR("Out of memory building id map");
4308 return NULL;
4309 }
4310 lxc_list_init(new);
4311
4312 if (hostuid_mapped < 0) {
4313 hostuid_mapped = find_unmapped_nsuid(conf, ID_TYPE_UID);
4314 if (hostuid_mapped < 0)
4315 goto err;
4316 tmp = malloc(sizeof(*tmp));
4317 if (!tmp)
4318 goto err;
4319 entry = malloc(sizeof(*entry));
4320 if (!entry) {
4321 free(tmp);
4322 goto err;
4323 }
4324 tmp->elem = entry;
4325 entry->idtype = ID_TYPE_UID;
4326 entry->nsid = hostuid_mapped;
4327 entry->hostid = (unsigned long) uid;
4328 entry->range = 1;
4329 lxc_list_add_tail(new, tmp);
4330 }
4331 if (hostgid_mapped < 0) {
4332 hostgid_mapped = find_unmapped_nsuid(conf, ID_TYPE_GID);
4333 if (hostgid_mapped < 0)
4334 goto err;
4335 tmp = malloc(sizeof(*tmp));
4336 if (!tmp)
4337 goto err;
4338 entry = malloc(sizeof(*entry));
4339 if (!entry) {
4340 free(tmp);
4341 goto err;
4342 }
4343 tmp->elem = entry;
4344 entry->idtype = ID_TYPE_GID;
4345 entry->nsid = hostgid_mapped;
4346 entry->hostid = (unsigned long) gid;
4347 entry->range = 1;
4348 lxc_list_add_tail(new, tmp);
4349 }
4350 lxc_list_for_each_safe(it, &conf->id_map, next) {
4351 tmp = malloc(sizeof(*tmp));
4352 if (!tmp)
4353 goto err;
4354 entry = malloc(sizeof(*entry));
4355 if (!entry) {
4356 free(tmp);
4357 goto err;
4358 }
4359 memset(entry, 0, sizeof(*entry));
4360 memcpy(entry, it->elem, sizeof(*entry));
4361 tmp->elem = entry;
4362 lxc_list_add_tail(new, tmp);
4363 }
4364
4365 return new;
4366
4367 err:
4368 ERROR("Out of memory building a new uid/gid map");
4369 if (new)
4370 lxc_free_idmap(new);
4371 free(new);
4372 return NULL;
4373 }
4374
4375 /*
4376 * Run a function in a new user namespace.
4377 * The caller's euid/egid will be mapped in if it is not already.
4378 */
4379 int userns_exec_1(struct lxc_conf *conf, int (*fn)(void *), void *data)
4380 {
4381 int ret, pid;
4382 struct userns_fn_data d;
4383 char c = '1';
4384 int p[2];
4385 struct lxc_list *idmap;
4386
4387 ret = pipe(p);
4388 if (ret < 0) {
4389 SYSERROR("opening pipe");
4390 return -1;
4391 }
4392 d.fn = fn;
4393 d.arg = data;
4394 d.p[0] = p[0];
4395 d.p[1] = p[1];
4396 pid = lxc_clone(run_userns_fn, &d, CLONE_NEWUSER);
4397 if (pid < 0)
4398 goto err;
4399 close(p[0]);
4400 p[0] = -1;
4401
4402 if ((idmap = idmap_add_id(conf, geteuid(), getegid())) == NULL) {
4403 ERROR("Error adding self to container uid/gid map");
4404 goto err;
4405 }
4406
4407 ret = lxc_map_ids(idmap, pid);
4408 lxc_free_idmap(idmap);
4409 free(idmap);
4410 if (ret) {
4411 ERROR("Error setting up child mappings");
4412 goto err;
4413 }
4414
4415 // kick the child
4416 if (write(p[1], &c, 1) != 1) {
4417 SYSERROR("writing to pipe to child");
4418 goto err;
4419 }
4420
4421 ret = wait_for_pid(pid);
4422
4423 close(p[1]);
4424 return ret;
4425
4426 err:
4427 if (p[0] != -1)
4428 close(p[0]);
4429 close(p[1]);
4430 return -1;
4431 }
4432
4433 /* not thread-safe, do not use from api without first forking */
4434 static char* getuname(void)
4435 {
4436 struct passwd *result;
4437
4438 result = getpwuid(geteuid());
4439 if (!result)
4440 return NULL;
4441
4442 return strdup(result->pw_name);
4443 }
4444
4445 /* not thread-safe, do not use from api without first forking */
4446 static char *getgname(void)
4447 {
4448 struct group *result;
4449
4450 result = getgrgid(getegid());
4451 if (!result)
4452 return NULL;
4453
4454 return strdup(result->gr_name);
4455 }
4456
4457 /* not thread-safe, do not use from api without first forking */
4458 void suggest_default_idmap(void)
4459 {
4460 FILE *f;
4461 unsigned int uid = 0, urange = 0, gid = 0, grange = 0;
4462 char *line = NULL;
4463 char *uname, *gname;
4464 size_t len = 0;
4465
4466 if (!(uname = getuname()))
4467 return;
4468
4469 if (!(gname = getgname())) {
4470 free(uname);
4471 return;
4472 }
4473
4474 f = fopen(subuidfile, "r");
4475 if (!f) {
4476 ERROR("Your system is not configured with subuids");
4477 free(gname);
4478 free(uname);
4479 return;
4480 }
4481 while (getline(&line, &len, f) != -1) {
4482 char *p = strchr(line, ':'), *p2;
4483 if (*line == '#')
4484 continue;
4485 if (!p)
4486 continue;
4487 *p = '\0';
4488 p++;
4489 if (strcmp(line, uname))
4490 continue;
4491 p2 = strchr(p, ':');
4492 if (!p2)
4493 continue;
4494 *p2 = '\0';
4495 p2++;
4496 if (!*p2)
4497 continue;
4498 uid = atoi(p);
4499 urange = atoi(p2);
4500 }
4501 fclose(f);
4502
4503 f = fopen(subuidfile, "r");
4504 if (!f) {
4505 ERROR("Your system is not configured with subgids");
4506 free(gname);
4507 free(uname);
4508 return;
4509 }
4510 while (getline(&line, &len, f) != -1) {
4511 char *p = strchr(line, ':'), *p2;
4512 if (*line == '#')
4513 continue;
4514 if (!p)
4515 continue;
4516 *p = '\0';
4517 p++;
4518 if (strcmp(line, uname))
4519 continue;
4520 p2 = strchr(p, ':');
4521 if (!p2)
4522 continue;
4523 *p2 = '\0';
4524 p2++;
4525 if (!*p2)
4526 continue;
4527 gid = atoi(p);
4528 grange = atoi(p2);
4529 }
4530 fclose(f);
4531
4532 free(line);
4533
4534 if (!urange || !grange) {
4535 ERROR("You do not have subuids or subgids allocated");
4536 ERROR("Unprivileged containers require subuids and subgids");
4537 return;
4538 }
4539
4540 ERROR("You must either run as root, or define uid mappings");
4541 ERROR("To pass uid mappings to lxc-create, you could create");
4542 ERROR("~/.config/lxc/default.conf:");
4543 ERROR("lxc.include = %s", LXC_DEFAULT_CONFIG);
4544 ERROR("lxc.id_map = u 0 %u %u", uid, urange);
4545 ERROR("lxc.id_map = g 0 %u %u", gid, grange);
4546
4547 free(gname);
4548 free(uname);
4549 }