2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * this is all just a first shot for experiment. If we go this route, much
26 * shoudl change. bdev should be a directory with per-bdev file. Things which
27 * I'm doing by calling out to userspace should sometimes be done through
28 * libraries like liblvm2
35 #include <sys/mount.h>
38 #include <linux/loop.h>
47 #include "namespace.h"
52 #define BLKGETSIZE64 _IOR(0x12,114,size_t)
55 #ifndef LO_FLAGS_AUTOCLEAR
56 #define LO_FLAGS_AUTOCLEAR 4
59 lxc_log_define(bdev
, lxc
);
61 static int do_rsync(const char *src
, const char *dest
)
72 return wait_for_pid(pid
);
81 execlp("rsync", "rsync", "-a", s
, dest
, (char *)NULL
);
86 * return block size of dev->src
88 static int blk_getsize(struct bdev
*bdev
, unsigned long *size
)
91 char *path
= bdev
->src
;
93 if (strcmp(bdev
->type
, "loop") == 0)
96 fd
= open(path
, O_RDONLY
);
99 ret
= ioctl(fd
, BLKGETSIZE64
, size
);
105 * These are copied from conf.c. However as conf.c will be moved to using
106 * the callback system, they can be pulled from there eventually, so we
107 * don't need to pollute utils.c with these low level functions
109 static int find_fstype_cb(char* buffer
, void *data
)
119 /* we don't try 'nodev' entries */
120 if (strstr(buffer
, "nodev"))
124 fstype
+= lxc_char_left_gc(fstype
, strlen(fstype
));
125 fstype
[lxc_char_right_gc(fstype
, strlen(fstype
))] = '\0';
127 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
128 cbarg
->rootfs
, cbarg
->target
, fstype
);
130 if (mount(cbarg
->rootfs
, cbarg
->target
, fstype
, cbarg
->mntopt
, NULL
)) {
131 DEBUG("mount failed with error: %s", strerror(errno
));
135 INFO("mounted '%s' on '%s', with fstype '%s'",
136 cbarg
->rootfs
, cbarg
->target
, fstype
);
141 static int mount_unknow_fs(const char *rootfs
, const char *target
, int mntopt
)
156 * find the filesystem type with brute force:
157 * first we check with /etc/filesystems, in case the modules
158 * are auto-loaded and fall back to the supported kernel fs
165 for (i
= 0; i
< sizeof(fsfile
)/sizeof(fsfile
[0]); i
++) {
169 if (access(fsfile
[i
], F_OK
))
172 ret
= lxc_file_for_each_line(fsfile
[i
], find_fstype_cb
, &cbarg
);
174 ERROR("failed to parse '%s'", fsfile
[i
]);
182 ERROR("failed to determine fs type for '%s'", rootfs
);
186 static int do_mkfs(const char *path
, const char *fstype
)
190 if ((pid
= fork()) < 0) {
191 ERROR("error forking");
195 return wait_for_pid(pid
);
197 // If the file is not a block device, we don't want mkfs to ask
198 // us about whether to proceed.
202 open("/dev/zero", O_RDONLY
);
203 open("/dev/null", O_RDWR
);
204 open("/dev/null", O_RDWR
);
205 execlp("mkfs", "mkfs", "-t", fstype
, path
, NULL
);
209 static char *linkderef(char *path
, char *dest
)
214 ret
= stat(path
, &sbuf
);
217 if (!S_ISLNK(sbuf
.st_mode
))
219 ret
= readlink(path
, dest
, MAXPATHLEN
);
221 SYSERROR("error reading link %s", path
);
223 } else if (ret
>= MAXPATHLEN
) {
224 ERROR("link in %s too long", path
);
232 * Given a bdev (presumably blockdev-based), detect the fstype
233 * by trying mounting (in a private mntns) it.
234 * @bdev: bdev to investigate
235 * @type: preallocated char* in which to write the fstype
236 * @len: length of passed in char*
237 * Returns length of fstype, of -1 on error
239 static int detect_fs(struct bdev
*bdev
, char *type
, int len
)
245 char *sp1
, *sp2
, *sp3
, *line
= NULL
;
248 if (!bdev
|| !bdev
->src
|| !bdev
->dest
)
252 if (strcmp(bdev
->type
, "loop") == 0)
253 srcdev
= bdev
->src
+ 5;
257 if ((pid
= fork()) < 0)
262 memset(type
, 0, len
);
263 ret
= read(p
[0], type
, len
-1);
266 SYSERROR("error reading from pipe");
269 } else if (ret
== 0) {
270 ERROR("child exited early - fstype not found");
276 INFO("detected fstype %s for %s", type
, srcdev
);
280 if (unshare(CLONE_NEWNS
) < 0)
283 ret
= mount_unknow_fs(srcdev
, bdev
->dest
, 0);
285 ERROR("failed mounting %s onto %s to detect fstype", srcdev
, bdev
->dest
);
288 // if symlink, get the real dev name
289 char devpath
[MAXPATHLEN
];
290 char *l
= linkderef(srcdev
, devpath
);
293 f
= fopen("/proc/self/mounts", "r");
296 while (getline(&line
, &linelen
, f
) != -1) {
297 sp1
= index(line
, ' ');
303 sp2
= index(sp1
+1, ' ');
307 sp3
= index(sp2
+1, ' ');
312 if (write(p
[1], sp2
, strlen(sp2
)) != strlen(sp2
))
321 struct bdev_ops
*ops
;
324 static int is_dir(const char *path
)
327 int ret
= stat(path
, &statbuf
);
328 if (ret
== 0 && S_ISDIR(statbuf
.st_mode
))
333 static int dir_detect(const char *path
)
335 if (strncmp(path
, "dir:", 4) == 0)
336 return 1; // take their word for it
343 // XXXXXXX plain directory bind mount ops
345 static int dir_mount(struct bdev
*bdev
)
347 if (strcmp(bdev
->type
, "dir"))
349 if (!bdev
->src
|| !bdev
->dest
)
351 return mount(bdev
->src
, bdev
->dest
, "bind", MS_BIND
| MS_REC
, NULL
);
354 static int dir_umount(struct bdev
*bdev
)
356 if (strcmp(bdev
->type
, "dir"))
358 if (!bdev
->src
|| !bdev
->dest
)
360 return umount(bdev
->dest
);
363 /* the bulk of this needs to become a common helper */
364 static char *dir_new_path(char *src
, const char *oldname
, const char *name
,
365 const char *oldpath
, const char *lxcpath
)
370 nlen
= strlen(src
) + 1;
371 l1
= strlen(oldpath
);
373 /* if src starts with oldpath, look for oldname only after
375 if (strncmp(src
, oldpath
, l1
) == 0) {
377 nlen
+= (strlen(lxcpath
) - l1
);
379 l2
= strlen(oldname
);
380 while ((p
= strstr(p
, oldname
)) != NULL
) {
382 nlen
+= strlen(name
) - l2
;
390 if (strncmp(src
, oldpath
, l1
) == 0) {
391 p
+= sprintf(p
, "%s", lxcpath
);
395 while ((p2
= strstr(src
, oldname
)) != NULL
) {
396 strncpy(p
, src
, p2
-src
); // copy text up to oldname
397 p
+= p2
-src
; // move target pointer (p)
398 p
+= sprintf(p
, "%s", name
); // print new name in place of oldname
399 src
= p2
+ l2
; // move src to end of oldname
401 sprintf(p
, "%s", src
); // copy the rest of src
406 * for a simple directory bind mount, we substitute the old container
407 * name and paths for the new
409 static int dir_clonepaths(struct bdev
*orig
, struct bdev
*new, const char *oldname
,
410 const char *cname
, const char *oldpath
, const char *lxcpath
, int snap
,
411 unsigned long newsize
)
416 ERROR("directories cannot be snapshotted. Try overlayfs.");
420 if (!orig
->dest
|| !orig
->src
)
423 len
= strlen(lxcpath
) + strlen(cname
) + strlen("rootfs") + 3;
424 new->src
= malloc(len
);
427 ret
= snprintf(new->src
, len
, "%s/%s/rootfs", lxcpath
, cname
);
428 if (ret
< 0 || ret
>= len
)
430 if ((new->dest
= strdup(new->src
)) == NULL
)
436 static int dir_destroy(struct bdev
*orig
)
438 if (!lxc_rmdir_onedev(orig
->src
))
443 static int dir_create(struct bdev
*bdev
, const char *dest
, const char *n
,
444 struct bdev_specs
*specs
)
446 bdev
->src
= strdup(dest
);
447 bdev
->dest
= strdup(dest
);
448 if (!bdev
->src
|| !bdev
->dest
) {
449 ERROR("Out of memory");
453 if (mkdir_p(bdev
->src
, 0755) < 0) {
454 ERROR("Error creating %s\n", bdev
->src
);
457 if (mkdir_p(bdev
->dest
, 0755) < 0) {
458 ERROR("Error creating %s\n", bdev
->dest
);
465 struct bdev_ops dir_ops
= {
466 .detect
= &dir_detect
,
468 .umount
= &dir_umount
,
469 .clone_paths
= &dir_clonepaths
,
470 .destroy
= &dir_destroy
,
471 .create
= &dir_create
,
477 // There are two ways we could do this. We could always specify the
478 // 'zfs device' (i.e. tank/lxc lxc/container) as rootfs. But instead
479 // (at least right now) we have lxc-create specify $lxcpath/$lxcname/rootfs
480 // as the mountpoint, so that it is always mounted.
482 // That means 'mount' is really never needed and could be noop, but for the
483 // sake of flexibility let's always bind-mount.
486 static int zfs_list_entry(const char *path
, char *output
, size_t inlen
)
491 if ((f
= popen("zfs list 2> /dev/null", "r")) == NULL
) {
492 SYSERROR("popen failed");
495 while (fgets(output
, inlen
, f
)) {
496 if (strstr(output
, path
)) {
506 static int zfs_detect(const char *path
)
508 char *output
= malloc(LXC_LOG_BUFFER_SIZE
);
512 ERROR("out of memory");
515 found
= zfs_list_entry(path
, output
, LXC_LOG_BUFFER_SIZE
);
520 static int zfs_mount(struct bdev
*bdev
)
522 if (strcmp(bdev
->type
, "zfs"))
524 if (!bdev
->src
|| !bdev
->dest
)
526 return mount(bdev
->src
, bdev
->dest
, "bind", MS_BIND
| MS_REC
, NULL
);
529 static int zfs_umount(struct bdev
*bdev
)
531 if (strcmp(bdev
->type
, "zfs"))
533 if (!bdev
->src
|| !bdev
->dest
)
535 return umount(bdev
->dest
);
538 static int zfs_clone(const char *opath
, const char *npath
, const char *oname
,
539 const char *nname
, const char *lxcpath
, int snapshot
)
541 // use the 'zfs list | grep opath' entry to get the zfsroot
542 char output
[MAXPATHLEN
], option
[MAXPATHLEN
], *p
;
543 const char *zfsroot
= output
;
547 if (zfs_list_entry(opath
, output
, MAXPATHLEN
)) {
548 // zfsroot is output up to ' '
549 if ((p
= index(output
, ' ')) == NULL
)
552 if ((p
= strrchr(output
, '/')) == NULL
)
556 zfsroot
= default_zfs_root();
558 ret
= snprintf(option
, MAXPATHLEN
, "-omountpoint=%s/%s/rootfs",
560 if (ret
< 0 || ret
>= MAXPATHLEN
)
563 // zfs create -omountpoint=$lxcpath/$lxcname $zfsroot/$nname
565 if ((pid
= fork()) < 0)
568 char dev
[MAXPATHLEN
];
569 ret
= snprintf(dev
, MAXPATHLEN
, "%s/%s", zfsroot
, nname
);
570 if (ret
< 0 || ret
>= MAXPATHLEN
)
572 execlp("zfs", "zfs", "create", option
, dev
, NULL
);
575 return wait_for_pid(pid
);
578 // 'zfs snapshot zfsroot/oname@nname
579 // zfs clone zfsroot/oname@nname zfsroot/nname
580 char path1
[MAXPATHLEN
], path2
[MAXPATHLEN
];
582 ret
= snprintf(path1
, MAXPATHLEN
, "%s/%s@%s", zfsroot
,
584 if (ret
< 0 || ret
>= MAXPATHLEN
)
586 (void) snprintf(path2
, MAXPATHLEN
, "%s/%s", zfsroot
, nname
);
588 // if the snapshot exists, delete it
589 if ((pid
= fork()) < 0)
592 execlp("zfs", "zfs", "destroy", path1
, NULL
);
595 // it probably doesn't exist so destroy probably will fail.
596 (void) wait_for_pid(pid
);
598 // run first (snapshot) command
599 if ((pid
= fork()) < 0)
602 execlp("zfs", "zfs", "snapshot", path1
, NULL
);
605 if (wait_for_pid(pid
) < 0)
608 // run second (clone) command
609 if ((pid
= fork()) < 0)
612 execlp("zfs", "zfs", "clone", option
, path1
, path2
, NULL
);
615 return wait_for_pid(pid
);
619 static int zfs_clonepaths(struct bdev
*orig
, struct bdev
*new, const char *oldname
,
620 const char *cname
, const char *oldpath
, const char *lxcpath
, int snap
,
621 unsigned long newsize
)
625 if (!orig
->src
|| !orig
->dest
)
628 if (snap
&& strcmp(orig
->type
, "zfs")) {
629 ERROR("zfs snapshot from %s backing store is not supported",
634 len
= strlen(lxcpath
) + strlen(cname
) + strlen("rootfs") + 3;
635 new->src
= malloc(len
);
638 ret
= snprintf(new->src
, len
, "%s/%s/rootfs", lxcpath
, cname
);
639 if (ret
< 0 || ret
>= len
)
641 if ((new->dest
= strdup(new->src
)) == NULL
)
644 return zfs_clone(orig
->src
, new->src
, oldname
, cname
, lxcpath
, snap
);
648 * TODO: detect whether this was a clone, and if so then also delete the
649 * snapshot it was based on, so that we don't hold the original
652 static int zfs_destroy(struct bdev
*orig
)
655 char output
[MAXPATHLEN
], *p
;
657 if ((pid
= fork()) < 0)
660 return wait_for_pid(pid
);
662 if (!zfs_list_entry(orig
->src
, output
, MAXPATHLEN
)) {
663 ERROR("Error: zfs entry for %s not found", orig
->src
);
667 // zfs mount is output up to ' '
668 if ((p
= index(output
, ' ')) == NULL
)
672 execlp("zfs", "zfs", "destroy", output
, NULL
);
676 static int zfs_create(struct bdev
*bdev
, const char *dest
, const char *n
,
677 struct bdev_specs
*specs
)
680 char option
[MAXPATHLEN
];
684 if (!specs
|| !specs
->u
.zfs
.zfsroot
)
685 zfsroot
= default_zfs_root();
687 zfsroot
= specs
->u
.zfs
.zfsroot
;
689 if (!(bdev
->dest
= strdup(dest
))) {
690 ERROR("No mount target specified or out of memory");
693 if (!(bdev
->src
= strdup(bdev
->dest
))) {
694 ERROR("out of memory");
698 ret
= snprintf(option
, MAXPATHLEN
, "-omountpoint=%s", bdev
->dest
);
699 if (ret
< 0 || ret
>= MAXPATHLEN
)
701 if ((pid
= fork()) < 0)
704 return wait_for_pid(pid
);
706 char dev
[MAXPATHLEN
];
707 ret
= snprintf(dev
, MAXPATHLEN
, "%s/%s", zfsroot
, n
);
708 if (ret
< 0 || ret
>= MAXPATHLEN
)
710 execlp("zfs", "zfs", "create", option
, dev
, NULL
);
714 struct bdev_ops zfs_ops
= {
715 .detect
= &zfs_detect
,
717 .umount
= &zfs_umount
,
718 .clone_paths
= &zfs_clonepaths
,
719 .destroy
= &zfs_destroy
,
720 .create
= &zfs_create
,
728 * Look at /sys/dev/block/maj:min/dm/uuid. If it contains the hardcoded LVM
729 * prefix "LVM-", then this is an lvm2 LV
731 static int lvm_detect(const char *path
)
733 char devp
[MAXPATHLEN
], buf
[4];
738 if (strncmp(path
, "lvm:", 4) == 0)
739 return 1; // take their word for it
741 ret
= stat(path
, &statbuf
);
744 if (!S_ISBLK(statbuf
.st_mode
))
747 ret
= snprintf(devp
, MAXPATHLEN
, "/sys/dev/block/%d:%d/dm/uuid",
748 major(statbuf
.st_rdev
), minor(statbuf
.st_rdev
));
749 if (ret
< 0 || ret
>= MAXPATHLEN
) {
750 ERROR("lvm uuid pathname too long");
753 fout
= fopen(devp
, "r");
756 ret
= fread(buf
, 1, 4, fout
);
758 if (ret
!= 4 || strncmp(buf
, "LVM-", 4) != 0)
763 static int lvm_mount(struct bdev
*bdev
)
765 if (strcmp(bdev
->type
, "lvm"))
767 if (!bdev
->src
|| !bdev
->dest
)
769 /* if we might pass in data sometime, then we'll have to enrich
771 return mount_unknow_fs(bdev
->src
, bdev
->dest
, 0);
774 static int lvm_umount(struct bdev
*bdev
)
776 if (strcmp(bdev
->type
, "lvm"))
778 if (!bdev
->src
|| !bdev
->dest
)
780 return umount(bdev
->dest
);
784 * path must be '/dev/$vg/$lv', $vg must be an existing VG, and $lv must
785 * not yet exist. This function will attempt to create /dev/$vg/$lv of
788 static int do_lvm_create(const char *path
, unsigned long size
)
791 char sz
[24], *pathdup
, *vg
, *lv
;
793 if ((pid
= fork()) < 0) {
794 SYSERROR("failed fork");
798 return wait_for_pid(pid
);
800 // lvcreate default size is in M, not bytes.
801 ret
= snprintf(sz
, 24, "%lu", size
/1000000);
802 if (ret
< 0 || ret
>= 24)
805 pathdup
= strdup(path
);
808 lv
= strrchr(pathdup
, '/');
815 vg
= strrchr(pathdup
, '/');
819 execlp("lvcreate", "lvcreate", "-L", sz
, vg
, "-n", lv
, (char *)NULL
);
824 static int lvm_snapshot(const char *orig
, const char *path
, unsigned long size
)
827 char sz
[24], *pathdup
, *lv
;
829 if ((pid
= fork()) < 0) {
830 SYSERROR("failed fork");
834 return wait_for_pid(pid
);
835 // lvcreate default size is in M, not bytes.
836 ret
= snprintf(sz
, 24, "%lu", size
/1000000);
837 if (ret
< 0 || ret
>= 24)
840 pathdup
= strdup(path
);
843 lv
= strrchr(pathdup
, '/');
851 ret
= execlp("lvcreate", "lvcreate", "-s", "-L", sz
, "-n", lv
, orig
, (char *)NULL
);
856 // this will return 1 for physical disks, qemu-nbd, loop, etc
857 // right now only lvm is a block device
858 static int is_blktype(struct bdev
*b
)
860 if (strcmp(b
->type
, "lvm") == 0)
865 static int lvm_clonepaths(struct bdev
*orig
, struct bdev
*new, const char *oldname
,
866 const char *cname
, const char *oldpath
, const char *lxcpath
, int snap
,
867 unsigned long newsize
)
870 unsigned long size
= newsize
;
873 if (!orig
->src
|| !orig
->dest
)
876 if (strcmp(orig
->type
, "lvm")) {
880 ERROR("LVM snapshot from %s backing store is not supported",
884 vg
= default_lvm_vg();
885 len
= strlen("/dev/") + strlen(vg
) + strlen(cname
) + 2;
886 if ((new->src
= malloc(len
)) == NULL
)
888 ret
= snprintf(new->src
, len
, "/dev/%s/%s", vg
, cname
);
889 if (ret
< 0 || ret
>= len
)
892 new->src
= dir_new_path(orig
->src
, oldname
, cname
, oldpath
, lxcpath
);
898 new->data
= strdup(orig
->data
);
903 len
= strlen(lxcpath
) + strlen(cname
) + strlen("rootfs") + 3;
904 new->dest
= malloc(len
);
907 ret
= snprintf(new->dest
, len
, "%s/%s/rootfs", lxcpath
, cname
);
908 if (ret
< 0 || ret
>= len
)
910 if (mkdir_p(new->dest
, 0755) < 0)
913 if (is_blktype(orig
)) {
914 if (!newsize
&& blk_getsize(orig
, &size
) < 0) {
915 ERROR("Error getting size of %s", orig
->src
);
918 if (detect_fs(orig
, fstype
, 100) < 0) {
919 INFO("could not find fstype for %s, using ext3", orig
->src
);
923 sprintf(fstype
, "ext3");
925 size
= 1000000000; // default to 1G
929 if (lvm_snapshot(orig
->src
, new->src
, size
) < 0) {
930 ERROR("could not create %s snapshot of %s", new->src
, orig
->src
);
934 if (do_lvm_create(new->src
, size
) < 0) {
935 ERROR("Error creating new lvm blockdev");
938 if (do_mkfs(new->src
, fstype
) < 0) {
939 ERROR("Error creating filesystem type %s on %s", fstype
,
948 static int lvm_destroy(struct bdev
*orig
)
952 if ((pid
= fork()) < 0)
955 execlp("lvremove", "lvremove", "-f", orig
->src
, NULL
);
958 return wait_for_pid(pid
);
961 #define DEFAULT_FS_SIZE 1024000000
962 #define DEFAULT_FSTYPE "ext3"
963 static int lvm_create(struct bdev
*bdev
, const char *dest
, const char *n
,
964 struct bdev_specs
*specs
)
966 const char *vg
, *fstype
, *lv
= n
;
973 vg
= specs
->u
.lvm
.vg
;
975 vg
= default_lvm_vg();
979 lv
= specs
->u
.lvm
.lv
;
980 len
= strlen(vg
) + strlen(lv
) + 7;
981 bdev
->src
= malloc(len
);
985 ret
= snprintf(bdev
->src
, len
, "/dev/%s/%s", vg
, lv
);
986 if (ret
< 0 || ret
>= len
)
989 // lvm.fssize is in bytes.
990 sz
= specs
->u
.lvm
.fssize
;
992 sz
= DEFAULT_FS_SIZE
;
994 INFO("Error creating new lvm blockdev %s size %lu", bdev
->src
, sz
);
995 if (do_lvm_create(bdev
->src
, sz
) < 0) {
996 ERROR("Error creating new lvm blockdev %s size %lu", bdev
->src
, sz
);
1000 fstype
= specs
->u
.lvm
.fstype
;
1002 fstype
= DEFAULT_FSTYPE
;
1003 if (do_mkfs(bdev
->src
, fstype
) < 0) {
1004 ERROR("Error creating filesystem type %s on %s", fstype
,
1008 if (!(bdev
->dest
= strdup(dest
)))
1011 if (mkdir_p(bdev
->dest
, 0755) < 0) {
1012 ERROR("Error creating %s\n", bdev
->dest
);
1019 struct bdev_ops lvm_ops
= {
1020 .detect
= &lvm_detect
,
1021 .mount
= &lvm_mount
,
1022 .umount
= &lvm_umount
,
1023 .clone_paths
= &lvm_clonepaths
,
1024 .destroy
= &lvm_destroy
,
1025 .create
= &lvm_create
,
1032 struct btrfs_ioctl_space_info
{
1033 unsigned long long flags
;
1034 unsigned long long total_bytes
;
1035 unsigned long long used_bytes
;
1038 struct btrfs_ioctl_space_args
{
1039 unsigned long long space_slots
;
1040 unsigned long long total_spaces
;
1041 struct btrfs_ioctl_space_info spaces
[0];
1044 #define BTRFS_IOCTL_MAGIC 0x94
1045 #define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, unsigned long long)
1046 #define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
1047 struct btrfs_ioctl_space_args)
1049 static bool is_btrfs_fs(const char *path
)
1052 struct btrfs_ioctl_space_args sargs
;
1054 // make sure this is a btrfs filesystem
1055 fd
= open(path
, O_RDONLY
);
1058 sargs
.space_slots
= 0;
1059 sargs
.total_spaces
= 0;
1060 ret
= ioctl(fd
, BTRFS_IOC_SPACE_INFO
, &sargs
);
1068 static int btrfs_detect(const char *path
)
1073 if (!is_btrfs_fs(path
))
1076 // and make sure it's a subvolume.
1077 ret
= stat(path
, &st
);
1081 if (st
.st_ino
== 256 && S_ISDIR(st
.st_mode
))
1087 static int btrfs_mount(struct bdev
*bdev
)
1089 if (strcmp(bdev
->type
, "btrfs"))
1091 if (!bdev
->src
|| !bdev
->dest
)
1093 return mount(bdev
->src
, bdev
->dest
, "bind", MS_BIND
| MS_REC
, NULL
);
1096 static int btrfs_umount(struct bdev
*bdev
)
1098 if (strcmp(bdev
->type
, "btrfs"))
1100 if (!bdev
->src
|| !bdev
->dest
)
1102 return umount(bdev
->dest
);
1105 #define BTRFS_SUBVOL_NAME_MAX 4039
1106 #define BTRFS_PATH_NAME_MAX 4087
1108 struct btrfs_ioctl_vol_args
{
1109 signed long long fd
;
1110 char name
[BTRFS_PATH_NAME_MAX
+ 1];
1113 #define BTRFS_IOCTL_MAGIC 0x94
1114 #define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \
1115 struct btrfs_ioctl_vol_args_v2)
1116 #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
1117 struct btrfs_ioctl_vol_args_v2)
1118 #define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \
1119 struct btrfs_ioctl_vol_args)
1120 #define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \
1121 struct btrfs_ioctl_vol_args)
1123 #define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0)
1125 struct btrfs_ioctl_vol_args_v2
{
1126 signed long long fd
;
1127 unsigned long long transid
;
1128 unsigned long long flags
;
1131 unsigned long long size
;
1132 //struct btrfs_qgroup_inherit *qgroup_inherit;
1133 void *qgroup_inherit
;
1135 unsigned long long unused
[4];
1137 char name
[BTRFS_SUBVOL_NAME_MAX
+ 1];
1140 static int btrfs_subvolume_create(const char *path
)
1143 struct btrfs_ioctl_vol_args args
;
1144 char *p
, *newfull
= strdup(path
);
1147 ERROR("Error: out of memory");
1151 p
= strrchr(newfull
, '/');
1153 ERROR("bad path: %s", path
);
1158 if ((fd
= open(newfull
, O_RDONLY
)) < 0) {
1159 ERROR("Error opening %s", newfull
);
1164 memset(&args
, 0, sizeof(args
));
1165 strncpy(args
.name
, p
+1, BTRFS_SUBVOL_NAME_MAX
);
1166 args
.name
[BTRFS_SUBVOL_NAME_MAX
-1] = 0;
1167 ret
= ioctl(fd
, BTRFS_IOC_SUBVOL_CREATE
, &args
);
1168 INFO("btrfs: snapshot create ioctl returned %d", ret
);
1175 static int btrfs_snapshot(const char *orig
, const char *new)
1177 int fd
= -1, fddst
= -1, ret
= -1;
1178 struct btrfs_ioctl_vol_args_v2 args
;
1179 char *newdir
, *newname
, *newfull
= NULL
;
1181 newfull
= strdup(new);
1183 ERROR("Error: out of memory");
1186 // make sure the directory doesn't already exist
1187 if (rmdir(newfull
) < 0 && errno
!= -ENOENT
) {
1188 SYSERROR("Error removing empty new rootfs");
1191 newname
= basename(newfull
);
1192 newdir
= dirname(newfull
);
1193 fd
= open(orig
, O_RDONLY
);
1195 SYSERROR("Error opening original rootfs %s", orig
);
1198 fddst
= open(newdir
, O_RDONLY
);
1200 SYSERROR("Error opening new container dir %s", newdir
);
1204 memset(&args
, 0, sizeof(args
));
1206 strncpy(args
.name
, newname
, BTRFS_SUBVOL_NAME_MAX
);
1207 args
.name
[BTRFS_SUBVOL_NAME_MAX
-1] = 0;
1208 ret
= ioctl(fddst
, BTRFS_IOC_SNAP_CREATE_V2
, &args
);
1209 INFO("btrfs: snapshot create ioctl returned %d", ret
);
1221 static int btrfs_clonepaths(struct bdev
*orig
, struct bdev
*new, const char *oldname
,
1222 const char *cname
, const char *oldpath
, const char *lxcpath
, int snap
,
1223 unsigned long newsize
)
1225 if (!orig
->dest
|| !orig
->src
)
1228 if (strcmp(orig
->type
, "btrfs")) {
1231 ERROR("btrfs snapshot from %s backing store is not supported",
1235 len
= strlen(lxcpath
) + strlen(cname
) + strlen("rootfs") + 3;
1236 new->src
= malloc(len
);
1239 ret
= snprintf(new->src
, len
, "%s/%s/rootfs", lxcpath
, cname
);
1240 if (ret
< 0 || ret
>= len
)
1243 // in case rootfs is in custom path, reuse it
1244 if ((new->src
= dir_new_path(orig
->src
, oldname
, cname
, oldpath
, lxcpath
)) == NULL
)
1249 if ((new->dest
= strdup(new->src
)) == NULL
)
1252 if (orig
->data
&& (new->data
= strdup(orig
->data
)) == NULL
)
1256 return btrfs_snapshot(orig
->dest
, new->dest
);
1258 if (rmdir(new->dest
) < 0 && errno
!= -ENOENT
) {
1259 SYSERROR("removing %s\n", new->dest
);
1263 return btrfs_subvolume_create(new->dest
);
1266 static int btrfs_destroy(struct bdev
*orig
)
1269 struct btrfs_ioctl_vol_args args
;
1270 char *path
= orig
->src
;
1271 char *p
, *newfull
= strdup(path
);
1274 ERROR("Error: out of memory");
1278 p
= strrchr(newfull
, '/');
1280 ERROR("bad path: %s", path
);
1285 if ((fd
= open(newfull
, O_RDONLY
)) < 0) {
1286 ERROR("Error opening %s", newfull
);
1291 memset(&args
, 0, sizeof(args
));
1292 strncpy(args
.name
, p
+1, BTRFS_SUBVOL_NAME_MAX
);
1293 args
.name
[BTRFS_SUBVOL_NAME_MAX
-1] = 0;
1294 ret
= ioctl(fd
, BTRFS_IOC_SNAP_DESTROY
, &args
);
1295 INFO("btrfs: snapshot create ioctl returned %d", ret
);
1302 static int btrfs_create(struct bdev
*bdev
, const char *dest
, const char *n
,
1303 struct bdev_specs
*specs
)
1305 bdev
->src
= strdup(dest
);
1306 bdev
->dest
= strdup(dest
);
1307 if (!bdev
->src
|| !bdev
->dest
)
1309 return btrfs_subvolume_create(bdev
->dest
);
1312 struct bdev_ops btrfs_ops
= {
1313 .detect
= &btrfs_detect
,
1314 .mount
= &btrfs_mount
,
1315 .umount
= &btrfs_umount
,
1316 .clone_paths
= &btrfs_clonepaths
,
1317 .destroy
= &btrfs_destroy
,
1318 .create
= &btrfs_create
,
1324 static int loop_detect(const char *path
)
1326 if (strncmp(path
, "loop:", 5) == 0)
1331 static int find_free_loopdev(int *retfd
, char *namep
)
1333 struct dirent dirent
, *direntp
;
1334 struct loop_info64 lo
;
1338 if (!(dir
= opendir("/dev"))) {
1339 SYSERROR("Error opening /dev");
1342 while (!readdir_r(dir
, &dirent
, &direntp
)) {
1346 if (strncmp(direntp
->d_name
, "loop", 4) != 0)
1348 if ((fd
= openat(dirfd(dir
), direntp
->d_name
, O_RDWR
)) < 0)
1350 if (ioctl(fd
, LOOP_GET_STATUS64
, &lo
) == 0 || errno
!= ENXIO
) {
1355 // We can use this fd
1356 snprintf(namep
, 100, "/dev/%s", direntp
->d_name
);
1361 ERROR("No loop device found");
1369 static int loop_mount(struct bdev
*bdev
)
1371 int lfd
, ffd
= -1, ret
= -1;
1372 struct loop_info64 lo
;
1375 if (strcmp(bdev
->type
, "loop"))
1377 if (!bdev
->src
|| !bdev
->dest
)
1379 if (find_free_loopdev(&lfd
, loname
) < 0)
1382 if ((ffd
= open(bdev
->src
+ 5, O_RDWR
)) < 0) {
1383 SYSERROR("Error opening backing file %s\n", bdev
->src
);
1387 if (ioctl(lfd
, LOOP_SET_FD
, ffd
) < 0) {
1388 SYSERROR("Error attaching backing file to loop dev");
1391 memset(&lo
, 0, sizeof(lo
));
1392 lo
.lo_flags
= LO_FLAGS_AUTOCLEAR
;
1393 if (ioctl(lfd
, LOOP_SET_STATUS64
, &lo
) < 0) {
1394 SYSERROR("Error setting autoclear on loop dev\n");
1398 ret
= mount_unknow_fs(loname
, bdev
->dest
, 0);
1400 ERROR("Error mounting %s\n", bdev
->src
);
1414 static int loop_umount(struct bdev
*bdev
)
1418 if (strcmp(bdev
->type
, "loop"))
1420 if (!bdev
->src
|| !bdev
->dest
)
1422 ret
= umount(bdev
->dest
);
1423 if (bdev
->lofd
>= 0) {
1430 static int do_loop_create(const char *path
, unsigned long size
, const char *fstype
)
1433 // create the new loopback file.
1434 fd
= creat(path
, S_IRUSR
|S_IWUSR
);
1437 if (lseek(fd
, size
, SEEK_SET
) < 0) {
1438 SYSERROR("Error seeking to set new loop file size");
1442 if (write(fd
, "1", 1) != 1) {
1443 SYSERROR("Error creating new loop file");
1447 if (close(fd
) < 0) {
1448 SYSERROR("Error closing new loop file");
1452 // create an fs in the loopback file
1453 if (do_mkfs(path
, fstype
) < 0) {
1454 ERROR("Error creating filesystem type %s on %s", fstype
,
1463 * No idea what the original blockdev will be called, but the copy will be
1464 * called $lxcpath/$lxcname/rootdev
1466 static int loop_clonepaths(struct bdev
*orig
, struct bdev
*new, const char *oldname
,
1467 const char *cname
, const char *oldpath
, const char *lxcpath
, int snap
,
1468 unsigned long newsize
)
1471 unsigned long size
= newsize
;
1476 ERROR("loop devices cannot be snapshotted.");
1480 if (!orig
->dest
|| !orig
->src
)
1483 len
= strlen(lxcpath
) + strlen(cname
) + strlen("rootdev") + 3;
1484 srcdev
= alloca(len
);
1485 ret
= snprintf(srcdev
, len
, "%s/%s/rootdev", lxcpath
, cname
);
1486 if (ret
< 0 || ret
>= len
)
1489 new->src
= malloc(len
+ 5);
1492 ret
= snprintf(new->src
, len
+ 5, "loop:%s", srcdev
);
1493 if (ret
< 0 || ret
>= len
+ 5)
1496 new->dest
= malloc(len
);
1499 ret
= snprintf(new->dest
, len
, "%s/%s/rootfs", lxcpath
, cname
);
1500 if (ret
< 0 || ret
>= len
)
1503 // it's tempting to say: if orig->src == loopback and !newsize, then
1504 // copy the loopback file. However, we'd have to make sure to
1505 // correctly keep holes! So punt for now.
1507 if (is_blktype(orig
)) {
1508 if (!newsize
&& blk_getsize(orig
, &size
) < 0) {
1509 ERROR("Error getting size of %s", orig
->src
);
1512 if (detect_fs(orig
, fstype
, 100) < 0) {
1513 INFO("could not find fstype for %s, using %s", orig
->src
,
1518 sprintf(fstype
, "%s", DEFAULT_FSTYPE
);
1520 size
= DEFAULT_FS_SIZE
; // default to 1G
1522 return do_loop_create(srcdev
, size
, fstype
);
1525 static int loop_create(struct bdev
*bdev
, const char *dest
, const char *n
,
1526 struct bdev_specs
*specs
)
1536 // dest is passed in as $lxcpath / $lxcname / rootfs
1537 // srcdev will be: $lxcpath / $lxcname / rootdev
1538 // src will be 'loop:$srcdev'
1539 len
= strlen(dest
) + 2;
1540 srcdev
= alloca(len
);
1542 ret
= snprintf(srcdev
, len
, "%s", dest
);
1543 if (ret
< 0 || ret
>= len
)
1545 sprintf(srcdev
+ len
- 4, "dev");
1547 bdev
->src
= malloc(len
+ 5);
1550 ret
= snprintf(bdev
->src
, len
+ 5, "loop:%s", srcdev
);
1551 if (ret
< 0 || ret
>= len
+ 5)
1554 sz
= specs
->u
.loop
.fssize
;
1556 sz
= DEFAULT_FS_SIZE
;
1558 fstype
= specs
->u
.loop
.fstype
;
1560 fstype
= DEFAULT_FSTYPE
;
1562 if (!(bdev
->dest
= strdup(dest
)))
1565 if (mkdir_p(bdev
->dest
, 0755) < 0) {
1566 ERROR("Error creating %s\n", bdev
->dest
);
1570 return do_loop_create(srcdev
, sz
, fstype
);
1573 static int loop_destroy(struct bdev
*orig
)
1575 return unlink(orig
->src
+ 5);
1578 struct bdev_ops loop_ops
= {
1579 .detect
= &loop_detect
,
1580 .mount
= &loop_mount
,
1581 .umount
= &loop_umount
,
1582 .clone_paths
= &loop_clonepaths
,
1583 .destroy
= &loop_destroy
,
1584 .create
= &loop_create
,
1591 static int overlayfs_detect(const char *path
)
1593 if (strncmp(path
, "overlayfs:", 10) == 0)
1594 return 1; // take their word for it
1599 // XXXXXXX plain directory bind mount ops
1601 static int overlayfs_mount(struct bdev
*bdev
)
1603 char *options
, *dup
, *lower
, *upper
;
1607 if (strcmp(bdev
->type
, "overlayfs"))
1609 if (!bdev
->src
|| !bdev
->dest
)
1612 // separately mount it first
1613 // mount -t overlayfs -oupperdir=${upper},lowerdir=${lower} lower dest
1614 dup
= alloca(strlen(bdev
->src
)+1);
1615 strcpy(dup
, bdev
->src
);
1616 if (!(lower
= index(dup
, ':')))
1618 if (!(upper
= index(++lower
, ':')))
1623 // TODO We should check whether bdev->src is a blockdev, and if so
1624 // but for now, only support overlays of a basic directory
1626 len
= strlen(lower
) + strlen(upper
) + strlen("upperdir=,lowerdir=") + 1;
1627 options
= alloca(len
);
1628 ret
= snprintf(options
, len
, "upperdir=%s,lowerdir=%s", upper
, lower
);
1629 if (ret
< 0 || ret
>= len
)
1631 ret
= mount(lower
, bdev
->dest
, "overlayfs", MS_MGC_VAL
, options
);
1633 SYSERROR("overlayfs: error mounting %s onto %s options %s",
1634 lower
, bdev
->dest
, options
);
1636 INFO("overlayfs: mounted %s onto %s options %s",
1637 lower
, bdev
->dest
, options
);
1641 static int overlayfs_umount(struct bdev
*bdev
)
1643 if (strcmp(bdev
->type
, "overlayfs"))
1645 if (!bdev
->src
|| !bdev
->dest
)
1647 return umount(bdev
->dest
);
1650 static int overlayfs_clonepaths(struct bdev
*orig
, struct bdev
*new, const char *oldname
,
1651 const char *cname
, const char *oldpath
, const char *lxcpath
, int snap
,
1652 unsigned long newsize
)
1655 ERROR("overlayfs is only for snapshot clones");
1659 if (!orig
->src
|| !orig
->dest
)
1662 new->dest
= dir_new_path(orig
->dest
, oldname
, cname
, oldpath
, lxcpath
);
1665 if (mkdir_p(new->dest
, 0755) < 0)
1668 if (strcmp(orig
->type
, "dir") == 0) {
1672 // if we have /var/lib/lxc/c2/rootfs, then delta will be
1673 // /var/lib/lxc/c2/delta0
1674 delta
= strdup(new->dest
);
1678 if (strlen(delta
) < 6) {
1682 strcpy(&delta
[strlen(delta
)-6], "delta0");
1683 if ((ret
= mkdir(delta
, 0755)) < 0) {
1684 SYSERROR("error: mkdir %s", delta
);
1689 // the src will be 'overlayfs:lowerdir:upperdir'
1690 len
= strlen(delta
) + strlen(orig
->src
) + 12;
1691 new->src
= malloc(len
);
1696 ret
= snprintf(new->src
, len
, "overlayfs:%s:%s", orig
->src
, delta
);
1698 if (ret
< 0 || ret
>= len
)
1700 } else if (strcmp(orig
->type
, "overlayfs") == 0) {
1701 // What exactly do we want to do here?
1702 // I think we want to use the original lowerdir, with a
1703 // private delta which is originally rsynced from the
1705 char *osrc
, *odelta
, *nsrc
, *ndelta
;
1707 if (!(osrc
= strdup(orig
->src
)))
1709 nsrc
= index(osrc
, ':') + 1;
1710 if (nsrc
!= osrc
+ 10 || (odelta
= index(nsrc
, ':')) == NULL
) {
1716 ndelta
= dir_new_path(odelta
, oldname
, cname
, oldpath
, lxcpath
);
1721 if (do_rsync(odelta
, ndelta
) < 0) {
1724 ERROR("copying overlayfs delta");
1727 len
= strlen(nsrc
) + strlen(ndelta
) + 12;
1728 new->src
= malloc(len
);
1734 ret
= snprintf(new->src
, len
, "overlayfs:%s:%s", nsrc
, ndelta
);
1737 if (ret
< 0 || ret
>= len
)
1740 ERROR("overlayfs clone of %s container is not yet supported",
1742 // Note, supporting this will require overlayfs_mount supporting
1743 // mounting of the underlay. No big deal, just needs to be done.
1750 int overlayfs_destroy(struct bdev
*orig
)
1754 if (strncmp(orig
->src
, "overlayfs:", 10) != 0)
1756 upper
= index(orig
->src
+ 10, ':');
1760 return lxc_rmdir_onedev(upper
);
1764 * to say 'lxc-create -t ubuntu -n o1 -B overlayfs' means you want
1765 * $lxcpath/$lxcname/rootfs to have the created container, while all
1766 * changes after starting the container are written to
1767 * $lxcpath/$lxcname/delta0
1769 static int overlayfs_create(struct bdev
*bdev
, const char *dest
, const char *n
,
1770 struct bdev_specs
*specs
)
1773 int ret
, len
= strlen(dest
), newlen
;
1775 if (len
< 8 || strcmp(dest
+len
-7, "/rootfs") != 0)
1778 if (!(bdev
->dest
= strdup(dest
))) {
1779 ERROR("Out of memory");
1783 delta
= alloca(strlen(dest
)+1);
1784 strcpy(delta
, dest
);
1785 strcpy(delta
+len
-6, "delta0");
1787 if (mkdir_p(delta
, 0755) < 0) {
1788 ERROR("Error creating %s\n", delta
);
1792 /* overlayfs:lower:upper */
1793 newlen
= (2 * len
) + strlen("overlayfs:") + 2;
1794 bdev
->src
= malloc(newlen
);
1796 ERROR("Out of memory");
1799 ret
= snprintf(bdev
->src
, newlen
, "overlayfs:%s:%s", dest
, delta
);
1800 if (ret
< 0 || ret
>= newlen
)
1803 if (mkdir_p(bdev
->dest
, 0755) < 0) {
1804 ERROR("Error creating %s\n", bdev
->dest
);
1811 struct bdev_ops overlayfs_ops
= {
1812 .detect
= &overlayfs_detect
,
1813 .mount
= &overlayfs_mount
,
1814 .umount
= &overlayfs_umount
,
1815 .clone_paths
= &overlayfs_clonepaths
,
1816 .destroy
= &overlayfs_destroy
,
1817 .create
= &overlayfs_create
,
1820 struct bdev_type bdevs
[] = {
1821 {.name
= "zfs", .ops
= &zfs_ops
,},
1822 {.name
= "lvm", .ops
= &lvm_ops
,},
1823 {.name
= "btrfs", .ops
= &btrfs_ops
,},
1824 {.name
= "dir", .ops
= &dir_ops
,},
1825 {.name
= "overlayfs", .ops
= &overlayfs_ops
,},
1826 {.name
= "loop", .ops
= &loop_ops
,},
1829 static const size_t numbdevs
= sizeof(bdevs
) / sizeof(struct bdev_type
);
1831 void bdev_put(struct bdev
*bdev
)
1842 struct bdev
*bdev_get(const char *type
)
1847 for (i
=0; i
<numbdevs
; i
++) {
1848 if (strcmp(bdevs
[i
].name
, type
) == 0)
1853 bdev
= malloc(sizeof(struct bdev
));
1856 memset(bdev
, 0, sizeof(struct bdev
));
1857 bdev
->ops
= bdevs
[i
].ops
;
1858 bdev
->type
= bdevs
[i
].name
;
1862 struct bdev
*bdev_init(const char *src
, const char *dst
, const char *data
)
1867 for (i
=0; i
<numbdevs
; i
++) {
1869 r
= bdevs
[i
].ops
->detect(src
);
1876 bdev
= malloc(sizeof(struct bdev
));
1879 memset(bdev
, 0, sizeof(struct bdev
));
1880 bdev
->ops
= bdevs
[i
].ops
;
1881 bdev
->type
= bdevs
[i
].name
;
1883 bdev
->data
= strdup(data
);
1885 bdev
->src
= strdup(src
);
1887 bdev
->dest
= strdup(dst
);
1893 * If we're not snaphotting, then bdev_copy becomes a simple case of mount
1894 * the original, mount the new, and rsync the contents.
1896 struct bdev
*bdev_copy(const char *src
, const char *oldname
, const char *cname
,
1897 const char *oldpath
, const char *lxcpath
, const char *bdevtype
,
1898 int snap
, const char *bdevdata
, unsigned long newsize
,
1901 struct bdev
*orig
, *new;
1904 /* if the container name doesn't show up in the rootfs path, then
1905 * we don't know how to come up with a new name
1907 if (strstr(src
, oldname
) == NULL
) {
1908 ERROR("original rootfs path %s doesn't include container name %s",
1913 orig
= bdev_init(src
, NULL
, NULL
);
1915 ERROR("failed to detect blockdev type for %s\n", src
);
1921 orig
->dest
= malloc(MAXPATHLEN
);
1923 ERROR("out of memory");
1927 ret
= snprintf(orig
->dest
, MAXPATHLEN
, "%s/%s/rootfs", oldpath
, oldname
);
1928 if (ret
< 0 || ret
>= MAXPATHLEN
) {
1929 ERROR("rootfs path too long");
1936 * If newtype is NULL and snapshot is set, then use overlayfs
1938 if (!bdevtype
&& snap
&& strcmp(orig
->type
, "dir") == 0)
1939 bdevtype
= "overlayfs";
1942 if (strcmp(orig
->type
, "dir") == 0 &&
1943 strcmp(bdevtype
, "overlayfs") == 0)
1946 new = bdev_get(bdevtype
? bdevtype
: orig
->type
);
1948 ERROR("no such block device type: %s", bdevtype
? bdevtype
: orig
->type
);
1953 if (new->ops
->clone_paths(orig
, new, oldname
, cname
, oldpath
, lxcpath
, snap
, newsize
) < 0) {
1954 ERROR("failed getting pathnames for cloned storage: %s\n", src
);
1969 int ret
= wait_for_pid(pid
);
1978 if (unshare(CLONE_NEWNS
) < 0) {
1979 SYSERROR("unshare CLONE_NEWNS");
1985 // If not a snapshot, copy the fs.
1986 if (orig
->ops
->mount(orig
) < 0) {
1987 ERROR("failed mounting %s onto %s\n", src
, orig
->dest
);
1990 if (new->ops
->mount(new) < 0) {
1991 ERROR("failed mounting %s onto %s\n", new->src
, new->dest
);
1994 if (do_rsync(orig
->dest
, new->dest
) < 0) {
1995 ERROR("rsyncing %s to %s\n", orig
->src
, new->src
);
1998 // don't bother umounting, ns exit will do that
2003 static struct bdev
* do_bdev_create(const char *dest
, const char *type
,
2004 const char *cname
, struct bdev_specs
*specs
)
2006 struct bdev
*bdev
= bdev_get(type
);
2011 if (bdev
->ops
->create(bdev
, dest
, cname
, specs
) < 0) {
2021 * Create a backing store for a container.
2022 * If successfull, return a struct bdev *, with the bdev mounted and ready
2023 * for use. Before completing, the caller will need to call the
2024 * umount operation and bdev_put().
2025 * @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs)
2026 * @type: the bdevtype (dir, btrfs, zfs, etc)
2027 * @cname: the container name
2028 * @specs: details about the backing store to create, like fstype
2030 struct bdev
*bdev_create(const char *dest
, const char *type
,
2031 const char *cname
, struct bdev_specs
*specs
)
2034 char *best_options
[] = {"btrfs", "zfs", "lvm", "dir", NULL
};
2037 return do_bdev_create(dest
, "dir", cname
, specs
);
2039 if (strcmp(type
, "best") == 0) {
2041 // try for the best backing store type, according to our
2042 // opinionated preferences
2043 for (i
=0; best_options
[i
]; i
++) {
2044 if ((bdev
= do_bdev_create(dest
, best_options
[i
], cname
, specs
)))
2047 return NULL
; // 'dir' should never fail, so this shouldn't happen
2051 if (index(type
, ',') != NULL
) {
2052 char *dup
= alloca(strlen(type
)+1), *saveptr
, *token
;
2054 for (token
= strtok_r(dup
, ",", &saveptr
); token
;
2055 token
= strtok_r(NULL
, ",", &saveptr
)) {
2056 if ((bdev
= do_bdev_create(dest
, token
, cname
, specs
)))
2061 return do_bdev_create(dest
, type
, cname
, specs
);
2064 char *overlayfs_getlower(char *p
)
2066 char *p1
= index(p
, ':');