2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * this is all just a first shot for experiment. If we go this route, much
26 * shoudl change. bdev should be a directory with per-bdev file. Things which
27 * I'm doing by calling out to userspace should sometimes be done through
28 * libraries like liblvm2
34 #include <sys/types.h>
39 #include <sys/mount.h>
42 #include <linux/loop.h>
52 #include "namespace.h"
57 #define BLKGETSIZE64 _IOR(0x12,114,size_t)
60 #ifndef LO_FLAGS_AUTOCLEAR
61 #define LO_FLAGS_AUTOCLEAR 4
64 #define DEFAULT_FS_SIZE 1073741824
65 #define DEFAULT_FSTYPE "ext3"
67 lxc_log_define(bdev
, lxc
);
69 static int do_rsync(const char *src
, const char *dest
)
80 return wait_for_pid(pid
);
90 execlp("rsync", "rsync", "-a", s
, dest
, (char *)NULL
);
95 * return block size of dev->src in units of bytes
97 static int blk_getsize(struct bdev
*bdev
, uint64_t *size
)
100 char *path
= bdev
->src
;
102 if (strcmp(bdev
->type
, "loop") == 0)
103 path
= bdev
->src
+ 5;
105 fd
= open(path
, O_RDONLY
);
109 ret
= ioctl(fd
, BLKGETSIZE64
, size
); // size of device in bytes
115 * These are copied from conf.c. However as conf.c will be moved to using
116 * the callback system, they can be pulled from there eventually, so we
117 * don't need to pollute utils.c with these low level functions
119 static int find_fstype_cb(char* buffer
, void *data
)
127 unsigned long mntflags
;
131 /* we don't try 'nodev' entries */
132 if (strstr(buffer
, "nodev"))
136 fstype
+= lxc_char_left_gc(fstype
, strlen(fstype
));
137 fstype
[lxc_char_right_gc(fstype
, strlen(fstype
))] = '\0';
139 DEBUG("trying to mount '%s'->'%s' with fstype '%s'",
140 cbarg
->rootfs
, cbarg
->target
, fstype
);
142 if (parse_mntopts(cbarg
->options
, &mntflags
, &mntdata
) < 0) {
147 if (mount(cbarg
->rootfs
, cbarg
->target
, fstype
, mntflags
, mntdata
)) {
148 DEBUG("mount failed with error: %s", strerror(errno
));
155 INFO("mounted '%s' on '%s', with fstype '%s'",
156 cbarg
->rootfs
, cbarg
->target
, fstype
);
161 static int mount_unknown_fs(const char *rootfs
, const char *target
,
177 * find the filesystem type with brute force:
178 * first we check with /etc/filesystems, in case the modules
179 * are auto-loaded and fall back to the supported kernel fs
186 for (i
= 0; i
< sizeof(fsfile
)/sizeof(fsfile
[0]); i
++) {
190 if (access(fsfile
[i
], F_OK
))
193 ret
= lxc_file_for_each_line(fsfile
[i
], find_fstype_cb
, &cbarg
);
195 ERROR("failed to parse '%s'", fsfile
[i
]);
203 ERROR("failed to determine fs type for '%s'", rootfs
);
207 static int do_mkfs(const char *path
, const char *fstype
)
211 if ((pid
= fork()) < 0) {
212 ERROR("error forking");
216 return wait_for_pid(pid
);
218 // If the file is not a block device, we don't want mkfs to ask
219 // us about whether to proceed.
223 open("/dev/zero", O_RDONLY
);
224 open("/dev/null", O_RDWR
);
225 open("/dev/null", O_RDWR
);
226 execlp("mkfs", "mkfs", "-t", fstype
, path
, NULL
);
230 static char *linkderef(char *path
, char *dest
)
235 ret
= stat(path
, &sbuf
);
238 if (!S_ISLNK(sbuf
.st_mode
))
240 ret
= readlink(path
, dest
, MAXPATHLEN
);
242 SYSERROR("error reading link %s", path
);
244 } else if (ret
>= MAXPATHLEN
) {
245 ERROR("link in %s too long", path
);
253 * Given a bdev (presumably blockdev-based), detect the fstype
254 * by trying mounting (in a private mntns) it.
255 * @bdev: bdev to investigate
256 * @type: preallocated char* in which to write the fstype
257 * @len: length of passed in char*
258 * Returns length of fstype, of -1 on error
260 static int detect_fs(struct bdev
*bdev
, char *type
, int len
)
266 char *sp1
, *sp2
, *sp3
, *line
= NULL
;
269 if (!bdev
|| !bdev
->src
|| !bdev
->dest
)
273 if (strcmp(bdev
->type
, "loop") == 0)
274 srcdev
= bdev
->src
+ 5;
279 if ((pid
= fork()) < 0)
284 memset(type
, 0, len
);
285 ret
= read(p
[0], type
, len
-1);
288 SYSERROR("error reading from pipe");
291 } else if (ret
== 0) {
292 ERROR("child exited early - fstype not found");
298 INFO("detected fstype %s for %s", type
, srcdev
);
302 if (unshare(CLONE_NEWNS
) < 0)
305 ret
= mount_unknown_fs(srcdev
, bdev
->dest
, bdev
->mntopts
);
307 ERROR("failed mounting %s onto %s to detect fstype", srcdev
, bdev
->dest
);
310 // if symlink, get the real dev name
311 char devpath
[MAXPATHLEN
];
312 char *l
= linkderef(srcdev
, devpath
);
315 f
= fopen("/proc/self/mounts", "r");
318 while (getline(&line
, &linelen
, f
) != -1) {
319 sp1
= index(line
, ' ');
325 sp2
= index(sp1
+1, ' ');
329 sp3
= index(sp2
+1, ' ');
334 if (write(p
[1], sp2
, strlen(sp2
)) != strlen(sp2
))
343 const struct bdev_ops
*ops
;
346 static int is_dir(const char *path
)
349 int ret
= stat(path
, &statbuf
);
350 if (ret
== 0 && S_ISDIR(statbuf
.st_mode
))
355 static int dir_detect(const char *path
)
357 if (strncmp(path
, "dir:", 4) == 0)
358 return 1; // take their word for it
365 // XXXXXXX plain directory bind mount ops
367 static int dir_mount(struct bdev
*bdev
)
369 unsigned long mntflags
;
373 if (strcmp(bdev
->type
, "dir"))
375 if (!bdev
->src
|| !bdev
->dest
)
378 if (parse_mntopts(bdev
->mntopts
, &mntflags
, &mntdata
) < 0) {
383 ret
= mount(bdev
->src
, bdev
->dest
, "bind", MS_BIND
| MS_REC
| mntflags
, mntdata
);
388 static int dir_umount(struct bdev
*bdev
)
390 if (strcmp(bdev
->type
, "dir"))
392 if (!bdev
->src
|| !bdev
->dest
)
394 return umount(bdev
->dest
);
397 /* the bulk of this needs to become a common helper */
398 static char *dir_new_path(char *src
, const char *oldname
, const char *name
,
399 const char *oldpath
, const char *lxcpath
)
404 nlen
= strlen(src
) + 1;
405 l1
= strlen(oldpath
);
407 /* if src starts with oldpath, look for oldname only after
409 if (strncmp(src
, oldpath
, l1
) == 0) {
411 nlen
+= (strlen(lxcpath
) - l1
);
413 l2
= strlen(oldname
);
414 while ((p
= strstr(p
, oldname
)) != NULL
) {
416 nlen
+= strlen(name
) - l2
;
424 if (strncmp(src
, oldpath
, l1
) == 0) {
425 p
+= sprintf(p
, "%s", lxcpath
);
429 while ((p2
= strstr(src
, oldname
)) != NULL
) {
430 strncpy(p
, src
, p2
-src
); // copy text up to oldname
431 p
+= p2
-src
; // move target pointer (p)
432 p
+= sprintf(p
, "%s", name
); // print new name in place of oldname
433 src
= p2
+ l2
; // move src to end of oldname
435 sprintf(p
, "%s", src
); // copy the rest of src
440 * for a simple directory bind mount, we substitute the old container
441 * name and paths for the new
443 static int dir_clonepaths(struct bdev
*orig
, struct bdev
*new, const char *oldname
,
444 const char *cname
, const char *oldpath
, const char *lxcpath
, int snap
,
450 ERROR("directories cannot be snapshotted. Try overlayfs.");
454 if (!orig
->dest
|| !orig
->src
)
457 len
= strlen(lxcpath
) + strlen(cname
) + strlen("rootfs") + 3;
458 new->src
= malloc(len
);
461 ret
= snprintf(new->src
, len
, "%s/%s/rootfs", lxcpath
, cname
);
462 if (ret
< 0 || ret
>= len
)
464 if ((new->dest
= strdup(new->src
)) == NULL
)
470 static int dir_destroy(struct bdev
*orig
)
472 if (lxc_rmdir_onedev(orig
->src
) < 0)
477 static int dir_create(struct bdev
*bdev
, const char *dest
, const char *n
,
478 struct bdev_specs
*specs
)
480 bdev
->src
= strdup(dest
);
481 bdev
->dest
= strdup(dest
);
482 if (!bdev
->src
|| !bdev
->dest
) {
483 ERROR("Out of memory");
487 if (mkdir_p(bdev
->src
, 0755) < 0) {
488 ERROR("Error creating %s", bdev
->src
);
491 if (mkdir_p(bdev
->dest
, 0755) < 0) {
492 ERROR("Error creating %s", bdev
->dest
);
499 static const struct bdev_ops dir_ops
= {
500 .detect
= &dir_detect
,
502 .umount
= &dir_umount
,
503 .clone_paths
= &dir_clonepaths
,
504 .destroy
= &dir_destroy
,
505 .create
= &dir_create
,
506 .can_snapshot
= false,
512 // There are two ways we could do this. We could always specify the
513 // 'zfs device' (i.e. tank/lxc lxc/container) as rootfs. But instead
514 // (at least right now) we have lxc-create specify $lxcpath/$lxcname/rootfs
515 // as the mountpoint, so that it is always mounted.
517 // That means 'mount' is really never needed and could be noop, but for the
518 // sake of flexibility let's always bind-mount.
521 static int zfs_list_entry(const char *path
, char *output
, size_t inlen
)
523 struct lxc_popen_FILE
*f
;
526 f
= lxc_popen("zfs list 2> /dev/null");
528 SYSERROR("popen failed");
531 while (fgets(output
, inlen
, f
->f
)) {
532 if (strstr(output
, path
)) {
537 (void) lxc_pclose(f
);
542 static int zfs_detect(const char *path
)
544 char *output
= malloc(LXC_LOG_BUFFER_SIZE
);
548 ERROR("out of memory");
551 found
= zfs_list_entry(path
, output
, LXC_LOG_BUFFER_SIZE
);
556 static int zfs_mount(struct bdev
*bdev
)
558 unsigned long mntflags
;
562 if (strcmp(bdev
->type
, "zfs"))
564 if (!bdev
->src
|| !bdev
->dest
)
567 if (parse_mntopts(bdev
->mntopts
, &mntflags
, &mntdata
) < 0) {
572 ret
= mount(bdev
->src
, bdev
->dest
, "bind", MS_BIND
| MS_REC
| mntflags
, mntdata
);
577 static int zfs_umount(struct bdev
*bdev
)
579 if (strcmp(bdev
->type
, "zfs"))
581 if (!bdev
->src
|| !bdev
->dest
)
583 return umount(bdev
->dest
);
586 static int zfs_clone(const char *opath
, const char *npath
, const char *oname
,
587 const char *nname
, const char *lxcpath
, int snapshot
)
589 // use the 'zfs list | grep opath' entry to get the zfsroot
590 char output
[MAXPATHLEN
], option
[MAXPATHLEN
], *p
;
591 const char *zfsroot
= output
;
595 if (zfs_list_entry(opath
, output
, MAXPATHLEN
)) {
596 // zfsroot is output up to ' '
597 if ((p
= index(output
, ' ')) == NULL
)
600 if ((p
= strrchr(output
, '/')) == NULL
)
604 zfsroot
= lxc_global_config_value("lxc.bdev.zfs.root");
606 ret
= snprintf(option
, MAXPATHLEN
, "-omountpoint=%s/%s/rootfs",
608 if (ret
< 0 || ret
>= MAXPATHLEN
)
611 // zfs create -omountpoint=$lxcpath/$lxcname $zfsroot/$nname
613 if ((pid
= fork()) < 0)
616 char dev
[MAXPATHLEN
];
618 ret
= snprintf(dev
, MAXPATHLEN
, "%s/%s", zfsroot
, nname
);
619 if (ret
< 0 || ret
>= MAXPATHLEN
)
621 execlp("zfs", "zfs", "create", option
, dev
, NULL
);
624 return wait_for_pid(pid
);
627 // 'zfs snapshot zfsroot/oname@nname
628 // zfs clone zfsroot/oname@nname zfsroot/nname
629 char path1
[MAXPATHLEN
], path2
[MAXPATHLEN
];
631 ret
= snprintf(path1
, MAXPATHLEN
, "%s/%s@%s", zfsroot
,
633 if (ret
< 0 || ret
>= MAXPATHLEN
)
635 (void) snprintf(path2
, MAXPATHLEN
, "%s/%s", zfsroot
, nname
);
637 // if the snapshot exists, delete it
638 if ((pid
= fork()) < 0)
641 execlp("zfs", "zfs", "destroy", path1
, NULL
);
644 // it probably doesn't exist so destroy probably will fail.
645 (void) wait_for_pid(pid
);
647 // run first (snapshot) command
648 if ((pid
= fork()) < 0)
651 execlp("zfs", "zfs", "snapshot", path1
, NULL
);
654 if (wait_for_pid(pid
) < 0)
657 // run second (clone) command
658 if ((pid
= fork()) < 0)
661 execlp("zfs", "zfs", "clone", option
, path1
, path2
, NULL
);
664 return wait_for_pid(pid
);
668 static int zfs_clonepaths(struct bdev
*orig
, struct bdev
*new, const char *oldname
,
669 const char *cname
, const char *oldpath
, const char *lxcpath
, int snap
,
674 if (!orig
->src
|| !orig
->dest
)
677 if (snap
&& strcmp(orig
->type
, "zfs")) {
678 ERROR("zfs snapshot from %s backing store is not supported",
683 len
= strlen(lxcpath
) + strlen(cname
) + strlen("rootfs") + 3;
684 new->src
= malloc(len
);
687 ret
= snprintf(new->src
, len
, "%s/%s/rootfs", lxcpath
, cname
);
688 if (ret
< 0 || ret
>= len
)
690 if ((new->dest
= strdup(new->src
)) == NULL
)
693 return zfs_clone(orig
->src
, new->src
, oldname
, cname
, lxcpath
, snap
);
697 * TODO: detect whether this was a clone, and if so then also delete the
698 * snapshot it was based on, so that we don't hold the original
701 static int zfs_destroy(struct bdev
*orig
)
704 char output
[MAXPATHLEN
], *p
;
706 if ((pid
= fork()) < 0)
709 return wait_for_pid(pid
);
711 if (!zfs_list_entry(orig
->src
, output
, MAXPATHLEN
)) {
712 ERROR("Error: zfs entry for %s not found", orig
->src
);
716 // zfs mount is output up to ' '
717 if ((p
= index(output
, ' ')) == NULL
)
721 execlp("zfs", "zfs", "destroy", output
, NULL
);
725 static int zfs_create(struct bdev
*bdev
, const char *dest
, const char *n
,
726 struct bdev_specs
*specs
)
729 char option
[MAXPATHLEN
];
733 if (!specs
|| !specs
->zfs
.zfsroot
)
734 zfsroot
= lxc_global_config_value("lxc.bdev.zfs.root");
736 zfsroot
= specs
->zfs
.zfsroot
;
738 if (!(bdev
->dest
= strdup(dest
))) {
739 ERROR("No mount target specified or out of memory");
742 if (!(bdev
->src
= strdup(bdev
->dest
))) {
743 ERROR("out of memory");
747 ret
= snprintf(option
, MAXPATHLEN
, "-omountpoint=%s", bdev
->dest
);
748 if (ret
< 0 || ret
>= MAXPATHLEN
)
750 if ((pid
= fork()) < 0)
753 return wait_for_pid(pid
);
755 char dev
[MAXPATHLEN
];
756 ret
= snprintf(dev
, MAXPATHLEN
, "%s/%s", zfsroot
, n
);
757 if (ret
< 0 || ret
>= MAXPATHLEN
)
759 execlp("zfs", "zfs", "create", option
, dev
, NULL
);
763 static const struct bdev_ops zfs_ops
= {
764 .detect
= &zfs_detect
,
766 .umount
= &zfs_umount
,
767 .clone_paths
= &zfs_clonepaths
,
768 .destroy
= &zfs_destroy
,
769 .create
= &zfs_create
,
770 .can_snapshot
= true,
778 * Look at /sys/dev/block/maj:min/dm/uuid. If it contains the hardcoded LVM
779 * prefix "LVM-", then this is an lvm2 LV
781 static int lvm_detect(const char *path
)
783 char devp
[MAXPATHLEN
], buf
[4];
788 if (strncmp(path
, "lvm:", 4) == 0)
789 return 1; // take their word for it
791 ret
= stat(path
, &statbuf
);
794 if (!S_ISBLK(statbuf
.st_mode
))
797 ret
= snprintf(devp
, MAXPATHLEN
, "/sys/dev/block/%d:%d/dm/uuid",
798 major(statbuf
.st_rdev
), minor(statbuf
.st_rdev
));
799 if (ret
< 0 || ret
>= MAXPATHLEN
) {
800 ERROR("lvm uuid pathname too long");
803 fout
= fopen(devp
, "r");
806 ret
= fread(buf
, 1, 4, fout
);
808 if (ret
!= 4 || strncmp(buf
, "LVM-", 4) != 0)
813 static int lvm_mount(struct bdev
*bdev
)
815 if (strcmp(bdev
->type
, "lvm"))
817 if (!bdev
->src
|| !bdev
->dest
)
819 /* if we might pass in data sometime, then we'll have to enrich
820 * mount_unknown_fs */
821 return mount_unknown_fs(bdev
->src
, bdev
->dest
, bdev
->mntopts
);
824 static int lvm_umount(struct bdev
*bdev
)
826 if (strcmp(bdev
->type
, "lvm"))
828 if (!bdev
->src
|| !bdev
->dest
)
830 return umount(bdev
->dest
);
833 static int lvm_compare_lv_attr(const char *path
, int pos
, const char expected
) {
834 struct lxc_popen_FILE
*f
;
835 int ret
, len
, status
, start
=0;
836 char *cmd
, output
[12];
837 const char *lvscmd
= "lvs --unbuffered --noheadings -o lv_attr %s 2>/dev/null";
839 len
= strlen(lvscmd
) + strlen(path
) - 1;
842 ret
= snprintf(cmd
, len
, lvscmd
, path
);
843 if (ret
< 0 || ret
>= len
)
849 SYSERROR("popen failed");
853 ret
= fgets(output
, 12, f
->f
) == NULL
;
855 status
= lxc_pclose(f
);
857 if (ret
|| WEXITSTATUS(status
))
858 // Assume either vg or lvs do not exist, default
859 // comparison to false.
862 len
= strlen(output
);
863 while(start
< len
&& output
[start
] == ' ') start
++;
865 if (start
+ pos
< len
&& output
[start
+ pos
] == expected
)
871 static int lvm_is_thin_volume(const char *path
)
873 return lvm_compare_lv_attr(path
, 6, 't');
876 static int lvm_is_thin_pool(const char *path
)
878 return lvm_compare_lv_attr(path
, 0, 't');
882 * path must be '/dev/$vg/$lv', $vg must be an existing VG, and $lv must not
883 * yet exist. This function will attempt to create /dev/$vg/$lv of size
884 * $size. If thinpool is specified, we'll check for it's existence and if it's
885 * a valid thin pool, and if so, we'll create the requested lv from that thin
888 static int do_lvm_create(const char *path
, uint64_t size
, const char *thinpool
)
891 char sz
[24], *pathdup
, *vg
, *lv
, *tp
= NULL
;
893 if ((pid
= fork()) < 0) {
894 SYSERROR("failed fork");
898 return wait_for_pid(pid
);
900 // specify bytes to lvcreate
901 ret
= snprintf(sz
, 24, "%"PRIu64
"b", size
);
902 if (ret
< 0 || ret
>= 24)
905 pathdup
= strdup(path
);
909 lv
= strrchr(pathdup
, '/');
916 vg
= strrchr(pathdup
, '/');
922 len
= strlen(pathdup
) + strlen(thinpool
) + 2;
925 ret
= snprintf(tp
, len
, "%s/%s", pathdup
, thinpool
);
926 if (ret
< 0 || ret
>= len
)
929 ret
= lvm_is_thin_pool(tp
);
930 INFO("got %d for thin pool at path: %s", ret
, tp
);
939 execlp("lvcreate", "lvcreate", "-L", sz
, vg
, "-n", lv
, (char *)NULL
);
941 execlp("lvcreate", "lvcreate", "--thinpool", tp
, "-V", sz
, vg
, "-n", lv
, (char *)NULL
);
947 static int lvm_snapshot(const char *orig
, const char *path
, uint64_t size
)
950 char sz
[24], *pathdup
, *lv
;
952 if ((pid
= fork()) < 0) {
953 SYSERROR("failed fork");
957 return wait_for_pid(pid
);
959 // specify bytes to lvcreate
960 ret
= snprintf(sz
, 24, "%"PRIu64
"b", size
);
961 if (ret
< 0 || ret
>= 24)
964 pathdup
= strdup(path
);
967 lv
= strrchr(pathdup
, '/');
975 // check if the original lv is backed by a thin pool, in which case we
976 // cannot specify a size that's different from the original size.
977 ret
= lvm_is_thin_volume(orig
);
984 ret
= execlp("lvcreate", "lvcreate", "-s", "-L", sz
, "-n", lv
, orig
, (char *)NULL
);
986 ret
= execlp("lvcreate", "lvcreate", "-s", "-n", lv
, orig
, (char *)NULL
);
993 // this will return 1 for physical disks, qemu-nbd, loop, etc
994 // right now only lvm is a block device
995 static int is_blktype(struct bdev
*b
)
997 if (strcmp(b
->type
, "lvm") == 0)
1002 static int lvm_clonepaths(struct bdev
*orig
, struct bdev
*new, const char *oldname
,
1003 const char *cname
, const char *oldpath
, const char *lxcpath
, int snap
,
1007 uint64_t size
= newsize
;
1010 if (!orig
->src
|| !orig
->dest
)
1013 if (strcmp(orig
->type
, "lvm")) {
1017 ERROR("LVM snapshot from %s backing store is not supported",
1021 vg
= lxc_global_config_value("lxc.bdev.lvm.vg");
1022 len
= strlen("/dev/") + strlen(vg
) + strlen(cname
) + 2;
1023 if ((new->src
= malloc(len
)) == NULL
)
1025 ret
= snprintf(new->src
, len
, "/dev/%s/%s", vg
, cname
);
1026 if (ret
< 0 || ret
>= len
)
1029 new->src
= dir_new_path(orig
->src
, oldname
, cname
, oldpath
, lxcpath
);
1034 if (orig
->mntopts
) {
1035 new->mntopts
= strdup(orig
->mntopts
);
1040 len
= strlen(lxcpath
) + strlen(cname
) + strlen("rootfs") + 3;
1041 new->dest
= malloc(len
);
1044 ret
= snprintf(new->dest
, len
, "%s/%s/rootfs", lxcpath
, cname
);
1045 if (ret
< 0 || ret
>= len
)
1047 if (mkdir_p(new->dest
, 0755) < 0)
1050 if (is_blktype(orig
)) {
1051 if (!newsize
&& blk_getsize(orig
, &size
) < 0) {
1052 ERROR("Error getting size of %s", orig
->src
);
1055 if (detect_fs(orig
, fstype
, 100) < 0) {
1056 INFO("could not find fstype for %s, using ext3", orig
->src
);
1060 sprintf(fstype
, "ext3");
1062 size
= DEFAULT_FS_SIZE
;
1066 if (lvm_snapshot(orig
->src
, new->src
, size
) < 0) {
1067 ERROR("could not create %s snapshot of %s", new->src
, orig
->src
);
1071 if (do_lvm_create(new->src
, size
, lxc_global_config_value("lxc.bdev.lvm.thin_pool")) < 0) {
1072 ERROR("Error creating new lvm blockdev");
1075 if (do_mkfs(new->src
, fstype
) < 0) {
1076 ERROR("Error creating filesystem type %s on %s", fstype
,
1085 static int lvm_destroy(struct bdev
*orig
)
1089 if ((pid
= fork()) < 0)
1092 execlp("lvremove", "lvremove", "-f", orig
->src
, NULL
);
1095 return wait_for_pid(pid
);
1098 static int lvm_create(struct bdev
*bdev
, const char *dest
, const char *n
,
1099 struct bdev_specs
*specs
)
1101 const char *vg
, *thinpool
, *fstype
, *lv
= n
;
1110 vg
= lxc_global_config_value("lxc.bdev.lvm.vg");
1112 thinpool
= specs
->lvm
.thinpool
;
1114 thinpool
= lxc_global_config_value("lxc.bdev.lvm.thin_pool");
1120 len
= strlen(vg
) + strlen(lv
) + 7;
1121 bdev
->src
= malloc(len
);
1125 ret
= snprintf(bdev
->src
, len
, "/dev/%s/%s", vg
, lv
);
1126 if (ret
< 0 || ret
>= len
)
1129 // fssize is in bytes.
1132 sz
= DEFAULT_FS_SIZE
;
1134 if (do_lvm_create(bdev
->src
, sz
, thinpool
) < 0) {
1135 ERROR("Error creating new lvm blockdev %s size %"PRIu64
" bytes", bdev
->src
, sz
);
1139 fstype
= specs
->fstype
;
1141 fstype
= DEFAULT_FSTYPE
;
1142 if (do_mkfs(bdev
->src
, fstype
) < 0) {
1143 ERROR("Error creating filesystem type %s on %s", fstype
,
1147 if (!(bdev
->dest
= strdup(dest
)))
1150 if (mkdir_p(bdev
->dest
, 0755) < 0) {
1151 ERROR("Error creating %s", bdev
->dest
);
1158 static const struct bdev_ops lvm_ops
= {
1159 .detect
= &lvm_detect
,
1160 .mount
= &lvm_mount
,
1161 .umount
= &lvm_umount
,
1162 .clone_paths
= &lvm_clonepaths
,
1163 .destroy
= &lvm_destroy
,
1164 .create
= &lvm_create
,
1165 .can_snapshot
= true,
1172 struct btrfs_ioctl_space_info
{
1173 unsigned long long flags
;
1174 unsigned long long total_bytes
;
1175 unsigned long long used_bytes
;
1178 struct btrfs_ioctl_space_args
{
1179 unsigned long long space_slots
;
1180 unsigned long long total_spaces
;
1181 struct btrfs_ioctl_space_info spaces
[0];
1184 #define BTRFS_IOCTL_MAGIC 0x94
1185 #define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, unsigned long long)
1186 #define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
1187 struct btrfs_ioctl_space_args)
1189 static bool is_btrfs_fs(const char *path
)
1192 struct btrfs_ioctl_space_args sargs
;
1194 // make sure this is a btrfs filesystem
1195 fd
= open(path
, O_RDONLY
);
1198 sargs
.space_slots
= 0;
1199 sargs
.total_spaces
= 0;
1200 ret
= ioctl(fd
, BTRFS_IOC_SPACE_INFO
, &sargs
);
1208 static int btrfs_detect(const char *path
)
1213 if (!is_btrfs_fs(path
))
1216 // and make sure it's a subvolume.
1217 ret
= stat(path
, &st
);
1221 if (st
.st_ino
== 256 && S_ISDIR(st
.st_mode
))
1227 static int btrfs_mount(struct bdev
*bdev
)
1229 unsigned long mntflags
;
1233 if (strcmp(bdev
->type
, "btrfs"))
1235 if (!bdev
->src
|| !bdev
->dest
)
1238 if (parse_mntopts(bdev
->mntopts
, &mntflags
, &mntdata
) < 0) {
1243 ret
= mount(bdev
->src
, bdev
->dest
, "bind", MS_BIND
| MS_REC
| mntflags
, mntdata
);
1248 static int btrfs_umount(struct bdev
*bdev
)
1250 if (strcmp(bdev
->type
, "btrfs"))
1252 if (!bdev
->src
|| !bdev
->dest
)
1254 return umount(bdev
->dest
);
1257 #define BTRFS_SUBVOL_NAME_MAX 4039
1258 #define BTRFS_PATH_NAME_MAX 4087
1260 struct btrfs_ioctl_vol_args
{
1261 signed long long fd
;
1262 char name
[BTRFS_PATH_NAME_MAX
+ 1];
1265 #define BTRFS_IOCTL_MAGIC 0x94
1266 #define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \
1267 struct btrfs_ioctl_vol_args_v2)
1268 #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
1269 struct btrfs_ioctl_vol_args_v2)
1270 #define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \
1271 struct btrfs_ioctl_vol_args)
1272 #define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \
1273 struct btrfs_ioctl_vol_args)
1275 #define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0)
1277 struct btrfs_ioctl_vol_args_v2
{
1278 signed long long fd
;
1279 unsigned long long transid
;
1280 unsigned long long flags
;
1283 unsigned long long size
;
1284 //struct btrfs_qgroup_inherit *qgroup_inherit;
1285 void *qgroup_inherit
;
1287 unsigned long long unused
[4];
1289 char name
[BTRFS_SUBVOL_NAME_MAX
+ 1];
1292 static int btrfs_subvolume_create(const char *path
)
1295 struct btrfs_ioctl_vol_args args
;
1296 char *p
, *newfull
= strdup(path
);
1299 ERROR("Error: out of memory");
1303 p
= strrchr(newfull
, '/');
1305 ERROR("bad path: %s", path
);
1311 fd
= open(newfull
, O_RDONLY
);
1313 ERROR("Error opening %s", newfull
);
1318 memset(&args
, 0, sizeof(args
));
1319 strncpy(args
.name
, p
+1, BTRFS_SUBVOL_NAME_MAX
);
1320 args
.name
[BTRFS_SUBVOL_NAME_MAX
-1] = 0;
1321 ret
= ioctl(fd
, BTRFS_IOC_SUBVOL_CREATE
, &args
);
1322 INFO("btrfs: snapshot create ioctl returned %d", ret
);
1329 static int btrfs_snapshot(const char *orig
, const char *new)
1331 int fd
= -1, fddst
= -1, ret
= -1;
1332 struct btrfs_ioctl_vol_args_v2 args
;
1333 char *newdir
, *newname
, *newfull
= NULL
;
1335 newfull
= strdup(new);
1337 ERROR("Error: out of memory");
1340 // make sure the directory doesn't already exist
1341 if (rmdir(newfull
) < 0 && errno
!= -ENOENT
) {
1342 SYSERROR("Error removing empty new rootfs");
1345 newname
= basename(newfull
);
1346 newdir
= dirname(newfull
);
1347 fd
= open(orig
, O_RDONLY
);
1349 SYSERROR("Error opening original rootfs %s", orig
);
1352 fddst
= open(newdir
, O_RDONLY
);
1354 SYSERROR("Error opening new container dir %s", newdir
);
1358 memset(&args
, 0, sizeof(args
));
1360 strncpy(args
.name
, newname
, BTRFS_SUBVOL_NAME_MAX
);
1361 args
.name
[BTRFS_SUBVOL_NAME_MAX
-1] = 0;
1362 ret
= ioctl(fddst
, BTRFS_IOC_SNAP_CREATE_V2
, &args
);
1363 INFO("btrfs: snapshot create ioctl returned %d", ret
);
1375 static int btrfs_clonepaths(struct bdev
*orig
, struct bdev
*new, const char *oldname
,
1376 const char *cname
, const char *oldpath
, const char *lxcpath
, int snap
,
1379 if (!orig
->dest
|| !orig
->src
)
1382 if (strcmp(orig
->type
, "btrfs")) {
1385 ERROR("btrfs snapshot from %s backing store is not supported",
1389 len
= strlen(lxcpath
) + strlen(cname
) + strlen("rootfs") + 3;
1390 new->src
= malloc(len
);
1393 ret
= snprintf(new->src
, len
, "%s/%s/rootfs", lxcpath
, cname
);
1394 if (ret
< 0 || ret
>= len
)
1397 // in case rootfs is in custom path, reuse it
1398 if ((new->src
= dir_new_path(orig
->src
, oldname
, cname
, oldpath
, lxcpath
)) == NULL
)
1403 if ((new->dest
= strdup(new->src
)) == NULL
)
1406 if (orig
->mntopts
&& (new->mntopts
= strdup(orig
->mntopts
)) == NULL
)
1410 return btrfs_snapshot(orig
->dest
, new->dest
);
1412 if (rmdir(new->dest
) < 0 && errno
!= -ENOENT
) {
1413 SYSERROR("removing %s", new->dest
);
1417 return btrfs_subvolume_create(new->dest
);
1420 static int btrfs_destroy(struct bdev
*orig
)
1423 struct btrfs_ioctl_vol_args args
;
1424 char *path
= orig
->src
;
1425 char *p
, *newfull
= strdup(path
);
1428 ERROR("Error: out of memory");
1432 p
= strrchr(newfull
, '/');
1434 ERROR("bad path: %s", path
);
1440 fd
= open(newfull
, O_RDONLY
);
1442 ERROR("Error opening %s", newfull
);
1447 memset(&args
, 0, sizeof(args
));
1448 strncpy(args
.name
, p
+1, BTRFS_SUBVOL_NAME_MAX
);
1449 args
.name
[BTRFS_SUBVOL_NAME_MAX
-1] = 0;
1450 ret
= ioctl(fd
, BTRFS_IOC_SNAP_DESTROY
, &args
);
1451 INFO("btrfs: snapshot create ioctl returned %d", ret
);
1458 static int btrfs_create(struct bdev
*bdev
, const char *dest
, const char *n
,
1459 struct bdev_specs
*specs
)
1461 bdev
->src
= strdup(dest
);
1462 bdev
->dest
= strdup(dest
);
1463 if (!bdev
->src
|| !bdev
->dest
)
1465 return btrfs_subvolume_create(bdev
->dest
);
1468 static const struct bdev_ops btrfs_ops
= {
1469 .detect
= &btrfs_detect
,
1470 .mount
= &btrfs_mount
,
1471 .umount
= &btrfs_umount
,
1472 .clone_paths
= &btrfs_clonepaths
,
1473 .destroy
= &btrfs_destroy
,
1474 .create
= &btrfs_create
,
1475 .can_snapshot
= true,
1481 static int loop_detect(const char *path
)
1483 if (strncmp(path
, "loop:", 5) == 0)
1488 static int find_free_loopdev(int *retfd
, char *namep
)
1490 struct dirent dirent
, *direntp
;
1491 struct loop_info64 lo
;
1495 dir
= opendir("/dev");
1497 SYSERROR("Error opening /dev");
1500 while (!readdir_r(dir
, &dirent
, &direntp
)) {
1504 if (strncmp(direntp
->d_name
, "loop", 4) != 0)
1506 fd
= openat(dirfd(dir
), direntp
->d_name
, O_RDWR
);
1509 if (ioctl(fd
, LOOP_GET_STATUS64
, &lo
) == 0 || errno
!= ENXIO
) {
1514 // We can use this fd
1515 snprintf(namep
, 100, "/dev/%s", direntp
->d_name
);
1520 ERROR("No loop device found");
1528 static int loop_mount(struct bdev
*bdev
)
1530 int lfd
, ffd
= -1, ret
= -1;
1531 struct loop_info64 lo
;
1534 if (strcmp(bdev
->type
, "loop"))
1536 if (!bdev
->src
|| !bdev
->dest
)
1538 if (find_free_loopdev(&lfd
, loname
) < 0)
1541 ffd
= open(bdev
->src
+ 5, O_RDWR
);
1543 SYSERROR("Error opening backing file %s", bdev
->src
);
1547 if (ioctl(lfd
, LOOP_SET_FD
, ffd
) < 0) {
1548 SYSERROR("Error attaching backing file to loop dev");
1551 memset(&lo
, 0, sizeof(lo
));
1552 lo
.lo_flags
= LO_FLAGS_AUTOCLEAR
;
1553 if (ioctl(lfd
, LOOP_SET_STATUS64
, &lo
) < 0) {
1554 SYSERROR("Error setting autoclear on loop dev");
1558 ret
= mount_unknown_fs(loname
, bdev
->dest
, bdev
->mntopts
);
1560 ERROR("Error mounting %s", bdev
->src
);
1574 static int loop_umount(struct bdev
*bdev
)
1578 if (strcmp(bdev
->type
, "loop"))
1580 if (!bdev
->src
|| !bdev
->dest
)
1582 ret
= umount(bdev
->dest
);
1583 if (bdev
->lofd
>= 0) {
1590 static int do_loop_create(const char *path
, uint64_t size
, const char *fstype
)
1593 // create the new loopback file.
1594 fd
= creat(path
, S_IRUSR
|S_IWUSR
);
1597 if (lseek(fd
, size
, SEEK_SET
) < 0) {
1598 SYSERROR("Error seeking to set new loop file size");
1602 if (write(fd
, "1", 1) != 1) {
1603 SYSERROR("Error creating new loop file");
1609 SYSERROR("Error closing new loop file");
1613 // create an fs in the loopback file
1614 if (do_mkfs(path
, fstype
) < 0) {
1615 ERROR("Error creating filesystem type %s on %s", fstype
,
1624 * No idea what the original blockdev will be called, but the copy will be
1625 * called $lxcpath/$lxcname/rootdev
1627 static int loop_clonepaths(struct bdev
*orig
, struct bdev
*new, const char *oldname
,
1628 const char *cname
, const char *oldpath
, const char *lxcpath
, int snap
,
1632 uint64_t size
= newsize
;
1637 ERROR("loop devices cannot be snapshotted.");
1641 if (!orig
->dest
|| !orig
->src
)
1644 len
= strlen(lxcpath
) + strlen(cname
) + strlen("rootdev") + 3;
1645 srcdev
= alloca(len
);
1646 ret
= snprintf(srcdev
, len
, "%s/%s/rootdev", lxcpath
, cname
);
1647 if (ret
< 0 || ret
>= len
)
1650 new->src
= malloc(len
+ 5);
1653 ret
= snprintf(new->src
, len
+ 5, "loop:%s", srcdev
);
1654 if (ret
< 0 || ret
>= len
+ 5)
1657 new->dest
= malloc(len
);
1660 ret
= snprintf(new->dest
, len
, "%s/%s/rootfs", lxcpath
, cname
);
1661 if (ret
< 0 || ret
>= len
)
1664 // it's tempting to say: if orig->src == loopback and !newsize, then
1665 // copy the loopback file. However, we'd have to make sure to
1666 // correctly keep holes! So punt for now.
1668 if (is_blktype(orig
)) {
1669 if (!newsize
&& blk_getsize(orig
, &size
) < 0) {
1670 ERROR("Error getting size of %s", orig
->src
);
1673 if (detect_fs(orig
, fstype
, 100) < 0) {
1674 INFO("could not find fstype for %s, using %s", orig
->src
,
1679 sprintf(fstype
, "%s", DEFAULT_FSTYPE
);
1681 size
= DEFAULT_FS_SIZE
;
1683 return do_loop_create(srcdev
, size
, fstype
);
1686 static int loop_create(struct bdev
*bdev
, const char *dest
, const char *n
,
1687 struct bdev_specs
*specs
)
1697 // dest is passed in as $lxcpath / $lxcname / rootfs
1698 // srcdev will be: $lxcpath / $lxcname / rootdev
1699 // src will be 'loop:$srcdev'
1700 len
= strlen(dest
) + 2;
1701 srcdev
= alloca(len
);
1703 ret
= snprintf(srcdev
, len
, "%s", dest
);
1704 if (ret
< 0 || ret
>= len
)
1706 sprintf(srcdev
+ len
- 4, "dev");
1708 bdev
->src
= malloc(len
+ 5);
1711 ret
= snprintf(bdev
->src
, len
+ 5, "loop:%s", srcdev
);
1712 if (ret
< 0 || ret
>= len
+ 5)
1717 sz
= DEFAULT_FS_SIZE
;
1719 fstype
= specs
->fstype
;
1721 fstype
= DEFAULT_FSTYPE
;
1723 if (!(bdev
->dest
= strdup(dest
)))
1726 if (mkdir_p(bdev
->dest
, 0755) < 0) {
1727 ERROR("Error creating %s", bdev
->dest
);
1731 return do_loop_create(srcdev
, sz
, fstype
);
1734 static int loop_destroy(struct bdev
*orig
)
1736 return unlink(orig
->src
+ 5);
1739 static const struct bdev_ops loop_ops
= {
1740 .detect
= &loop_detect
,
1741 .mount
= &loop_mount
,
1742 .umount
= &loop_umount
,
1743 .clone_paths
= &loop_clonepaths
,
1744 .destroy
= &loop_destroy
,
1745 .create
= &loop_create
,
1746 .can_snapshot
= false,
1753 static int overlayfs_detect(const char *path
)
1755 if (strncmp(path
, "overlayfs:", 10) == 0)
1756 return 1; // take their word for it
1761 // XXXXXXX plain directory bind mount ops
1763 static int overlayfs_mount(struct bdev
*bdev
)
1765 char *options
, *dup
, *lower
, *upper
;
1767 unsigned long mntflags
;
1771 if (strcmp(bdev
->type
, "overlayfs"))
1773 if (!bdev
->src
|| !bdev
->dest
)
1776 // separately mount it first
1777 // mount -t overlayfs -oupperdir=${upper},lowerdir=${lower} lower dest
1778 dup
= alloca(strlen(bdev
->src
)+1);
1779 strcpy(dup
, bdev
->src
);
1780 if (!(lower
= index(dup
, ':')))
1782 if (!(upper
= index(++lower
, ':')))
1787 if (parse_mntopts(bdev
->mntopts
, &mntflags
, &mntdata
) < 0) {
1792 // TODO We should check whether bdev->src is a blockdev, and if so
1793 // but for now, only support overlays of a basic directory
1796 len
= strlen(lower
) + strlen(upper
) + strlen("upperdir=,lowerdir=,") + strlen(mntdata
) + 1;
1797 options
= alloca(len
);
1798 ret
= snprintf(options
, len
, "upperdir=%s,lowerdir=%s,%s", upper
, lower
, mntdata
);
1801 len
= strlen(lower
) + strlen(upper
) + strlen("upperdir=,lowerdir=") + 1;
1802 options
= alloca(len
);
1803 ret
= snprintf(options
, len
, "upperdir=%s,lowerdir=%s", upper
, lower
);
1805 if (ret
< 0 || ret
>= len
) {
1810 ret
= mount(lower
, bdev
->dest
, "overlayfs", MS_MGC_VAL
| mntflags
, options
);
1812 SYSERROR("overlayfs: error mounting %s onto %s options %s",
1813 lower
, bdev
->dest
, options
);
1815 INFO("overlayfs: mounted %s onto %s options %s",
1816 lower
, bdev
->dest
, options
);
1820 static int overlayfs_umount(struct bdev
*bdev
)
1822 if (strcmp(bdev
->type
, "overlayfs"))
1824 if (!bdev
->src
|| !bdev
->dest
)
1826 return umount(bdev
->dest
);
1829 static int overlayfs_clonepaths(struct bdev
*orig
, struct bdev
*new, const char *oldname
,
1830 const char *cname
, const char *oldpath
, const char *lxcpath
, int snap
,
1834 ERROR("overlayfs is only for snapshot clones");
1838 if (!orig
->src
|| !orig
->dest
)
1841 new->dest
= dir_new_path(orig
->dest
, oldname
, cname
, oldpath
, lxcpath
);
1844 if (mkdir_p(new->dest
, 0755) < 0)
1847 if (strcmp(orig
->type
, "dir") == 0) {
1851 // if we have /var/lib/lxc/c2/rootfs, then delta will be
1852 // /var/lib/lxc/c2/delta0
1853 delta
= strdup(new->dest
);
1857 if (strlen(delta
) < 6) {
1861 strcpy(&delta
[strlen(delta
)-6], "delta0");
1862 if ((ret
= mkdir(delta
, 0755)) < 0) {
1863 SYSERROR("error: mkdir %s", delta
);
1868 // the src will be 'overlayfs:lowerdir:upperdir'
1869 len
= strlen(delta
) + strlen(orig
->src
) + 12;
1870 new->src
= malloc(len
);
1875 ret
= snprintf(new->src
, len
, "overlayfs:%s:%s", orig
->src
, delta
);
1877 if (ret
< 0 || ret
>= len
)
1879 } else if (strcmp(orig
->type
, "overlayfs") == 0) {
1880 // What exactly do we want to do here?
1881 // I think we want to use the original lowerdir, with a
1882 // private delta which is originally rsynced from the
1884 char *osrc
, *odelta
, *nsrc
, *ndelta
;
1886 if (!(osrc
= strdup(orig
->src
)))
1888 nsrc
= index(osrc
, ':') + 1;
1889 if (nsrc
!= osrc
+ 10 || (odelta
= index(nsrc
, ':')) == NULL
) {
1895 ndelta
= dir_new_path(odelta
, oldname
, cname
, oldpath
, lxcpath
);
1900 if (do_rsync(odelta
, ndelta
) < 0) {
1903 ERROR("copying overlayfs delta");
1906 len
= strlen(nsrc
) + strlen(ndelta
) + 12;
1907 new->src
= malloc(len
);
1913 ret
= snprintf(new->src
, len
, "overlayfs:%s:%s", nsrc
, ndelta
);
1916 if (ret
< 0 || ret
>= len
)
1919 ERROR("overlayfs clone of %s container is not yet supported",
1921 // Note, supporting this will require overlayfs_mount supporting
1922 // mounting of the underlay. No big deal, just needs to be done.
1929 static int overlayfs_destroy(struct bdev
*orig
)
1933 if (strncmp(orig
->src
, "overlayfs:", 10) != 0)
1935 upper
= index(orig
->src
+ 10, ':');
1939 return lxc_rmdir_onedev(upper
);
1943 * to say 'lxc-create -t ubuntu -n o1 -B overlayfs' means you want
1944 * $lxcpath/$lxcname/rootfs to have the created container, while all
1945 * changes after starting the container are written to
1946 * $lxcpath/$lxcname/delta0
1948 static int overlayfs_create(struct bdev
*bdev
, const char *dest
, const char *n
,
1949 struct bdev_specs
*specs
)
1952 int ret
, len
= strlen(dest
), newlen
;
1954 if (len
< 8 || strcmp(dest
+len
-7, "/rootfs") != 0)
1957 if (!(bdev
->dest
= strdup(dest
))) {
1958 ERROR("Out of memory");
1962 delta
= alloca(strlen(dest
)+1);
1963 strcpy(delta
, dest
);
1964 strcpy(delta
+len
-6, "delta0");
1966 if (mkdir_p(delta
, 0755) < 0) {
1967 ERROR("Error creating %s", delta
);
1971 /* overlayfs:lower:upper */
1972 newlen
= (2 * len
) + strlen("overlayfs:") + 2;
1973 bdev
->src
= malloc(newlen
);
1975 ERROR("Out of memory");
1978 ret
= snprintf(bdev
->src
, newlen
, "overlayfs:%s:%s", dest
, delta
);
1979 if (ret
< 0 || ret
>= newlen
)
1982 if (mkdir_p(bdev
->dest
, 0755) < 0) {
1983 ERROR("Error creating %s", bdev
->dest
);
1990 static const struct bdev_ops overlayfs_ops
= {
1991 .detect
= &overlayfs_detect
,
1992 .mount
= &overlayfs_mount
,
1993 .umount
= &overlayfs_umount
,
1994 .clone_paths
= &overlayfs_clonepaths
,
1995 .destroy
= &overlayfs_destroy
,
1996 .create
= &overlayfs_create
,
1997 .can_snapshot
= true,
2000 static const struct bdev_type bdevs
[] = {
2001 {.name
= "zfs", .ops
= &zfs_ops
,},
2002 {.name
= "lvm", .ops
= &lvm_ops
,},
2003 {.name
= "btrfs", .ops
= &btrfs_ops
,},
2004 {.name
= "dir", .ops
= &dir_ops
,},
2005 {.name
= "overlayfs", .ops
= &overlayfs_ops
,},
2006 {.name
= "loop", .ops
= &loop_ops
,},
2009 static const size_t numbdevs
= sizeof(bdevs
) / sizeof(struct bdev_type
);
2011 void bdev_put(struct bdev
*bdev
)
2014 free(bdev
->mntopts
);
2022 struct bdev
*bdev_get(const char *type
)
2027 for (i
=0; i
<numbdevs
; i
++) {
2028 if (strcmp(bdevs
[i
].name
, type
) == 0)
2033 bdev
= malloc(sizeof(struct bdev
));
2036 memset(bdev
, 0, sizeof(struct bdev
));
2037 bdev
->ops
= bdevs
[i
].ops
;
2038 bdev
->type
= bdevs
[i
].name
;
2042 struct bdev
*bdev_init(const char *src
, const char *dst
, const char *mntopts
)
2047 for (i
=0; i
<numbdevs
; i
++) {
2049 r
= bdevs
[i
].ops
->detect(src
);
2056 bdev
= malloc(sizeof(struct bdev
));
2059 memset(bdev
, 0, sizeof(struct bdev
));
2060 bdev
->ops
= bdevs
[i
].ops
;
2061 bdev
->type
= bdevs
[i
].name
;
2063 bdev
->mntopts
= strdup(mntopts
);
2065 bdev
->src
= strdup(src
);
2067 bdev
->dest
= strdup(dst
);
2077 static int rsync_rootfs(struct rsync_data
*data
)
2079 struct bdev
*orig
= data
->orig
,
2082 if (unshare(CLONE_NEWNS
) < 0) {
2083 SYSERROR("unshare CLONE_NEWNS");
2087 // If not a snapshot, copy the fs.
2088 if (orig
->ops
->mount(orig
) < 0) {
2089 ERROR("failed mounting %s onto %s", orig
->src
, orig
->dest
);
2092 if (new->ops
->mount(new) < 0) {
2093 ERROR("failed mounting %s onto %s", new->src
, new->dest
);
2096 if (setgid(0) < 0) {
2097 ERROR("Failed to setgid to 0");
2100 if (setgroups(0, NULL
) < 0)
2101 WARN("Failed to clear groups");
2102 if (setuid(0) < 0) {
2103 ERROR("Failed to setuid to 0");
2106 if (do_rsync(orig
->dest
, new->dest
) < 0) {
2107 ERROR("rsyncing %s to %s", orig
->src
, new->src
);
2114 static int rsync_rootfs_wrapper(void *data
)
2116 struct rsync_data
*arg
= data
;
2117 return rsync_rootfs(arg
);
2120 * If we're not snaphotting, then bdev_copy becomes a simple case of mount
2121 * the original, mount the new, and rsync the contents.
2123 struct bdev
*bdev_copy(struct lxc_container
*c0
, const char *cname
,
2124 const char *lxcpath
, const char *bdevtype
,
2125 int flags
, const char *bdevdata
, uint64_t newsize
,
2128 struct bdev
*orig
, *new;
2131 bool snap
= flags
& LXC_CLONE_SNAPSHOT
;
2132 bool maybe_snap
= flags
& LXC_CLONE_MAYBE_SNAPSHOT
;
2133 bool keepbdevtype
= flags
& LXC_CLONE_KEEPBDEVTYPE
;
2134 const char *src
= c0
->lxc_conf
->rootfs
.path
;
2135 const char *oldname
= c0
->name
;
2136 const char *oldpath
= c0
->config_path
;
2137 struct rsync_data data
;
2139 /* if the container name doesn't show up in the rootfs path, then
2140 * we don't know how to come up with a new name
2142 if (strstr(src
, oldname
) == NULL
) {
2143 ERROR("original rootfs path %s doesn't include container name %s",
2148 orig
= bdev_init(src
, NULL
, NULL
);
2150 ERROR("failed to detect blockdev type for %s", src
);
2156 orig
->dest
= malloc(MAXPATHLEN
);
2158 ERROR("out of memory");
2162 ret
= snprintf(orig
->dest
, MAXPATHLEN
, "%s/%s/rootfs", oldpath
, oldname
);
2163 if (ret
< 0 || ret
>= MAXPATHLEN
) {
2164 ERROR("rootfs path too long");
2170 /* check for privilege */
2172 if (snap
&& !maybe_snap
) {
2173 ERROR("Unprivileged users cannot snapshot");
2177 if (bdevtype
&& strcmp(bdevtype
, "dir") != 0) {
2178 ERROR("Unprivileged users can only make dir copy-clones");
2182 if (strcmp(orig
->type
, "dir") != 0) {
2183 ERROR("Unprivileged users can only make dir copy-clones");
2191 * special case for snapshot - if caller requested maybe_snapshot and
2192 * keepbdevtype and backing store is directory, then proceed with a copy
2193 * clone rather than returning error
2195 if (maybe_snap
&& keepbdevtype
&& !bdevtype
&& !orig
->ops
->can_snapshot
)
2199 * If newtype is NULL and snapshot is set, then use overlayfs
2201 if (!bdevtype
&& !keepbdevtype
&& snap
&& strcmp(orig
->type
, "dir") == 0)
2202 bdevtype
= "overlayfs";
2205 if (bdevtype
&& strcmp(orig
->type
, "dir") == 0 &&
2206 strcmp(bdevtype
, "overlayfs") == 0)
2209 new = bdev_get(bdevtype
? bdevtype
: orig
->type
);
2211 ERROR("no such block device type: %s", bdevtype
? bdevtype
: orig
->type
);
2216 if (new->ops
->clone_paths(orig
, new, oldname
, cname
, oldpath
, lxcpath
, snap
, newsize
) < 0) {
2217 ERROR("failed getting pathnames for cloned storage: %s", src
);
2234 int ret
= wait_for_pid(pid
);
2246 ret
= userns_exec_1(c0
->lxc_conf
, rsync_rootfs_wrapper
, &data
);
2248 ret
= rsync_rootfs(&data
);
2250 exit(ret
== 0 ? 0 : 1);
2253 static struct bdev
* do_bdev_create(const char *dest
, const char *type
,
2254 const char *cname
, struct bdev_specs
*specs
)
2256 struct bdev
*bdev
= bdev_get(type
);
2261 if (bdev
->ops
->create(bdev
, dest
, cname
, specs
) < 0) {
2271 * Create a backing store for a container.
2272 * If successfull, return a struct bdev *, with the bdev mounted and ready
2273 * for use. Before completing, the caller will need to call the
2274 * umount operation and bdev_put().
2275 * @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs)
2276 * @type: the bdevtype (dir, btrfs, zfs, etc)
2277 * @cname: the container name
2278 * @specs: details about the backing store to create, like fstype
2280 struct bdev
*bdev_create(const char *dest
, const char *type
,
2281 const char *cname
, struct bdev_specs
*specs
)
2284 char *best_options
[] = {"btrfs", "zfs", "lvm", "dir", NULL
};
2287 return do_bdev_create(dest
, "dir", cname
, specs
);
2289 if (strcmp(type
, "best") == 0) {
2291 // try for the best backing store type, according to our
2292 // opinionated preferences
2293 for (i
=0; best_options
[i
]; i
++) {
2294 if ((bdev
= do_bdev_create(dest
, best_options
[i
], cname
, specs
)))
2297 return NULL
; // 'dir' should never fail, so this shouldn't happen
2301 if (index(type
, ',') != NULL
) {
2302 char *dup
= alloca(strlen(type
)+1), *saveptr
, *token
;
2304 for (token
= strtok_r(dup
, ",", &saveptr
); token
;
2305 token
= strtok_r(NULL
, ",", &saveptr
)) {
2306 if ((bdev
= do_bdev_create(dest
, token
, cname
, specs
)))
2311 return do_bdev_create(dest
, type
, cname
, specs
);
2314 char *overlayfs_getlower(char *p
)
2316 char *p1
= index(p
, ':');