2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include <sys/ioctl.h>
36 #include <sys/types.h>
45 lxc_log_define(btrfs
, lxc
);
48 * Return the full path of objid under dirid. Let's say dirid is
49 * /lxc/c1/rootfs, and objid is /lxc/c1/rootfs/a/b/c. Then we will
50 * return a/b/c. If instead objid is for /lxc/c1/rootfs/a, we will
53 char *get_btrfs_subvol_path(int fd
, u64 dir_id
, u64 objid
, char *name
,
56 struct btrfs_ioctl_ino_lookup_args args
;
61 memset(&args
, 0, sizeof(args
));
63 args
.objectid
= objid
;
65 ret
= ioctl(fd
, BTRFS_IOC_INO_LOOKUP
, &args
);
68 ERROR("Failed to lookup path for %llu %llu %s - %s\n",
69 (unsigned long long) dir_id
,
70 (unsigned long long) objid
,
74 INFO("Got path for %llu %llu - %s\n",
75 (unsigned long long) objid
, (unsigned long long) dir_id
,
80 * we're in a subdirectory of ref_tree, the kernel ioctl
81 * puts a / in there for us
83 len
= strlen(args
.name
) + name_len
+ 2;
84 retpath
= malloc(len
);
87 strcpy(retpath
, args
.name
);
89 strncat(retpath
, name
, name_len
);
91 /* we're at the root of ref_tree */
93 retpath
= malloc(len
);
97 strncat(retpath
, name
, name_len
);
102 int btrfs_list_get_path_rootid(int fd
, u64
*treeid
)
105 struct btrfs_ioctl_ino_lookup_args args
;
107 memset(&args
, 0, sizeof(args
));
108 args
.objectid
= BTRFS_FIRST_FREE_OBJECTID
;
110 ret
= ioctl(fd
, BTRFS_IOC_INO_LOOKUP
, &args
);
112 WARN("Warning: can't perform the search -%s\n",
116 *treeid
= args
.treeid
;
120 bool is_btrfs_fs(const char *path
)
123 struct btrfs_ioctl_space_args sargs
;
125 /* Make sure this is a btrfs filesystem. */
126 fd
= open(path
, O_RDONLY
);
129 sargs
.space_slots
= 0;
130 sargs
.total_spaces
= 0;
131 ret
= ioctl(fd
, BTRFS_IOC_SPACE_INFO
, &sargs
);
140 * Taken from btrfs toolsuite. Test if path is a subvolume.
141 * return 0; path exists but it is not a subvolume
142 * return 1; path exists and it is a subvolume
145 int is_btrfs_subvol(const char *path
)
151 ret
= stat(path
, &st
);
155 if (st
.st_ino
!= BTRFS_FIRST_FREE_OBJECTID
|| !S_ISDIR(st
.st_mode
))
158 ret
= statfs(path
, &stfs
);
162 return stfs
.f_type
== BTRFS_SUPER_MAGIC
;
165 bool btrfs_detect(const char *path
)
170 if (!strncmp(path
, "btrfs:", 6))
173 if (!is_btrfs_fs(path
))
176 /* make sure it's a subvolume */
177 ret
= stat(path
, &st
);
181 if (st
.st_ino
== 256 && S_ISDIR(st
.st_mode
))
187 int btrfs_mount(struct lxc_storage
*bdev
)
189 unsigned long mntflags
;
194 if (strcmp(bdev
->type
, "btrfs"))
197 if (!bdev
->src
|| !bdev
->dest
)
200 if (parse_mntopts(bdev
->mntopts
, &mntflags
, &mntdata
) < 0) {
205 src
= lxc_storage_get_path(bdev
->src
, "btrfs");
207 ret
= mount(src
, bdev
->dest
, "bind", MS_BIND
| MS_REC
| mntflags
, mntdata
);
212 int btrfs_umount(struct lxc_storage
*bdev
)
214 if (strcmp(bdev
->type
, "btrfs"))
217 if (!bdev
->src
|| !bdev
->dest
)
220 return umount(bdev
->dest
);
223 static int btrfs_subvolume_create(const char *path
)
225 int ret
, saved_errno
;
226 struct btrfs_ioctl_vol_args args
;
230 newfull
= strdup(path
);
236 p
= strrchr(newfull
, '/');
244 fd
= open(newfull
, O_RDONLY
);
250 memset(&args
, 0, sizeof(args
));
251 strncpy(args
.name
, p
+ 1, BTRFS_SUBVOL_NAME_MAX
);
252 args
.name
[BTRFS_SUBVOL_NAME_MAX
- 1] = 0;
254 ret
= ioctl(fd
, BTRFS_IOC_SUBVOL_CREATE
, &args
);
263 int btrfs_same_fs(const char *orig
, const char *new)
265 int fd_orig
= -1, fd_new
= -1, ret
= -1;
266 struct btrfs_ioctl_fs_info_args orig_args
, new_args
;
268 fd_orig
= open(orig
, O_RDONLY
);
270 SYSERROR("Error opening original rootfs %s", orig
);
273 ret
= ioctl(fd_orig
, BTRFS_IOC_FS_INFO
, &orig_args
);
275 SYSERROR("BTRFS_IOC_FS_INFO %s", orig
);
279 fd_new
= open(new, O_RDONLY
);
281 SYSERROR("Error opening new container dir %s", new);
285 ret
= ioctl(fd_new
, BTRFS_IOC_FS_INFO
, &new_args
);
287 SYSERROR("BTRFS_IOC_FS_INFO %s", new);
291 if (strncmp(orig_args
.fsid
, new_args
.fsid
, BTRFS_FSID_SIZE
) != 0) {
304 int btrfs_snapshot(const char *orig
, const char *new)
306 struct btrfs_ioctl_vol_args_v2 args
;
307 char *newdir
, *newname
;
308 char *newfull
= NULL
;
309 int saved_errno
= -1;
310 int fd
= -1, fddst
= -1, ret
= -1;
312 newfull
= strdup(new);
316 ret
= rmdir(newfull
);
317 if (ret
< 0 && errno
!= ENOENT
)
320 newname
= basename(newfull
);
321 fd
= open(orig
, O_RDONLY
);
325 newdir
= dirname(newfull
);
326 fddst
= open(newdir
, O_RDONLY
);
330 memset(&args
, 0, sizeof(args
));
332 strncpy(args
.name
, newname
, BTRFS_SUBVOL_NAME_MAX
);
333 args
.name
[BTRFS_SUBVOL_NAME_MAX
- 1] = 0;
335 ret
= ioctl(fddst
, BTRFS_IOC_SNAP_CREATE_V2
, &args
);
345 if (saved_errno
>= 0)
350 int btrfs_snapshot_wrapper(void *data
)
353 struct rsync_data_char
*arg
= data
;
356 ERROR("Failed to setgid to 0");
359 if (setgroups(0, NULL
) < 0)
360 WARN("Failed to clear groups");
363 ERROR("Failed to setuid to 0");
367 src
= lxc_storage_get_path(arg
->src
, "btrfs");
368 return btrfs_snapshot(src
, arg
->dest
);
371 int btrfs_clonepaths(struct lxc_storage
*orig
, struct lxc_storage
*new,
372 const char *oldname
, const char *cname
,
373 const char *oldpath
, const char *lxcpath
, int snap
,
374 uint64_t newsize
, struct lxc_conf
*conf
)
378 if (!orig
->dest
|| !orig
->src
)
381 if (strcmp(orig
->type
, "btrfs") && snap
) {
382 ERROR("btrfs snapshot from %s backing store is not supported",
387 new->src
= lxc_string_join(
389 (const char *[]){"btrfs:", *lxcpath
!= '/' ? lxcpath
: ++lxcpath
,
390 cname
, "rootfs", NULL
},
393 ERROR("Failed to create new rootfs path");
396 TRACE("Constructed new rootfs path \"%s\"", new->src
);
398 src
= lxc_storage_get_path(new->src
, "btrfs");
399 new->dest
= strdup(src
);
401 ERROR("Failed to duplicate string \"%s\"", src
);
406 new->mntopts
= strdup(orig
->mntopts
);
408 ERROR("Failed to duplicate string \"%s\"",
417 bool btrfs_create_clone(struct lxc_conf
*conf
, struct lxc_storage
*orig
,
418 struct lxc_storage
*new, uint64_t newsize
)
421 struct rsync_data data
= {0, 0};
422 char cmd_output
[MAXPATHLEN
] = {0};
424 ret
= rmdir(new->dest
);
425 if (ret
< 0 && errno
!= ENOENT
)
428 ret
= btrfs_subvolume_create(new->dest
);
430 SYSERROR("Failed to create btrfs subvolume \"%s\"", new->dest
);
434 /* rsync the contents from source to target */
437 if (am_guest_unpriv()) {
438 ret
= userns_exec_full(conf
, lxc_storage_rsync_exec_wrapper
,
439 &data
, "lxc_storage_rsync_exec_wrapper");
441 ERROR("Failed to rsync from \"%s\" into \"%s\"",
442 orig
->dest
, new->dest
);
449 ret
= run_command(cmd_output
, sizeof(cmd_output
),
450 lxc_storage_rsync_exec_wrapper
, (void *)&data
);
452 ERROR("Failed to rsync from \"%s\" into \"%s\": %s", orig
->dest
,
453 new->dest
, cmd_output
);
460 bool btrfs_create_snapshot(struct lxc_conf
*conf
, struct lxc_storage
*orig
,
461 struct lxc_storage
*new, uint64_t newsize
)
465 ret
= rmdir(new->dest
);
466 if (ret
< 0 && errno
!= ENOENT
)
469 if (am_guest_unpriv()) {
470 struct rsync_data_char args
;
472 args
.src
= orig
->src
;
473 args
.dest
= new->dest
;
475 ret
= userns_exec_1(conf
, btrfs_snapshot_wrapper
, &args
,
476 "btrfs_snapshot_wrapper");
478 ERROR("Failed to run \"btrfs_snapshot_wrapper\"");
482 TRACE("Created btrfs snapshot \"%s\" from \"%s\"", new->dest
,
487 ret
= btrfs_snapshot(orig
->src
, new->dest
);
489 SYSERROR("Failed to create btrfs snapshot \"%s\" from \"%s\"",
490 new->dest
, orig
->dest
);
494 TRACE("Created btrfs snapshot \"%s\" from \"%s\"", new->dest
, orig
->dest
);
498 static int btrfs_do_destroy_subvol(const char *path
)
501 struct btrfs_ioctl_vol_args args
;
502 char *p
, *newfull
= strdup(path
);
505 ERROR("Error: out of memory");
509 p
= strrchr(newfull
, '/');
511 ERROR("bad path: %s", path
);
517 fd
= open(newfull
, O_RDONLY
);
519 SYSERROR("Error opening %s", newfull
);
524 memset(&args
, 0, sizeof(args
));
525 strncpy(args
.name
, p
+1, BTRFS_SUBVOL_NAME_MAX
);
526 args
.name
[BTRFS_SUBVOL_NAME_MAX
-1] = 0;
527 ret
= ioctl(fd
, BTRFS_IOC_SNAP_DESTROY
, &args
);
528 INFO("btrfs: snapshot destroy ioctl returned %d for %s", ret
, path
);
529 if (ret
< 0 && errno
== EPERM
)
530 ERROR("Is the rootfs mounted with -o user_subvol_rm_allowed?");
537 static int get_btrfs_tree_idx(struct my_btrfs_tree
*tree
, u64 id
)
542 for (i
= 0; i
< tree
->num
; i
++) {
543 if (tree
->nodes
[i
].objid
== id
)
549 static struct my_btrfs_tree
*create_my_btrfs_tree(u64 id
, const char *path
,
552 struct my_btrfs_tree
*tree
;
554 tree
= malloc(sizeof(struct my_btrfs_tree
));
557 tree
->nodes
= malloc(sizeof(struct mytree_node
));
563 tree
->nodes
[0].dirname
= NULL
;
564 tree
->nodes
[0].name
= strdup(path
);
565 if (!tree
->nodes
[0].name
) {
570 tree
->nodes
[0].parentid
= 0;
571 tree
->nodes
[0].objid
= id
;
575 static bool update_tree_node(struct mytree_node
*n
, u64 id
, u64 parent
,
576 char *name
, int name_len
, char *dirname
)
581 n
->parentid
= parent
;
583 n
->name
= malloc(name_len
+ 1);
586 strncpy(n
->name
, name
, name_len
);
587 n
->name
[name_len
] = '\0';
590 n
->dirname
= malloc(strlen(dirname
) + 1);
595 strcpy(n
->dirname
, dirname
);
600 static bool add_btrfs_tree_node(struct my_btrfs_tree
*tree
, u64 id
, u64 parent
,
601 char *name
, int name_len
, char *dirname
)
603 struct mytree_node
*tmp
;
605 int i
= get_btrfs_tree_idx(tree
, id
);
607 return update_tree_node(&tree
->nodes
[i
], id
, parent
, name
,
610 tmp
= realloc(tree
->nodes
, (tree
->num
+1) * sizeof(struct mytree_node
));
614 memset(&tree
->nodes
[tree
->num
], 0, sizeof(struct mytree_node
));
615 if (!update_tree_node(&tree
->nodes
[tree
->num
], id
, parent
, name
,
622 static void free_btrfs_tree(struct my_btrfs_tree
*tree
)
627 for (i
= 0; i
< tree
->num
; i
++) {
628 free(tree
->nodes
[i
].name
);
629 free(tree
->nodes
[i
].dirname
);
636 * Given a @tree of subvolumes under @path, ask btrfs to remove each
639 static bool do_remove_btrfs_children(struct my_btrfs_tree
*tree
, u64 root_id
,
646 for (i
= 0; i
< tree
->num
; i
++) {
647 if (tree
->nodes
[i
].parentid
== root_id
) {
648 if (!tree
->nodes
[i
].dirname
) {
649 WARN("Odd condition: child objid with no name under %s\n", path
);
652 len
= strlen(path
) + strlen(tree
->nodes
[i
].dirname
) + 2;
653 newpath
= malloc(len
);
655 ERROR("Out of memory");
658 snprintf(newpath
, len
, "%s/%s", path
, tree
->nodes
[i
].dirname
);
659 if (!do_remove_btrfs_children(tree
, tree
->nodes
[i
].objid
, newpath
)) {
660 ERROR("Failed to prune %s\n", tree
->nodes
[i
].name
);
664 if (btrfs_do_destroy_subvol(newpath
) != 0) {
665 ERROR("Failed to remove %s\n", newpath
);
675 static int btrfs_recursive_destroy(const char *path
)
679 struct btrfs_ioctl_search_args args
;
680 struct btrfs_ioctl_search_key
*sk
= &args
.key
;
681 struct btrfs_ioctl_search_header sh
;
682 struct btrfs_root_ref
*ref
;
683 struct my_btrfs_tree
*tree
;
685 unsigned long off
= 0;
691 fd
= open(path
, O_RDONLY
);
693 ERROR("Failed to open %s\n", path
);
697 if (btrfs_list_get_path_rootid(fd
, &root_id
)) {
700 if (e
== EPERM
|| e
== EACCES
) {
701 WARN("Will simply try removing");
708 tree
= create_my_btrfs_tree(root_id
, path
, strlen(path
));
710 ERROR("Out of memory\n");
714 /* Walk all subvols looking for any under this id */
715 memset(&args
, 0, sizeof(args
));
717 /* search in the tree of tree roots */
720 sk
->max_type
= BTRFS_ROOT_REF_KEY
;
721 sk
->min_type
= BTRFS_ROOT_ITEM_KEY
;
722 sk
->min_objectid
= 0;
723 sk
->max_objectid
= (u64
)-1;
724 sk
->max_offset
= (u64
)-1;
726 sk
->max_transid
= (u64
)-1;
730 ret
= ioctl(fd
, BTRFS_IOC_TREE_SEARCH
, &args
);
734 free_btrfs_tree(tree
);
735 if (e
== EPERM
|| e
== EACCES
) {
736 WARN("Warn: can't perform the search under %s. Will simply try removing", path
);
740 ERROR("Error: can't perform the search under %s\n", path
);
743 if (sk
->nr_items
== 0)
747 for (i
= 0; i
< sk
->nr_items
; i
++) {
748 memcpy(&sh
, args
.buf
+ off
, sizeof(sh
));
751 * A backref key with the name and dirid of the parent
752 * comes followed by the reoot ref key which has the
753 * name of the child subvol in question.
755 if (sh
.objectid
!= root_id
&& sh
.type
== BTRFS_ROOT_BACKREF_KEY
) {
756 ref
= (struct btrfs_root_ref
*)(args
.buf
+ off
);
757 name_len
= btrfs_stack_root_ref_name_len(ref
);
758 name
= (char *)(ref
+ 1);
759 dir_id
= btrfs_stack_root_ref_dirid(ref
);
760 tmppath
= get_btrfs_subvol_path(fd
, sh
.offset
,
761 dir_id
, name
, name_len
);
762 if (!add_btrfs_tree_node(tree
, sh
.objectid
,
764 name_len
, tmppath
)) {
765 ERROR("Out of memory");
766 free_btrfs_tree(tree
);
776 * record the mins in sk so we can make sure the
777 * next search doesn't repeat this root
779 sk
->min_objectid
= sh
.objectid
;
780 sk
->min_type
= sh
.type
;
781 sk
->min_offset
= sh
.offset
;
790 if (sk
->min_type
> BTRFS_ROOT_BACKREF_KEY
) {
791 sk
->min_type
= BTRFS_ROOT_ITEM_KEY
;
796 if (sk
->min_objectid
>= sk
->max_objectid
)
801 /* now actually remove them */
803 if (!do_remove_btrfs_children(tree
, root_id
, path
)) {
804 free_btrfs_tree(tree
);
805 ERROR("failed pruning\n");
809 free_btrfs_tree(tree
);
810 /* All child subvols have been removed, now remove this one */
812 return btrfs_do_destroy_subvol(path
);
815 bool btrfs_try_remove_subvol(const char *path
)
817 if (!btrfs_detect(path
))
820 return btrfs_recursive_destroy(path
) == 0;
823 int btrfs_destroy(struct lxc_storage
*orig
)
827 src
= lxc_storage_get_path(orig
->src
, "btrfs");
829 return btrfs_recursive_destroy(src
);
832 int btrfs_create(struct lxc_storage
*bdev
, const char *dest
, const char *n
,
833 struct bdev_specs
*specs
)
838 len
= strlen(dest
) + 1;
839 /* strlen("btrfs:") */
841 bdev
->src
= malloc(len
);
843 ERROR("Failed to allocate memory");
847 ret
= snprintf(bdev
->src
, len
, "btrfs:%s", dest
);
848 if (ret
< 0 || (size_t)ret
>= len
) {
849 ERROR("Failed to create string");
853 bdev
->dest
= strdup(dest
);
855 ERROR("Failed to duplicate string \"%s\"", dest
);
859 ret
= btrfs_subvolume_create(bdev
->dest
);
861 SYSERROR("Failed to create btrfs subvolume \"%s\"", bdev
->dest
);