2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
36 #include <sys/mount.h>
37 #include <sys/prctl.h>
38 #include <sys/types.h>
54 #include "namespace.h"
60 #include "storage_utils.h"
65 #define BLKGETSIZE64 _IOR(0x12, 114, size_t)
68 lxc_log_define(storage
, lxc
);
71 static const struct lxc_storage_ops aufs_ops
= {
72 .detect
= &aufs_detect
,
74 .umount
= &aufs_umount
,
75 .clone_paths
= &aufs_clonepaths
,
76 .destroy
= &aufs_destroy
,
77 .create
= &aufs_create
,
85 static const struct lxc_storage_ops btrfs_ops
= {
86 .detect
= &btrfs_detect
,
87 .mount
= &btrfs_mount
,
88 .umount
= &btrfs_umount
,
89 .clone_paths
= &btrfs_clonepaths
,
90 .destroy
= &btrfs_destroy
,
91 .create
= &btrfs_create
,
92 .copy
= &btrfs_create_clone
,
93 .snapshot
= &btrfs_create_snapshot
,
99 static const struct lxc_storage_ops dir_ops
= {
100 .detect
= &dir_detect
,
102 .umount
= &dir_umount
,
103 .clone_paths
= &dir_clonepaths
,
104 .destroy
= &dir_destroy
,
105 .create
= &dir_create
,
108 .can_snapshot
= false,
113 static const struct lxc_storage_ops loop_ops
= {
114 .detect
= &loop_detect
,
115 .mount
= &loop_mount
,
116 .umount
= &loop_umount
,
117 .clone_paths
= &loop_clonepaths
,
118 .destroy
= &loop_destroy
,
119 .create
= &loop_create
,
122 .can_snapshot
= false,
127 static const struct lxc_storage_ops lvm_ops
= {
128 .detect
= &lvm_detect
,
130 .umount
= &lvm_umount
,
131 .clone_paths
= &lvm_clonepaths
,
132 .destroy
= &lvm_destroy
,
133 .create
= &lvm_create
,
134 .copy
= &lvm_create_clone
,
135 .snapshot
= &lvm_create_snapshot
,
136 .can_snapshot
= true,
141 const struct lxc_storage_ops nbd_ops
= {
142 .detect
= &nbd_detect
,
144 .umount
= &nbd_umount
,
145 .clone_paths
= &nbd_clonepaths
,
146 .destroy
= &nbd_destroy
,
147 .create
= &nbd_create
,
150 .can_snapshot
= true,
155 static const struct lxc_storage_ops ovl_ops
= {
156 .detect
= &ovl_detect
,
158 .umount
= &ovl_umount
,
159 .clone_paths
= &ovl_clonepaths
,
160 .destroy
= &ovl_destroy
,
161 .create
= &ovl_create
,
164 .can_snapshot
= true,
169 static const struct lxc_storage_ops rbd_ops
= {
170 .detect
= &rbd_detect
,
172 .umount
= &rbd_umount
,
173 .clone_paths
= &rbd_clonepaths
,
174 .destroy
= &rbd_destroy
,
175 .create
= &rbd_create
,
178 .can_snapshot
= false,
183 static const struct lxc_storage_ops zfs_ops
= {
184 .detect
= &zfs_detect
,
186 .umount
= &zfs_umount
,
187 .clone_paths
= &zfs_clonepaths
,
188 .destroy
= &zfs_destroy
,
189 .create
= &zfs_create
,
191 .snapshot
= &zfs_snapshot
,
192 .can_snapshot
= true,
196 struct lxc_storage_type
{
198 const struct lxc_storage_ops
*ops
;
201 static const struct lxc_storage_type bdevs
[] = {
202 { .name
= "dir", .ops
= &dir_ops
, },
203 { .name
= "zfs", .ops
= &zfs_ops
, },
204 { .name
= "lvm", .ops
= &lvm_ops
, },
205 { .name
= "rbd", .ops
= &rbd_ops
, },
206 { .name
= "btrfs", .ops
= &btrfs_ops
, },
207 { .name
= "aufs", .ops
= &aufs_ops
, },
208 { .name
= "overlay", .ops
= &ovl_ops
, },
209 { .name
= "overlayfs", .ops
= &ovl_ops
, },
210 { .name
= "loop", .ops
= &loop_ops
, },
211 { .name
= "nbd", .ops
= &nbd_ops
, },
214 static const size_t numbdevs
= sizeof(bdevs
) / sizeof(struct lxc_storage_type
);
216 static const struct lxc_storage_type
*get_storage_by_name(const char *path
,
223 cmplen
= strlen(type
);
225 cmplen
= strcspn(path
, ":");
229 for (i
= 0; i
< numbdevs
; i
++) {
231 ret
= strncmp(bdevs
[i
].name
, type
, cmplen
);
233 ret
= strncmp(bdevs
[i
].name
, path
, cmplen
);
241 DEBUG("Detected rootfs type \"%s\"", bdevs
[i
].name
);
245 static const struct lxc_storage_type
*storage_query(struct lxc_conf
*conf
)
248 const struct lxc_storage_type
*bdev
;
249 const char *path
= conf
->rootfs
.path
;
250 const char *type
= conf
->rootfs
.bdev_type
;
252 bdev
= get_storage_by_name(path
, type
);
256 for (i
= 0; i
< numbdevs
; i
++)
257 if (bdevs
[i
].ops
->detect(path
))
263 DEBUG("Detected rootfs type \"%s\"", bdevs
[i
].name
);
267 struct lxc_storage
*storage_get(const char *type
)
270 struct lxc_storage
*bdev
;
272 for (i
= 0; i
< numbdevs
; i
++)
273 if (strcmp(bdevs
[i
].name
, type
) == 0)
279 bdev
= malloc(sizeof(struct lxc_storage
));
283 memset(bdev
, 0, sizeof(struct lxc_storage
));
284 bdev
->ops
= bdevs
[i
].ops
;
285 bdev
->type
= bdevs
[i
].name
;
287 if (strcmp(bdev
->type
, "aufs") == 0)
288 WARN("The \"aufs\" driver will is deprecated and will soon be "
289 "removed. For similar functionality see the \"overlay\" "
295 static struct lxc_storage
*do_storage_create(const char *dest
, const char *type
,
297 struct bdev_specs
*specs
)
300 struct lxc_storage
*bdev
;
305 bdev
= storage_get(type
);
309 ret
= bdev
->ops
->create(bdev
, dest
, cname
, specs
);
318 bool storage_can_backup(struct lxc_conf
*conf
)
321 struct lxc_storage
*bdev
;
323 bdev
= storage_init(conf
);
327 ret
= bdev
->ops
->can_backup
;
332 /* If we're not snaphotting, then storage_copy becomes a simple case of mount
333 * the original, mount the new, and rsync the contents.
335 struct lxc_storage
*storage_copy(struct lxc_container
*c
, const char *cname
,
336 const char *lxcpath
, const char *bdevtype
,
337 int flags
, const char *bdevdata
,
338 uint64_t newsize
, bool *needs_rdep
)
341 const char *src_no_prefix
;
342 struct lxc_storage
*new, *orig
;
343 bool snap
= (flags
& LXC_CLONE_SNAPSHOT
);
344 bool maybe_snap
= (flags
& LXC_CLONE_MAYBE_SNAPSHOT
);
345 bool keepbdevtype
= (flags
& LXC_CLONE_KEEPBDEVTYPE
);
346 const char *src
= c
->lxc_conf
->rootfs
.path
;
347 const char *oldname
= c
->name
;
348 const char *oldpath
= c
->config_path
;
349 char cmd_output
[MAXPATHLEN
] = {0};
350 struct rsync_data data
= {0};
353 ERROR("No rootfs specified");
357 /* If the container name doesn't show up in the rootfs path, then we
358 * don't know how to come up with a new name.
360 if (!strstr(src
, oldname
)) {
361 ERROR("Original rootfs path \"%s\" does not include container "
362 "name \"%s\"", src
, oldname
);
366 orig
= storage_init(c
->lxc_conf
);
368 ERROR("Failed to detect storage driver for \"%s\"", oldname
);
377 len
= strlen(oldpath
) + strlen(oldname
) + strlen("/rootfs") + 2;
378 orig
->dest
= malloc(len
);
380 ERROR("Failed to allocate memory");
381 goto on_error_put_orig
;
384 ret
= snprintf(orig
->dest
, len
, "%s/%s/rootfs", oldpath
, oldname
);
385 if (ret
< 0 || (size_t)ret
>= len
) {
386 ERROR("Failed to create string");
387 goto on_error_put_orig
;
390 ret
= stat(orig
->dest
, &sb
);
391 if (ret
< 0 && errno
== ENOENT
) {
392 ret
= mkdir_p(orig
->dest
, 0755);
394 WARN("Failed to create directory \"%s\"", orig
->dest
);
398 /* Special case for snapshot. If the caller requested maybe_snapshot and
399 * keepbdevtype and the backing store is directory, then proceed with a
400 * a copy clone rather than returning error.
402 if (maybe_snap
&& keepbdevtype
&& !bdevtype
&& !orig
->ops
->can_snapshot
)
405 /* If newtype is NULL and snapshot is set, then use overlay. */
406 if (!bdevtype
&& !keepbdevtype
&& snap
&& !strcmp(orig
->type
, "dir"))
407 bdevtype
= "overlay";
409 if (am_guest_unpriv() && !unpriv_snap_allowed(orig
, bdevtype
, snap
, maybe_snap
)) {
410 ERROR("Unsupported snapshot type \"%s\" for unprivileged users",
411 bdevtype
? bdevtype
: "(null)");
412 goto on_error_put_orig
;
417 if (snap
&& !strcmp(orig
->type
, "lvm") &&
418 !lvm_is_thin_volume(orig
->src
))
420 else if (!strcmp(bdevtype
, "overlay") ||
421 !strcmp(bdevtype
, "overlayfs"))
424 if (!snap
&& strcmp(oldpath
, lxcpath
))
427 bdevtype
= orig
->type
;
429 if (!strcmp(bdevtype
, "overlay") ||
430 !strcmp(bdevtype
, "overlayfs"))
434 /* get new bdev type */
435 new = storage_get(bdevtype
);
437 ERROR("Failed to initialize %s storage driver",
438 bdevtype
? bdevtype
: orig
->type
);
439 goto on_error_put_orig
;
441 TRACE("Initialized %s storage driver", new->type
);
443 /* create new paths */
444 ret
= new->ops
->clone_paths(orig
, new, oldname
, cname
, oldpath
, lxcpath
,
445 snap
, newsize
, c
->lxc_conf
);
447 ERROR("Failed creating new paths for clone of \"%s\"", src
);
448 goto on_error_put_new
;
451 /* When we create an overlay snapshot of an overlay container in the
452 * snapshot directory under "<lxcpath>/<name>/snaps/" we don't need to
453 * record a dependency. If we would restore would also fail.
455 if ((strcmp(new->type
, "overlay") == 0 ||
456 strcmp(new->type
, "overlayfs") == 0) &&
457 ret
== LXC_CLONE_SNAPSHOT
)
461 if (!strcmp(orig
->type
, "btrfs") && !strcmp(new->type
, "btrfs")) {
464 if (snap
|| btrfs_same_fs(orig
->dest
, new->dest
) == 0)
465 bret
= new->ops
->snapshot(c
->lxc_conf
, orig
, new, 0);
467 bret
= new->ops
->copy(c
->lxc_conf
, orig
, new, 0);
469 goto on_error_put_new
;
475 if (!strcmp(orig
->type
, "lvm") && !strcmp(new->type
, "lvm")) {
479 bret
= new->ops
->snapshot(c
->lxc_conf
, orig
, new, newsize
);
481 bret
= new->ops
->copy(c
->lxc_conf
, orig
, new, newsize
);
483 goto on_error_put_new
;
489 if (!strcmp(orig
->type
, "zfs") && !strcmp(new->type
, "zfs")) {
493 bret
= new->ops
->snapshot(c
->lxc_conf
, orig
, new, newsize
);
495 bret
= new->ops
->copy(c
->lxc_conf
, orig
, new, newsize
);
497 goto on_error_put_new
;
502 if (strcmp(bdevtype
, "btrfs")) {
503 if (!strcmp(new->type
, "overlay") || !strcmp(new->type
, "overlayfs"))
504 src_no_prefix
= ovl_get_lower(new->src
);
506 src_no_prefix
= lxc_storage_get_path(new->src
, new->type
);
508 if (am_guest_unpriv()) {
509 ret
= chown_mapped_root(src_no_prefix
, c
->lxc_conf
);
511 WARN("Failed to chown \"%s\"", new->src
);
518 /* rsync the contents from source to target */
521 if (am_guest_unpriv())
522 ret
= userns_exec_full(c
->lxc_conf
,
523 lxc_storage_rsync_exec_wrapper
, &data
,
524 "lxc_storage_rsync_exec_wrapper");
526 ret
= run_command(cmd_output
, sizeof(cmd_output
),
527 lxc_storage_rsync_exec_wrapper
, (void *)&data
);
529 ERROR("Failed to rsync from \"%s\" into \"%s\"%s%s", orig
->dest
,
531 cmd_output
[0] != '\0' ? ": " : "",
532 cmd_output
[0] != '\0' ? cmd_output
: "");
533 goto on_error_put_new
;
550 /* Create a backing store for a container.
551 * If successful, return a struct bdev *, with the bdev mounted and ready
552 * for use. Before completing, the caller will need to call the
553 * umount operation and storage_put().
554 * @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs)
555 * @type: the bdevtype (dir, btrfs, zfs, rbd, etc)
556 * @cname: the container name
557 * @specs: details about the backing store to create, like fstype
559 struct lxc_storage
*storage_create(const char *dest
, const char *type
,
560 const char *cname
, struct bdev_specs
*specs
)
563 struct lxc_storage
*bdev
;
564 char *best_options
[] = {"btrfs", "zfs", "lvm", "dir", "rbd", NULL
};
567 return do_storage_create(dest
, "dir", cname
, specs
);
569 ret
= strcmp(type
, "best");
572 /* Try for the best backing store type, according to our
573 * opinionated preferences.
575 for (i
= 0; best_options
[i
]; i
++) {
576 bdev
= do_storage_create(dest
, best_options
[i
], cname
, specs
);
585 if (strchr(type
, ',')) {
587 char *saveptr
= NULL
;
589 dup
= alloca(strlen(type
) + 1);
591 for (token
= strtok_r(dup
, ",", &saveptr
); token
;
592 token
= strtok_r(NULL
, ",", &saveptr
)) {
593 bdev
= do_storage_create(dest
, token
, cname
, specs
);
599 return do_storage_create(dest
, type
, cname
, specs
);
602 bool storage_destroy(struct lxc_conf
*conf
)
604 struct lxc_storage
*r
;
608 r
= storage_init(conf
);
612 destroy_rv
= r
->ops
->destroy(r
);
620 struct lxc_storage
*storage_init(struct lxc_conf
*conf
)
622 struct lxc_storage
*bdev
;
623 const struct lxc_storage_type
*q
;
624 const char *src
= conf
->rootfs
.path
;
625 const char *dst
= conf
->rootfs
.mount
;
626 const char *mntopts
= conf
->rootfs
.options
;
628 BUILD_BUG_ON(LXC_STORAGE_INTERNAL_OVERLAY_RESTORE
<= LXC_CLONE_MAXFLAGS
);
633 q
= storage_query(conf
);
637 bdev
= malloc(sizeof(struct lxc_storage
));
641 memset(bdev
, 0, sizeof(struct lxc_storage
));
644 bdev
->type
= q
->name
;
647 bdev
->mntopts
= strdup(mntopts
);
650 bdev
->src
= strdup(src
);
653 bdev
->dest
= strdup(dst
);
655 if (strcmp(bdev
->type
, "nbd") == 0)
656 bdev
->nbd_idx
= conf
->nbd_idx
;
658 if (strcmp(bdev
->type
, "aufs") == 0)
659 WARN("The \"aufs\" driver will is deprecated and will soon be "
660 "removed. For similar functionality see the \"overlay\" "
666 bool storage_is_dir(struct lxc_conf
*conf
)
668 struct lxc_storage
*orig
;
669 char *type
= conf
->rootfs
.bdev_type
;
673 return (strcmp(type
, "dir") == 0);
675 orig
= storage_init(conf
);
679 if (strcmp(orig
->type
, "dir") == 0)
686 void storage_put(struct lxc_storage
*bdev
)
694 bool rootfs_is_blockdev(struct lxc_conf
*conf
)
696 const struct lxc_storage_type
*q
;
700 if (!conf
->rootfs
.path
|| strcmp(conf
->rootfs
.path
, "/") == 0 ||
701 strlen(conf
->rootfs
.path
) == 0)
704 ret
= stat(conf
->rootfs
.path
, &st
);
705 if (ret
== 0 && S_ISBLK(st
.st_mode
))
708 q
= storage_query(conf
);
712 if (strcmp(q
->name
, "lvm") == 0 ||
713 strcmp(q
->name
, "loop") == 0 ||
714 strcmp(q
->name
, "nbd") == 0 ||
715 strcmp(q
->name
, "rbd") == 0 ||
716 strcmp(q
->name
, "zfs") == 0)
722 const char *lxc_storage_get_path(char *src
, const char *prefix
)
726 prefix_len
= strlen(prefix
);
727 if (!strncmp(src
, prefix
, prefix_len
) && (*(src
+ prefix_len
) == ':'))
728 return (src
+ prefix_len
+ 1);