2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
36 #include <sys/mount.h>
37 #include <sys/prctl.h>
38 #include <sys/types.h>
54 #include "namespace.h"
60 #include "storage_utils.h"
65 #define BLKGETSIZE64 _IOR(0x12, 114, size_t)
68 lxc_log_define(storage
, lxc
);
71 static const struct lxc_storage_ops aufs_ops
= {
72 .detect
= &aufs_detect
,
74 .umount
= &aufs_umount
,
75 .clone_paths
= &aufs_clonepaths
,
76 .destroy
= &aufs_destroy
,
77 .create
= &aufs_create
,
85 static const struct lxc_storage_ops btrfs_ops
= {
86 .detect
= &btrfs_detect
,
87 .mount
= &btrfs_mount
,
88 .umount
= &btrfs_umount
,
89 .clone_paths
= &btrfs_clonepaths
,
90 .destroy
= &btrfs_destroy
,
91 .create
= &btrfs_create
,
92 .copy
= &btrfs_create_clone
,
93 .snapshot
= &btrfs_create_snapshot
,
99 static const struct lxc_storage_ops dir_ops
= {
100 .detect
= &dir_detect
,
102 .umount
= &dir_umount
,
103 .clone_paths
= &dir_clonepaths
,
104 .destroy
= &dir_destroy
,
105 .create
= &dir_create
,
108 .can_snapshot
= false,
113 static const struct lxc_storage_ops loop_ops
= {
114 .detect
= &loop_detect
,
115 .mount
= &loop_mount
,
116 .umount
= &loop_umount
,
117 .clone_paths
= &loop_clonepaths
,
118 .destroy
= &loop_destroy
,
119 .create
= &loop_create
,
122 .can_snapshot
= false,
127 static const struct lxc_storage_ops lvm_ops
= {
128 .detect
= &lvm_detect
,
130 .umount
= &lvm_umount
,
131 .clone_paths
= &lvm_clonepaths
,
132 .destroy
= &lvm_destroy
,
133 .create
= &lvm_create
,
134 .copy
= &lvm_create_clone
,
135 .snapshot
= &lvm_create_snapshot
,
136 .can_snapshot
= true,
141 const struct lxc_storage_ops nbd_ops
= {
142 .detect
= &nbd_detect
,
144 .umount
= &nbd_umount
,
145 .clone_paths
= &nbd_clonepaths
,
146 .destroy
= &nbd_destroy
,
147 .create
= &nbd_create
,
150 .can_snapshot
= true,
155 static const struct lxc_storage_ops ovl_ops
= {
156 .detect
= &ovl_detect
,
158 .umount
= &ovl_umount
,
159 .clone_paths
= &ovl_clonepaths
,
160 .destroy
= &ovl_destroy
,
161 .create
= &ovl_create
,
164 .can_snapshot
= true,
169 static const struct lxc_storage_ops rbd_ops
= {
170 .detect
= &rbd_detect
,
172 .umount
= &rbd_umount
,
173 .clone_paths
= &rbd_clonepaths
,
174 .destroy
= &rbd_destroy
,
175 .create
= &rbd_create
,
178 .can_snapshot
= false,
183 static const struct lxc_storage_ops zfs_ops
= {
184 .detect
= &zfs_detect
,
186 .umount
= &zfs_umount
,
187 .clone_paths
= &zfs_clonepaths
,
188 .destroy
= &zfs_destroy
,
189 .create
= &zfs_create
,
191 .snapshot
= &zfs_snapshot
,
192 .can_snapshot
= true,
196 struct lxc_storage_type
{
198 const struct lxc_storage_ops
*ops
;
201 static const struct lxc_storage_type bdevs
[] = {
202 { .name
= "dir", .ops
= &dir_ops
, },
203 { .name
= "zfs", .ops
= &zfs_ops
, },
204 { .name
= "lvm", .ops
= &lvm_ops
, },
205 { .name
= "rbd", .ops
= &rbd_ops
, },
206 { .name
= "btrfs", .ops
= &btrfs_ops
, },
207 { .name
= "aufs", .ops
= &aufs_ops
, },
208 { .name
= "overlay", .ops
= &ovl_ops
, },
209 { .name
= "overlayfs", .ops
= &ovl_ops
, },
210 { .name
= "loop", .ops
= &loop_ops
, },
211 { .name
= "nbd", .ops
= &nbd_ops
, },
214 static const size_t numbdevs
= sizeof(bdevs
) / sizeof(struct lxc_storage_type
);
216 static const struct lxc_storage_type
*get_storage_by_name(const char *name
)
220 cmplen
= strcspn(name
, ":");
224 for (i
= 0; i
< numbdevs
; i
++)
225 if (strncmp(bdevs
[i
].name
, name
, cmplen
) == 0)
231 DEBUG("Detected rootfs type \"%s\"", bdevs
[i
].name
);
235 const struct lxc_storage_type
*storage_query(struct lxc_conf
*conf
,
239 const struct lxc_storage_type
*bdev
;
241 bdev
= get_storage_by_name(src
);
245 for (i
= 0; i
< numbdevs
; i
++)
246 if (bdevs
[i
].ops
->detect(src
))
252 DEBUG("Detected rootfs type \"%s\"", bdevs
[i
].name
);
256 struct lxc_storage
*storage_get(const char *type
)
259 struct lxc_storage
*bdev
;
261 for (i
= 0; i
< numbdevs
; i
++) {
262 if (strcmp(bdevs
[i
].name
, type
) == 0)
269 bdev
= malloc(sizeof(struct lxc_storage
));
273 memset(bdev
, 0, sizeof(struct lxc_storage
));
274 bdev
->ops
= bdevs
[i
].ops
;
275 bdev
->type
= bdevs
[i
].name
;
277 if (!strcmp(bdev
->type
, "aufs"))
278 WARN("The \"aufs\" driver will is deprecated and will soon be "
279 "removed. For similar functionality see the \"overlay\" "
285 static struct lxc_storage
*do_storage_create(const char *dest
, const char *type
,
287 struct bdev_specs
*specs
)
290 struct lxc_storage
*bdev
;
295 bdev
= storage_get(type
);
299 if (bdev
->ops
->create(bdev
, dest
, cname
, specs
) < 0) {
307 bool storage_can_backup(struct lxc_conf
*conf
)
309 struct lxc_storage
*bdev
= storage_init(conf
, NULL
, NULL
, NULL
);
315 ret
= bdev
->ops
->can_backup
;
320 /* If we're not snaphotting, then storage_copy becomes a simple case of mount
321 * the original, mount the new, and rsync the contents.
323 struct lxc_storage
*storage_copy(struct lxc_container
*c
, const char *cname
,
324 const char *lxcpath
, const char *bdevtype
,
325 int flags
, const char *bdevdata
,
326 uint64_t newsize
, bool *needs_rdep
)
329 struct lxc_storage
*orig
, *new;
331 bool snap
= flags
& LXC_CLONE_SNAPSHOT
;
332 bool maybe_snap
= flags
& LXC_CLONE_MAYBE_SNAPSHOT
;
333 bool keepbdevtype
= flags
& LXC_CLONE_KEEPBDEVTYPE
;
334 const char *src
= c
->lxc_conf
->rootfs
.path
;
335 const char *oldname
= c
->name
;
336 const char *oldpath
= c
->config_path
;
337 struct rsync_data data
= {0};
338 char cmd_output
[MAXPATHLEN
] = {0};
340 /* If the container name doesn't show up in the rootfs path, then we
341 * don't know how to come up with a new name.
343 if (!strstr(src
, oldname
)) {
344 ERROR("Original rootfs path \"%s\" does not include container "
345 "name \"%s\"", src
, oldname
);
349 orig
= storage_init(c
->lxc_conf
, src
, NULL
, NULL
);
351 ERROR("Failed to detect storage driver for \"%s\"", src
);
360 len
= strlen(oldpath
) + strlen(oldname
) + strlen("/rootfs") + 2;
361 orig
->dest
= malloc(len
);
363 ERROR("Failed to allocate memory");
364 goto on_error_put_orig
;
367 ret
= snprintf(orig
->dest
, len
, "%s/%s/rootfs", oldpath
, oldname
);
368 if (ret
< 0 || (size_t)ret
>= len
) {
369 ERROR("Failed to create string");
370 goto on_error_put_orig
;
373 ret
= stat(orig
->dest
, &sb
);
374 if (ret
< 0 && errno
== ENOENT
) {
375 ret
= mkdir_p(orig
->dest
, 0755);
377 WARN("Failed to create directoy \"%s\"", orig
->dest
);
381 /* Special case for snapshot. If the caller requested maybe_snapshot and
382 * keepbdevtype and the backing store is directory, then proceed with a
383 * a copy clone rather than returning error.
385 if (maybe_snap
&& keepbdevtype
&& !bdevtype
&& !orig
->ops
->can_snapshot
)
388 /* If newtype is NULL and snapshot is set, then use overlay. */
389 if (!bdevtype
&& !keepbdevtype
&& snap
&& !strcmp(orig
->type
, "dir"))
390 bdevtype
= "overlay";
392 if (am_unpriv() && !unpriv_snap_allowed(orig
, bdevtype
, snap
, maybe_snap
)) {
393 ERROR("Unsupported snapshot type \"%s\" for unprivileged users",
394 bdevtype
? bdevtype
: "(null)");
395 goto on_error_put_orig
;
400 if (snap
&& !strcmp(orig
->type
, "lvm") &&
401 !lvm_is_thin_volume(orig
->src
))
403 else if (!strcmp(bdevtype
, "overlay") ||
404 !strcmp(bdevtype
, "overlayfs"))
407 if (!snap
&& strcmp(oldpath
, lxcpath
))
410 bdevtype
= orig
->type
;
412 if (!strcmp(bdevtype
, "overlay") ||
413 !strcmp(bdevtype
, "overlayfs"))
417 /* get new bdev type */
418 new = storage_get(bdevtype
);
420 ERROR("Failed to initialize \"%s\" storage driver",
421 bdevtype
? bdevtype
: orig
->type
);
422 goto on_error_put_orig
;
424 TRACE("Initialized \"%s\" storage driver", new->type
);
426 /* create new paths */
427 ret
= new->ops
->clone_paths(orig
, new, oldname
, cname
, oldpath
, lxcpath
,
428 snap
, newsize
, c
->lxc_conf
);
430 ERROR("Failed creating new paths for clone of \"%s\"", src
);
431 goto on_error_put_new
;
434 /* When we create an overlay snapshot of an overlay container in the
435 * snapshot directory under "<lxcpath>/<name>/snaps/" we don't need to
436 * record a dependency. If we would restore would also fail.
438 if ((!strcmp(new->type
, "overlay") ||
439 !strcmp(new->type
, "overlayfs")) &&
440 ret
== LXC_CLONE_SNAPSHOT
)
444 if (!strcmp(orig
->type
, "btrfs") && !strcmp(new->type
, "btrfs")) {
446 if (snap
|| btrfs_same_fs(orig
->dest
, new->dest
) == 0)
447 bret
= new->ops
->snapshot(c
->lxc_conf
, orig
, new, 0);
449 bret
= new->ops
->copy(c
->lxc_conf
, orig
, new, 0);
451 goto on_error_put_new
;
457 if (!strcmp(orig
->type
, "lvm") && !strcmp(new->type
, "lvm")) {
460 bret
= new->ops
->snapshot(c
->lxc_conf
, orig
,
463 bret
= new->ops
->copy(c
->lxc_conf
, orig
, new, newsize
);
465 goto on_error_put_new
;
471 if (!strcmp(orig
->type
, "zfs") && !strcmp(new->type
, "zfs")) {
475 bret
= new->ops
->snapshot(c
->lxc_conf
, orig
, new,
478 bret
= new->ops
->copy(c
->lxc_conf
, orig
, new, newsize
);
480 goto on_error_put_new
;
485 if (strcmp(bdevtype
, "btrfs")) {
486 if (!strcmp(new->type
, "overlay") || !strcmp(new->type
, "overlayfs"))
487 src_no_prefix
= ovl_get_lower(new->src
);
489 src_no_prefix
= lxc_storage_get_path(new->src
, new->type
);
492 ret
= chown_mapped_root(src_no_prefix
, c
->lxc_conf
);
494 WARN("Failed to chown \"%s\"", new->src
);
501 /* rsync the contents from source to target */
505 ret
= userns_exec_1(c
->lxc_conf
, lxc_storage_rsync_exec_wrapper
,
506 &data
, "lxc_storage_rsync_exec_wrapper");
508 ret
= run_command(cmd_output
, sizeof(cmd_output
),
509 lxc_storage_rsync_exec_wrapper
, (void *)&data
);
511 ERROR("Failed to rsync from \"%s\" into \"%s\"%s%s", orig
->dest
,
513 cmd_output
[0] != '\0' ? ": " : "",
514 cmd_output
[0] != '\0' ? cmd_output
: "");
515 goto on_error_put_new
;
532 /* Create a backing store for a container.
533 * If successful, return a struct bdev *, with the bdev mounted and ready
534 * for use. Before completing, the caller will need to call the
535 * umount operation and storage_put().
536 * @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs)
537 * @type: the bdevtype (dir, btrfs, zfs, rbd, etc)
538 * @cname: the container name
539 * @specs: details about the backing store to create, like fstype
541 struct lxc_storage
*storage_create(const char *dest
, const char *type
,
542 const char *cname
, struct bdev_specs
*specs
)
544 struct lxc_storage
*bdev
;
545 char *best_options
[] = {"btrfs", "zfs", "lvm", "dir", "rbd", NULL
};
548 return do_storage_create(dest
, "dir", cname
, specs
);
550 if (strcmp(type
, "best") == 0) {
552 /* Try for the best backing store type, according to our
553 * opinionated preferences.
555 for (i
= 0; best_options
[i
]; i
++) {
556 bdev
= do_storage_create(dest
, best_options
[i
], cname
,
566 if (strchr(type
, ',') != NULL
) {
567 char *dup
= alloca(strlen(type
) + 1), *saveptr
= NULL
, *token
;
569 for (token
= strtok_r(dup
, ",", &saveptr
); token
;
570 token
= strtok_r(NULL
, ",", &saveptr
)) {
571 if ((bdev
= do_storage_create(dest
, token
, cname
, specs
)))
576 return do_storage_create(dest
, type
, cname
, specs
);
579 bool storage_destroy(struct lxc_conf
*conf
)
581 struct lxc_storage
*r
;
584 r
= storage_init(conf
, conf
->rootfs
.path
, conf
->rootfs
.mount
, NULL
);
588 if (r
->ops
->destroy(r
) == 0)
595 struct lxc_storage
*storage_init(struct lxc_conf
*conf
, const char *src
,
596 const char *dst
, const char *mntopts
)
598 struct lxc_storage
*bdev
;
599 const struct lxc_storage_type
*q
;
601 BUILD_BUG_ON(LXC_STORAGE_INTERNAL_OVERLAY_RESTORE
<= LXC_CLONE_MAXFLAGS
);
604 src
= conf
->rootfs
.path
;
609 q
= storage_query(conf
, src
);
613 bdev
= malloc(sizeof(struct lxc_storage
));
617 memset(bdev
, 0, sizeof(struct lxc_storage
));
619 bdev
->type
= q
->name
;
621 bdev
->mntopts
= strdup(mntopts
);
623 bdev
->src
= strdup(src
);
625 bdev
->dest
= strdup(dst
);
626 if (strcmp(bdev
->type
, "nbd") == 0)
627 bdev
->nbd_idx
= conf
->nbd_idx
;
629 if (!strcmp(bdev
->type
, "aufs"))
630 WARN("The \"aufs\" driver will is deprecated and will soon be "
631 "removed. For similar functionality see the \"overlay\" "
637 bool storage_is_dir(struct lxc_conf
*conf
, const char *path
)
639 struct lxc_storage
*orig
;
642 orig
= storage_init(conf
, path
, NULL
, NULL
);
646 if (strcmp(orig
->type
, "dir") == 0)
653 void storage_put(struct lxc_storage
*bdev
)
661 bool rootfs_is_blockdev(struct lxc_conf
*conf
)
663 const struct lxc_storage_type
*q
;
667 if (!conf
->rootfs
.path
|| strcmp(conf
->rootfs
.path
, "/") == 0 ||
668 strlen(conf
->rootfs
.path
) == 0)
671 ret
= stat(conf
->rootfs
.path
, &st
);
672 if (ret
== 0 && S_ISBLK(st
.st_mode
))
675 q
= storage_query(conf
, conf
->rootfs
.path
);
679 if (strcmp(q
->name
, "lvm") == 0 ||
680 strcmp(q
->name
, "loop") == 0 ||
681 strcmp(q
->name
, "nbd") == 0 ||
682 strcmp(q
->name
, "rbd") == 0 ||
683 strcmp(q
->name
, "zfs") == 0)
689 char *lxc_storage_get_path(char *src
, const char *prefix
)
693 prefix_len
= strlen(prefix
);
694 if (!strncmp(src
, prefix
, prefix_len
) && (*(src
+ prefix_len
) == ':'))
695 return (src
+ prefix_len
+ 1);