]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/storage/storage.c
fee3d8df157375053708d49d388dd373c3b88352
[mirror_lxc.git] / src / lxc / storage / storage.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #define _GNU_SOURCE
25 #include <dirent.h>
26 #include <errno.h>
27 #include <fcntl.h>
28 #include <grp.h>
29 #include <inttypes.h>
30 #include <libgen.h>
31 #include <sched.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <unistd.h>
36 #include <sys/mount.h>
37 #include <sys/prctl.h>
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <sys/wait.h>
41
42 #include "aufs.h"
43 #include "btrfs.h"
44 #include "conf.h"
45 #include "config.h"
46 #include "dir.h"
47 #include "error.h"
48 #include "log.h"
49 #include "loop.h"
50 #include "lvm.h"
51 #include "lxc.h"
52 #include "lxclock.h"
53 #include "nbd.h"
54 #include "namespace.h"
55 #include "overlay.h"
56 #include "parse.h"
57 #include "rbd.h"
58 #include "rsync.h"
59 #include "storage.h"
60 #include "storage_utils.h"
61 #include "utils.h"
62 #include "zfs.h"
63
64 #ifndef BLKGETSIZE64
65 #define BLKGETSIZE64 _IOR(0x12, 114, size_t)
66 #endif
67
68 lxc_log_define(storage, lxc);
69
70 /* aufs */
71 static const struct lxc_storage_ops aufs_ops = {
72 .detect = &aufs_detect,
73 .mount = &aufs_mount,
74 .umount = &aufs_umount,
75 .clone_paths = &aufs_clonepaths,
76 .destroy = &aufs_destroy,
77 .create = &aufs_create,
78 .copy = NULL,
79 .snapshot = NULL,
80 .can_snapshot = true,
81 .can_backup = true,
82 };
83
84 /* btrfs */
85 static const struct lxc_storage_ops btrfs_ops = {
86 .detect = &btrfs_detect,
87 .mount = &btrfs_mount,
88 .umount = &btrfs_umount,
89 .clone_paths = &btrfs_clonepaths,
90 .destroy = &btrfs_destroy,
91 .create = &btrfs_create,
92 .copy = &btrfs_create_clone,
93 .snapshot = &btrfs_create_snapshot,
94 .can_snapshot = true,
95 .can_backup = true,
96 };
97
98 /* dir */
99 static const struct lxc_storage_ops dir_ops = {
100 .detect = &dir_detect,
101 .mount = &dir_mount,
102 .umount = &dir_umount,
103 .clone_paths = &dir_clonepaths,
104 .destroy = &dir_destroy,
105 .create = &dir_create,
106 .copy = NULL,
107 .snapshot = NULL,
108 .can_snapshot = false,
109 .can_backup = true,
110 };
111
112 /* loop */
113 static const struct lxc_storage_ops loop_ops = {
114 .detect = &loop_detect,
115 .mount = &loop_mount,
116 .umount = &loop_umount,
117 .clone_paths = &loop_clonepaths,
118 .destroy = &loop_destroy,
119 .create = &loop_create,
120 .copy = NULL,
121 .snapshot = NULL,
122 .can_snapshot = false,
123 .can_backup = true,
124 };
125
126 /* lvm */
127 static const struct lxc_storage_ops lvm_ops = {
128 .detect = &lvm_detect,
129 .mount = &lvm_mount,
130 .umount = &lvm_umount,
131 .clone_paths = &lvm_clonepaths,
132 .destroy = &lvm_destroy,
133 .create = &lvm_create,
134 .copy = &lvm_create_clone,
135 .snapshot = &lvm_create_snapshot,
136 .can_snapshot = true,
137 .can_backup = false,
138 };
139
140 /* nbd */
141 const struct lxc_storage_ops nbd_ops = {
142 .detect = &nbd_detect,
143 .mount = &nbd_mount,
144 .umount = &nbd_umount,
145 .clone_paths = &nbd_clonepaths,
146 .destroy = &nbd_destroy,
147 .create = &nbd_create,
148 .copy = NULL,
149 .snapshot = NULL,
150 .can_snapshot = true,
151 .can_backup = false,
152 };
153
154 /* overlay */
155 static const struct lxc_storage_ops ovl_ops = {
156 .detect = &ovl_detect,
157 .mount = &ovl_mount,
158 .umount = &ovl_umount,
159 .clone_paths = &ovl_clonepaths,
160 .destroy = &ovl_destroy,
161 .create = &ovl_create,
162 .copy = NULL,
163 .snapshot = NULL,
164 .can_snapshot = true,
165 .can_backup = true,
166 };
167
168 /* rbd */
169 static const struct lxc_storage_ops rbd_ops = {
170 .detect = &rbd_detect,
171 .mount = &rbd_mount,
172 .umount = &rbd_umount,
173 .clone_paths = &rbd_clonepaths,
174 .destroy = &rbd_destroy,
175 .create = &rbd_create,
176 .copy = NULL,
177 .snapshot = NULL,
178 .can_snapshot = false,
179 .can_backup = false,
180 };
181
182 /* zfs */
183 static const struct lxc_storage_ops zfs_ops = {
184 .detect = &zfs_detect,
185 .mount = &zfs_mount,
186 .umount = &zfs_umount,
187 .clone_paths = &zfs_clonepaths,
188 .destroy = &zfs_destroy,
189 .create = &zfs_create,
190 .copy = &zfs_copy,
191 .snapshot = &zfs_snapshot,
192 .can_snapshot = true,
193 .can_backup = true,
194 };
195
196 struct lxc_storage_type {
197 const char *name;
198 const struct lxc_storage_ops *ops;
199 };
200
201 static const struct lxc_storage_type bdevs[] = {
202 { .name = "dir", .ops = &dir_ops, },
203 { .name = "zfs", .ops = &zfs_ops, },
204 { .name = "lvm", .ops = &lvm_ops, },
205 { .name = "rbd", .ops = &rbd_ops, },
206 { .name = "btrfs", .ops = &btrfs_ops, },
207 { .name = "aufs", .ops = &aufs_ops, },
208 { .name = "overlay", .ops = &ovl_ops, },
209 { .name = "overlayfs", .ops = &ovl_ops, },
210 { .name = "loop", .ops = &loop_ops, },
211 { .name = "nbd", .ops = &nbd_ops, },
212 };
213
214 static const size_t numbdevs = sizeof(bdevs) / sizeof(struct lxc_storage_type);
215
216 static const struct lxc_storage_type *get_storage_by_name(const char *name)
217 {
218 size_t i, cmplen;
219
220 cmplen = strcspn(name, ":");
221 if (cmplen == 0)
222 return NULL;
223
224 for (i = 0; i < numbdevs; i++)
225 if (strncmp(bdevs[i].name, name, cmplen) == 0)
226 break;
227
228 if (i == numbdevs)
229 return NULL;
230
231 DEBUG("Detected rootfs type \"%s\"", bdevs[i].name);
232 return &bdevs[i];
233 }
234
235 const struct lxc_storage_type *storage_query(struct lxc_conf *conf,
236 const char *src)
237 {
238 size_t i;
239 const struct lxc_storage_type *bdev;
240
241 bdev = get_storage_by_name(src);
242 if (bdev)
243 return bdev;
244
245 for (i = 0; i < numbdevs; i++)
246 if (bdevs[i].ops->detect(src))
247 break;
248
249 if (i == numbdevs)
250 return NULL;
251
252 DEBUG("Detected rootfs type \"%s\"", bdevs[i].name);
253 return &bdevs[i];
254 }
255
256 struct lxc_storage *storage_get(const char *type)
257 {
258 size_t i;
259 struct lxc_storage *bdev;
260
261 for (i = 0; i < numbdevs; i++) {
262 if (strcmp(bdevs[i].name, type) == 0)
263 break;
264 }
265
266 if (i == numbdevs)
267 return NULL;
268
269 bdev = malloc(sizeof(struct lxc_storage));
270 if (!bdev)
271 return NULL;
272
273 memset(bdev, 0, sizeof(struct lxc_storage));
274 bdev->ops = bdevs[i].ops;
275 bdev->type = bdevs[i].name;
276
277 if (!strcmp(bdev->type, "aufs"))
278 WARN("The \"aufs\" driver will is deprecated and will soon be "
279 "removed. For similar functionality see the \"overlay\" "
280 "storage driver");
281
282 return bdev;
283 }
284
285 static struct lxc_storage *do_storage_create(const char *dest, const char *type,
286 const char *cname,
287 struct bdev_specs *specs)
288 {
289
290 struct lxc_storage *bdev;
291
292 if (!type)
293 type = "dir";
294
295 bdev = storage_get(type);
296 if (!bdev)
297 return NULL;
298
299 if (bdev->ops->create(bdev, dest, cname, specs) < 0) {
300 storage_put(bdev);
301 return NULL;
302 }
303
304 return bdev;
305 }
306
307 bool storage_can_backup(struct lxc_conf *conf)
308 {
309 struct lxc_storage *bdev = storage_init(conf, NULL, NULL, NULL);
310 bool ret;
311
312 if (!bdev)
313 return false;
314
315 ret = bdev->ops->can_backup;
316 storage_put(bdev);
317 return ret;
318 }
319
320 /* If we're not snaphotting, then storage_copy becomes a simple case of mount
321 * the original, mount the new, and rsync the contents.
322 */
323 struct lxc_storage *storage_copy(struct lxc_container *c, const char *cname,
324 const char *lxcpath, const char *bdevtype,
325 int flags, const char *bdevdata,
326 uint64_t newsize, bool *needs_rdep)
327 {
328 int ret;
329 struct lxc_storage *orig, *new;
330 char *src_no_prefix;
331 bool snap = flags & LXC_CLONE_SNAPSHOT;
332 bool maybe_snap = flags & LXC_CLONE_MAYBE_SNAPSHOT;
333 bool keepbdevtype = flags & LXC_CLONE_KEEPBDEVTYPE;
334 const char *src = c->lxc_conf->rootfs.path;
335 const char *oldname = c->name;
336 const char *oldpath = c->config_path;
337 struct rsync_data data = {0};
338 char cmd_output[MAXPATHLEN] = {0};
339
340 /* If the container name doesn't show up in the rootfs path, then we
341 * don't know how to come up with a new name.
342 */
343 if (!strstr(src, oldname)) {
344 ERROR("Original rootfs path \"%s\" does not include container "
345 "name \"%s\"", src, oldname);
346 return NULL;
347 }
348
349 orig = storage_init(c->lxc_conf, src, NULL, NULL);
350 if (!orig) {
351 ERROR("Failed to detect storage driver for \"%s\"", src);
352 return NULL;
353 }
354
355 if (!orig->dest) {
356 int ret;
357 size_t len;
358 struct stat sb;
359
360 len = strlen(oldpath) + strlen(oldname) + strlen("/rootfs") + 2;
361 orig->dest = malloc(len);
362 if (!orig->dest) {
363 ERROR("Failed to allocate memory");
364 goto on_error_put_orig;
365 }
366
367 ret = snprintf(orig->dest, len, "%s/%s/rootfs", oldpath, oldname);
368 if (ret < 0 || (size_t)ret >= len) {
369 ERROR("Failed to create string");
370 goto on_error_put_orig;
371 }
372
373 ret = stat(orig->dest, &sb);
374 if (ret < 0 && errno == ENOENT) {
375 ret = mkdir_p(orig->dest, 0755);
376 if (ret < 0)
377 WARN("Failed to create directoy \"%s\"", orig->dest);
378 }
379 }
380
381 /* Special case for snapshot. If the caller requested maybe_snapshot and
382 * keepbdevtype and the backing store is directory, then proceed with a
383 * a copy clone rather than returning error.
384 */
385 if (maybe_snap && keepbdevtype && !bdevtype && !orig->ops->can_snapshot)
386 snap = false;
387
388 /* If newtype is NULL and snapshot is set, then use overlay. */
389 if (!bdevtype && !keepbdevtype && snap && !strcmp(orig->type, "dir"))
390 bdevtype = "overlay";
391
392 if (am_unpriv() && !unpriv_snap_allowed(orig, bdevtype, snap, maybe_snap)) {
393 ERROR("Unsupported snapshot type \"%s\" for unprivileged users",
394 bdevtype ? bdevtype : "(null)");
395 goto on_error_put_orig;
396 }
397
398 *needs_rdep = false;
399 if (bdevtype) {
400 if (snap && !strcmp(orig->type, "lvm") &&
401 !lvm_is_thin_volume(orig->src))
402 *needs_rdep = true;
403 else if (!strcmp(bdevtype, "overlay") ||
404 !strcmp(bdevtype, "overlayfs"))
405 *needs_rdep = true;
406 } else {
407 if (!snap && strcmp(oldpath, lxcpath))
408 bdevtype = "dir";
409 else
410 bdevtype = orig->type;
411
412 if (!strcmp(bdevtype, "overlay") ||
413 !strcmp(bdevtype, "overlayfs"))
414 *needs_rdep = true;
415 }
416
417 /* get new bdev type */
418 new = storage_get(bdevtype);
419 if (!new) {
420 ERROR("Failed to initialize \"%s\" storage driver",
421 bdevtype ? bdevtype : orig->type);
422 goto on_error_put_orig;
423 }
424 TRACE("Initialized \"%s\" storage driver", new->type);
425
426 /* create new paths */
427 ret = new->ops->clone_paths(orig, new, oldname, cname, oldpath, lxcpath,
428 snap, newsize, c->lxc_conf);
429 if (ret < 0) {
430 ERROR("Failed creating new paths for clone of \"%s\"", src);
431 goto on_error_put_new;
432 }
433
434 /* When we create an overlay snapshot of an overlay container in the
435 * snapshot directory under "<lxcpath>/<name>/snaps/" we don't need to
436 * record a dependency. If we would restore would also fail.
437 */
438 if ((!strcmp(new->type, "overlay") ||
439 !strcmp(new->type, "overlayfs")) &&
440 ret == LXC_CLONE_SNAPSHOT)
441 *needs_rdep = false;
442
443 /* btrfs */
444 if (!strcmp(orig->type, "btrfs") && !strcmp(new->type, "btrfs")) {
445 bool bret = false;
446 if (snap || btrfs_same_fs(orig->dest, new->dest) == 0)
447 bret = new->ops->snapshot(c->lxc_conf, orig, new, 0);
448 else
449 bret = new->ops->copy(c->lxc_conf, orig, new, 0);
450 if (!bret)
451 goto on_error_put_new;
452
453 goto on_success;
454 }
455
456 /* lvm */
457 if (!strcmp(orig->type, "lvm") && !strcmp(new->type, "lvm")) {
458 bool bret = false;
459 if (snap)
460 bret = new->ops->snapshot(c->lxc_conf, orig,
461 new, newsize);
462 else
463 bret = new->ops->copy(c->lxc_conf, orig, new, newsize);
464 if (!bret)
465 goto on_error_put_new;
466
467 goto on_success;
468 }
469
470 /* zfs */
471 if (!strcmp(orig->type, "zfs") && !strcmp(new->type, "zfs")) {
472 bool bret = false;
473
474 if (snap)
475 bret = new->ops->snapshot(c->lxc_conf, orig, new,
476 newsize);
477 else
478 bret = new->ops->copy(c->lxc_conf, orig, new, newsize);
479 if (!bret)
480 goto on_error_put_new;
481
482 goto on_success;
483 }
484
485 if (strcmp(bdevtype, "btrfs")) {
486 if (!strcmp(new->type, "overlay") || !strcmp(new->type, "overlayfs"))
487 src_no_prefix = ovl_get_lower(new->src);
488 else
489 src_no_prefix = lxc_storage_get_path(new->src, new->type);
490
491 if (am_unpriv()) {
492 ret = chown_mapped_root(src_no_prefix, c->lxc_conf);
493 if (ret < 0)
494 WARN("Failed to chown \"%s\"", new->src);
495 }
496 }
497
498 if (snap)
499 goto on_success;
500
501 /* rsync the contents from source to target */
502 data.orig = orig;
503 data.new = new;
504 if (am_unpriv())
505 ret = userns_exec_1(c->lxc_conf, lxc_storage_rsync_exec_wrapper,
506 &data, "lxc_storage_rsync_exec_wrapper");
507 else
508 ret = run_command(cmd_output, sizeof(cmd_output),
509 lxc_storage_rsync_exec_wrapper, (void *)&data);
510 if (ret < 0) {
511 ERROR("Failed to rsync from \"%s\" into \"%s\"%s%s", orig->dest,
512 new->dest,
513 cmd_output[0] != '\0' ? ": " : "",
514 cmd_output[0] != '\0' ? cmd_output : "");
515 goto on_error_put_new;
516 }
517
518 on_success:
519 storage_put(orig);
520
521 return new;
522
523 on_error_put_new:
524 storage_put(new);
525
526 on_error_put_orig:
527 storage_put(orig);
528
529 return NULL;
530 }
531
532 /* Create a backing store for a container.
533 * If successful, return a struct bdev *, with the bdev mounted and ready
534 * for use. Before completing, the caller will need to call the
535 * umount operation and storage_put().
536 * @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs)
537 * @type: the bdevtype (dir, btrfs, zfs, rbd, etc)
538 * @cname: the container name
539 * @specs: details about the backing store to create, like fstype
540 */
541 struct lxc_storage *storage_create(const char *dest, const char *type,
542 const char *cname, struct bdev_specs *specs)
543 {
544 struct lxc_storage *bdev;
545 char *best_options[] = {"btrfs", "zfs", "lvm", "dir", "rbd", NULL};
546
547 if (!type)
548 return do_storage_create(dest, "dir", cname, specs);
549
550 if (strcmp(type, "best") == 0) {
551 int i;
552 /* Try for the best backing store type, according to our
553 * opinionated preferences.
554 */
555 for (i = 0; best_options[i]; i++) {
556 bdev = do_storage_create(dest, best_options[i], cname,
557 specs);
558 if (bdev)
559 return bdev;
560 }
561
562 return NULL;
563 }
564
565 /* -B lvm,dir */
566 if (strchr(type, ',') != NULL) {
567 char *dup = alloca(strlen(type) + 1), *saveptr = NULL, *token;
568 strcpy(dup, type);
569 for (token = strtok_r(dup, ",", &saveptr); token;
570 token = strtok_r(NULL, ",", &saveptr)) {
571 if ((bdev = do_storage_create(dest, token, cname, specs)))
572 return bdev;
573 }
574 }
575
576 return do_storage_create(dest, type, cname, specs);
577 }
578
579 bool storage_destroy(struct lxc_conf *conf)
580 {
581 struct lxc_storage *r;
582 bool ret = false;
583
584 r = storage_init(conf, conf->rootfs.path, conf->rootfs.mount, NULL);
585 if (!r)
586 return ret;
587
588 if (r->ops->destroy(r) == 0)
589 ret = true;
590
591 storage_put(r);
592 return ret;
593 }
594
595 struct lxc_storage *storage_init(struct lxc_conf *conf, const char *src,
596 const char *dst, const char *mntopts)
597 {
598 struct lxc_storage *bdev;
599 const struct lxc_storage_type *q;
600
601 BUILD_BUG_ON(LXC_STORAGE_INTERNAL_OVERLAY_RESTORE <= LXC_CLONE_MAXFLAGS);
602
603 if (!src)
604 src = conf->rootfs.path;
605
606 if (!src)
607 return NULL;
608
609 q = storage_query(conf, src);
610 if (!q)
611 return NULL;
612
613 bdev = malloc(sizeof(struct lxc_storage));
614 if (!bdev)
615 return NULL;
616
617 memset(bdev, 0, sizeof(struct lxc_storage));
618 bdev->ops = q->ops;
619 bdev->type = q->name;
620 if (mntopts)
621 bdev->mntopts = strdup(mntopts);
622 if (src)
623 bdev->src = strdup(src);
624 if (dst)
625 bdev->dest = strdup(dst);
626 if (strcmp(bdev->type, "nbd") == 0)
627 bdev->nbd_idx = conf->nbd_idx;
628
629 if (!strcmp(bdev->type, "aufs"))
630 WARN("The \"aufs\" driver will is deprecated and will soon be "
631 "removed. For similar functionality see the \"overlay\" "
632 "storage driver");
633
634 return bdev;
635 }
636
637 bool storage_is_dir(struct lxc_conf *conf, const char *path)
638 {
639 struct lxc_storage *orig;
640 bool bret = false;
641
642 orig = storage_init(conf, path, NULL, NULL);
643 if (!orig)
644 return bret;
645
646 if (strcmp(orig->type, "dir") == 0)
647 bret = true;
648
649 storage_put(orig);
650 return bret;
651 }
652
653 void storage_put(struct lxc_storage *bdev)
654 {
655 free(bdev->mntopts);
656 free(bdev->src);
657 free(bdev->dest);
658 free(bdev);
659 }
660
661 bool rootfs_is_blockdev(struct lxc_conf *conf)
662 {
663 const struct lxc_storage_type *q;
664 struct stat st;
665 int ret;
666
667 if (!conf->rootfs.path || strcmp(conf->rootfs.path, "/") == 0 ||
668 strlen(conf->rootfs.path) == 0)
669 return false;
670
671 ret = stat(conf->rootfs.path, &st);
672 if (ret == 0 && S_ISBLK(st.st_mode))
673 return true;
674
675 q = storage_query(conf, conf->rootfs.path);
676 if (!q)
677 return false;
678
679 if (strcmp(q->name, "lvm") == 0 ||
680 strcmp(q->name, "loop") == 0 ||
681 strcmp(q->name, "nbd") == 0 ||
682 strcmp(q->name, "rbd") == 0 ||
683 strcmp(q->name, "zfs") == 0)
684 return true;
685
686 return false;
687 }
688
689 char *lxc_storage_get_path(char *src, const char *prefix)
690 {
691 size_t prefix_len;
692
693 prefix_len = strlen(prefix);
694 if (!strncmp(src, prefix, prefix_len) && (*(src + prefix_len) == ':'))
695 return (src + prefix_len + 1);
696
697 return src;
698 }