]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/storage/storage.c
Restore most cases of am_guest_unpriv
[mirror_lxc.git] / src / lxc / storage / storage.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #define _GNU_SOURCE
25 #include <dirent.h>
26 #include <errno.h>
27 #include <fcntl.h>
28 #include <grp.h>
29 #include <inttypes.h>
30 #include <libgen.h>
31 #include <sched.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <unistd.h>
36 #include <sys/mount.h>
37 #include <sys/prctl.h>
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <sys/wait.h>
41
42 #include "aufs.h"
43 #include "btrfs.h"
44 #include "conf.h"
45 #include "config.h"
46 #include "dir.h"
47 #include "error.h"
48 #include "log.h"
49 #include "loop.h"
50 #include "lvm.h"
51 #include "lxc.h"
52 #include "lxclock.h"
53 #include "nbd.h"
54 #include "namespace.h"
55 #include "overlay.h"
56 #include "parse.h"
57 #include "rbd.h"
58 #include "rsync.h"
59 #include "storage.h"
60 #include "storage_utils.h"
61 #include "utils.h"
62 #include "zfs.h"
63
64 #ifndef BLKGETSIZE64
65 #define BLKGETSIZE64 _IOR(0x12, 114, size_t)
66 #endif
67
68 lxc_log_define(storage, lxc);
69
70 /* aufs */
71 static const struct lxc_storage_ops aufs_ops = {
72 .detect = &aufs_detect,
73 .mount = &aufs_mount,
74 .umount = &aufs_umount,
75 .clone_paths = &aufs_clonepaths,
76 .destroy = &aufs_destroy,
77 .create = &aufs_create,
78 .copy = NULL,
79 .snapshot = NULL,
80 .can_snapshot = true,
81 .can_backup = true,
82 };
83
84 /* btrfs */
85 static const struct lxc_storage_ops btrfs_ops = {
86 .detect = &btrfs_detect,
87 .mount = &btrfs_mount,
88 .umount = &btrfs_umount,
89 .clone_paths = &btrfs_clonepaths,
90 .destroy = &btrfs_destroy,
91 .create = &btrfs_create,
92 .copy = &btrfs_create_clone,
93 .snapshot = &btrfs_create_snapshot,
94 .can_snapshot = true,
95 .can_backup = true,
96 };
97
98 /* dir */
99 static const struct lxc_storage_ops dir_ops = {
100 .detect = &dir_detect,
101 .mount = &dir_mount,
102 .umount = &dir_umount,
103 .clone_paths = &dir_clonepaths,
104 .destroy = &dir_destroy,
105 .create = &dir_create,
106 .copy = NULL,
107 .snapshot = NULL,
108 .can_snapshot = false,
109 .can_backup = true,
110 };
111
112 /* loop */
113 static const struct lxc_storage_ops loop_ops = {
114 .detect = &loop_detect,
115 .mount = &loop_mount,
116 .umount = &loop_umount,
117 .clone_paths = &loop_clonepaths,
118 .destroy = &loop_destroy,
119 .create = &loop_create,
120 .copy = NULL,
121 .snapshot = NULL,
122 .can_snapshot = false,
123 .can_backup = true,
124 };
125
126 /* lvm */
127 static const struct lxc_storage_ops lvm_ops = {
128 .detect = &lvm_detect,
129 .mount = &lvm_mount,
130 .umount = &lvm_umount,
131 .clone_paths = &lvm_clonepaths,
132 .destroy = &lvm_destroy,
133 .create = &lvm_create,
134 .copy = &lvm_create_clone,
135 .snapshot = &lvm_create_snapshot,
136 .can_snapshot = true,
137 .can_backup = false,
138 };
139
140 /* nbd */
141 const struct lxc_storage_ops nbd_ops = {
142 .detect = &nbd_detect,
143 .mount = &nbd_mount,
144 .umount = &nbd_umount,
145 .clone_paths = &nbd_clonepaths,
146 .destroy = &nbd_destroy,
147 .create = &nbd_create,
148 .copy = NULL,
149 .snapshot = NULL,
150 .can_snapshot = true,
151 .can_backup = false,
152 };
153
154 /* overlay */
155 static const struct lxc_storage_ops ovl_ops = {
156 .detect = &ovl_detect,
157 .mount = &ovl_mount,
158 .umount = &ovl_umount,
159 .clone_paths = &ovl_clonepaths,
160 .destroy = &ovl_destroy,
161 .create = &ovl_create,
162 .copy = NULL,
163 .snapshot = NULL,
164 .can_snapshot = true,
165 .can_backup = true,
166 };
167
168 /* rbd */
169 static const struct lxc_storage_ops rbd_ops = {
170 .detect = &rbd_detect,
171 .mount = &rbd_mount,
172 .umount = &rbd_umount,
173 .clone_paths = &rbd_clonepaths,
174 .destroy = &rbd_destroy,
175 .create = &rbd_create,
176 .copy = NULL,
177 .snapshot = NULL,
178 .can_snapshot = false,
179 .can_backup = false,
180 };
181
182 /* zfs */
183 static const struct lxc_storage_ops zfs_ops = {
184 .detect = &zfs_detect,
185 .mount = &zfs_mount,
186 .umount = &zfs_umount,
187 .clone_paths = &zfs_clonepaths,
188 .destroy = &zfs_destroy,
189 .create = &zfs_create,
190 .copy = &zfs_copy,
191 .snapshot = &zfs_snapshot,
192 .can_snapshot = true,
193 .can_backup = true,
194 };
195
196 struct lxc_storage_type {
197 const char *name;
198 const struct lxc_storage_ops *ops;
199 };
200
201 static const struct lxc_storage_type bdevs[] = {
202 { .name = "dir", .ops = &dir_ops, },
203 { .name = "zfs", .ops = &zfs_ops, },
204 { .name = "lvm", .ops = &lvm_ops, },
205 { .name = "rbd", .ops = &rbd_ops, },
206 { .name = "btrfs", .ops = &btrfs_ops, },
207 { .name = "aufs", .ops = &aufs_ops, },
208 { .name = "overlay", .ops = &ovl_ops, },
209 { .name = "overlayfs", .ops = &ovl_ops, },
210 { .name = "loop", .ops = &loop_ops, },
211 { .name = "nbd", .ops = &nbd_ops, },
212 };
213
214 static const size_t numbdevs = sizeof(bdevs) / sizeof(struct lxc_storage_type);
215
216 static const struct lxc_storage_type *get_storage_by_name(const char *path,
217 const char *type)
218 {
219 int ret;
220 size_t i, cmplen;
221
222 if (type)
223 cmplen = strlen(type);
224 else
225 cmplen = strcspn(path, ":");
226 if (cmplen == 0)
227 return NULL;
228
229 for (i = 0; i < numbdevs; i++) {
230 if (type)
231 ret = strncmp(bdevs[i].name, type, cmplen);
232 else
233 ret = strncmp(bdevs[i].name, path, cmplen);
234 if (ret == 0)
235 break;
236 }
237
238 if (i == numbdevs)
239 return NULL;
240
241 DEBUG("Detected rootfs type \"%s\"", bdevs[i].name);
242 return &bdevs[i];
243 }
244
245 static const struct lxc_storage_type *storage_query(struct lxc_conf *conf)
246 {
247 size_t i;
248 const struct lxc_storage_type *bdev;
249 const char *path = conf->rootfs.path;
250 const char *type = conf->rootfs.bdev_type;
251
252 bdev = get_storage_by_name(path, type);
253 if (bdev)
254 return bdev;
255
256 for (i = 0; i < numbdevs; i++)
257 if (bdevs[i].ops->detect(path))
258 break;
259
260 if (i == numbdevs)
261 return NULL;
262
263 DEBUG("Detected rootfs type \"%s\"", bdevs[i].name);
264 return &bdevs[i];
265 }
266
267 struct lxc_storage *storage_get(const char *type)
268 {
269 size_t i;
270 struct lxc_storage *bdev;
271
272 for (i = 0; i < numbdevs; i++)
273 if (strcmp(bdevs[i].name, type) == 0)
274 break;
275
276 if (i == numbdevs)
277 return NULL;
278
279 bdev = malloc(sizeof(struct lxc_storage));
280 if (!bdev)
281 return NULL;
282
283 memset(bdev, 0, sizeof(struct lxc_storage));
284 bdev->ops = bdevs[i].ops;
285 bdev->type = bdevs[i].name;
286
287 if (strcmp(bdev->type, "aufs") == 0)
288 WARN("The \"aufs\" driver will is deprecated and will soon be "
289 "removed. For similar functionality see the \"overlay\" "
290 "storage driver");
291
292 return bdev;
293 }
294
295 static struct lxc_storage *do_storage_create(const char *dest, const char *type,
296 const char *cname,
297 struct bdev_specs *specs)
298 {
299 int ret;
300 struct lxc_storage *bdev;
301
302 if (!type)
303 type = "dir";
304
305 bdev = storage_get(type);
306 if (!bdev)
307 return NULL;
308
309 ret = bdev->ops->create(bdev, dest, cname, specs);
310 if (ret < 0) {
311 storage_put(bdev);
312 return NULL;
313 }
314
315 return bdev;
316 }
317
318 bool storage_can_backup(struct lxc_conf *conf)
319 {
320 bool ret;
321 struct lxc_storage *bdev;
322
323 bdev = storage_init(conf);
324 if (!bdev)
325 return false;
326
327 ret = bdev->ops->can_backup;
328 storage_put(bdev);
329 return ret;
330 }
331
332 /* If we're not snaphotting, then storage_copy becomes a simple case of mount
333 * the original, mount the new, and rsync the contents.
334 */
335 struct lxc_storage *storage_copy(struct lxc_container *c, const char *cname,
336 const char *lxcpath, const char *bdevtype,
337 int flags, const char *bdevdata,
338 uint64_t newsize, bool *needs_rdep)
339 {
340 int ret;
341 const char *src_no_prefix;
342 struct lxc_storage *new, *orig;
343 bool snap = (flags & LXC_CLONE_SNAPSHOT);
344 bool maybe_snap = (flags & LXC_CLONE_MAYBE_SNAPSHOT);
345 bool keepbdevtype = (flags & LXC_CLONE_KEEPBDEVTYPE);
346 const char *src = c->lxc_conf->rootfs.path;
347 const char *oldname = c->name;
348 const char *oldpath = c->config_path;
349 char cmd_output[MAXPATHLEN] = {0};
350 struct rsync_data data = {0};
351
352 if (!src) {
353 ERROR("No rootfs specified");
354 return NULL;
355 }
356
357 /* If the container name doesn't show up in the rootfs path, then we
358 * don't know how to come up with a new name.
359 */
360 if (!strstr(src, oldname)) {
361 ERROR("Original rootfs path \"%s\" does not include container "
362 "name \"%s\"", src, oldname);
363 return NULL;
364 }
365
366 orig = storage_init(c->lxc_conf);
367 if (!orig) {
368 ERROR("Failed to detect storage driver for \"%s\"", oldname);
369 return NULL;
370 }
371
372 if (!orig->dest) {
373 int ret;
374 size_t len;
375 struct stat sb;
376
377 len = strlen(oldpath) + strlen(oldname) + strlen("/rootfs") + 2;
378 orig->dest = malloc(len);
379 if (!orig->dest) {
380 ERROR("Failed to allocate memory");
381 goto on_error_put_orig;
382 }
383
384 ret = snprintf(orig->dest, len, "%s/%s/rootfs", oldpath, oldname);
385 if (ret < 0 || (size_t)ret >= len) {
386 ERROR("Failed to create string");
387 goto on_error_put_orig;
388 }
389
390 ret = stat(orig->dest, &sb);
391 if (ret < 0 && errno == ENOENT) {
392 ret = mkdir_p(orig->dest, 0755);
393 if (ret < 0)
394 WARN("Failed to create directory \"%s\"", orig->dest);
395 }
396 }
397
398 /* Special case for snapshot. If the caller requested maybe_snapshot and
399 * keepbdevtype and the backing store is directory, then proceed with a
400 * a copy clone rather than returning error.
401 */
402 if (maybe_snap && keepbdevtype && !bdevtype && !orig->ops->can_snapshot)
403 snap = false;
404
405 /* If newtype is NULL and snapshot is set, then use overlay. */
406 if (!bdevtype && !keepbdevtype && snap && !strcmp(orig->type, "dir"))
407 bdevtype = "overlay";
408
409 if (am_guest_unpriv() && !unpriv_snap_allowed(orig, bdevtype, snap, maybe_snap)) {
410 ERROR("Unsupported snapshot type \"%s\" for unprivileged users",
411 bdevtype ? bdevtype : "(null)");
412 goto on_error_put_orig;
413 }
414
415 *needs_rdep = false;
416 if (bdevtype) {
417 if (snap && !strcmp(orig->type, "lvm") &&
418 !lvm_is_thin_volume(orig->src))
419 *needs_rdep = true;
420 else if (!strcmp(bdevtype, "overlay") ||
421 !strcmp(bdevtype, "overlayfs"))
422 *needs_rdep = true;
423 } else {
424 if (!snap && strcmp(oldpath, lxcpath))
425 bdevtype = "dir";
426 else
427 bdevtype = orig->type;
428
429 if (!strcmp(bdevtype, "overlay") ||
430 !strcmp(bdevtype, "overlayfs"))
431 *needs_rdep = true;
432 }
433
434 /* get new bdev type */
435 new = storage_get(bdevtype);
436 if (!new) {
437 ERROR("Failed to initialize %s storage driver",
438 bdevtype ? bdevtype : orig->type);
439 goto on_error_put_orig;
440 }
441 TRACE("Initialized %s storage driver", new->type);
442
443 /* create new paths */
444 ret = new->ops->clone_paths(orig, new, oldname, cname, oldpath, lxcpath,
445 snap, newsize, c->lxc_conf);
446 if (ret < 0) {
447 ERROR("Failed creating new paths for clone of \"%s\"", src);
448 goto on_error_put_new;
449 }
450
451 /* When we create an overlay snapshot of an overlay container in the
452 * snapshot directory under "<lxcpath>/<name>/snaps/" we don't need to
453 * record a dependency. If we would restore would also fail.
454 */
455 if ((strcmp(new->type, "overlay") == 0 ||
456 strcmp(new->type, "overlayfs") == 0) &&
457 ret == LXC_CLONE_SNAPSHOT)
458 *needs_rdep = false;
459
460 /* btrfs */
461 if (!strcmp(orig->type, "btrfs") && !strcmp(new->type, "btrfs")) {
462 bool bret;
463
464 if (snap || btrfs_same_fs(orig->dest, new->dest) == 0)
465 bret = new->ops->snapshot(c->lxc_conf, orig, new, 0);
466 else
467 bret = new->ops->copy(c->lxc_conf, orig, new, 0);
468 if (!bret)
469 goto on_error_put_new;
470
471 goto on_success;
472 }
473
474 /* lvm */
475 if (!strcmp(orig->type, "lvm") && !strcmp(new->type, "lvm")) {
476 bool bret;
477
478 if (snap)
479 bret = new->ops->snapshot(c->lxc_conf, orig, new, newsize);
480 else
481 bret = new->ops->copy(c->lxc_conf, orig, new, newsize);
482 if (!bret)
483 goto on_error_put_new;
484
485 goto on_success;
486 }
487
488 /* zfs */
489 if (!strcmp(orig->type, "zfs") && !strcmp(new->type, "zfs")) {
490 bool bret;
491
492 if (snap)
493 bret = new->ops->snapshot(c->lxc_conf, orig, new, newsize);
494 else
495 bret = new->ops->copy(c->lxc_conf, orig, new, newsize);
496 if (!bret)
497 goto on_error_put_new;
498
499 goto on_success;
500 }
501
502 if (strcmp(bdevtype, "btrfs")) {
503 if (!strcmp(new->type, "overlay") || !strcmp(new->type, "overlayfs"))
504 src_no_prefix = ovl_get_lower(new->src);
505 else
506 src_no_prefix = lxc_storage_get_path(new->src, new->type);
507
508 if (am_guest_unpriv()) {
509 ret = chown_mapped_root(src_no_prefix, c->lxc_conf);
510 if (ret < 0)
511 WARN("Failed to chown \"%s\"", new->src);
512 }
513 }
514
515 if (snap)
516 goto on_success;
517
518 /* rsync the contents from source to target */
519 data.orig = orig;
520 data.new = new;
521 if (am_guest_unpriv())
522 ret = userns_exec_full(c->lxc_conf,
523 lxc_storage_rsync_exec_wrapper, &data,
524 "lxc_storage_rsync_exec_wrapper");
525 else
526 ret = run_command(cmd_output, sizeof(cmd_output),
527 lxc_storage_rsync_exec_wrapper, (void *)&data);
528 if (ret < 0) {
529 ERROR("Failed to rsync from \"%s\" into \"%s\"%s%s", orig->dest,
530 new->dest,
531 cmd_output[0] != '\0' ? ": " : "",
532 cmd_output[0] != '\0' ? cmd_output : "");
533 goto on_error_put_new;
534 }
535
536 on_success:
537 storage_put(orig);
538
539 return new;
540
541 on_error_put_new:
542 storage_put(new);
543
544 on_error_put_orig:
545 storage_put(orig);
546
547 return NULL;
548 }
549
550 /* Create a backing store for a container.
551 * If successful, return a struct bdev *, with the bdev mounted and ready
552 * for use. Before completing, the caller will need to call the
553 * umount operation and storage_put().
554 * @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs)
555 * @type: the bdevtype (dir, btrfs, zfs, rbd, etc)
556 * @cname: the container name
557 * @specs: details about the backing store to create, like fstype
558 */
559 struct lxc_storage *storage_create(const char *dest, const char *type,
560 const char *cname, struct bdev_specs *specs)
561 {
562 int ret;
563 struct lxc_storage *bdev;
564 char *best_options[] = {"btrfs", "zfs", "lvm", "dir", "rbd", NULL};
565
566 if (!type)
567 return do_storage_create(dest, "dir", cname, specs);
568
569 ret = strcmp(type, "best");
570 if (ret == 0) {
571 int i;
572 /* Try for the best backing store type, according to our
573 * opinionated preferences.
574 */
575 for (i = 0; best_options[i]; i++) {
576 bdev = do_storage_create(dest, best_options[i], cname, specs);
577 if (bdev)
578 return bdev;
579 }
580
581 return NULL;
582 }
583
584 /* -B lvm,dir */
585 if (strchr(type, ',')) {
586 char *dup, *token;
587 char *saveptr = NULL;
588
589 dup = alloca(strlen(type) + 1);
590 strcpy(dup, type);
591 for (token = strtok_r(dup, ",", &saveptr); token;
592 token = strtok_r(NULL, ",", &saveptr)) {
593 bdev = do_storage_create(dest, token, cname, specs);
594 if (bdev)
595 return bdev;
596 }
597 }
598
599 return do_storage_create(dest, type, cname, specs);
600 }
601
602 bool storage_destroy(struct lxc_conf *conf)
603 {
604 struct lxc_storage *r;
605 bool ret = false;
606 int destroy_rv = 0;
607
608 r = storage_init(conf);
609 if (!r)
610 return ret;
611
612 destroy_rv = r->ops->destroy(r);
613 if (destroy_rv == 0)
614 ret = true;
615
616 storage_put(r);
617 return ret;
618 }
619
620 struct lxc_storage *storage_init(struct lxc_conf *conf)
621 {
622 struct lxc_storage *bdev;
623 const struct lxc_storage_type *q;
624 const char *src = conf->rootfs.path;
625 const char *dst = conf->rootfs.mount;
626 const char *mntopts = conf->rootfs.options;
627
628 BUILD_BUG_ON(LXC_STORAGE_INTERNAL_OVERLAY_RESTORE <= LXC_CLONE_MAXFLAGS);
629
630 if (!src)
631 return NULL;
632
633 q = storage_query(conf);
634 if (!q)
635 return NULL;
636
637 bdev = malloc(sizeof(struct lxc_storage));
638 if (!bdev)
639 return NULL;
640
641 memset(bdev, 0, sizeof(struct lxc_storage));
642
643 bdev->ops = q->ops;
644 bdev->type = q->name;
645
646 if (mntopts)
647 bdev->mntopts = strdup(mntopts);
648
649 if (src)
650 bdev->src = strdup(src);
651
652 if (dst)
653 bdev->dest = strdup(dst);
654
655 if (strcmp(bdev->type, "nbd") == 0)
656 bdev->nbd_idx = conf->nbd_idx;
657
658 if (strcmp(bdev->type, "aufs") == 0)
659 WARN("The \"aufs\" driver will is deprecated and will soon be "
660 "removed. For similar functionality see the \"overlay\" "
661 "storage driver");
662
663 return bdev;
664 }
665
666 bool storage_is_dir(struct lxc_conf *conf)
667 {
668 struct lxc_storage *orig;
669 char *type = conf->rootfs.bdev_type;
670 bool bret = false;
671
672 if (type)
673 return (strcmp(type, "dir") == 0);
674
675 orig = storage_init(conf);
676 if (!orig)
677 return bret;
678
679 if (strcmp(orig->type, "dir") == 0)
680 bret = true;
681
682 storage_put(orig);
683 return bret;
684 }
685
686 void storage_put(struct lxc_storage *bdev)
687 {
688 free(bdev->mntopts);
689 free(bdev->src);
690 free(bdev->dest);
691 free(bdev);
692 }
693
694 bool rootfs_is_blockdev(struct lxc_conf *conf)
695 {
696 const struct lxc_storage_type *q;
697 struct stat st;
698 int ret;
699
700 if (!conf->rootfs.path || strcmp(conf->rootfs.path, "/") == 0 ||
701 strlen(conf->rootfs.path) == 0)
702 return false;
703
704 ret = stat(conf->rootfs.path, &st);
705 if (ret == 0 && S_ISBLK(st.st_mode))
706 return true;
707
708 q = storage_query(conf);
709 if (!q)
710 return false;
711
712 if (strcmp(q->name, "lvm") == 0 ||
713 strcmp(q->name, "loop") == 0 ||
714 strcmp(q->name, "nbd") == 0 ||
715 strcmp(q->name, "rbd") == 0 ||
716 strcmp(q->name, "zfs") == 0)
717 return true;
718
719 return false;
720 }
721
722 const char *lxc_storage_get_path(char *src, const char *prefix)
723 {
724 size_t prefix_len;
725
726 prefix_len = strlen(prefix);
727 if (!strncmp(src, prefix, prefix_len) && (*(src + prefix_len) == ':'))
728 return (src + prefix_len + 1);
729
730 return src;
731 }