]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/bdev/overlay.c
Split bdev into modules: overlay
[mirror_lxc.git] / src / lxc / bdev / overlay.c
CommitLineData
38683db4
CB
1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24#define _GNU_SOURCE
25#include <errno.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29
30#include "bdev.h"
31#include "conf.h"
32#include "confile.h"
33#include "log.h"
34#include "lxccontainer.h"
35#include "overlay.h"
36#include "utils.h"
37
38lxc_log_define(overlay, lxc);
39
40struct ovl_rsync_data {
41 struct bdev *orig;
42 struct bdev *new;
43};
44
45/* defined in lxccontainer.c: needs to become common helper */
46extern int do_rsync(const char *src, const char *dest);
47
48/* defined in lxccontainer.c: needs to become common helper */
49extern char *dir_new_path(char *src, const char *oldname, const char *name,
50 const char *oldpath, const char *lxcpath);
51
52char *overlay_getlower(char *p)
53{
54 char *p1 = strchr(p, ':');
55 if (p1)
56 *p1 = '\0';
57 return p;
58}
59
60int overlayfs_detect(const char *path)
61{
62 if (strncmp(path, "overlayfs:", 10) == 0)
63 return 1; // take their word for it
64 return 0;
65}
66
67static char *overlayfs_name;
68static char *detect_overlayfs_name(void)
69{
70 char *v = "overlayfs";
71 char *line = NULL;
72 size_t len = 0;
73 FILE *f = fopen("/proc/filesystems", "r");
74 if (!f)
75 return v;
76
77 while (getline(&line, &len, f) != -1) {
78 if (strcmp(line, "nodev\toverlay\n") == 0) {
79 v = "overlay";
80 break;
81 }
82 }
83
84 fclose(f);
85 free(line);
86 return v;
87}
88
89/* XXXXXXX plain directory bind mount ops */
90int overlayfs_mount(struct bdev *bdev)
91{
92 char *options, *dup, *lower, *upper;
93 char *options_work, *work, *lastslash;
94 int lastslashidx;
95 int len, len2;
96 unsigned long mntflags;
97 char *mntdata;
98 int ret, ret2;
99
100 if (strcmp(bdev->type, "overlayfs"))
101 return -22;
102 if (!bdev->src || !bdev->dest)
103 return -22;
104
105 // defined in bdev.c
106 if (!overlayfs_name)
107 overlayfs_name = detect_overlayfs_name();
108
109 // separately mount it first
110 // mount -t overlayfs -oupperdir=${upper},lowerdir=${lower} lower dest
111 dup = alloca(strlen(bdev->src)+1);
112 strcpy(dup, bdev->src);
113 if (!(lower = strchr(dup, ':')))
114 return -22;
115 if (!(upper = strchr(++lower, ':')))
116 return -22;
117 *upper = '\0';
118 upper++;
119
120 // if delta doesn't yet exist, create it
121 if (mkdir_p(upper, 0755) < 0 && errno != EEXIST)
122 return -22;
123
124 // overlayfs.v22 or higher needs workdir option
125 // if upper is /var/lib/lxc/c2/delta0,
126 // then workdir is /var/lib/lxc/c2/olwork
127 lastslash = strrchr(upper, '/');
128 if (!lastslash)
129 return -22;
130 lastslash++;
131 lastslashidx = lastslash - upper;
132
133 work = alloca(lastslashidx + 7);
134 strncpy(work, upper, lastslashidx+7);
135 strcpy(work+lastslashidx, "olwork");
136
137 if (parse_mntopts(bdev->mntopts, &mntflags, &mntdata) < 0) {
138 free(mntdata);
139 return -22;
140 }
141
142 if (mkdir_p(work, 0755) < 0 && errno != EEXIST) {
143 free(mntdata);
144 return -22;
145 }
146
147 // TODO We should check whether bdev->src is a blockdev, and if so
148 // but for now, only support overlays of a basic directory
149
150 if (mntdata) {
151 len = strlen(lower) + strlen(upper) + strlen("upperdir=,lowerdir=,") + strlen(mntdata) + 1;
152 options = alloca(len);
153 ret = snprintf(options, len, "upperdir=%s,lowerdir=%s,%s", upper, lower, mntdata);
154
155 len2 = strlen(lower) + strlen(upper) + strlen(work)
156 + strlen("upperdir=,lowerdir=,workdir=") + strlen(mntdata) + 1;
157 options_work = alloca(len2);
158 ret2 = snprintf(options, len2, "upperdir=%s,lowerdir=%s,workdir=%s,%s",
159 upper, lower, work, mntdata);
160 }
161 else {
162 len = strlen(lower) + strlen(upper) + strlen("upperdir=,lowerdir=") + 1;
163 options = alloca(len);
164 ret = snprintf(options, len, "upperdir=%s,lowerdir=%s", upper, lower);
165
166 len2 = strlen(lower) + strlen(upper) + strlen(work)
167 + strlen("upperdir=,lowerdir=,workdir=") + 1;
168 options_work = alloca(len2);
169 ret2 = snprintf(options_work, len2, "upperdir=%s,lowerdir=%s,workdir=%s",
170 upper, lower, work);
171 }
172 if (ret < 0 || ret >= len || ret2 < 0 || ret2 >= len2) {
173 free(mntdata);
174 return -1;
175 }
176
177 // mount without workdir option for overlayfs before v21
178 ret = mount(lower, bdev->dest, overlayfs_name, MS_MGC_VAL | mntflags, options);
179 if (ret < 0) {
180 INFO("overlayfs: error mounting %s onto %s options %s. retry with workdir",
181 lower, bdev->dest, options);
182
183 // retry with workdir option for overlayfs v22 and higher
184 ret = mount(lower, bdev->dest, overlayfs_name, MS_MGC_VAL | mntflags, options_work);
185 if (ret < 0)
186 SYSERROR("overlayfs: error mounting %s onto %s options %s",
187 lower, bdev->dest, options_work);
188 else
189 INFO("overlayfs: mounted %s onto %s options %s",
190 lower, bdev->dest, options_work);
191 }
192 else
193 INFO("overlayfs: mounted %s onto %s options %s",
194 lower, bdev->dest, options);
195 return ret;
196}
197
198int overlayfs_umount(struct bdev *bdev)
199{
200 if (strcmp(bdev->type, "overlayfs"))
201 return -22;
202 if (!bdev->src || !bdev->dest)
203 return -22;
204 return umount(bdev->dest);
205}
206
207static int ovl_rsync(struct ovl_rsync_data *data)
208{
209 int ret;
210
211 if (setgid(0) < 0) {
212 ERROR("Failed to setgid to 0");
213 return -1;
214 }
215 if (setgroups(0, NULL) < 0)
216 WARN("Failed to clear groups");
217 if (setuid(0) < 0) {
218 ERROR("Failed to setuid to 0");
219 return -1;
220 }
221
222 if (unshare(CLONE_NEWNS) < 0) {
223 SYSERROR("Unable to unshare mounts ns");
224 return -1;
225 }
226 if (detect_shared_rootfs()) {
227 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL)) {
228 SYSERROR("Failed to make / rslave");
229 ERROR("Continuing...");
230 }
231 }
232 if (overlayfs_mount(data->orig) < 0) {
233 ERROR("Failed mounting original container fs");
234 return -1;
235 }
236 if (overlayfs_mount(data->new) < 0) {
237 ERROR("Failed mounting new container fs");
238 return -1;
239 }
240 ret = do_rsync(data->orig->dest, data->new->dest);
241
242 overlayfs_umount(data->new);
243 overlayfs_umount(data->orig);
244
245 if (ret < 0) {
246 ERROR("rsyncing %s to %s", data->orig->dest, data->new->dest);
247 return -1;
248 }
249
250 return 0;
251}
252
253static int ovl_rsync_wrapper(void *data)
254{
255 struct ovl_rsync_data *arg = data;
256 return ovl_rsync(arg);
257}
258
259static int ovl_do_rsync(struct bdev *orig, struct bdev *new, struct lxc_conf *conf)
260{
261 int ret = -1;
262 struct ovl_rsync_data rdata;
263
264 rdata.orig = orig;
265 rdata.new = new;
266 if (am_unpriv())
267 ret = userns_exec_1(conf, ovl_rsync_wrapper, &rdata);
268 else
269 ret = ovl_rsync(&rdata);
270 if (ret)
271 ERROR("copying overlayfs delta");
272
273 return ret;
274}
275
276int overlayfs_clonepaths(struct bdev *orig, struct bdev *new, const char *oldname,
277 const char *cname, const char *oldpath, const char *lxcpath, int snap,
278 uint64_t newsize, struct lxc_conf *conf)
279{
280 if (!snap) {
281 ERROR("overlayfs is only for snapshot clones");
282 return -22;
283 }
284
285 if (!orig->src || !orig->dest)
286 return -1;
287
288 new->dest = dir_new_path(orig->dest, oldname, cname, oldpath, lxcpath);
289 if (!new->dest)
290 return -1;
291 if (mkdir_p(new->dest, 0755) < 0)
292 return -1;
293
294 if (am_unpriv() && chown_mapped_root(new->dest, conf) < 0)
295 WARN("Failed to update ownership of %s", new->dest);
296
297 if (strcmp(orig->type, "dir") == 0) {
298 char *delta, *lastslash;
299 char *work;
300 int ret, len, lastslashidx;
301
302 // if we have /var/lib/lxc/c2/rootfs, then delta will be
303 // /var/lib/lxc/c2/delta0
304 lastslash = strrchr(new->dest, '/');
305 if (!lastslash)
306 return -22;
307 if (strlen(lastslash) < 7)
308 return -22;
309 lastslash++;
310 lastslashidx = lastslash - new->dest;
311
312 delta = malloc(lastslashidx + 7);
313 if (!delta)
314 return -1;
315 strncpy(delta, new->dest, lastslashidx+1);
316 strcpy(delta+lastslashidx, "delta0");
317 if ((ret = mkdir(delta, 0755)) < 0) {
318 SYSERROR("error: mkdir %s", delta);
319 free(delta);
320 return -1;
321 }
322 if (am_unpriv() && chown_mapped_root(delta, conf) < 0)
323 WARN("Failed to update ownership of %s", delta);
324
325 // make workdir for overlayfs.v22 or higher
326 // workdir is /var/lib/lxc/c2/olwork
327 // it is used to prepare files before atomically swithing with destination,
328 // and needs to be on the same filesystem as upperdir,
329 // so it's OK for it to be empty.
330 work = malloc(lastslashidx + 7);
331 if (!work) {
332 free(delta);
333 return -1;
334 }
335 strncpy(work, new->dest, lastslashidx+1);
336 strcpy(work+lastslashidx, "olwork");
337 if (mkdir(work, 0755) < 0) {
338 SYSERROR("error: mkdir %s", work);
339 free(delta);
340 free(work);
341 return -1;
342 }
343 if (am_unpriv() && chown_mapped_root(work, conf) < 0)
344 WARN("Failed to update ownership of %s", work);
345 free(work);
346
347 // the src will be 'overlayfs:lowerdir:upperdir'
348 len = strlen(delta) + strlen(orig->src) + 12;
349 new->src = malloc(len);
350 if (!new->src) {
351 free(delta);
352 return -ENOMEM;
353 }
354 ret = snprintf(new->src, len, "overlayfs:%s:%s", orig->src, delta);
355 free(delta);
356 if (ret < 0 || ret >= len)
357 return -ENOMEM;
358 } else if (strcmp(orig->type, "overlayfs") == 0) {
359 // What exactly do we want to do here?
360 // I think we want to use the original lowerdir, with a
361 // private delta which is originally rsynced from the
362 // original delta
363 char *osrc, *odelta, *nsrc, *ndelta, *work;
364 char *lastslash;
365 int len, ret, lastslashidx;
366 if (!(osrc = strdup(orig->src)))
367 return -22;
368 nsrc = strchr(osrc, ':') + 1;
369 if (nsrc != osrc + 10 || (odelta = strchr(nsrc, ':')) == NULL) {
370 free(osrc);
371 return -22;
372 }
373 *odelta = '\0';
374 odelta++;
375 ndelta = dir_new_path(odelta, oldname, cname, oldpath, lxcpath);
376 if (!ndelta) {
377 free(osrc);
378 return -ENOMEM;
379 }
380 if ((ret = mkdir(ndelta, 0755)) < 0 && errno != EEXIST) {
381 SYSERROR("error: mkdir %s", ndelta);
382 free(osrc);
383 free(ndelta);
384 return -1;
385 }
386 if (am_unpriv() && chown_mapped_root(ndelta, conf) < 0)
387 WARN("Failed to update ownership of %s", ndelta);
388
389 // make workdir for overlayfs.v22 or higher
390 // for details, see above.
391 lastslash = strrchr(ndelta, '/');
392 if (!lastslash)
393 return -1;
394 lastslash++;
395 lastslashidx = lastslash - ndelta;
396
397 work = malloc(lastslashidx + 7);
398 if (!work)
399 return -1;
400 strncpy(work, ndelta, lastslashidx+1);
401 strcpy(work+lastslashidx, "olwork");
402 if ((mkdir(work, 0755) < 0) && errno != EEXIST) {
403 SYSERROR("error: mkdir %s", work);
404 free(work);
405 return -1;
406 }
407 if (am_unpriv() && chown_mapped_root(work, conf) < 0)
408 WARN("Failed to update ownership of %s", work);
409 free(work);
410
411 len = strlen(nsrc) + strlen(ndelta) + 12;
412 new->src = malloc(len);
413 if (!new->src) {
414 free(osrc);
415 free(ndelta);
416 return -ENOMEM;
417 }
418 ret = snprintf(new->src, len, "overlayfs:%s:%s", nsrc, ndelta);
419 free(osrc);
420 free(ndelta);
421 if (ret < 0 || ret >= len)
422 return -ENOMEM;
423
424 return ovl_do_rsync(orig, new, conf);
425 } else {
426 ERROR("overlayfs clone of %s container is not yet supported",
427 orig->type);
428 // Note, supporting this will require overlayfs_mount supporting
429 // mounting of the underlay. No big deal, just needs to be done.
430 return -1;
431 }
432
433 return 0;
434}
435
436int overlayfs_destroy(struct bdev *orig)
437{
438 char *upper;
439
440 if (strncmp(orig->src, "overlayfs:", 10) != 0)
441 return -22;
442 upper = strchr(orig->src + 10, ':');
443 if (!upper)
444 return -22;
445 upper++;
446 return lxc_rmdir_onedev(upper, NULL);
447}
448
449/*
450 * to say 'lxc-create -t ubuntu -n o1 -B overlayfs' means you want
451 * $lxcpath/$lxcname/rootfs to have the created container, while all
452 * changes after starting the container are written to
453 * $lxcpath/$lxcname/delta0
454 */
455int overlayfs_create(struct bdev *bdev, const char *dest, const char *n,
456 struct bdev_specs *specs)
457{
458 char *delta;
459 int ret, len = strlen(dest), newlen;
460
461 if (len < 8 || strcmp(dest+len-7, "/rootfs") != 0)
462 return -1;
463
464 if (!(bdev->dest = strdup(dest))) {
465 ERROR("Out of memory");
466 return -1;
467 }
468
469 delta = alloca(strlen(dest)+1);
470 strcpy(delta, dest);
471 strcpy(delta+len-6, "delta0");
472
473 if (mkdir_p(delta, 0755) < 0) {
474 ERROR("Error creating %s", delta);
475 return -1;
476 }
477
478 /* overlayfs:lower:upper */
479 newlen = (2 * len) + strlen("overlayfs:") + 2;
480 bdev->src = malloc(newlen);
481 if (!bdev->src) {
482 ERROR("Out of memory");
483 return -1;
484 }
485 ret = snprintf(bdev->src, newlen, "overlayfs:%s:%s", dest, delta);
486 if (ret < 0 || ret >= newlen)
487 return -1;
488
489 if (mkdir_p(bdev->dest, 0755) < 0) {
490 ERROR("Error creating %s", bdev->dest);
491 return -1;
492 }
493
494 return 0;
495}
496
497/*
498 * To be called from lxcapi_clone() in lxccontainer.c: When we clone a container
499 * with overlay lxc.mount.entry entries we need to update absolute paths for
500 * upper- and workdir. This update is done in two locations:
501 * lxc_conf->unexpanded_config and lxc_conf->mount_list. Both updates are done
502 * independent of each other since lxc_conf->mountlist may container more mount
503 * entries (e.g. from other included files) than lxc_conf->unexpanded_config .
504 */
505int update_ovl_paths(struct lxc_conf *lxc_conf, const char *lxc_path,
506 const char *lxc_name, const char *newpath,
507 const char *newname)
508{
509 char new_upper[MAXPATHLEN];
510 char new_work[MAXPATHLEN];
511 char old_upper[MAXPATHLEN];
512 char old_work[MAXPATHLEN];
513 char *cleanpath = NULL;
514 int i;
515 int fret = -1;
516 int ret = 0;
517 struct lxc_list *iterator;
518 const char *ovl_dirs[] = {"br", "upperdir", "workdir"};
519
520 cleanpath = strdup(newpath);
521 if (!cleanpath)
522 goto err;
523
524 remove_trailing_slashes(cleanpath);
525
526 /* We have to update lxc_conf->unexpanded_config separately from
527 * lxc_conf->mount_list. */
528 for (i = 0; i < sizeof(ovl_dirs) / sizeof(ovl_dirs[0]); i++) {
529 if (!clone_update_unexp_ovl_paths(lxc_conf, lxc_path, newpath,
530 lxc_name, newname,
531 ovl_dirs[i]))
532 goto err;
533 }
534
535 ret = snprintf(old_work, MAXPATHLEN, "workdir=%s/%s", lxc_path, lxc_name);
536 if (ret < 0 || ret >= MAXPATHLEN)
537 goto err;
538
539 ret = snprintf(new_work, MAXPATHLEN, "workdir=%s/%s", cleanpath, newname);
540 if (ret < 0 || ret >= MAXPATHLEN)
541 goto err;
542
543 lxc_list_for_each(iterator, &lxc_conf->mount_list) {
544 char *mnt_entry = NULL;
545 char *new_mnt_entry = NULL;
546 char *tmp = NULL;
547 char *tmp_mnt_entry = NULL;
548 mnt_entry = iterator->elem;
549
550 if (strstr(mnt_entry, "overlay"))
551 tmp = "upperdir";
552 else if (strstr(mnt_entry, "aufs"))
553 tmp = "br";
554
555 if (!tmp)
556 continue;
557
558 ret = snprintf(old_upper, MAXPATHLEN, "%s=%s/%s", tmp, lxc_path, lxc_name);
559 if (ret < 0 || ret >= MAXPATHLEN)
560 goto err;
561
562 ret = snprintf(new_upper, MAXPATHLEN, "%s=%s/%s", tmp, cleanpath, newname);
563 if (ret < 0 || ret >= MAXPATHLEN)
564 goto err;
565
566 if (strstr(mnt_entry, old_upper)) {
567 tmp_mnt_entry = lxc_string_replace(old_upper, new_upper, mnt_entry);
568 }
569
570 if (strstr(mnt_entry, old_work)) {
571 if (tmp_mnt_entry)
572 new_mnt_entry = lxc_string_replace(old_work, new_work, tmp_mnt_entry);
573 else
574 new_mnt_entry = lxc_string_replace(old_work, new_work, mnt_entry);
575 }
576
577 if (new_mnt_entry) {
578 free(iterator->elem);
579 iterator->elem = strdup(new_mnt_entry);
580 } else if (tmp_mnt_entry) {
581 free(iterator->elem);
582 iterator->elem = strdup(tmp_mnt_entry);
583 }
584
585 free(new_mnt_entry);
586 free(tmp_mnt_entry);
587 }
588
589 fret = 0;
590err:
591 free(cleanpath);
592 return fret;
593}
594