]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/storage/storage_utils.c
tree-wide: struct bdev -> struct lxc_storage
[mirror_lxc.git] / src / lxc / storage / storage_utils.c
1 /*
2 * lxc: linux Container library
3 *
4 * Copyright © 2017 Canonical Ltd.
5 *
6 * Authors:
7 * Christian Brauner <christian.brauner@ubuntu.com>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #define _GNU_SOURCE
25 #include <dirent.h>
26 #include <errno.h>
27 #include <fcntl.h>
28 #include <grp.h>
29 #include <inttypes.h>
30 #include <libgen.h>
31 #include <sched.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <unistd.h>
36 #include <sys/mount.h>
37 #include <sys/prctl.h>
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <sys/wait.h>
41
42 #include "log.h"
43 #include "nbd.h"
44 #include "parse.h"
45 #include "storage.h"
46 #include "storage_utils.h"
47 #include "utils.h"
48
49 #ifndef BLKGETSIZE64
50 #define BLKGETSIZE64 _IOR(0x12, 114, size_t)
51 #endif
52
53 lxc_log_define(storage_utils, lxc);
54
55 /* the bulk of this needs to become a common helper */
56 char *dir_new_path(char *src, const char *oldname, const char *name,
57 const char *oldpath, const char *lxcpath)
58 {
59 char *ret, *p, *p2;
60 int l1, l2, nlen;
61
62 nlen = strlen(src) + 1;
63 l1 = strlen(oldpath);
64 p = src;
65 /* if src starts with oldpath, look for oldname only after
66 * that path */
67 if (strncmp(src, oldpath, l1) == 0) {
68 p += l1;
69 nlen += (strlen(lxcpath) - l1);
70 }
71 l2 = strlen(oldname);
72 while ((p = strstr(p, oldname)) != NULL) {
73 p += l2;
74 nlen += strlen(name) - l2;
75 }
76
77 ret = malloc(nlen);
78 if (!ret)
79 return NULL;
80
81 p = ret;
82 if (strncmp(src, oldpath, l1) == 0) {
83 p += sprintf(p, "%s", lxcpath);
84 src += l1;
85 }
86
87 while ((p2 = strstr(src, oldname)) != NULL) {
88 strncpy(p, src, p2 - src); // copy text up to oldname
89 p += p2 - src; // move target pointer (p)
90 p += sprintf(p, "%s",
91 name); // print new name in place of oldname
92 src = p2 + l2; // move src to end of oldname
93 }
94 sprintf(p, "%s", src); // copy the rest of src
95 return ret;
96 }
97
98 /*
99 * attach_block_device returns true if all went well,
100 * meaning either a block device was attached or was not
101 * needed. It returns false if something went wrong and
102 * container startup should be stopped.
103 */
104 bool attach_block_device(struct lxc_conf *conf)
105 {
106 char *path;
107
108 if (!conf->rootfs.path)
109 return true;
110
111 path = conf->rootfs.path;
112 if (!requires_nbd(path))
113 return true;
114
115 path = strchr(path, ':');
116 if (!path)
117 return false;
118
119 path++;
120 if (!attach_nbd(path, conf))
121 return false;
122
123 return true;
124 }
125
126 /*
127 * return block size of dev->src in units of bytes
128 */
129 int blk_getsize(struct lxc_storage *bdev, uint64_t *size)
130 {
131 int fd, ret;
132 char *src;
133
134 src = lxc_storage_get_path(bdev->src, bdev->type);
135 fd = open(src, O_RDONLY);
136 if (fd < 0)
137 return -1;
138
139 ret = ioctl(fd, BLKGETSIZE64, size); // size of device in bytes
140 close(fd);
141 return ret;
142 }
143
144 void detach_block_device(struct lxc_conf *conf)
145 {
146 if (conf->nbd_idx != -1)
147 detach_nbd_idx(conf->nbd_idx);
148 }
149
150 /*
151 * Given a lxc_storage (presumably blockdev-based), detect the fstype
152 * by trying mounting (in a private mntns) it.
153 * @lxc_storage: bdev to investigate
154 * @type: preallocated char* in which to write the fstype
155 * @len: length of passed in char*
156 * Returns length of fstype, of -1 on error
157 */
158 int detect_fs(struct lxc_storage *bdev, char *type, int len)
159 {
160 int p[2], ret;
161 size_t linelen;
162 pid_t pid;
163 FILE *f;
164 char *sp1, *sp2, *sp3, *srcdev, *line = NULL;
165
166 if (!bdev || !bdev->src || !bdev->dest)
167 return -1;
168
169 srcdev = lxc_storage_get_path(bdev->src, bdev->type);
170
171 ret = pipe(p);
172 if (ret < 0)
173 return -1;
174
175 if ((pid = fork()) < 0)
176 return -1;
177
178 if (pid > 0) {
179 int status;
180 close(p[1]);
181 memset(type, 0, len);
182 ret = read(p[0], type, len - 1);
183 close(p[0]);
184 if (ret < 0) {
185 SYSERROR("error reading from pipe");
186 wait(&status);
187 return -1;
188 } else if (ret == 0) {
189 ERROR("child exited early - fstype not found");
190 wait(&status);
191 return -1;
192 }
193 wait(&status);
194 type[len - 1] = '\0';
195 INFO("detected fstype %s for %s", type, srcdev);
196 return ret;
197 }
198
199 if (unshare(CLONE_NEWNS) < 0)
200 exit(1);
201
202 if (detect_shared_rootfs()) {
203 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) {
204 SYSERROR("Failed to make / rslave");
205 ERROR("Continuing...");
206 }
207 }
208
209 ret = mount_unknown_fs(srcdev, bdev->dest, bdev->mntopts);
210 if (ret < 0) {
211 ERROR("failed mounting %s onto %s to detect fstype", srcdev,
212 bdev->dest);
213 exit(1);
214 }
215
216 // if symlink, get the real dev name
217 char devpath[MAXPATHLEN];
218 char *l = linkderef(srcdev, devpath);
219 if (!l)
220 exit(1);
221 f = fopen("/proc/self/mounts", "r");
222 if (!f)
223 exit(1);
224
225 while (getline(&line, &linelen, f) != -1) {
226 sp1 = strchr(line, ' ');
227 if (!sp1)
228 exit(1);
229 *sp1 = '\0';
230 if (strcmp(line, l))
231 continue;
232 sp2 = strchr(sp1 + 1, ' ');
233 if (!sp2)
234 exit(1);
235 *sp2 = '\0';
236 sp3 = strchr(sp2 + 1, ' ');
237 if (!sp3)
238 exit(1);
239 *sp3 = '\0';
240 sp2++;
241 if (write(p[1], sp2, strlen(sp2)) != strlen(sp2))
242 exit(1);
243
244 exit(0);
245 }
246
247 exit(1);
248 }
249
250 int do_mkfs_exec_wrapper(void *args)
251 {
252 int ret;
253 char *mkfs;
254 char **data = args;
255 /* strlen("mkfs.")
256 * +
257 * strlen(data[0])
258 * +
259 * \0
260 */
261 size_t len = 5 + strlen(data[0]) + 1;
262
263 mkfs = malloc(len);
264 if (!mkfs)
265 return -1;
266
267 ret = snprintf(mkfs, len, "mkfs.%s", data[0]);
268 if (ret < 0 || (size_t)ret >= len) {
269 free(mkfs);
270 return -1;
271 }
272
273 TRACE("executing \"%s %s\"", mkfs, data[1]);
274 execlp(mkfs, mkfs, data[1], (char *)NULL);
275 SYSERROR("failed to run \"%s %s \"", mkfs, data[1]);
276 return -1;
277 }
278
279 /*
280 * This will return 1 for physical disks, qemu-nbd, loop, etc right now only lvm
281 * is a block device.
282 */
283 int is_blktype(struct lxc_storage *b)
284 {
285 if (strcmp(b->type, "lvm") == 0)
286 return 1;
287
288 return 0;
289 }
290
291 int mount_unknown_fs(const char *rootfs, const char *target,
292 const char *options)
293 {
294 size_t i;
295 int ret;
296 struct cbarg {
297 const char *rootfs;
298 const char *target;
299 const char *options;
300 } cbarg = {
301 .rootfs = rootfs,
302 .target = target,
303 .options = options,
304 };
305
306 /*
307 * find the filesystem type with brute force:
308 * first we check with /etc/filesystems, in case the modules
309 * are auto-loaded and fall back to the supported kernel fs
310 */
311 char *fsfile[] = {
312 "/etc/filesystems",
313 "/proc/filesystems",
314 };
315
316 for (i = 0; i < sizeof(fsfile) / sizeof(fsfile[0]); i++) {
317 if (access(fsfile[i], F_OK))
318 continue;
319
320 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
321 if (ret < 0) {
322 ERROR("failed to parse '%s'", fsfile[i]);
323 return -1;
324 }
325
326 if (ret)
327 return 0;
328 }
329
330 ERROR("failed to determine fs type for '%s'", rootfs);
331 return -1;
332 }
333
334 /*
335 * These are copied from conf.c. However as conf.c will be moved to using
336 * the callback system, they can be pulled from there eventually, so we
337 * don't need to pollute utils.c with these low level functions
338 */
339 int find_fstype_cb(char *buffer, void *data)
340 {
341 struct cbarg {
342 const char *rootfs;
343 const char *target;
344 const char *options;
345 } *cbarg = data;
346
347 unsigned long mntflags;
348 char *mntdata;
349 char *fstype;
350
351 /* we don't try 'nodev' entries */
352 if (strstr(buffer, "nodev"))
353 return 0;
354
355 fstype = buffer;
356 fstype += lxc_char_left_gc(fstype, strlen(fstype));
357 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
358
359 DEBUG("trying to mount '%s'->'%s' with fstype '%s'", cbarg->rootfs,
360 cbarg->target, fstype);
361
362 if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) {
363 free(mntdata);
364 return 0;
365 }
366
367 if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
368 DEBUG("mount failed with error: %s", strerror(errno));
369 free(mntdata);
370 return 0;
371 }
372
373 free(mntdata);
374
375 INFO("mounted '%s' on '%s', with fstype '%s'", cbarg->rootfs,
376 cbarg->target, fstype);
377
378 return 1;
379 }
380
381 char *linkderef(char *path, char *dest)
382 {
383 struct stat sbuf;
384 ssize_t ret;
385
386 ret = stat(path, &sbuf);
387 if (ret < 0)
388 return NULL;
389
390 if (!S_ISLNK(sbuf.st_mode))
391 return path;
392
393 ret = readlink(path, dest, MAXPATHLEN);
394 if (ret < 0) {
395 SYSERROR("error reading link %s", path);
396 return NULL;
397 } else if (ret >= MAXPATHLEN) {
398 ERROR("link in %s too long", path);
399 return NULL;
400 }
401 dest[ret] = '\0';
402
403 return dest;
404 }
405
406 /*
407 * is an unprivileged user allowed to make this kind of snapshot
408 */
409 bool unpriv_snap_allowed(struct lxc_storage *b, const char *t, bool snap,
410 bool maybesnap)
411 {
412 if (!t) {
413 // new type will be same as original
414 // (unless snap && b->type == dir, in which case it will be
415 // overlayfs -- which is also allowed)
416 if (strcmp(b->type, "dir") == 0 ||
417 strcmp(b->type, "aufs") == 0 ||
418 strcmp(b->type, "overlay") == 0 ||
419 strcmp(b->type, "overlayfs") == 0 ||
420 strcmp(b->type, "btrfs") == 0 ||
421 strcmp(b->type, "loop") == 0)
422 return true;
423
424 return false;
425 }
426
427 // unprivileged users can copy and snapshot dir, overlayfs,
428 // and loop. In particular, not zfs, btrfs, or lvm.
429 if (strcmp(t, "dir") == 0 ||
430 strcmp(t, "aufs") == 0 ||
431 strcmp(t, "overlay") == 0 ||
432 strcmp(t, "overlayfs") == 0 ||
433 strcmp(t, "btrfs") == 0 ||
434 strcmp(t, "loop") == 0)
435 return true;
436
437 return false;
438 }
439
440 bool is_valid_storage_type(const char *type)
441 {
442 if (strcmp(type, "dir") == 0 ||
443 strcmp(type, "btrfs") == 0 ||
444 strcmp(type, "aufs") == 0 ||
445 strcmp(type, "loop") == 0 ||
446 strcmp(type, "lvm") == 0 ||
447 strcmp(type, "nbd") == 0 ||
448 strcmp(type, "overlayfs") == 0 ||
449 strcmp(type, "rbd") == 0 ||
450 strcmp(type, "zfs") == 0)
451 return true;
452
453 return false;
454 }
455
456 int storage_destroy_wrapper(void *data)
457 {
458 struct lxc_conf *conf = data;
459
460 if (setgid(0) < 0) {
461 ERROR("Failed to setgid to 0");
462 return -1;
463 }
464
465 if (setgroups(0, NULL) < 0)
466 WARN("Failed to clear groups");
467
468 if (setuid(0) < 0) {
469 ERROR("Failed to setuid to 0");
470 return -1;
471 }
472
473 if (!storage_destroy(conf))
474 return -1;
475
476 return 0;
477 }