]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/storage/storage_utils.c
github: Update for main branch
[mirror_lxc.git] / src / lxc / storage / storage_utils.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include "config.h"
4
5 #include <ctype.h>
6 #include <dirent.h>
7 #include <errno.h>
8 #include <fcntl.h>
9 #include <grp.h>
10 #include <inttypes.h>
11 #include <libgen.h>
12 #include <sched.h>
13 #include <stdint.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <sys/mount.h>
17 #include <sys/prctl.h>
18 #include <sys/stat.h>
19 #include <sys/types.h>
20 #include <sys/wait.h>
21 #include <unistd.h>
22
23 #include "log.h"
24 #include "nbd.h"
25 #include "parse.h"
26 #include "storage.h"
27 #include "storage_utils.h"
28 #include "syscall_wrappers.h"
29 #include "utils.h"
30
31 #if !HAVE_STRLCPY
32 #include "strlcpy.h"
33 #endif
34
35 #ifndef BLKGETSIZE64
36 #define BLKGETSIZE64 _IOR(0x12, 114, size_t)
37 #endif
38
39 lxc_log_define(storage_utils, lxc);
40
41 /*
42 * attach_block_device returns true if all went well,
43 * meaning either a block device was attached or was not
44 * needed. It returns false if something went wrong and
45 * container startup should be stopped.
46 */
47 bool attach_block_device(struct lxc_conf *conf)
48 {
49 char *path;
50
51 if (!conf->rootfs.path)
52 return true;
53
54 path = conf->rootfs.path;
55 if (!requires_nbd(path))
56 return true;
57
58 path = strchr(path, ':');
59 if (!path)
60 return false;
61
62 path++;
63 if (!attach_nbd(path, conf))
64 return false;
65
66 return true;
67 }
68
69 /*
70 * return block size of dev->src in units of bytes
71 */
72 int blk_getsize(struct lxc_storage *bdev, uint64_t *size)
73 {
74 int fd, ret;
75 const char *src;
76
77 src = lxc_storage_get_path(bdev->src, bdev->type);
78
79 fd = open(src, O_RDONLY | O_CLOEXEC);
80 if (fd < 0) {
81 SYSERROR("Failed to open \"%s\"", src);
82 return -1;
83 }
84
85 /* size of device in bytes */
86 ret = ioctl(fd, BLKGETSIZE64, size);
87 if (ret < 0)
88 SYSERROR("Failed to get block size of dev-src");
89
90 close(fd);
91 return ret;
92 }
93
94 void detach_block_device(struct lxc_conf *conf)
95 {
96 if (conf->nbd_idx != -1)
97 detach_nbd_idx(conf->nbd_idx);
98 }
99
100 /*
101 * Given a lxc_storage (presumably blockdev-based), detect the fstype
102 * by trying mounting (in a private mntns) it.
103 * @lxc_storage: bdev to investigate
104 * @type: preallocated char* in which to write the fstype
105 * @len: length of passed in char*
106 * Returns length of fstype, of -1 on error
107 */
108 int detect_fs(struct lxc_storage *bdev, char *type, int len)
109 {
110 int ret;
111 int p[2];
112 size_t linelen;
113 pid_t pid;
114 FILE *f;
115 char *sp1, *sp2, *sp3;
116 const char *l, *srcdev;
117 char devpath[PATH_MAX];
118 char *line = NULL;
119
120 if (!bdev || !bdev->src || !bdev->dest)
121 return -1;
122
123 srcdev = lxc_storage_get_path(bdev->src, bdev->type);
124
125 ret = pipe(p);
126 if (ret < 0) {
127 SYSERROR("Failed to create pipe");
128 return -1;
129 }
130
131 pid = fork();
132 if (pid < 0) {
133 SYSERROR("Failed to fork process");
134 return -1;
135 }
136
137 if (pid > 0) {
138 int status;
139
140 close(p[1]);
141 memset(type, 0, len);
142
143 ret = read(p[0], type, len - 1);
144 if (ret < 0) {
145 SYSERROR("Failed to read FSType from pipe");
146 } else if (ret == 0) {
147 ERROR("FSType not found - child exited early");
148 ret = -1;
149 }
150
151 close(p[0]);
152 wait(&status);
153
154 if (ret < 0)
155 return ret;
156
157 type[len - 1] = '\0';
158 INFO("Detected FSType \"%s\" for \"%s\"", type, srcdev);
159
160 return ret;
161 }
162
163 if (unshare(CLONE_NEWNS) < 0)
164 _exit(EXIT_FAILURE);
165
166 if (detect_shared_rootfs() && mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL))
167 SYSERROR("Failed to recursively turn root mount tree into dependent mount. Continuing...");
168
169 ret = mount_unknown_fs(srcdev, bdev->dest, bdev->mntopts);
170 if (ret < 0) {
171 ERROR("Failed to mount \"%s\" onto \"%s\" to detect FSType", srcdev,
172 bdev->dest);
173 _exit(EXIT_FAILURE);
174 }
175
176 l = linkderef(srcdev, devpath);
177 if (!l)
178 _exit(EXIT_FAILURE);
179
180 f = fopen("/proc/self/mounts", "r");
181 if (!f)
182 _exit(EXIT_FAILURE);
183
184 while (getline(&line, &linelen, f) != -1) {
185 ssize_t nbytes;
186
187 sp1 = strchr(line, ' ');
188 if (!sp1)
189 _exit(EXIT_FAILURE);
190
191 *sp1 = '\0';
192 if (strcmp(line, l))
193 continue;
194
195 sp2 = strchr(sp1 + 1, ' ');
196 if (!sp2)
197 _exit(EXIT_FAILURE);
198 *sp2 = '\0';
199
200 sp3 = strchr(sp2 + 1, ' ');
201 if (!sp3)
202 _exit(EXIT_FAILURE);
203 *sp3 = '\0';
204
205 sp2++;
206 nbytes = write(p[1], sp2, strlen(sp2));
207 if (nbytes < 0 || (size_t)nbytes != strlen(sp2))
208 _exit(EXIT_FAILURE);
209
210 _exit(EXIT_SUCCESS);
211 }
212
213 _exit(EXIT_FAILURE);
214 }
215
216 int do_mkfs_exec_wrapper(void *args)
217 {
218 int ret;
219 char *mkfs;
220 char **data = args;
221 /* strlen("mkfs.")
222 * +
223 * strlen(data[0])
224 * +
225 * \0
226 */
227 size_t len = 5 + strlen(data[0]) + 1;
228
229 mkfs = malloc(len);
230 if (!mkfs)
231 return -1;
232
233 ret = snprintf(mkfs, len, "mkfs.%s", data[0]);
234 if (ret < 0 || (size_t)ret >= len) {
235 free(mkfs);
236 return -1;
237 }
238
239 TRACE("Executing \"%s %s\"", mkfs, data[1]);
240 execlp(mkfs, mkfs, data[1], (char *)NULL);
241
242 SYSERROR("Failed to run \"%s %s\"", mkfs, data[1]);
243 free(mkfs);
244
245 return -1;
246 }
247
248 /*
249 * This will return 1 for physical disks, qemu-nbd, loop, etc right now only lvm
250 * is a block device.
251 */
252 int is_blktype(struct lxc_storage *b)
253 {
254 if (strcmp(b->type, "lvm") == 0)
255 return 1;
256
257 return 0;
258 }
259
260 int mount_unknown_fs(const char *rootfs, const char *target,
261 const char *options)
262 {
263 size_t i;
264 int ret;
265 struct cbarg {
266 const char *rootfs;
267 const char *target;
268 const char *options;
269 } cbarg = {
270 .rootfs = rootfs,
271 .target = target,
272 .options = options,
273 };
274
275 /*
276 * find the filesystem type with brute force:
277 * first we check with /etc/filesystems, in case the modules
278 * are auto-loaded and fall back to the supported kernel fs
279 */
280 char *fsfile[] = {
281 "/etc/filesystems",
282 "/proc/filesystems",
283 };
284
285 for (i = 0; i < sizeof(fsfile) / sizeof(fsfile[0]); i++) {
286 if (access(fsfile[i], F_OK))
287 continue;
288
289 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
290 if (ret < 0) {
291 ERROR("Failed to parse \"%s\"", fsfile[i]);
292 return -1;
293 }
294
295 if (ret)
296 return 0;
297 }
298
299 ERROR("Failed to determine FSType for \"%s\"", rootfs);
300
301 return -1;
302 }
303
304 /*
305 * These are copied from conf.c. However as conf.c will be moved to using
306 * the callback system, they can be pulled from there eventually, so we
307 * don't need to pollute utils.c with these low level functions
308 */
309 int find_fstype_cb(char *buffer, void *data)
310 {
311 struct cbarg {
312 const char *rootfs;
313 const char *target;
314 const char *options;
315 } *cbarg = data;
316 unsigned long mntflags = 0;
317 char *mntdata = NULL;
318 char *fstype;
319
320 /* we don't try 'nodev' entries */
321 if (strstr(buffer, "nodev"))
322 return 0;
323
324 fstype = buffer;
325 fstype += lxc_char_left_gc(fstype, strlen(fstype));
326 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
327
328 DEBUG("Trying to mount \"%s\"->\"%s\" with FSType \"%s\"", cbarg->rootfs,
329 cbarg->target, fstype);
330
331 if (parse_mntopts_legacy(cbarg->options, &mntflags, &mntdata) < 0) {
332 free(mntdata);
333 return 0;
334 }
335
336 if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
337 SYSDEBUG("Failed to mount");
338 free(mntdata);
339 return 0;
340 }
341
342 free(mntdata);
343
344 INFO("Mounted \"%s\" on \"%s\", with FSType \"%s\"", cbarg->rootfs,
345 cbarg->target, fstype);
346
347 return 1;
348 }
349
350 const char *linkderef(const char *path, char *dest)
351 {
352 struct stat sbuf;
353 ssize_t ret;
354
355 ret = stat(path, &sbuf);
356 if (ret < 0) {
357 SYSERROR("Failed to get status of file - \"%s\"", path);
358 return NULL;
359 }
360
361 if (!S_ISLNK(sbuf.st_mode))
362 return path;
363
364 ret = readlink(path, dest, PATH_MAX);
365 if (ret < 0) {
366 SYSERROR("Failed to read link of \"%s\"", path);
367 return NULL;
368 } else if (ret >= PATH_MAX) {
369 ERROR("The name of link of \"%s\" is too long", path);
370 return NULL;
371 }
372 dest[ret] = '\0';
373
374 return dest;
375 }
376
377 /*
378 * is an unprivileged user allowed to make this kind of snapshot
379 */
380 bool unpriv_snap_allowed(struct lxc_storage *b, const char *t, bool snap,
381 bool maybesnap)
382 {
383 if (!t) {
384 /* New type will be same as original (unless snap && b->type ==
385 * dir, in which case it will be overlayfs -- which is also
386 * allowed).
387 */
388 if (strcmp(b->type, "dir") == 0 ||
389 strcmp(b->type, "overlay") == 0 ||
390 strcmp(b->type, "overlayfs") == 0 ||
391 strcmp(b->type, "btrfs") == 0 ||
392 strcmp(b->type, "loop") == 0)
393 return true;
394
395 return false;
396 }
397
398 /* Unprivileged users can copy and snapshot dir, overlayfs, and loop.
399 * In particular, not zfs, btrfs, or lvm.
400 */
401 if (strcmp(t, "dir") == 0 ||
402 strcmp(t, "overlay") == 0 ||
403 strcmp(t, "overlayfs") == 0 ||
404 strcmp(t, "btrfs") == 0 ||
405 strcmp(t, "loop") == 0)
406 return true;
407
408 return false;
409 }
410
411 uint64_t get_fssize(char *s)
412 {
413 uint64_t ret;
414 char *end;
415
416 ret = strtoull(s, &end, 0);
417 if (end == s) {
418 ERROR("Invalid blockdev size '%s', using default size", s);
419 return 0;
420 }
421
422 while (isblank(*end))
423 end++;
424
425 if (*end == '\0') {
426 ret *= 1024ULL * 1024ULL; /* MB by default */
427 } else if (*end == 'b' || *end == 'B') {
428 ret *= 1ULL;
429 } else if (*end == 'k' || *end == 'K') {
430 ret *= 1024ULL;
431 } else if (*end == 'm' || *end == 'M') {
432 ret *= 1024ULL * 1024ULL;
433 } else if (*end == 'g' || *end == 'G') {
434 ret *= 1024ULL * 1024ULL * 1024ULL;
435 } else if (*end == 't' || *end == 'T') {
436 ret *= 1024ULL * 1024ULL * 1024ULL * 1024ULL;
437 } else {
438 ERROR("Invalid blockdev unit size '%c' in '%s', using default size", *end, s);
439 return 0;
440 }
441
442 return ret;
443 }
444
445 bool is_valid_storage_type(const char *type)
446 {
447 if (strcmp(type, "dir") == 0 ||
448 strcmp(type, "btrfs") == 0 ||
449 strcmp(type, "loop") == 0 ||
450 strcmp(type, "lvm") == 0 ||
451 strcmp(type, "nbd") == 0 ||
452 strcmp(type, "overlay") == 0 ||
453 strcmp(type, "overlayfs") == 0 ||
454 strcmp(type, "rbd") == 0 ||
455 strcmp(type, "zfs") == 0)
456 return true;
457
458 return false;
459 }
460
461 int storage_destroy_wrapper(void *data)
462 {
463 struct lxc_conf *conf = data;
464
465 (void)lxc_drop_groups();
466
467 if (setgid(0) < 0) {
468 SYSERROR("Failed to setgid to 0");
469 return -1;
470 }
471
472 if (setuid(0) < 0) {
473 SYSERROR("Failed to setuid to 0");
474 return -1;
475 }
476
477 if (!storage_destroy(conf)) {
478 ERROR("Failed to destroy storage");
479 return -1;
480 }
481
482 return 0;
483 }