]>
Commit | Line | Data |
---|---|---|
cc73685d | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
f2d5a09d | 2 | |
d38dd64a CB |
3 | #ifndef _GNU_SOURCE |
4 | #define _GNU_SOURCE 1 | |
5 | #endif | |
2b670dfe | 6 | #include <ctype.h> |
f2d5a09d CB |
7 | #include <dirent.h> |
8 | #include <errno.h> | |
9 | #include <fcntl.h> | |
10 | #include <grp.h> | |
11 | #include <inttypes.h> | |
12 | #include <libgen.h> | |
13 | #include <sched.h> | |
14 | #include <stdint.h> | |
15 | #include <stdio.h> | |
16 | #include <string.h> | |
f2d5a09d CB |
17 | #include <sys/mount.h> |
18 | #include <sys/prctl.h> | |
f2d5a09d | 19 | #include <sys/stat.h> |
d38dd64a | 20 | #include <sys/types.h> |
f2d5a09d | 21 | #include <sys/wait.h> |
d38dd64a | 22 | #include <unistd.h> |
f2d5a09d | 23 | |
d38dd64a | 24 | #include "config.h" |
f2d5a09d | 25 | #include "log.h" |
28d832c4 | 26 | #include "nbd.h" |
f2d5a09d | 27 | #include "parse.h" |
28d832c4 | 28 | #include "storage.h" |
f2d5a09d | 29 | #include "storage_utils.h" |
e8f764b6 | 30 | #include "syscall_wrappers.h" |
f2d5a09d CB |
31 | #include "utils.h" |
32 | ||
9de31d5a | 33 | #ifndef HAVE_STRLCPY |
58db1a61 | 34 | #include "strlcpy.h" |
9de31d5a CB |
35 | #endif |
36 | ||
f2d5a09d CB |
37 | #ifndef BLKGETSIZE64 |
38 | #define BLKGETSIZE64 _IOR(0x12, 114, size_t) | |
39 | #endif | |
40 | ||
41 | lxc_log_define(storage_utils, lxc); | |
42 | ||
f2d5a09d CB |
43 | /* |
44 | * attach_block_device returns true if all went well, | |
45 | * meaning either a block device was attached or was not | |
46 | * needed. It returns false if something went wrong and | |
47 | * container startup should be stopped. | |
48 | */ | |
49 | bool attach_block_device(struct lxc_conf *conf) | |
50 | { | |
51 | char *path; | |
52 | ||
53 | if (!conf->rootfs.path) | |
54 | return true; | |
55 | ||
56 | path = conf->rootfs.path; | |
57 | if (!requires_nbd(path)) | |
58 | return true; | |
59 | ||
60 | path = strchr(path, ':'); | |
61 | if (!path) | |
62 | return false; | |
63 | ||
64 | path++; | |
65 | if (!attach_nbd(path, conf)) | |
66 | return false; | |
67 | ||
68 | return true; | |
69 | } | |
70 | ||
71 | /* | |
72 | * return block size of dev->src in units of bytes | |
73 | */ | |
10bc1861 | 74 | int blk_getsize(struct lxc_storage *bdev, uint64_t *size) |
f2d5a09d CB |
75 | { |
76 | int fd, ret; | |
41dc7155 | 77 | const char *src; |
f2d5a09d | 78 | |
11274f69 | 79 | src = lxc_storage_get_path(bdev->src, bdev->type); |
ec3af0a4 | 80 | |
81 | fd = open(src, O_RDONLY | O_CLOEXEC); | |
82 | if (fd < 0) { | |
83 | SYSERROR("Failed to open \"%s\"", src); | |
f2d5a09d | 84 | return -1; |
ec3af0a4 | 85 | } |
f2d5a09d | 86 | |
1a0e70ac CB |
87 | /* size of device in bytes */ |
88 | ret = ioctl(fd, BLKGETSIZE64, size); | |
ec3af0a4 | 89 | if (ret < 0) |
90 | SYSERROR("Failed to get block size of dev-src"); | |
91 | ||
f2d5a09d CB |
92 | close(fd); |
93 | return ret; | |
94 | } | |
95 | ||
96 | void detach_block_device(struct lxc_conf *conf) | |
97 | { | |
98 | if (conf->nbd_idx != -1) | |
99 | detach_nbd_idx(conf->nbd_idx); | |
100 | } | |
101 | ||
102 | /* | |
10bc1861 | 103 | * Given a lxc_storage (presumably blockdev-based), detect the fstype |
f2d5a09d | 104 | * by trying mounting (in a private mntns) it. |
10bc1861 | 105 | * @lxc_storage: bdev to investigate |
f2d5a09d CB |
106 | * @type: preallocated char* in which to write the fstype |
107 | * @len: length of passed in char* | |
108 | * Returns length of fstype, of -1 on error | |
109 | */ | |
10bc1861 | 110 | int detect_fs(struct lxc_storage *bdev, char *type, int len) |
f2d5a09d | 111 | { |
41dc7155 CB |
112 | int ret; |
113 | int p[2]; | |
f2d5a09d CB |
114 | size_t linelen; |
115 | pid_t pid; | |
116 | FILE *f; | |
41dc7155 CB |
117 | char *sp1, *sp2, *sp3; |
118 | const char *l, *srcdev; | |
339de297 | 119 | char devpath[PATH_MAX]; |
41dc7155 | 120 | char *line = NULL; |
f2d5a09d CB |
121 | |
122 | if (!bdev || !bdev->src || !bdev->dest) | |
123 | return -1; | |
124 | ||
11274f69 | 125 | srcdev = lxc_storage_get_path(bdev->src, bdev->type); |
f2d5a09d CB |
126 | |
127 | ret = pipe(p); | |
ec3af0a4 | 128 | if (ret < 0) { |
129 | SYSERROR("Failed to create pipe"); | |
f2d5a09d | 130 | return -1; |
ec3af0a4 | 131 | } |
f2d5a09d | 132 | |
ec3af0a4 | 133 | pid = fork(); |
134 | if (pid < 0) { | |
135 | SYSERROR("Failed to fork process"); | |
f2d5a09d | 136 | return -1; |
ec3af0a4 | 137 | } |
f2d5a09d CB |
138 | |
139 | if (pid > 0) { | |
140 | int status; | |
ed263352 | 141 | |
f2d5a09d CB |
142 | close(p[1]); |
143 | memset(type, 0, len); | |
ed263352 | 144 | |
f2d5a09d | 145 | ret = read(p[0], type, len - 1); |
f2d5a09d | 146 | if (ret < 0) { |
ed263352 | 147 | SYSERROR("Failed to read FSType from pipe"); |
f2d5a09d | 148 | } else if (ret == 0) { |
ed263352 | 149 | ERROR("FSType not found - child exited early"); |
150 | ret = -1; | |
f2d5a09d | 151 | } |
ed263352 | 152 | |
153 | close(p[0]); | |
f2d5a09d | 154 | wait(&status); |
ed263352 | 155 | |
156 | if (ret < 0) | |
157 | return ret; | |
158 | ||
f2d5a09d | 159 | type[len - 1] = '\0'; |
ed263352 | 160 | INFO("Detected FSType \"%s\" for \"%s\"", type, srcdev); |
161 | ||
f2d5a09d CB |
162 | return ret; |
163 | } | |
164 | ||
165 | if (unshare(CLONE_NEWNS) < 0) | |
c0b0c44b | 166 | _exit(EXIT_FAILURE); |
f2d5a09d | 167 | |
9e61fb1f CB |
168 | if (detect_shared_rootfs() && mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) |
169 | SYSERROR("Failed to recursively turn root mount tree into dependent mount. Continuing..."); | |
f2d5a09d CB |
170 | |
171 | ret = mount_unknown_fs(srcdev, bdev->dest, bdev->mntopts); | |
172 | if (ret < 0) { | |
ed263352 | 173 | ERROR("Failed to mount \"%s\" onto \"%s\" to detect FSType", srcdev, |
f2d5a09d | 174 | bdev->dest); |
c0b0c44b | 175 | _exit(EXIT_FAILURE); |
f2d5a09d CB |
176 | } |
177 | ||
41dc7155 | 178 | l = linkderef(srcdev, devpath); |
f2d5a09d | 179 | if (!l) |
c0b0c44b | 180 | _exit(EXIT_FAILURE); |
ed263352 | 181 | |
f2d5a09d CB |
182 | f = fopen("/proc/self/mounts", "r"); |
183 | if (!f) | |
c0b0c44b | 184 | _exit(EXIT_FAILURE); |
f2d5a09d CB |
185 | |
186 | while (getline(&line, &linelen, f) != -1) { | |
187 | sp1 = strchr(line, ' '); | |
188 | if (!sp1) | |
c0b0c44b | 189 | _exit(EXIT_FAILURE); |
ed263352 | 190 | |
f2d5a09d CB |
191 | *sp1 = '\0'; |
192 | if (strcmp(line, l)) | |
193 | continue; | |
ed263352 | 194 | |
f2d5a09d CB |
195 | sp2 = strchr(sp1 + 1, ' '); |
196 | if (!sp2) | |
c0b0c44b | 197 | _exit(EXIT_FAILURE); |
f2d5a09d | 198 | *sp2 = '\0'; |
ed263352 | 199 | |
f2d5a09d CB |
200 | sp3 = strchr(sp2 + 1, ' '); |
201 | if (!sp3) | |
c0b0c44b | 202 | _exit(EXIT_FAILURE); |
f2d5a09d | 203 | *sp3 = '\0'; |
ed263352 | 204 | |
f2d5a09d CB |
205 | sp2++; |
206 | if (write(p[1], sp2, strlen(sp2)) != strlen(sp2)) | |
c0b0c44b | 207 | _exit(EXIT_FAILURE); |
f2d5a09d | 208 | |
c0b0c44b | 209 | _exit(EXIT_SUCCESS); |
f2d5a09d CB |
210 | } |
211 | ||
c0b0c44b | 212 | _exit(EXIT_FAILURE); |
f2d5a09d CB |
213 | } |
214 | ||
215 | int do_mkfs_exec_wrapper(void *args) | |
216 | { | |
217 | int ret; | |
218 | char *mkfs; | |
219 | char **data = args; | |
220 | /* strlen("mkfs.") | |
221 | * + | |
222 | * strlen(data[0]) | |
223 | * + | |
224 | * \0 | |
225 | */ | |
226 | size_t len = 5 + strlen(data[0]) + 1; | |
227 | ||
228 | mkfs = malloc(len); | |
229 | if (!mkfs) | |
230 | return -1; | |
231 | ||
232 | ret = snprintf(mkfs, len, "mkfs.%s", data[0]); | |
233 | if (ret < 0 || (size_t)ret >= len) { | |
234 | free(mkfs); | |
235 | return -1; | |
236 | } | |
237 | ||
ed263352 | 238 | TRACE("Executing \"%s %s\"", mkfs, data[1]); |
f2d5a09d | 239 | execlp(mkfs, mkfs, data[1], (char *)NULL); |
ed263352 | 240 | |
241 | SYSERROR("Failed to run \"%s %s\"", mkfs, data[1]); | |
8810408c | 242 | free(mkfs); |
ed263352 | 243 | |
f2d5a09d CB |
244 | return -1; |
245 | } | |
246 | ||
247 | /* | |
248 | * This will return 1 for physical disks, qemu-nbd, loop, etc right now only lvm | |
249 | * is a block device. | |
250 | */ | |
10bc1861 | 251 | int is_blktype(struct lxc_storage *b) |
f2d5a09d CB |
252 | { |
253 | if (strcmp(b->type, "lvm") == 0) | |
254 | return 1; | |
255 | ||
256 | return 0; | |
257 | } | |
258 | ||
259 | int mount_unknown_fs(const char *rootfs, const char *target, | |
260 | const char *options) | |
261 | { | |
262 | size_t i; | |
263 | int ret; | |
264 | struct cbarg { | |
265 | const char *rootfs; | |
266 | const char *target; | |
267 | const char *options; | |
268 | } cbarg = { | |
269 | .rootfs = rootfs, | |
270 | .target = target, | |
271 | .options = options, | |
272 | }; | |
273 | ||
274 | /* | |
275 | * find the filesystem type with brute force: | |
276 | * first we check with /etc/filesystems, in case the modules | |
277 | * are auto-loaded and fall back to the supported kernel fs | |
278 | */ | |
279 | char *fsfile[] = { | |
280 | "/etc/filesystems", | |
281 | "/proc/filesystems", | |
282 | }; | |
283 | ||
284 | for (i = 0; i < sizeof(fsfile) / sizeof(fsfile[0]); i++) { | |
285 | if (access(fsfile[i], F_OK)) | |
286 | continue; | |
287 | ||
288 | ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg); | |
289 | if (ret < 0) { | |
ed263352 | 290 | ERROR("Failed to parse \"%s\"", fsfile[i]); |
f2d5a09d CB |
291 | return -1; |
292 | } | |
293 | ||
294 | if (ret) | |
295 | return 0; | |
296 | } | |
297 | ||
ed263352 | 298 | ERROR("Failed to determine FSType for \"%s\"", rootfs); |
299 | ||
f2d5a09d CB |
300 | return -1; |
301 | } | |
302 | ||
303 | /* | |
304 | * These are copied from conf.c. However as conf.c will be moved to using | |
305 | * the callback system, they can be pulled from there eventually, so we | |
306 | * don't need to pollute utils.c with these low level functions | |
307 | */ | |
308 | int find_fstype_cb(char *buffer, void *data) | |
309 | { | |
310 | struct cbarg { | |
311 | const char *rootfs; | |
312 | const char *target; | |
313 | const char *options; | |
314 | } *cbarg = data; | |
a08bfbe3 CB |
315 | unsigned long mntflags = 0; |
316 | char *mntdata = NULL; | |
f2d5a09d CB |
317 | char *fstype; |
318 | ||
319 | /* we don't try 'nodev' entries */ | |
320 | if (strstr(buffer, "nodev")) | |
321 | return 0; | |
322 | ||
323 | fstype = buffer; | |
324 | fstype += lxc_char_left_gc(fstype, strlen(fstype)); | |
325 | fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0'; | |
326 | ||
ed263352 | 327 | DEBUG("Trying to mount \"%s\"->\"%s\" with FSType \"%s\"", cbarg->rootfs, |
f2d5a09d CB |
328 | cbarg->target, fstype); |
329 | ||
d94eb390 | 330 | if (parse_mntopts_legacy(cbarg->options, &mntflags, &mntdata) < 0) { |
f2d5a09d CB |
331 | free(mntdata); |
332 | return 0; | |
333 | } | |
334 | ||
335 | if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) { | |
ed263352 | 336 | SYSDEBUG("Failed to mount"); |
f2d5a09d CB |
337 | free(mntdata); |
338 | return 0; | |
339 | } | |
340 | ||
341 | free(mntdata); | |
342 | ||
ed263352 | 343 | INFO("Mounted \"%s\" on \"%s\", with FSType \"%s\"", cbarg->rootfs, |
f2d5a09d CB |
344 | cbarg->target, fstype); |
345 | ||
346 | return 1; | |
347 | } | |
348 | ||
41dc7155 | 349 | const char *linkderef(const char *path, char *dest) |
f2d5a09d CB |
350 | { |
351 | struct stat sbuf; | |
352 | ssize_t ret; | |
353 | ||
354 | ret = stat(path, &sbuf); | |
ec3af0a4 | 355 | if (ret < 0) { |
356 | SYSERROR("Failed to get status of file - \"%s\"", path); | |
f2d5a09d | 357 | return NULL; |
ec3af0a4 | 358 | } |
f2d5a09d CB |
359 | |
360 | if (!S_ISLNK(sbuf.st_mode)) | |
361 | return path; | |
362 | ||
339de297 | 363 | ret = readlink(path, dest, PATH_MAX); |
f2d5a09d | 364 | if (ret < 0) { |
ed263352 | 365 | SYSERROR("Failed to read link of \"%s\"", path); |
f2d5a09d | 366 | return NULL; |
339de297 | 367 | } else if (ret >= PATH_MAX) { |
ed263352 | 368 | ERROR("The name of link of \"%s\" is too long", path); |
f2d5a09d CB |
369 | return NULL; |
370 | } | |
371 | dest[ret] = '\0'; | |
372 | ||
373 | return dest; | |
374 | } | |
375 | ||
376 | /* | |
377 | * is an unprivileged user allowed to make this kind of snapshot | |
378 | */ | |
10bc1861 | 379 | bool unpriv_snap_allowed(struct lxc_storage *b, const char *t, bool snap, |
f2d5a09d CB |
380 | bool maybesnap) |
381 | { | |
382 | if (!t) { | |
1a0e70ac CB |
383 | /* New type will be same as original (unless snap && b->type == |
384 | * dir, in which case it will be overlayfs -- which is also | |
385 | * allowed). | |
386 | */ | |
f2d5a09d | 387 | if (strcmp(b->type, "dir") == 0 || |
ba115175 | 388 | strcmp(b->type, "overlay") == 0 || |
f2d5a09d CB |
389 | strcmp(b->type, "overlayfs") == 0 || |
390 | strcmp(b->type, "btrfs") == 0 || | |
391 | strcmp(b->type, "loop") == 0) | |
392 | return true; | |
393 | ||
394 | return false; | |
395 | } | |
396 | ||
1a0e70ac CB |
397 | /* Unprivileged users can copy and snapshot dir, overlayfs, and loop. |
398 | * In particular, not zfs, btrfs, or lvm. | |
399 | */ | |
f2d5a09d | 400 | if (strcmp(t, "dir") == 0 || |
ba115175 | 401 | strcmp(t, "overlay") == 0 || |
f2d5a09d CB |
402 | strcmp(t, "overlayfs") == 0 || |
403 | strcmp(t, "btrfs") == 0 || | |
404 | strcmp(t, "loop") == 0) | |
405 | return true; | |
406 | ||
407 | return false; | |
408 | } | |
409 | ||
2b670dfe | 410 | uint64_t get_fssize(char *s) |
411 | { | |
412 | uint64_t ret; | |
413 | char *end; | |
414 | ||
415 | ret = strtoull(s, &end, 0); | |
416 | if (end == s) { | |
417 | ERROR("Invalid blockdev size '%s', using default size", s); | |
418 | return 0; | |
419 | } | |
420 | ||
421 | while (isblank(*end)) | |
422 | end++; | |
423 | ||
424 | if (*end == '\0') { | |
425 | ret *= 1024ULL * 1024ULL; /* MB by default */ | |
426 | } else if (*end == 'b' || *end == 'B') { | |
427 | ret *= 1ULL; | |
428 | } else if (*end == 'k' || *end == 'K') { | |
429 | ret *= 1024ULL; | |
430 | } else if (*end == 'm' || *end == 'M') { | |
431 | ret *= 1024ULL * 1024ULL; | |
432 | } else if (*end == 'g' || *end == 'G') { | |
433 | ret *= 1024ULL * 1024ULL * 1024ULL; | |
434 | } else if (*end == 't' || *end == 'T') { | |
435 | ret *= 1024ULL * 1024ULL * 1024ULL * 1024ULL; | |
436 | } else { | |
437 | ERROR("Invalid blockdev unit size '%c' in '%s', using default size", *end, s); | |
438 | return 0; | |
439 | } | |
440 | ||
441 | return ret; | |
442 | } | |
443 | ||
10bc1861 | 444 | bool is_valid_storage_type(const char *type) |
f2d5a09d CB |
445 | { |
446 | if (strcmp(type, "dir") == 0 || | |
447 | strcmp(type, "btrfs") == 0 || | |
f2d5a09d CB |
448 | strcmp(type, "loop") == 0 || |
449 | strcmp(type, "lvm") == 0 || | |
450 | strcmp(type, "nbd") == 0 || | |
e45d7204 | 451 | strcmp(type, "overlay") == 0 || |
f2d5a09d CB |
452 | strcmp(type, "overlayfs") == 0 || |
453 | strcmp(type, "rbd") == 0 || | |
454 | strcmp(type, "zfs") == 0) | |
455 | return true; | |
456 | ||
457 | return false; | |
458 | } | |
459 | ||
10bc1861 | 460 | int storage_destroy_wrapper(void *data) |
f2d5a09d CB |
461 | { |
462 | struct lxc_conf *conf = data; | |
463 | ||
8917c382 | 464 | (void)lxc_drop_groups(); |
b58214ac | 465 | |
f2d5a09d | 466 | if (setgid(0) < 0) { |
ec3af0a4 | 467 | SYSERROR("Failed to setgid to 0"); |
f2d5a09d CB |
468 | return -1; |
469 | } | |
470 | ||
f2d5a09d | 471 | if (setuid(0) < 0) { |
ec3af0a4 | 472 | SYSERROR("Failed to setuid to 0"); |
f2d5a09d CB |
473 | return -1; |
474 | } | |
475 | ||
ec3af0a4 | 476 | if (!storage_destroy(conf)) { |
477 | ERROR("Failed to destroy storage"); | |
f2d5a09d | 478 | return -1; |
ec3af0a4 | 479 | } |
f2d5a09d CB |
480 | |
481 | return 0; | |
482 | } |