]> git.proxmox.com Git - mirror_qemu.git/blob - block/export/fuse.c
export/fuse: Pass default_permissions for mount
[mirror_qemu.git] / block / export / fuse.c
1 /*
2 * Present a block device as a raw image through FUSE
3 *
4 * Copyright (c) 2020 Max Reitz <mreitz@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; under version 2 or later of the License.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #define FUSE_USE_VERSION 31
20
21 #include "qemu/osdep.h"
22 #include "block/aio.h"
23 #include "block/block.h"
24 #include "block/export.h"
25 #include "block/fuse.h"
26 #include "block/qapi.h"
27 #include "qapi/error.h"
28 #include "qapi/qapi-commands-block.h"
29 #include "sysemu/block-backend.h"
30
31 #include <fuse.h>
32 #include <fuse_lowlevel.h>
33
34
35 /* Prevent overly long bounce buffer allocations */
36 #define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024))
37
38
39 typedef struct FuseExport {
40 BlockExport common;
41
42 struct fuse_session *fuse_session;
43 struct fuse_buf fuse_buf;
44 bool mounted, fd_handler_set_up;
45
46 char *mountpoint;
47 bool writable;
48 bool growable;
49 } FuseExport;
50
51 static GHashTable *exports;
52 static const struct fuse_lowlevel_ops fuse_ops;
53
54 static void fuse_export_shutdown(BlockExport *exp);
55 static void fuse_export_delete(BlockExport *exp);
56
57 static void init_exports_table(void);
58
59 static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
60 Error **errp);
61 static void read_from_fuse_export(void *opaque);
62
63 static bool is_regular_file(const char *path, Error **errp);
64
65
66 static int fuse_export_create(BlockExport *blk_exp,
67 BlockExportOptions *blk_exp_args,
68 Error **errp)
69 {
70 FuseExport *exp = container_of(blk_exp, FuseExport, common);
71 BlockExportOptionsFuse *args = &blk_exp_args->u.fuse;
72 int ret;
73
74 assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE);
75
76 /* For growable exports, take the RESIZE permission */
77 if (args->growable) {
78 uint64_t blk_perm, blk_shared_perm;
79
80 blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
81
82 ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
83 blk_shared_perm, errp);
84 if (ret < 0) {
85 return ret;
86 }
87 }
88
89 init_exports_table();
90
91 /*
92 * It is important to do this check before calling is_regular_file() --
93 * that function will do a stat(), which we would have to handle if we
94 * already exported something on @mountpoint. But we cannot, because
95 * we are currently caught up here.
96 * (Note that ideally we would want to resolve relative paths here,
97 * but bdrv_make_absolute_filename() might do the wrong thing for
98 * paths that contain colons, and realpath() would resolve symlinks,
99 * which we do not want: The mount point is not going to be the
100 * symlink's destination, but the link itself.)
101 * So this will not catch all potential clashes, but hopefully at
102 * least the most common one of specifying exactly the same path
103 * string twice.
104 */
105 if (g_hash_table_contains(exports, args->mountpoint)) {
106 error_setg(errp, "There already is a FUSE export on '%s'",
107 args->mountpoint);
108 ret = -EEXIST;
109 goto fail;
110 }
111
112 if (!is_regular_file(args->mountpoint, errp)) {
113 ret = -EINVAL;
114 goto fail;
115 }
116
117 exp->mountpoint = g_strdup(args->mountpoint);
118 exp->writable = blk_exp_args->writable;
119 exp->growable = args->growable;
120
121 ret = setup_fuse_export(exp, args->mountpoint, errp);
122 if (ret < 0) {
123 goto fail;
124 }
125
126 return 0;
127
128 fail:
129 fuse_export_delete(blk_exp);
130 return ret;
131 }
132
133 /**
134 * Allocates the global @exports hash table.
135 */
136 static void init_exports_table(void)
137 {
138 if (exports) {
139 return;
140 }
141
142 exports = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
143 }
144
145 /**
146 * Create exp->fuse_session and mount it.
147 */
148 static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
149 Error **errp)
150 {
151 const char *fuse_argv[4];
152 char *mount_opts;
153 struct fuse_args fuse_args;
154 int ret;
155
156 /*
157 * max_read needs to match what fuse_init() sets.
158 * max_write need not be supplied.
159 */
160 mount_opts = g_strdup_printf("max_read=%zu,default_permissions",
161 FUSE_MAX_BOUNCE_BYTES);
162
163 fuse_argv[0] = ""; /* Dummy program name */
164 fuse_argv[1] = "-o";
165 fuse_argv[2] = mount_opts;
166 fuse_argv[3] = NULL;
167 fuse_args = (struct fuse_args)FUSE_ARGS_INIT(3, (char **)fuse_argv);
168
169 exp->fuse_session = fuse_session_new(&fuse_args, &fuse_ops,
170 sizeof(fuse_ops), exp);
171 g_free(mount_opts);
172 if (!exp->fuse_session) {
173 error_setg(errp, "Failed to set up FUSE session");
174 ret = -EIO;
175 goto fail;
176 }
177
178 ret = fuse_session_mount(exp->fuse_session, mountpoint);
179 if (ret < 0) {
180 error_setg(errp, "Failed to mount FUSE session to export");
181 ret = -EIO;
182 goto fail;
183 }
184 exp->mounted = true;
185
186 g_hash_table_insert(exports, g_strdup(mountpoint), NULL);
187
188 aio_set_fd_handler(exp->common.ctx,
189 fuse_session_fd(exp->fuse_session), true,
190 read_from_fuse_export, NULL, NULL, exp);
191 exp->fd_handler_set_up = true;
192
193 return 0;
194
195 fail:
196 fuse_export_shutdown(&exp->common);
197 return ret;
198 }
199
200 /**
201 * Callback to be invoked when the FUSE session FD can be read from.
202 * (This is basically the FUSE event loop.)
203 */
204 static void read_from_fuse_export(void *opaque)
205 {
206 FuseExport *exp = opaque;
207 int ret;
208
209 blk_exp_ref(&exp->common);
210
211 do {
212 ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf);
213 } while (ret == -EINTR);
214 if (ret < 0) {
215 goto out;
216 }
217
218 fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf);
219
220 out:
221 blk_exp_unref(&exp->common);
222 }
223
224 static void fuse_export_shutdown(BlockExport *blk_exp)
225 {
226 FuseExport *exp = container_of(blk_exp, FuseExport, common);
227
228 if (exp->fuse_session) {
229 fuse_session_exit(exp->fuse_session);
230
231 if (exp->fd_handler_set_up) {
232 aio_set_fd_handler(exp->common.ctx,
233 fuse_session_fd(exp->fuse_session), true,
234 NULL, NULL, NULL, NULL);
235 exp->fd_handler_set_up = false;
236 }
237 }
238
239 if (exp->mountpoint) {
240 /*
241 * Safe to drop now, because we will not handle any requests
242 * for this export anymore anyway.
243 */
244 g_hash_table_remove(exports, exp->mountpoint);
245 }
246 }
247
248 static void fuse_export_delete(BlockExport *blk_exp)
249 {
250 FuseExport *exp = container_of(blk_exp, FuseExport, common);
251
252 if (exp->fuse_session) {
253 if (exp->mounted) {
254 fuse_session_unmount(exp->fuse_session);
255 }
256
257 fuse_session_destroy(exp->fuse_session);
258 }
259
260 free(exp->fuse_buf.mem);
261 g_free(exp->mountpoint);
262 }
263
264 /**
265 * Check whether @path points to a regular file. If not, put an
266 * appropriate message into *errp.
267 */
268 static bool is_regular_file(const char *path, Error **errp)
269 {
270 struct stat statbuf;
271 int ret;
272
273 ret = stat(path, &statbuf);
274 if (ret < 0) {
275 error_setg_errno(errp, errno, "Failed to stat '%s'", path);
276 return false;
277 }
278
279 if (!S_ISREG(statbuf.st_mode)) {
280 error_setg(errp, "'%s' is not a regular file", path);
281 return false;
282 }
283
284 return true;
285 }
286
287 /**
288 * A chance to set change some parameters supplied to FUSE_INIT.
289 */
290 static void fuse_init(void *userdata, struct fuse_conn_info *conn)
291 {
292 /*
293 * MIN_NON_ZERO() would not be wrong here, but what we set here
294 * must equal what has been passed to fuse_session_new().
295 * Therefore, as long as max_read must be passed as a mount option
296 * (which libfuse claims will be changed at some point), we have
297 * to set max_read to a fixed value here.
298 */
299 conn->max_read = FUSE_MAX_BOUNCE_BYTES;
300
301 conn->max_write = MIN_NON_ZERO(BDRV_REQUEST_MAX_BYTES, conn->max_write);
302 }
303
304 /**
305 * Let clients look up files. Always return ENOENT because we only
306 * care about the mountpoint itself.
307 */
308 static void fuse_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
309 {
310 fuse_reply_err(req, ENOENT);
311 }
312
313 /**
314 * Let clients get file attributes (i.e., stat() the file).
315 */
316 static void fuse_getattr(fuse_req_t req, fuse_ino_t inode,
317 struct fuse_file_info *fi)
318 {
319 struct stat statbuf;
320 int64_t length, allocated_blocks;
321 time_t now = time(NULL);
322 FuseExport *exp = fuse_req_userdata(req);
323 mode_t mode;
324
325 length = blk_getlength(exp->common.blk);
326 if (length < 0) {
327 fuse_reply_err(req, -length);
328 return;
329 }
330
331 allocated_blocks = bdrv_get_allocated_file_size(blk_bs(exp->common.blk));
332 if (allocated_blocks <= 0) {
333 allocated_blocks = DIV_ROUND_UP(length, 512);
334 } else {
335 allocated_blocks = DIV_ROUND_UP(allocated_blocks, 512);
336 }
337
338 mode = S_IFREG | S_IRUSR;
339 if (exp->writable) {
340 mode |= S_IWUSR;
341 }
342
343 statbuf = (struct stat) {
344 .st_ino = inode,
345 .st_mode = mode,
346 .st_nlink = 1,
347 .st_uid = getuid(),
348 .st_gid = getgid(),
349 .st_size = length,
350 .st_blksize = blk_bs(exp->common.blk)->bl.request_alignment,
351 .st_blocks = allocated_blocks,
352 .st_atime = now,
353 .st_mtime = now,
354 .st_ctime = now,
355 };
356
357 fuse_reply_attr(req, &statbuf, 1.);
358 }
359
360 static int fuse_do_truncate(const FuseExport *exp, int64_t size,
361 bool req_zero_write, PreallocMode prealloc)
362 {
363 uint64_t blk_perm, blk_shared_perm;
364 BdrvRequestFlags truncate_flags = 0;
365 int ret;
366
367 if (req_zero_write) {
368 truncate_flags |= BDRV_REQ_ZERO_WRITE;
369 }
370
371 /* Growable exports have a permanent RESIZE permission */
372 if (!exp->growable) {
373 blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
374
375 ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
376 blk_shared_perm, NULL);
377 if (ret < 0) {
378 return ret;
379 }
380 }
381
382 ret = blk_truncate(exp->common.blk, size, true, prealloc,
383 truncate_flags, NULL);
384
385 if (!exp->growable) {
386 /* Must succeed, because we are only giving up the RESIZE permission */
387 blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, &error_abort);
388 }
389
390 return ret;
391 }
392
393 /**
394 * Let clients set file attributes. Only resizing is supported.
395 */
396 static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat *statbuf,
397 int to_set, struct fuse_file_info *fi)
398 {
399 FuseExport *exp = fuse_req_userdata(req);
400 int ret;
401
402 if (!exp->writable) {
403 fuse_reply_err(req, EACCES);
404 return;
405 }
406
407 if (to_set & ~FUSE_SET_ATTR_SIZE) {
408 fuse_reply_err(req, ENOTSUP);
409 return;
410 }
411
412 ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF);
413 if (ret < 0) {
414 fuse_reply_err(req, -ret);
415 return;
416 }
417
418 fuse_getattr(req, inode, fi);
419 }
420
421 /**
422 * Let clients open a file (i.e., the exported image).
423 */
424 static void fuse_open(fuse_req_t req, fuse_ino_t inode,
425 struct fuse_file_info *fi)
426 {
427 fuse_reply_open(req, fi);
428 }
429
430 /**
431 * Handle client reads from the exported image.
432 */
433 static void fuse_read(fuse_req_t req, fuse_ino_t inode,
434 size_t size, off_t offset, struct fuse_file_info *fi)
435 {
436 FuseExport *exp = fuse_req_userdata(req);
437 int64_t length;
438 void *buf;
439 int ret;
440
441 /* Limited by max_read, should not happen */
442 if (size > FUSE_MAX_BOUNCE_BYTES) {
443 fuse_reply_err(req, EINVAL);
444 return;
445 }
446
447 /**
448 * Clients will expect short reads at EOF, so we have to limit
449 * offset+size to the image length.
450 */
451 length = blk_getlength(exp->common.blk);
452 if (length < 0) {
453 fuse_reply_err(req, -length);
454 return;
455 }
456
457 if (offset + size > length) {
458 size = length - offset;
459 }
460
461 buf = qemu_try_blockalign(blk_bs(exp->common.blk), size);
462 if (!buf) {
463 fuse_reply_err(req, ENOMEM);
464 return;
465 }
466
467 ret = blk_pread(exp->common.blk, offset, buf, size);
468 if (ret >= 0) {
469 fuse_reply_buf(req, buf, size);
470 } else {
471 fuse_reply_err(req, -ret);
472 }
473
474 qemu_vfree(buf);
475 }
476
477 /**
478 * Handle client writes to the exported image.
479 */
480 static void fuse_write(fuse_req_t req, fuse_ino_t inode, const char *buf,
481 size_t size, off_t offset, struct fuse_file_info *fi)
482 {
483 FuseExport *exp = fuse_req_userdata(req);
484 int64_t length;
485 int ret;
486
487 /* Limited by max_write, should not happen */
488 if (size > BDRV_REQUEST_MAX_BYTES) {
489 fuse_reply_err(req, EINVAL);
490 return;
491 }
492
493 if (!exp->writable) {
494 fuse_reply_err(req, EACCES);
495 return;
496 }
497
498 /**
499 * Clients will expect short writes at EOF, so we have to limit
500 * offset+size to the image length.
501 */
502 length = blk_getlength(exp->common.blk);
503 if (length < 0) {
504 fuse_reply_err(req, -length);
505 return;
506 }
507
508 if (offset + size > length) {
509 if (exp->growable) {
510 ret = fuse_do_truncate(exp, offset + size, true, PREALLOC_MODE_OFF);
511 if (ret < 0) {
512 fuse_reply_err(req, -ret);
513 return;
514 }
515 } else {
516 size = length - offset;
517 }
518 }
519
520 ret = blk_pwrite(exp->common.blk, offset, buf, size, 0);
521 if (ret >= 0) {
522 fuse_reply_write(req, size);
523 } else {
524 fuse_reply_err(req, -ret);
525 }
526 }
527
528 /**
529 * Let clients perform various fallocate() operations.
530 */
531 static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
532 off_t offset, off_t length,
533 struct fuse_file_info *fi)
534 {
535 FuseExport *exp = fuse_req_userdata(req);
536 int64_t blk_len;
537 int ret;
538
539 if (!exp->writable) {
540 fuse_reply_err(req, EACCES);
541 return;
542 }
543
544 blk_len = blk_getlength(exp->common.blk);
545 if (blk_len < 0) {
546 fuse_reply_err(req, -blk_len);
547 return;
548 }
549
550 if (mode & FALLOC_FL_KEEP_SIZE) {
551 length = MIN(length, blk_len - offset);
552 }
553
554 if (mode & FALLOC_FL_PUNCH_HOLE) {
555 if (!(mode & FALLOC_FL_KEEP_SIZE)) {
556 fuse_reply_err(req, EINVAL);
557 return;
558 }
559
560 do {
561 int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
562
563 ret = blk_pdiscard(exp->common.blk, offset, size);
564 offset += size;
565 length -= size;
566 } while (ret == 0 && length > 0);
567 } else if (mode & FALLOC_FL_ZERO_RANGE) {
568 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) {
569 /* No need for zeroes, we are going to write them ourselves */
570 ret = fuse_do_truncate(exp, offset + length, false,
571 PREALLOC_MODE_OFF);
572 if (ret < 0) {
573 fuse_reply_err(req, -ret);
574 return;
575 }
576 }
577
578 do {
579 int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
580
581 ret = blk_pwrite_zeroes(exp->common.blk,
582 offset, size, 0);
583 offset += size;
584 length -= size;
585 } while (ret == 0 && length > 0);
586 } else if (!mode) {
587 /* We can only fallocate at the EOF with a truncate */
588 if (offset < blk_len) {
589 fuse_reply_err(req, EOPNOTSUPP);
590 return;
591 }
592
593 if (offset > blk_len) {
594 /* No preallocation needed here */
595 ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
596 if (ret < 0) {
597 fuse_reply_err(req, -ret);
598 return;
599 }
600 }
601
602 ret = fuse_do_truncate(exp, offset + length, true,
603 PREALLOC_MODE_FALLOC);
604 } else {
605 ret = -EOPNOTSUPP;
606 }
607
608 fuse_reply_err(req, ret < 0 ? -ret : 0);
609 }
610
611 /**
612 * Let clients fsync the exported image.
613 */
614 static void fuse_fsync(fuse_req_t req, fuse_ino_t inode, int datasync,
615 struct fuse_file_info *fi)
616 {
617 FuseExport *exp = fuse_req_userdata(req);
618 int ret;
619
620 ret = blk_flush(exp->common.blk);
621 fuse_reply_err(req, ret < 0 ? -ret : 0);
622 }
623
624 /**
625 * Called before an FD to the exported image is closed. (libfuse
626 * notes this to be a way to return last-minute errors.)
627 */
628 static void fuse_flush(fuse_req_t req, fuse_ino_t inode,
629 struct fuse_file_info *fi)
630 {
631 fuse_fsync(req, inode, 1, fi);
632 }
633
634 #ifdef CONFIG_FUSE_LSEEK
635 /**
636 * Let clients inquire allocation status.
637 */
638 static void fuse_lseek(fuse_req_t req, fuse_ino_t inode, off_t offset,
639 int whence, struct fuse_file_info *fi)
640 {
641 FuseExport *exp = fuse_req_userdata(req);
642
643 if (whence != SEEK_HOLE && whence != SEEK_DATA) {
644 fuse_reply_err(req, EINVAL);
645 return;
646 }
647
648 while (true) {
649 int64_t pnum;
650 int ret;
651
652 ret = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
653 offset, INT64_MAX, &pnum, NULL, NULL);
654 if (ret < 0) {
655 fuse_reply_err(req, -ret);
656 return;
657 }
658
659 if (!pnum && (ret & BDRV_BLOCK_EOF)) {
660 int64_t blk_len;
661
662 /*
663 * If blk_getlength() rounds (e.g. by sectors), then the
664 * export length will be rounded, too. However,
665 * bdrv_block_status_above() may return EOF at unaligned
666 * offsets. We must not let this become visible and thus
667 * always simulate a hole between @offset (the real EOF)
668 * and @blk_len (the client-visible EOF).
669 */
670
671 blk_len = blk_getlength(exp->common.blk);
672 if (blk_len < 0) {
673 fuse_reply_err(req, -blk_len);
674 return;
675 }
676
677 if (offset > blk_len || whence == SEEK_DATA) {
678 fuse_reply_err(req, ENXIO);
679 } else {
680 fuse_reply_lseek(req, offset);
681 }
682 return;
683 }
684
685 if (ret & BDRV_BLOCK_DATA) {
686 if (whence == SEEK_DATA) {
687 fuse_reply_lseek(req, offset);
688 return;
689 }
690 } else {
691 if (whence == SEEK_HOLE) {
692 fuse_reply_lseek(req, offset);
693 return;
694 }
695 }
696
697 /* Safety check against infinite loops */
698 if (!pnum) {
699 fuse_reply_err(req, ENXIO);
700 return;
701 }
702
703 offset += pnum;
704 }
705 }
706 #endif
707
708 static const struct fuse_lowlevel_ops fuse_ops = {
709 .init = fuse_init,
710 .lookup = fuse_lookup,
711 .getattr = fuse_getattr,
712 .setattr = fuse_setattr,
713 .open = fuse_open,
714 .read = fuse_read,
715 .write = fuse_write,
716 .fallocate = fuse_fallocate,
717 .flush = fuse_flush,
718 .fsync = fuse_fsync,
719 #ifdef CONFIG_FUSE_LSEEK
720 .lseek = fuse_lseek,
721 #endif
722 };
723
724 const BlockExportDriver blk_exp_fuse = {
725 .type = BLOCK_EXPORT_TYPE_FUSE,
726 .instance_size = sizeof(FuseExport),
727 .create = fuse_export_create,
728 .delete = fuse_export_delete,
729 .request_shutdown = fuse_export_shutdown,
730 };