2 * Present a block device as a raw image through FUSE
4 * Copyright (c) 2020 Max Reitz <mreitz@redhat.com>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; under version 2 or later of the License.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #define FUSE_USE_VERSION 31
21 #include "qemu/osdep.h"
22 #include "qemu/memalign.h"
23 #include "block/aio.h"
24 #include "block/block_int-common.h"
25 #include "block/export.h"
26 #include "block/fuse.h"
27 #include "block/qapi.h"
28 #include "qapi/error.h"
29 #include "qapi/qapi-commands-block.h"
30 #include "qemu/main-loop.h"
31 #include "sysemu/block-backend.h"
34 #include <fuse_lowlevel.h>
36 #if defined(CONFIG_FALLOCATE_ZERO_RANGE)
37 #include <linux/falloc.h>
44 /* Prevent overly long bounce buffer allocations */
45 #define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024))
48 typedef struct FuseExport
{
51 struct fuse_session
*fuse_session
;
52 struct fuse_buf fuse_buf
;
53 bool mounted
, fd_handler_set_up
;
58 /* Whether allow_other was used as a mount option or not */
66 static GHashTable
*exports
;
67 static const struct fuse_lowlevel_ops fuse_ops
;
69 static void fuse_export_shutdown(BlockExport
*exp
);
70 static void fuse_export_delete(BlockExport
*exp
);
72 static void init_exports_table(void);
74 static int setup_fuse_export(FuseExport
*exp
, const char *mountpoint
,
75 bool allow_other
, Error
**errp
);
76 static void read_from_fuse_export(void *opaque
);
78 static bool is_regular_file(const char *path
, Error
**errp
);
81 static int fuse_export_create(BlockExport
*blk_exp
,
82 BlockExportOptions
*blk_exp_args
,
85 FuseExport
*exp
= container_of(blk_exp
, FuseExport
, common
);
86 BlockExportOptionsFuse
*args
= &blk_exp_args
->u
.fuse
;
89 assert(blk_exp_args
->type
== BLOCK_EXPORT_TYPE_FUSE
);
91 /* For growable and writable exports, take the RESIZE permission */
92 if (args
->growable
|| blk_exp_args
->writable
) {
93 uint64_t blk_perm
, blk_shared_perm
;
95 blk_get_perm(exp
->common
.blk
, &blk_perm
, &blk_shared_perm
);
97 ret
= blk_set_perm(exp
->common
.blk
, blk_perm
| BLK_PERM_RESIZE
,
98 blk_shared_perm
, errp
);
104 init_exports_table();
107 * It is important to do this check before calling is_regular_file() --
108 * that function will do a stat(), which we would have to handle if we
109 * already exported something on @mountpoint. But we cannot, because
110 * we are currently caught up here.
111 * (Note that ideally we would want to resolve relative paths here,
112 * but bdrv_make_absolute_filename() might do the wrong thing for
113 * paths that contain colons, and realpath() would resolve symlinks,
114 * which we do not want: The mount point is not going to be the
115 * symlink's destination, but the link itself.)
116 * So this will not catch all potential clashes, but hopefully at
117 * least the most common one of specifying exactly the same path
120 if (g_hash_table_contains(exports
, args
->mountpoint
)) {
121 error_setg(errp
, "There already is a FUSE export on '%s'",
127 if (!is_regular_file(args
->mountpoint
, errp
)) {
132 exp
->mountpoint
= g_strdup(args
->mountpoint
);
133 exp
->writable
= blk_exp_args
->writable
;
134 exp
->growable
= args
->growable
;
137 if (!args
->has_allow_other
) {
138 args
->allow_other
= FUSE_EXPORT_ALLOW_OTHER_AUTO
;
141 exp
->st_mode
= S_IFREG
| S_IRUSR
;
143 exp
->st_mode
|= S_IWUSR
;
145 exp
->st_uid
= getuid();
146 exp
->st_gid
= getgid();
148 if (args
->allow_other
== FUSE_EXPORT_ALLOW_OTHER_AUTO
) {
149 /* Ignore errors on our first attempt */
150 ret
= setup_fuse_export(exp
, args
->mountpoint
, true, NULL
);
151 exp
->allow_other
= ret
== 0;
153 ret
= setup_fuse_export(exp
, args
->mountpoint
, false, errp
);
156 exp
->allow_other
= args
->allow_other
== FUSE_EXPORT_ALLOW_OTHER_ON
;
157 ret
= setup_fuse_export(exp
, args
->mountpoint
, exp
->allow_other
, errp
);
166 fuse_export_delete(blk_exp
);
171 * Allocates the global @exports hash table.
173 static void init_exports_table(void)
179 exports
= g_hash_table_new_full(g_str_hash
, g_str_equal
, g_free
, NULL
);
183 * Create exp->fuse_session and mount it.
185 static int setup_fuse_export(FuseExport
*exp
, const char *mountpoint
,
186 bool allow_other
, Error
**errp
)
188 const char *fuse_argv
[4];
190 struct fuse_args fuse_args
;
194 * max_read needs to match what fuse_init() sets.
195 * max_write need not be supplied.
197 mount_opts
= g_strdup_printf("max_read=%zu,default_permissions%s",
198 FUSE_MAX_BOUNCE_BYTES
,
199 allow_other
? ",allow_other" : "");
201 fuse_argv
[0] = ""; /* Dummy program name */
203 fuse_argv
[2] = mount_opts
;
205 fuse_args
= (struct fuse_args
)FUSE_ARGS_INIT(3, (char **)fuse_argv
);
207 exp
->fuse_session
= fuse_session_new(&fuse_args
, &fuse_ops
,
208 sizeof(fuse_ops
), exp
);
210 if (!exp
->fuse_session
) {
211 error_setg(errp
, "Failed to set up FUSE session");
216 ret
= fuse_session_mount(exp
->fuse_session
, mountpoint
);
218 error_setg(errp
, "Failed to mount FUSE session to export");
224 g_hash_table_insert(exports
, g_strdup(mountpoint
), NULL
);
226 aio_set_fd_handler(exp
->common
.ctx
,
227 fuse_session_fd(exp
->fuse_session
), true,
228 read_from_fuse_export
, NULL
, NULL
, NULL
, exp
);
229 exp
->fd_handler_set_up
= true;
234 fuse_export_shutdown(&exp
->common
);
239 * Callback to be invoked when the FUSE session FD can be read from.
240 * (This is basically the FUSE event loop.)
242 static void read_from_fuse_export(void *opaque
)
244 FuseExport
*exp
= opaque
;
247 blk_exp_ref(&exp
->common
);
250 ret
= fuse_session_receive_buf(exp
->fuse_session
, &exp
->fuse_buf
);
251 } while (ret
== -EINTR
);
256 fuse_session_process_buf(exp
->fuse_session
, &exp
->fuse_buf
);
259 blk_exp_unref(&exp
->common
);
262 static void fuse_export_shutdown(BlockExport
*blk_exp
)
264 FuseExport
*exp
= container_of(blk_exp
, FuseExport
, common
);
266 if (exp
->fuse_session
) {
267 fuse_session_exit(exp
->fuse_session
);
269 if (exp
->fd_handler_set_up
) {
270 aio_set_fd_handler(exp
->common
.ctx
,
271 fuse_session_fd(exp
->fuse_session
), true,
272 NULL
, NULL
, NULL
, NULL
, NULL
);
273 exp
->fd_handler_set_up
= false;
277 if (exp
->mountpoint
) {
279 * Safe to drop now, because we will not handle any requests
280 * for this export anymore anyway.
282 g_hash_table_remove(exports
, exp
->mountpoint
);
286 static void fuse_export_delete(BlockExport
*blk_exp
)
288 FuseExport
*exp
= container_of(blk_exp
, FuseExport
, common
);
290 if (exp
->fuse_session
) {
292 fuse_session_unmount(exp
->fuse_session
);
295 fuse_session_destroy(exp
->fuse_session
);
298 free(exp
->fuse_buf
.mem
);
299 g_free(exp
->mountpoint
);
303 * Check whether @path points to a regular file. If not, put an
304 * appropriate message into *errp.
306 static bool is_regular_file(const char *path
, Error
**errp
)
311 ret
= stat(path
, &statbuf
);
313 error_setg_errno(errp
, errno
, "Failed to stat '%s'", path
);
317 if (!S_ISREG(statbuf
.st_mode
)) {
318 error_setg(errp
, "'%s' is not a regular file", path
);
326 * A chance to set change some parameters supplied to FUSE_INIT.
328 static void fuse_init(void *userdata
, struct fuse_conn_info
*conn
)
331 * MIN_NON_ZERO() would not be wrong here, but what we set here
332 * must equal what has been passed to fuse_session_new().
333 * Therefore, as long as max_read must be passed as a mount option
334 * (which libfuse claims will be changed at some point), we have
335 * to set max_read to a fixed value here.
337 conn
->max_read
= FUSE_MAX_BOUNCE_BYTES
;
339 conn
->max_write
= MIN_NON_ZERO(BDRV_REQUEST_MAX_BYTES
, conn
->max_write
);
343 * Let clients look up files. Always return ENOENT because we only
344 * care about the mountpoint itself.
346 static void fuse_lookup(fuse_req_t req
, fuse_ino_t parent
, const char *name
)
348 fuse_reply_err(req
, ENOENT
);
352 * Let clients get file attributes (i.e., stat() the file).
354 static void fuse_getattr(fuse_req_t req
, fuse_ino_t inode
,
355 struct fuse_file_info
*fi
)
358 int64_t length
, allocated_blocks
;
359 time_t now
= time(NULL
);
360 FuseExport
*exp
= fuse_req_userdata(req
);
362 length
= blk_getlength(exp
->common
.blk
);
364 fuse_reply_err(req
, -length
);
368 allocated_blocks
= bdrv_get_allocated_file_size(blk_bs(exp
->common
.blk
));
369 if (allocated_blocks
<= 0) {
370 allocated_blocks
= DIV_ROUND_UP(length
, 512);
372 allocated_blocks
= DIV_ROUND_UP(allocated_blocks
, 512);
375 statbuf
= (struct stat
) {
377 .st_mode
= exp
->st_mode
,
379 .st_uid
= exp
->st_uid
,
380 .st_gid
= exp
->st_gid
,
382 .st_blksize
= blk_bs(exp
->common
.blk
)->bl
.request_alignment
,
383 .st_blocks
= allocated_blocks
,
389 fuse_reply_attr(req
, &statbuf
, 1.);
392 static int fuse_do_truncate(const FuseExport
*exp
, int64_t size
,
393 bool req_zero_write
, PreallocMode prealloc
)
395 uint64_t blk_perm
, blk_shared_perm
;
396 BdrvRequestFlags truncate_flags
= 0;
397 bool add_resize_perm
;
400 /* Growable and writable exports have a permanent RESIZE permission */
401 add_resize_perm
= !exp
->growable
&& !exp
->writable
;
403 if (req_zero_write
) {
404 truncate_flags
|= BDRV_REQ_ZERO_WRITE
;
407 if (add_resize_perm
) {
409 if (!qemu_in_main_thread()) {
410 /* Changing permissions like below only works in the main thread */
414 blk_get_perm(exp
->common
.blk
, &blk_perm
, &blk_shared_perm
);
416 ret
= blk_set_perm(exp
->common
.blk
, blk_perm
| BLK_PERM_RESIZE
,
417 blk_shared_perm
, NULL
);
423 ret
= blk_truncate(exp
->common
.blk
, size
, true, prealloc
,
424 truncate_flags
, NULL
);
426 if (add_resize_perm
) {
427 /* Must succeed, because we are only giving up the RESIZE permission */
428 ret_check
= blk_set_perm(exp
->common
.blk
, blk_perm
,
429 blk_shared_perm
, &error_abort
);
430 assert(ret_check
== 0);
437 * Let clients set file attributes. Only resizing and changing
438 * permissions (st_mode, st_uid, st_gid) is allowed.
439 * Changing permissions is only allowed as far as it will actually
440 * permit access: Read-only exports cannot be given +w, and exports
441 * without allow_other cannot be given a different UID or GID, and
442 * they cannot be given non-owner access.
444 static void fuse_setattr(fuse_req_t req
, fuse_ino_t inode
, struct stat
*statbuf
,
445 int to_set
, struct fuse_file_info
*fi
)
447 FuseExport
*exp
= fuse_req_userdata(req
);
451 supported_attrs
= FUSE_SET_ATTR_SIZE
| FUSE_SET_ATTR_MODE
;
452 if (exp
->allow_other
) {
453 supported_attrs
|= FUSE_SET_ATTR_UID
| FUSE_SET_ATTR_GID
;
456 if (to_set
& ~supported_attrs
) {
457 fuse_reply_err(req
, ENOTSUP
);
461 /* Do some argument checks first before committing to anything */
462 if (to_set
& FUSE_SET_ATTR_MODE
) {
464 * Without allow_other, non-owners can never access the export, so do
465 * not allow setting permissions for them
467 if (!exp
->allow_other
&&
468 (statbuf
->st_mode
& (S_IRWXG
| S_IRWXO
)) != 0)
470 fuse_reply_err(req
, EPERM
);
474 /* +w for read-only exports makes no sense, disallow it */
475 if (!exp
->writable
&&
476 (statbuf
->st_mode
& (S_IWUSR
| S_IWGRP
| S_IWOTH
)) != 0)
478 fuse_reply_err(req
, EROFS
);
483 if (to_set
& FUSE_SET_ATTR_SIZE
) {
484 if (!exp
->writable
) {
485 fuse_reply_err(req
, EACCES
);
489 ret
= fuse_do_truncate(exp
, statbuf
->st_size
, true, PREALLOC_MODE_OFF
);
491 fuse_reply_err(req
, -ret
);
496 if (to_set
& FUSE_SET_ATTR_MODE
) {
497 /* Ignore FUSE-supplied file type, only change the mode */
498 exp
->st_mode
= (statbuf
->st_mode
& 07777) | S_IFREG
;
501 if (to_set
& FUSE_SET_ATTR_UID
) {
502 exp
->st_uid
= statbuf
->st_uid
;
505 if (to_set
& FUSE_SET_ATTR_GID
) {
506 exp
->st_gid
= statbuf
->st_gid
;
509 fuse_getattr(req
, inode
, fi
);
513 * Let clients open a file (i.e., the exported image).
515 static void fuse_open(fuse_req_t req
, fuse_ino_t inode
,
516 struct fuse_file_info
*fi
)
518 fuse_reply_open(req
, fi
);
522 * Handle client reads from the exported image.
524 static void fuse_read(fuse_req_t req
, fuse_ino_t inode
,
525 size_t size
, off_t offset
, struct fuse_file_info
*fi
)
527 FuseExport
*exp
= fuse_req_userdata(req
);
532 /* Limited by max_read, should not happen */
533 if (size
> FUSE_MAX_BOUNCE_BYTES
) {
534 fuse_reply_err(req
, EINVAL
);
539 * Clients will expect short reads at EOF, so we have to limit
540 * offset+size to the image length.
542 length
= blk_getlength(exp
->common
.blk
);
544 fuse_reply_err(req
, -length
);
548 if (offset
+ size
> length
) {
549 size
= length
- offset
;
552 buf
= qemu_try_blockalign(blk_bs(exp
->common
.blk
), size
);
554 fuse_reply_err(req
, ENOMEM
);
558 ret
= blk_pread(exp
->common
.blk
, offset
, size
, buf
, 0);
560 fuse_reply_buf(req
, buf
, size
);
562 fuse_reply_err(req
, -ret
);
569 * Handle client writes to the exported image.
571 static void fuse_write(fuse_req_t req
, fuse_ino_t inode
, const char *buf
,
572 size_t size
, off_t offset
, struct fuse_file_info
*fi
)
574 FuseExport
*exp
= fuse_req_userdata(req
);
578 /* Limited by max_write, should not happen */
579 if (size
> BDRV_REQUEST_MAX_BYTES
) {
580 fuse_reply_err(req
, EINVAL
);
584 if (!exp
->writable
) {
585 fuse_reply_err(req
, EACCES
);
590 * Clients will expect short writes at EOF, so we have to limit
591 * offset+size to the image length.
593 length
= blk_getlength(exp
->common
.blk
);
595 fuse_reply_err(req
, -length
);
599 if (offset
+ size
> length
) {
601 ret
= fuse_do_truncate(exp
, offset
+ size
, true, PREALLOC_MODE_OFF
);
603 fuse_reply_err(req
, -ret
);
607 size
= length
- offset
;
611 ret
= blk_pwrite(exp
->common
.blk
, offset
, size
, buf
, 0);
613 fuse_reply_write(req
, size
);
615 fuse_reply_err(req
, -ret
);
620 * Let clients perform various fallocate() operations.
622 static void fuse_fallocate(fuse_req_t req
, fuse_ino_t inode
, int mode
,
623 off_t offset
, off_t length
,
624 struct fuse_file_info
*fi
)
626 FuseExport
*exp
= fuse_req_userdata(req
);
630 if (!exp
->writable
) {
631 fuse_reply_err(req
, EACCES
);
635 blk_len
= blk_getlength(exp
->common
.blk
);
637 fuse_reply_err(req
, -blk_len
);
641 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
642 if (mode
& FALLOC_FL_KEEP_SIZE
) {
643 length
= MIN(length
, blk_len
- offset
);
645 #endif /* CONFIG_FALLOCATE_PUNCH_HOLE */
648 /* We can only fallocate at the EOF with a truncate */
649 if (offset
< blk_len
) {
650 fuse_reply_err(req
, EOPNOTSUPP
);
654 if (offset
> blk_len
) {
655 /* No preallocation needed here */
656 ret
= fuse_do_truncate(exp
, offset
, true, PREALLOC_MODE_OFF
);
658 fuse_reply_err(req
, -ret
);
663 ret
= fuse_do_truncate(exp
, offset
+ length
, true,
664 PREALLOC_MODE_FALLOC
);
666 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
667 else if (mode
& FALLOC_FL_PUNCH_HOLE
) {
668 if (!(mode
& FALLOC_FL_KEEP_SIZE
)) {
669 fuse_reply_err(req
, EINVAL
);
674 int size
= MIN(length
, BDRV_REQUEST_MAX_BYTES
);
676 ret
= blk_pwrite_zeroes(exp
->common
.blk
, offset
, size
,
677 BDRV_REQ_MAY_UNMAP
| BDRV_REQ_NO_FALLBACK
);
678 if (ret
== -ENOTSUP
) {
680 * fallocate() specifies to return EOPNOTSUPP for unsupported
688 } while (ret
== 0 && length
> 0);
690 #endif /* CONFIG_FALLOCATE_PUNCH_HOLE */
691 #ifdef CONFIG_FALLOCATE_ZERO_RANGE
692 else if (mode
& FALLOC_FL_ZERO_RANGE
) {
693 if (!(mode
& FALLOC_FL_KEEP_SIZE
) && offset
+ length
> blk_len
) {
694 /* No need for zeroes, we are going to write them ourselves */
695 ret
= fuse_do_truncate(exp
, offset
+ length
, false,
698 fuse_reply_err(req
, -ret
);
704 int size
= MIN(length
, BDRV_REQUEST_MAX_BYTES
);
706 ret
= blk_pwrite_zeroes(exp
->common
.blk
,
710 } while (ret
== 0 && length
> 0);
712 #endif /* CONFIG_FALLOCATE_ZERO_RANGE */
717 fuse_reply_err(req
, ret
< 0 ? -ret
: 0);
721 * Let clients fsync the exported image.
723 static void fuse_fsync(fuse_req_t req
, fuse_ino_t inode
, int datasync
,
724 struct fuse_file_info
*fi
)
726 FuseExport
*exp
= fuse_req_userdata(req
);
729 ret
= blk_flush(exp
->common
.blk
);
730 fuse_reply_err(req
, ret
< 0 ? -ret
: 0);
734 * Called before an FD to the exported image is closed. (libfuse
735 * notes this to be a way to return last-minute errors.)
737 static void fuse_flush(fuse_req_t req
, fuse_ino_t inode
,
738 struct fuse_file_info
*fi
)
740 fuse_fsync(req
, inode
, 1, fi
);
743 #ifdef CONFIG_FUSE_LSEEK
745 * Let clients inquire allocation status.
747 static void fuse_lseek(fuse_req_t req
, fuse_ino_t inode
, off_t offset
,
748 int whence
, struct fuse_file_info
*fi
)
750 FuseExport
*exp
= fuse_req_userdata(req
);
752 if (whence
!= SEEK_HOLE
&& whence
!= SEEK_DATA
) {
753 fuse_reply_err(req
, EINVAL
);
761 ret
= bdrv_block_status_above(blk_bs(exp
->common
.blk
), NULL
,
762 offset
, INT64_MAX
, &pnum
, NULL
, NULL
);
764 fuse_reply_err(req
, -ret
);
768 if (!pnum
&& (ret
& BDRV_BLOCK_EOF
)) {
772 * If blk_getlength() rounds (e.g. by sectors), then the
773 * export length will be rounded, too. However,
774 * bdrv_block_status_above() may return EOF at unaligned
775 * offsets. We must not let this become visible and thus
776 * always simulate a hole between @offset (the real EOF)
777 * and @blk_len (the client-visible EOF).
780 blk_len
= blk_getlength(exp
->common
.blk
);
782 fuse_reply_err(req
, -blk_len
);
786 if (offset
> blk_len
|| whence
== SEEK_DATA
) {
787 fuse_reply_err(req
, ENXIO
);
789 fuse_reply_lseek(req
, offset
);
794 if (ret
& BDRV_BLOCK_DATA
) {
795 if (whence
== SEEK_DATA
) {
796 fuse_reply_lseek(req
, offset
);
800 if (whence
== SEEK_HOLE
) {
801 fuse_reply_lseek(req
, offset
);
806 /* Safety check against infinite loops */
808 fuse_reply_err(req
, ENXIO
);
817 static const struct fuse_lowlevel_ops fuse_ops
= {
819 .lookup
= fuse_lookup
,
820 .getattr
= fuse_getattr
,
821 .setattr
= fuse_setattr
,
825 .fallocate
= fuse_fallocate
,
828 #ifdef CONFIG_FUSE_LSEEK
833 const BlockExportDriver blk_exp_fuse
= {
834 .type
= BLOCK_EXPORT_TYPE_FUSE
,
835 .instance_size
= sizeof(FuseExport
),
836 .create
= fuse_export_create
,
837 .delete = fuse_export_delete
,
838 .request_shutdown
= fuse_export_shutdown
,