bool page_cache_inconsistent:1;
bool has_fallocate;
bool needs_alignment;
+ bool drop_cache;
bool check_cache_dropped;
PRManager *pr_mgr;
typedef struct BDRVRawReopenState {
int fd;
int open_flags;
+ bool drop_cache;
bool check_cache_dropped;
} BDRVRawReopenState;
}
}
-static void raw_parse_flags(int bdrv_flags, int *open_flags)
+static void raw_parse_flags(int bdrv_flags, int *open_flags, bool has_writers)
{
+ bool read_write = false;
assert(open_flags != NULL);
*open_flags |= O_BINARY;
*open_flags &= ~O_ACCMODE;
- if (bdrv_flags & BDRV_O_RDWR) {
+
+ if (bdrv_flags & BDRV_O_AUTO_RDONLY) {
+ read_write = has_writers;
+ } else if (bdrv_flags & BDRV_O_RDWR) {
+ read_write = true;
+ }
+
+ if (read_write) {
*open_flags |= O_RDWR;
} else {
*open_flags |= O_RDONLY;
.type = QEMU_OPT_STRING,
.help = "id of persistent reservation manager object (default: none)",
},
+#if defined(__linux__)
+ {
+ .name = "drop-cache",
+ .type = QEMU_OPT_BOOL,
+ .help = "invalidate page cache during live migration (default: on)",
+ },
+#endif
{
.name = "x-check-cache-dropped",
.type = QEMU_OPT_BOOL,
},
};
+static const char *const mutable_opts[] = { "x-check-cache-dropped", NULL };
+
static int raw_open_common(BlockDriverState *bs, QDict *options,
int bdrv_flags, int open_flags,
bool device, Error **errp)
}
}
+ s->drop_cache = qemu_opt_get_bool(opts, "drop-cache", true);
s->check_cache_dropped = qemu_opt_get_bool(opts, "x-check-cache-dropped",
false);
s->open_flags = open_flags;
- raw_parse_flags(bdrv_flags, &s->open_flags);
+ raw_parse_flags(bdrv_flags, &s->open_flags, false);
s->fd = -1;
fd = qemu_open(filename, s->open_flags, 0644);
ret = fd < 0 ? -errno : 0;
- if (ret == -EACCES || ret == -EROFS) {
- /* Try to degrade to read-only, but if it doesn't work, still use the
- * normal error message. */
- if (bdrv_apply_auto_read_only(bs, NULL, NULL) == 0) {
- bdrv_flags &= ~BDRV_O_RDWR;
- raw_parse_flags(bdrv_flags, &s->open_flags);
- assert(!(s->open_flags & O_CREAT));
- fd = qemu_open(filename, s->open_flags);
- ret = fd < 0 ? -errno : 0;
- }
- }
-
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not open '%s'", filename);
if (ret == -EROFS) {
}
#endif
- bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
+ bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
ret = 0;
fail:
if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
switch (op) {
case RAW_PL_PREPARE:
+ if ((s->perm | new_perm) == s->perm &&
+ (s->shared_perm & new_shared) == s->shared_perm)
+ {
+ /*
+ * We are going to unlock bytes, it should not fail. If it fail due
+ * to some fs-dependent permission-unrelated reasons (which occurs
+ * sometimes on NFS and leads to abort in bdrv_replace_child) we
+ * can't prevent such errors by any check here. And we ignore them
+ * anyway in ABORT and COMMIT.
+ */
+ return 0;
+ }
ret = raw_apply_lock_bytes(s, s->fd, s->perm | new_perm,
~s->shared_perm | ~new_shared,
false, errp);
}
static int raw_reconfigure_getfd(BlockDriverState *bs, int flags,
- int *open_flags, bool force_dup,
+ int *open_flags, uint64_t perm, bool force_dup,
Error **errp)
{
BDRVRawState *s = bs->opaque;
int fd = -1;
int ret;
+ bool has_writers = perm &
+ (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED | BLK_PERM_RESIZE);
int fcntl_flags = O_APPEND | O_NONBLOCK;
#ifdef O_NOATIME
fcntl_flags |= O_NOATIME;
*open_flags |= O_NONBLOCK;
}
- raw_parse_flags(flags, open_flags);
+ raw_parse_flags(flags, open_flags, has_writers);
#ifdef O_ASYNC
/* Not all operating systems have O_ASYNC, and those that don't
goto out;
}
+ rs->drop_cache = qemu_opt_get_bool_del(opts, "drop-cache", true);
rs->check_cache_dropped =
qemu_opt_get_bool_del(opts, "x-check-cache-dropped", false);
qemu_opts_to_qdict(opts, state->options);
rs->fd = raw_reconfigure_getfd(state->bs, state->flags, &rs->open_flags,
- true, &local_err);
+ state->perm, true, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -1;
BDRVRawReopenState *rs = state->opaque;
BDRVRawState *s = state->bs->opaque;
+ s->drop_cache = rs->drop_cache;
s->check_cache_dropped = rs->check_cache_dropped;
s->open_flags = rs->open_flags;
#ifdef CONFIG_XFS
static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes)
{
+ int64_t len;
struct xfs_flock64 fl;
int err;
+ len = lseek(s->fd, 0, SEEK_END);
+ if (len < 0) {
+ return -errno;
+ }
+
+ if (offset + bytes > len) {
+ /* XFS_IOC_ZERO_RANGE does not increase the file length */
+ if (ftruncate(s->fd, offset + bytes) < 0) {
+ return -errno;
+ }
+ }
+
memset(&fl, 0, sizeof(fl));
fl.l_whence = SEEK_SET;
fl.l_start = offset;
}
#ifdef BLKZEROOUT
- do {
- uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
- if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
- return 0;
- }
- } while (errno == EINTR);
+ /* The BLKZEROOUT implementation in the kernel doesn't set
+ * BLKDEV_ZERO_NOFALLBACK, so we can't call this if we have to avoid slow
+ * fallbacks. */
+ if (!(aiocb->aio_type & QEMU_AIO_NO_FALLBACK)) {
+ do {
+ uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
+ if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
+ return 0;
+ }
+ } while (errno == EINTR);
- ret = translate_err(-errno);
+ ret = translate_err(-errno);
+ }
#endif
if (ret == -ENOTSUP) {
off_t data = 0, hole = 0;
int ret;
+ assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
+
ret = fd_open(bs);
if (ret < 0) {
return ret;
/* On a data extent, compute bytes to the end of the extent,
* possibly including a partial sector at EOF. */
*pnum = MIN(bytes, hole - offset);
+
+ /*
+ * We are not allowed to return partial sectors, though, so
+ * round up if necessary.
+ */
+ if (!QEMU_IS_ALIGNED(*pnum, bs->bl.request_alignment)) {
+ int64_t file_length = raw_getlength(bs);
+ if (file_length > 0) {
+ /* Ignore errors, this is just a safeguard */
+ assert(hole == file_length);
+ }
+ *pnum = ROUND_UP(*pnum, bs->bl.request_alignment);
+ }
+
ret = BDRV_BLOCK_DATA;
} else {
/* On a hole, compute bytes to the beginning of the next extent. */
return;
}
+ if (!s->drop_cache) {
+ return;
+ }
+
if (s->open_flags & O_DIRECT) {
return; /* No host kernel page cache */
}
if (blkdev) {
acb.aio_type |= QEMU_AIO_BLKDEV;
}
+ if (flags & BDRV_REQ_NO_FALLBACK) {
+ acb.aio_type |= QEMU_AIO_NO_FALLBACK;
+ }
if (flags & BDRV_REQ_MAY_UNMAP) {
acb.aio_type |= QEMU_AIO_DISCARD;
s->perm_change_fd = rs->fd;
} else {
/* We may need a new fd if auto-read-only switches the mode */
- ret = raw_reconfigure_getfd(bs, bs->open_flags, &open_flags,
+ ret = raw_reconfigure_getfd(bs, bs->open_flags, &open_flags, perm,
false, errp);
if (ret < 0) {
return ret;
.bdrv_set_perm = raw_set_perm,
.bdrv_abort_perm_update = raw_abort_perm_update,
.create_opts = &raw_create_opts,
+ .mutable_opts = mutable_opts,
};
/***********************************************/
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_co_create_opts = hdev_co_create_opts,
.create_opts = &raw_create_opts,
+ .mutable_opts = mutable_opts,
.bdrv_co_invalidate_cache = raw_co_invalidate_cache,
.bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_co_create_opts = hdev_co_create_opts,
.create_opts = &raw_create_opts,
+ .mutable_opts = mutable_opts,
.bdrv_co_invalidate_cache = raw_co_invalidate_cache,
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_co_create_opts = hdev_co_create_opts,
.create_opts = &raw_create_opts,
+ .mutable_opts = mutable_opts,
.bdrv_co_preadv = raw_co_preadv,
.bdrv_co_pwritev = raw_co_pwritev,