bool has_write_zeroes:1;
bool discard_zeroes:1;
bool use_linux_aio:1;
+ bool page_cache_inconsistent:1;
bool has_fallocate;
bool needs_alignment;
} BDRVRawState;
{
unsigned int sector_size;
bool success = false;
+ int i;
errno = ENOTSUP;
-
- /* Try a few ioctls to get the right size */
+ static const unsigned long ioctl_list[] = {
#ifdef BLKSSZGET
- if (ioctl(fd, BLKSSZGET, §or_size) >= 0) {
- *sector_size_p = sector_size;
- success = true;
- }
+ BLKSSZGET,
#endif
#ifdef DKIOCGETBLOCKSIZE
- if (ioctl(fd, DKIOCGETBLOCKSIZE, §or_size) >= 0) {
- *sector_size_p = sector_size;
- success = true;
- }
+ DKIOCGETBLOCKSIZE,
#endif
#ifdef DIOCGSECTORSIZE
- if (ioctl(fd, DIOCGSECTORSIZE, §or_size) >= 0) {
- *sector_size_p = sector_size;
- success = true;
- }
+ DIOCGSECTORSIZE,
#endif
+ };
+
+ /* Try a few ioctls to get the right size */
+ for (i = 0; i < (int)ARRAY_SIZE(ioctl_list); i++) {
+ if (ioctl(fd, ioctl_list[i], §or_size) >= 0) {
+ *sector_size_p = sector_size;
+ success = true;
+ }
+ }
return success ? 0 : -errno;
}
#endif
}
+static int hdev_get_max_segments(const struct stat *st)
+{
+#ifdef CONFIG_LINUX
+ char buf[32];
+ const char *end;
+ char *sysfspath;
+ int ret;
+ int fd = -1;
+ long max_segments;
+
+ sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
+ major(st->st_rdev), minor(st->st_rdev));
+ fd = open(sysfspath, O_RDONLY);
+ if (fd == -1) {
+ ret = -errno;
+ goto out;
+ }
+ do {
+ ret = read(fd, buf, sizeof(buf) - 1);
+ } while (ret == -1 && errno == EINTR);
+ if (ret < 0) {
+ ret = -errno;
+ goto out;
+ } else if (ret == 0) {
+ ret = -EIO;
+ goto out;
+ }
+ buf[ret] = 0;
+ /* The file is ended with '\n', pass 'end' to accept that. */
+ ret = qemu_strtol(buf, &end, 10, &max_segments);
+ if (ret == 0 && end && *end == '\n') {
+ ret = max_segments;
+ }
+
+out:
+ if (fd != -1) {
+ close(fd);
+ }
+ g_free(sysfspath);
+ return ret;
+#else
+ return -ENOTSUP;
+#endif
+}
+
static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVRawState *s = bs->opaque;
if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
bs->bl.max_transfer = pow2floor(ret);
}
+ ret = hdev_get_max_segments(&st);
+ if (ret > 0) {
+ bs->bl.max_transfer = MIN(bs->bl.max_transfer,
+ ret * getpagesize());
+ }
}
}
static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
{
+ BDRVRawState *s = aiocb->bs->opaque;
int ret;
+ if (s->page_cache_inconsistent) {
+ return -EIO;
+ }
+
ret = qemu_fdatasync(aiocb->aio_fildes);
if (ret == -1) {
+ /* There is no clear definition of the semantics of a failing fsync(),
+ * so we may have to assume the worst. The sad truth is that this
+ * assumption is correct for Linux. Some pages are now probably marked
+ * clean in the page cache even though they are inconsistent with the
+ * on-disk contents. The next fdatasync() call would succeed, but no
+ * further writeback attempt will be made. We can't get back to a state
+ * in which we know what is on disk (we would have to rewrite
+ * everything that was touched since the last fdatasync() at least), so
+ * make bdrv_flush() fail permanently. Given that the behaviour isn't
+ * really defined, I have little hope that other OSes are doing better.
+ *
+ * Obviously, this doesn't affect O_DIRECT, which bypasses the page
+ * cache. */
+ if ((s->open_flags & O_DIRECT) == 0) {
+ s->page_cache_inconsistent = true;
+ }
return -errno;
}
return 0;