hw/acpi: Consolidate build_mcfg to pci.c

[mirror_qemu.git] / block / file-posix.c
diff --git a/block/file-posix.c b/block/file-posix.c

index e9fa6aac484f3c4d94788da54dfa255740945902..d0184296723a065703fd008ff479a2b0e0f61872 100644 (file)
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -157,6 +157,7 @@ typedef struct BDRVRawState {
      bool page_cache_inconsistent:1;
      bool has_fallocate;
      bool needs_alignment;
+    bool drop_cache;
      bool check_cache_dropped;
  
      PRManager *pr_mgr;
@@ -165,6 +166,7 @@ typedef struct BDRVRawState {
  typedef struct BDRVRawReopenState {
      int fd;
      int open_flags;
+    bool drop_cache;
      bool check_cache_dropped;
  } BDRVRawReopenState;
  
@@ -433,6 +435,13 @@ static QemuOptsList raw_runtime_opts = {
              .type = QEMU_OPT_STRING,
              .help = "id of persistent reservation manager object (default: none)",
          },
+#if defined(__linux__)
+        {
+            .name = "drop-cache",
+            .type = QEMU_OPT_BOOL,
+            .help = "invalidate page cache during live migration (default: on)",
+        },
+#endif
          {
              .name = "x-check-cache-dropped",
              .type = QEMU_OPT_BOOL,
@@ -524,6 +533,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
          }
      }
  
+    s->drop_cache = qemu_opt_get_bool(opts, "drop-cache", true);
      s->check_cache_dropped = qemu_opt_get_bool(opts, "x-check-cache-dropped",
                                                 false);
  
@@ -642,7 +652,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
      }
  #endif
  
-    bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
+    bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
      ret = 0;
  fail:
      if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
@@ -805,6 +815,18 @@ static int raw_handle_perm_lock(BlockDriverState *bs,
  
      switch (op) {
      case RAW_PL_PREPARE:
+        if ((s->perm | new_perm) == s->perm &&
+            (s->shared_perm & new_shared) == s->shared_perm)
+        {
+            /*
+             * We are going to unlock bytes, it should not fail. If it fail due
+             * to some fs-dependent permission-unrelated reasons (which occurs
+             * sometimes on NFS and leads to abort in bdrv_replace_child) we
+             * can't prevent such errors by any check here. And we ignore them
+             * anyway in ABORT and COMMIT.
+             */
+            return 0;
+        }
          ret = raw_apply_lock_bytes(s, s->fd, s->perm | new_perm,
                                     ~s->shared_perm | ~new_shared,
                                     false, errp);
@@ -933,6 +955,7 @@ static int raw_reopen_prepare(BDRVReopenState *state,
          goto out;
      }
  
+    rs->drop_cache = qemu_opt_get_bool_del(opts, "drop-cache", true);
      rs->check_cache_dropped =
          qemu_opt_get_bool_del(opts, "x-check-cache-dropped", false);
  
@@ -977,6 +1000,7 @@ static void raw_reopen_commit(BDRVReopenState *state)
      BDRVRawReopenState *rs = state->opaque;
      BDRVRawState *s = state->bs->opaque;
  
+    s->drop_cache = rs->drop_cache;
      s->check_cache_dropped = rs->check_cache_dropped;
      s->open_flags = rs->open_flags;
  
@@ -1420,9 +1444,22 @@ out:
  #ifdef CONFIG_XFS
  static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes)
  {
+    int64_t len;
      struct xfs_flock64 fl;
      int err;
  
+    len = lseek(s->fd, 0, SEEK_END);
+    if (len < 0) {
+        return -errno;
+    }
+
+    if (offset + bytes > len) {
+        /* XFS_IOC_ZERO_RANGE does not increase the file length */
+        if (ftruncate(s->fd, offset + bytes) < 0) {
+            return -errno;
+        }
+    }
+
      memset(&fl, 0, sizeof(fl));
      fl.l_whence = SEEK_SET;
      fl.l_start = offset;
@@ -1488,14 +1525,19 @@ static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb)
      }
  
  #ifdef BLKZEROOUT
-    do {
-        uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
-        if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
-            return 0;
-        }
-    } while (errno == EINTR);
+    /* The BLKZEROOUT implementation in the kernel doesn't set
+     * BLKDEV_ZERO_NOFALLBACK, so we can't call this if we have to avoid slow
+     * fallbacks. */
+    if (!(aiocb->aio_type & QEMU_AIO_NO_FALLBACK)) {
+        do {
+            uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
+            if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
+                return 0;
+            }
+        } while (errno == EINTR);
  
-    ret = translate_err(-errno);
+        ret = translate_err(-errno);
+    }
  #endif
  
      if (ret == -ENOTSUP) {
@@ -2446,6 +2488,8 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
      off_t data = 0, hole = 0;
      int ret;
  
+    assert(QEMU_IS_ALIGNED(offset | bytes, bs->bl.request_alignment));
+
      ret = fd_open(bs);
      if (ret < 0) {
          return ret;
@@ -2471,6 +2515,20 @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
          /* On a data extent, compute bytes to the end of the extent,
           * possibly including a partial sector at EOF. */
          *pnum = MIN(bytes, hole - offset);
+
+        /*
+         * We are not allowed to return partial sectors, though, so
+         * round up if necessary.
+         */
+        if (!QEMU_IS_ALIGNED(*pnum, bs->bl.request_alignment)) {
+            int64_t file_length = raw_getlength(bs);
+            if (file_length > 0) {
+                /* Ignore errors, this is just a safeguard */
+                assert(hole == file_length);
+            }
+            *pnum = ROUND_UP(*pnum, bs->bl.request_alignment);
+        }
+
          ret = BDRV_BLOCK_DATA;
      } else {
          /* On a hole, compute bytes to the beginning of the next extent.  */
@@ -2562,6 +2620,10 @@ static void coroutine_fn raw_co_invalidate_cache(BlockDriverState *bs,
          return;
      }
  
+    if (!s->drop_cache) {
+        return;
+    }
+
      if (s->open_flags & O_DIRECT) {
          return; /* No host kernel page cache */
      }
@@ -2643,6 +2705,9 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
      if (blkdev) {
          acb.aio_type |= QEMU_AIO_BLKDEV;
      }
+    if (flags & BDRV_REQ_NO_FALLBACK) {
+        acb.aio_type |= QEMU_AIO_NO_FALLBACK;
+    }
  
      if (flags & BDRV_REQ_MAY_UNMAP) {
          acb.aio_type |= QEMU_AIO_DISCARD;