]> git.proxmox.com Git - mirror_qemu.git/commitdiff
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
authorPeter Maydell <peter.maydell@linaro.org>
Thu, 30 Apr 2015 11:04:11 +0000 (12:04 +0100)
committerPeter Maydell <peter.maydell@linaro.org>
Thu, 30 Apr 2015 11:04:11 +0000 (12:04 +0100)
- miscellaneous cleanups for TCG (Emilio) and NBD (Bogdan)
- next part in the thread-safe address_space_* saga: atomic access
  to the bounce buffer and the map_clients list, from Fam
- optional support for linking with tcmalloc, also from Fam
- reapplying Peter Crosthwaite's "Respect as_translate_internal
  length clamp" after fixing the SPARC fallout.
- build system fix from Wei Liu
- small acpi-build and ioport cleanup by myself

# gpg: Signature made Wed Apr 29 09:34:00 2015 BST using RSA key ID 78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream: (22 commits)
  nbd/trivial: fix type cast for ioctl
  translate-all: use bitmap helpers for PageDesc's bitmap
  target-i386: disable LINT0 after reset
  Makefile.target: prepend $libs_softmmu to $LIBS
  milkymist: do not modify libs-softmmu
  configure: Add support for tcmalloc
  exec: Respect as_translate_internal length clamp
  ioport: reserve the whole range of an I/O port in the AddressSpace
  ioport: loosen assertions on emulation of 16-bit ports
  ioport: remove wrong comment
  ide: there is only one data port
  gus: clean up MemoryRegionPortio
  sb16: remove useless mixer_write_indexw
  sun4m: fix slavio sysctrl and led register sizes
  acpi-build: remove dependency from ram_addr.h
  memory: add memory_region_ram_resize
  dma-helpers: Fix race condition of continue_after_map_failure and dma_aio_cancel
  exec: Notify cpu_register_map_client caller if the bounce buffer is available
  exec: Protect map_client_list with mutex
  linux-user, bsd-user: Remove two calls to cpu_exec_init_all
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
157 files changed:
MAINTAINERS
aio-posix.c
aio-win32.c
async.c
balloon.c
block.c
block/Makefile.objs
block/backup.c
block/blkdebug.c
block/block-backend.c
block/io.c [new file with mode: 0644]
block/iscsi.c
block/mirror.c
block/null.c
block/qapi.c
block/qcow.c
block/qcow2-refcount.c
block/qcow2-snapshot.c
block/qcow2.c
block/qed.c
block/quorum.c
block/rbd.c
block/sheepdog.c
block/snapshot.c
block/vdi.c
block/vhdx.c
block/vmdk.c
block/vpc.c
block/vvfat.c
blockdev.c
blockjob.c
cputlb.c
dma-helpers.c
docs/bitmaps.md [new file with mode: 0644]
docs/qmp/qmp-events.txt
exec.c
hmp-commands.hx
hmp.c
hw/acpi/pcihp.c
hw/alpha/dp264.c
hw/alpha/typhoon.c
hw/arm/boot.c
hw/arm/highbank.c
hw/arm/nseries.c
hw/arm/omap1.c
hw/arm/pxa2xx.c
hw/arm/pxa2xx_gpio.c
hw/arm/strongarm.c
hw/block/fdc.c
hw/block/m25p80.c
hw/bt/sdp.c
hw/char/virtio-serial-bus.c
hw/display/tc6393xb.c
hw/dma/pl080.c
hw/dma/sun4m_iommu.c
hw/gpio/max7310.c
hw/gpio/omap_gpio.c
hw/gpio/zaurus.c
hw/i2c/omap_i2c.c
hw/i386/intel_iommu.c
hw/intc/allwinner-a10-pic.c
hw/intc/omap_intc.c
hw/mips/mips_jazz.c
hw/pci-host/apb.c
hw/pci-host/bonito.c
hw/pci-host/prep.c
hw/pci-host/uninorth.c
hw/pci/msi.c
hw/pci/msix.c
hw/pci/pcie_aer.c
hw/pci/shpc.c
hw/pci/slotid_cap.c
hw/ppc/ppce500_spin.c
hw/s390x/css.c
hw/s390x/s390-pci-bus.c
hw/s390x/s390-pci-inst.c
hw/s390x/s390-virtio-bus.c
hw/s390x/s390-virtio.c
hw/s390x/virtio-ccw.c
hw/scsi/megasas.c
hw/sd/sd.c
hw/sh4/r2d.c
hw/timer/hpet.c
hw/vfio/pci.c
hw/virtio/virtio-balloon.c
include/block/aio.h
include/block/block.h
include/block/block_int.h
include/block/blockjob.h
include/block/qapi.h
include/exec/cpu-defs.h
include/exec/exec-all.h
include/exec/memattrs.h [new file with mode: 0644]
include/exec/memory.h
include/hw/pci/pci.h
include/hw/pci/pcie_regs.h
include/qapi/qmp/qerror.h
include/qemu/hbitmap.h
include/qom/cpu.h
include/standard-headers/linux/virtio_blk.h
include/sysemu/block-backend.h
include/sysemu/dma.h
include/sysemu/os-win32.h
include/ui/console.h
include/ui/qemu-pixman.h
include/ui/qemu-spice.h
include/ui/spice-display.h
ioport.c
iothread.c
kvm-all.c
memory.c
migration/block.c
monitor.c
qapi/block-core.json
qemu-img.c
qmp-commands.hx
scripts/checkpatch.pl
scripts/coverity-model.c
scripts/qapi-visit.py
scripts/qemu-gdb.py
scripts/qmp/qmp.py
softmmu_template.h
target-arm/cpu.c
target-arm/cpu.h
target-arm/helper.c
target-arm/op_helper.c
target-i386/arch_memory_mapping.c
target-i386/cpu.c
target-ppc/cpu.h
tests/Makefile
tests/ahci-test.c
tests/ide-test.c
tests/libqos/ahci.c
tests/libqos/ahci.h
tests/libqos/libqos-pc.c
tests/libqos/libqos-pc.h
tests/libqos/libqos.c
tests/libqos/libqos.h
tests/libqtest.c
tests/libqtest.h
tests/qemu-iotests/122 [new file with mode: 0755]
tests/qemu-iotests/122.out [new file with mode: 0644]
tests/qemu-iotests/124 [new file with mode: 0644]
tests/qemu-iotests/124.out [new file with mode: 0644]
tests/qemu-iotests/129 [new file with mode: 0644]
tests/qemu-iotests/129.out [new file with mode: 0644]
tests/qemu-iotests/group
tests/qemu-iotests/iotests.py
tests/test-aio.c
tests/test-hbitmap.c
thread-pool.c
ui/console.c
ui/gtk.c
ui/spice-core.c
ui/spice-display.c
util/hbitmap.c
util/qemu-config.c

index d7e9ba2da75f32960b322be84de2837ec148196e..b5ab755de5f9c1793ec62b36f8ade0ee5c7d826d 100644 (file)
@@ -172,7 +172,8 @@ F: hw/unicore32/
 X86
 M: Paolo Bonzini <pbonzini@redhat.com>
 M: Richard Henderson <rth@twiddle.net>
-S: Odd Fixes
+M: Eduardo Habkost <ehabkost@redhat.com>
+S: Maintained
 F: target-i386/
 F: hw/i386/
 
@@ -905,6 +906,15 @@ F: nbd.*
 F: qemu-nbd.c
 T: git git://github.com/bonzini/qemu.git nbd-next
 
+NUMA
+M: Eduardo Habkost <ehabkost@redhat.com>
+S: Maintained
+F: numa.c
+F: include/sysemu/numa.h
+K: numa|NUMA
+K: srat|SRAT
+T: git git://github.com/ehabkost/qemu.git numa
+
 QAPI
 M: Luiz Capitulino <lcapitulino@redhat.com>
 M: Michael Roth <mdroth@linux.vnet.ibm.com>
@@ -1172,7 +1182,19 @@ S: Supported
 F: block/gluster.c
 T: git git://github.com/codyprime/qemu-kvm-jtc.git block
 
+Null Block Driver
+M: Fam Zheng <famz@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/null.c
+
 Bootdevice
 M: Gonglei <arei.gonglei@huawei.com>
 S: Maintained
 F: bootdevice.c
+
+Quorum
+M: Alberto Garcia <berto@igalia.com>
+S: Supported
+F: block/quorum.c
+L: qemu-block@nongnu.org
index cbd4c3438c2275a05e96d926e6114251a035e4a2..4abec38866ca8d1a8d7415865e6375a9b7380216 100644 (file)
@@ -24,7 +24,6 @@ struct AioHandler
     IOHandler *io_read;
     IOHandler *io_write;
     int deleted;
-    int pollfds_idx;
     void *opaque;
     QLIST_ENTRY(AioHandler) node;
 };
@@ -83,7 +82,6 @@ void aio_set_fd_handler(AioContext *ctx,
         node->io_read = io_read;
         node->io_write = io_write;
         node->opaque = opaque;
-        node->pollfds_idx = -1;
 
         node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
         node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
@@ -186,13 +184,61 @@ bool aio_dispatch(AioContext *ctx)
     return progress;
 }
 
+/* These thread-local variables are used only in a small part of aio_poll
+ * around the call to the poll() system call.  In particular they are not
+ * used while aio_poll is performing callbacks, which makes it much easier
+ * to think about reentrancy!
+ *
+ * Stack-allocated arrays would be perfect but they have size limitations;
+ * heap allocation is expensive enough that we want to reuse arrays across
+ * calls to aio_poll().  And because poll() has to be called without holding
+ * any lock, the arrays cannot be stored in AioContext.  Thread-local data
+ * has none of the disadvantages of these three options.
+ */
+static __thread GPollFD *pollfds;
+static __thread AioHandler **nodes;
+static __thread unsigned npfd, nalloc;
+static __thread Notifier pollfds_cleanup_notifier;
+
+static void pollfds_cleanup(Notifier *n, void *unused)
+{
+    g_assert(npfd == 0);
+    g_free(pollfds);
+    g_free(nodes);
+    nalloc = 0;
+}
+
+static void add_pollfd(AioHandler *node)
+{
+    if (npfd == nalloc) {
+        if (nalloc == 0) {
+            pollfds_cleanup_notifier.notify = pollfds_cleanup;
+            qemu_thread_atexit_add(&pollfds_cleanup_notifier);
+            nalloc = 8;
+        } else {
+            g_assert(nalloc <= INT_MAX);
+            nalloc *= 2;
+        }
+        pollfds = g_renew(GPollFD, pollfds, nalloc);
+        nodes = g_renew(AioHandler *, nodes, nalloc);
+    }
+    nodes[npfd] = node;
+    pollfds[npfd] = (GPollFD) {
+        .fd = node->pfd.fd,
+        .events = node->pfd.events,
+    };
+    npfd++;
+}
+
 bool aio_poll(AioContext *ctx, bool blocking)
 {
     AioHandler *node;
     bool was_dispatching;
-    int ret;
+    int i, ret;
     bool progress;
+    int64_t timeout;
 
+    aio_context_acquire(ctx);
     was_dispatching = ctx->dispatching;
     progress = false;
 
@@ -210,39 +256,36 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
     ctx->walking_handlers++;
 
-    g_array_set_size(ctx->pollfds, 0);
+    assert(npfd == 0);
 
     /* fill pollfds */
     QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-        node->pollfds_idx = -1;
         if (!node->deleted && node->pfd.events) {
-            GPollFD pfd = {
-                .fd = node->pfd.fd,
-                .events = node->pfd.events,
-            };
-            node->pollfds_idx = ctx->pollfds->len;
-            g_array_append_val(ctx->pollfds, pfd);
+            add_pollfd(node);
         }
     }
 
-    ctx->walking_handlers--;
+    timeout = blocking ? aio_compute_timeout(ctx) : 0;
 
     /* wait until next event */
-    ret = qemu_poll_ns((GPollFD *)ctx->pollfds->data,
-                         ctx->pollfds->len,
-                         blocking ? aio_compute_timeout(ctx) : 0);
+    if (timeout) {
+        aio_context_release(ctx);
+    }
+    ret = qemu_poll_ns((GPollFD *)pollfds, npfd, timeout);
+    if (timeout) {
+        aio_context_acquire(ctx);
+    }
 
     /* if we have any readable fds, dispatch event */
     if (ret > 0) {
-        QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-            if (node->pollfds_idx != -1) {
-                GPollFD *pfd = &g_array_index(ctx->pollfds, GPollFD,
-                                              node->pollfds_idx);
-                node->pfd.revents = pfd->revents;
-            }
+        for (i = 0; i < npfd; i++) {
+            nodes[i]->pfd.revents = pollfds[i].revents;
         }
     }
 
+    npfd = 0;
+    ctx->walking_handlers--;
+
     /* Run dispatch even if there were no readable fds to run timers */
     aio_set_dispatching(ctx, true);
     if (aio_dispatch(ctx)) {
@@ -250,5 +293,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
     }
 
     aio_set_dispatching(ctx, was_dispatching);
+    aio_context_release(ctx);
+
     return progress;
 }
index e6f4cedf48e54724792eb673023b0bd0bea5b237..233d8f5d7951738e0f043596060d7ff8a86e9866 100644 (file)
@@ -283,6 +283,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
     int count;
     int timeout;
 
+    aio_context_acquire(ctx);
     have_select_revents = aio_prepare(ctx);
     if (have_select_revents) {
         blocking = false;
@@ -323,7 +324,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
         timeout = blocking
             ? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
+        if (timeout) {
+            aio_context_release(ctx);
+        }
         ret = WaitForMultipleObjects(count, events, FALSE, timeout);
+        if (timeout) {
+            aio_context_acquire(ctx);
+        }
         aio_set_dispatching(ctx, true);
 
         if (first && aio_bh_poll(ctx)) {
@@ -349,5 +356,6 @@ bool aio_poll(AioContext *ctx, bool blocking)
     progress |= timerlistgroup_run_timers(&ctx->tlg);
 
     aio_set_dispatching(ctx, was_dispatching);
+    aio_context_release(ctx);
     return progress;
 }
diff --git a/async.c b/async.c
index 2b51e87679a3154078d17af3b2881ea8d119d729..46d9e639d7c9b371989d8b7cc409d1491a97964d 100644 (file)
--- a/async.c
+++ b/async.c
@@ -230,7 +230,6 @@ aio_ctx_finalize(GSource     *source)
     event_notifier_cleanup(&ctx->notifier);
     rfifolock_destroy(&ctx->lock);
     qemu_mutex_destroy(&ctx->bh_lock);
-    g_array_free(ctx->pollfds, TRUE);
     timerlistgroup_deinit(&ctx->tlg);
 }
 
@@ -281,12 +280,6 @@ static void aio_timerlist_notify(void *opaque)
     aio_notify(opaque);
 }
 
-static void aio_rfifolock_cb(void *opaque)
-{
-    /* Kick owner thread in case they are blocked in aio_poll() */
-    aio_notify(opaque);
-}
-
 AioContext *aio_context_new(Error **errp)
 {
     int ret;
@@ -302,10 +295,9 @@ AioContext *aio_context_new(Error **errp)
     aio_set_event_notifier(ctx, &ctx->notifier,
                            (EventNotifierHandler *)
                            event_notifier_test_and_clear);
-    ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
     ctx->thread_pool = NULL;
     qemu_mutex_init(&ctx->bh_lock);
-    rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
+    rfifolock_init(&ctx->lock, NULL, NULL);
     timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
 
     return ctx;
index 70c00f5f840b8f3c3a7f99f8112a7d541d17f6f4..c7033e3dc334829eb06885eb003da2c60d809f91 100644 (file)
--- a/balloon.c
+++ b/balloon.c
@@ -58,7 +58,6 @@ int qemu_add_balloon_handler(QEMUBalloonEvent *event_func,
         /* We're already registered one balloon handler.  How many can
          * a guest really have?
          */
-        error_report("Another balloon device already registered");
         return -1;
     }
     balloon_event_fn = event_func;
diff --git a/block.c b/block.c
index f2f8ae77c10af9755997f6098ec7ebcf8e9c6de6..7904098c6412babee26c6bef4beb24f4d9d1df2c 100644 (file)
--- a/block.c
+++ b/block.c
 #include <windows.h>
 #endif
 
+/**
+ * A BdrvDirtyBitmap can be in three possible states:
+ * (1) successor is NULL and disabled is false: full r/w mode
+ * (2) successor is NULL and disabled is true: read only mode ("disabled")
+ * (3) successor is set: frozen mode.
+ *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
+ *     or enabled. A frozen bitmap can only abdicate() or reclaim().
+ */
 struct BdrvDirtyBitmap {
-    HBitmap *bitmap;
+    HBitmap *bitmap;            /* Dirty sector bitmap implementation */
+    BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
+    char *name;                 /* Optional non-empty unique ID */
+    int64_t size;               /* Size of the bitmap (Number of sectors) */
+    bool disabled;              /* Bitmap is read-only */
     QLIST_ENTRY(BdrvDirtyBitmap) list;
 };
 
 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
 
-static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque);
-static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque);
-static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
-                                         int64_t sector_num, int nb_sectors,
-                                         QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
-                                         int64_t sector_num, int nb_sectors,
-                                         QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
-    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
-    BdrvRequestFlags flags);
-static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
-    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
-    BdrvRequestFlags flags);
-static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
-                                         int64_t sector_num,
-                                         QEMUIOVector *qiov,
-                                         int nb_sectors,
-                                         BdrvRequestFlags flags,
-                                         BlockCompletionFunc *cb,
-                                         void *opaque,
-                                         bool is_write);
-static void coroutine_fn bdrv_co_do_rw(void *opaque);
-static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
-
 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
     QTAILQ_HEAD_INITIALIZER(bdrv_states);
 
@@ -97,10 +79,7 @@ static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
     QLIST_HEAD_INITIALIZER(bdrv_drivers);
 
-static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
-                           int nr_sectors);
-static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
-                             int nr_sectors);
+static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
 /* If non-zero, use only whitelisted block drivers */
 static int use_bdrv_whitelist;
 
@@ -124,104 +103,6 @@ int is_windows_drive(const char *filename)
 }
 #endif
 
-/* throttling disk I/O limits */
-void bdrv_set_io_limits(BlockDriverState *bs,
-                        ThrottleConfig *cfg)
-{
-    int i;
-
-    throttle_config(&bs->throttle_state, cfg);
-
-    for (i = 0; i < 2; i++) {
-        qemu_co_enter_next(&bs->throttled_reqs[i]);
-    }
-}
-
-/* this function drain all the throttled IOs */
-static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
-{
-    bool drained = false;
-    bool enabled = bs->io_limits_enabled;
-    int i;
-
-    bs->io_limits_enabled = false;
-
-    for (i = 0; i < 2; i++) {
-        while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
-            drained = true;
-        }
-    }
-
-    bs->io_limits_enabled = enabled;
-
-    return drained;
-}
-
-void bdrv_io_limits_disable(BlockDriverState *bs)
-{
-    bs->io_limits_enabled = false;
-
-    bdrv_start_throttled_reqs(bs);
-
-    throttle_destroy(&bs->throttle_state);
-}
-
-static void bdrv_throttle_read_timer_cb(void *opaque)
-{
-    BlockDriverState *bs = opaque;
-    qemu_co_enter_next(&bs->throttled_reqs[0]);
-}
-
-static void bdrv_throttle_write_timer_cb(void *opaque)
-{
-    BlockDriverState *bs = opaque;
-    qemu_co_enter_next(&bs->throttled_reqs[1]);
-}
-
-/* should be called before bdrv_set_io_limits if a limit is set */
-void bdrv_io_limits_enable(BlockDriverState *bs)
-{
-    assert(!bs->io_limits_enabled);
-    throttle_init(&bs->throttle_state,
-                  bdrv_get_aio_context(bs),
-                  QEMU_CLOCK_VIRTUAL,
-                  bdrv_throttle_read_timer_cb,
-                  bdrv_throttle_write_timer_cb,
-                  bs);
-    bs->io_limits_enabled = true;
-}
-
-/* This function makes an IO wait if needed
- *
- * @nb_sectors: the number of sectors of the IO
- * @is_write:   is the IO a write
- */
-static void bdrv_io_limits_intercept(BlockDriverState *bs,
-                                     unsigned int bytes,
-                                     bool is_write)
-{
-    /* does this io must wait */
-    bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
-
-    /* if must wait or any request of this type throttled queue the IO */
-    if (must_wait ||
-        !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
-        qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
-    }
-
-    /* the IO will be executed, do the accounting */
-    throttle_account(&bs->throttle_state, is_write, bytes);
-
-
-    /* if the next request must wait -> do nothing */
-    if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
-        return;
-    }
-
-    /* else queue next request for execution */
-    qemu_co_queue_next(&bs->throttled_reqs[is_write]);
-}
-
 size_t bdrv_opt_mem_align(BlockDriverState *bs)
 {
     if (!bs || !bs->drv) {
@@ -335,20 +216,7 @@ void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
 
 void bdrv_register(BlockDriver *bdrv)
 {
-    /* Block drivers without coroutine functions need emulation */
-    if (!bdrv->bdrv_co_readv) {
-        bdrv->bdrv_co_readv = bdrv_co_readv_em;
-        bdrv->bdrv_co_writev = bdrv_co_writev_em;
-
-        /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
-         * the block driver lacks aio we need to emulate that too.
-         */
-        if (!bdrv->bdrv_aio_readv) {
-            /* add AIO emulation layer */
-            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
-            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
-        }
-    }
+    bdrv_setup_io_funcs(bdrv);
 
     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
 }
@@ -520,54 +388,6 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
     return ret;
 }
 
-void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
-{
-    BlockDriver *drv = bs->drv;
-    Error *local_err = NULL;
-
-    memset(&bs->bl, 0, sizeof(bs->bl));
-
-    if (!drv) {
-        return;
-    }
-
-    /* Take some limits from the children as a default */
-    if (bs->file) {
-        bdrv_refresh_limits(bs->file, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return;
-        }
-        bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
-        bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
-        bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
-    } else {
-        bs->bl.opt_mem_alignment = 512;
-    }
-
-    if (bs->backing_hd) {
-        bdrv_refresh_limits(bs->backing_hd, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return;
-        }
-        bs->bl.opt_transfer_length =
-            MAX(bs->bl.opt_transfer_length,
-                bs->backing_hd->bl.opt_transfer_length);
-        bs->bl.max_transfer_length =
-            MIN_NON_ZERO(bs->bl.max_transfer_length,
-                         bs->backing_hd->bl.max_transfer_length);
-        bs->bl.opt_mem_alignment =
-            MAX(bs->bl.opt_mem_alignment,
-                bs->backing_hd->bl.opt_mem_alignment);
-    }
-
-    /* Then let the driver override it */
-    if (drv->bdrv_refresh_limits) {
-        drv->bdrv_refresh_limits(bs, errp);
-    }
-}
-
 /**
  * Try to get @bs's logical and physical block size.
  * On success, store them in @bsz struct and return 0.
@@ -841,22 +661,6 @@ int bdrv_parse_cache_flags(const char *mode, int *flags)
     return 0;
 }
 
-/**
- * The copy-on-read flag is actually a reference count so multiple users may
- * use the feature without worrying about clobbering its previous state.
- * Copy-on-read stays enabled until all users have called to disable it.
- */
-void bdrv_enable_copy_on_read(BlockDriverState *bs)
-{
-    bs->copy_on_read++;
-}
-
-void bdrv_disable_copy_on_read(BlockDriverState *bs)
-{
-    assert(bs->copy_on_read > 0);
-    bs->copy_on_read--;
-}
-
 /*
  * Returns the flags that a temporary snapshot should get, based on the
  * originally requested flags (the originally requested image will have flags
@@ -1224,8 +1028,8 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
         bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
     } else if (backing_hd) {
         error_setg(&bs->backing_blocker,
-                   "device is used as backing hd of '%s'",
-                   bdrv_get_device_name(bs));
+                   "node is used as backing hd of '%s'",
+                   bdrv_get_device_or_node_name(bs));
     }
 
     bs->backing_hd = backing_hd;
@@ -1812,8 +1616,8 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
      * to r/w */
     if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
         reopen_state->flags & BDRV_O_RDWR) {
-        error_set(errp, QERR_DEVICE_IS_READ_ONLY,
-                  bdrv_get_device_name(reopen_state->bs));
+        error_setg(errp, "Node '%s' is read only",
+                   bdrv_get_device_or_node_name(reopen_state->bs));
         goto error;
     }
 
@@ -1839,9 +1643,9 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
     } else {
         /* It is currently mandatory to have a bdrv_reopen_prepare()
          * handler for each supported drv. */
-        error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-                  drv->format_name, bdrv_get_device_name(reopen_state->bs),
-                 "reopening of file");
+        error_setg(errp, "Block format '%s' used by node '%s' "
+                   "does not support reopening files", drv->format_name,
+                   bdrv_get_device_or_node_name(reopen_state->bs));
         ret = -1;
         goto error;
     }
@@ -1966,86 +1770,6 @@ void bdrv_close_all(void)
     }
 }
 
-/* Check if any requests are in-flight (including throttled requests) */
-static bool bdrv_requests_pending(BlockDriverState *bs)
-{
-    if (!QLIST_EMPTY(&bs->tracked_requests)) {
-        return true;
-    }
-    if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
-        return true;
-    }
-    if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
-        return true;
-    }
-    if (bs->file && bdrv_requests_pending(bs->file)) {
-        return true;
-    }
-    if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
-        return true;
-    }
-    return false;
-}
-
-static bool bdrv_drain_one(BlockDriverState *bs)
-{
-    bool bs_busy;
-
-    bdrv_flush_io_queue(bs);
-    bdrv_start_throttled_reqs(bs);
-    bs_busy = bdrv_requests_pending(bs);
-    bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
-    return bs_busy;
-}
-
-/*
- * Wait for pending requests to complete on a single BlockDriverState subtree
- *
- * See the warning in bdrv_drain_all().  This function can only be called if
- * you are sure nothing can generate I/O because you have op blockers
- * installed.
- *
- * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
- * AioContext.
- */
-void bdrv_drain(BlockDriverState *bs)
-{
-    while (bdrv_drain_one(bs)) {
-        /* Keep iterating */
-    }
-}
-
-/*
- * Wait for pending requests to complete across all BlockDriverStates
- *
- * This function does not flush data to disk, use bdrv_flush_all() for that
- * after calling this function.
- *
- * Note that completion of an asynchronous I/O operation can trigger any
- * number of other I/O operations on other devices---for example a coroutine
- * can be arbitrarily complex and a constant flow of I/O can come until the
- * coroutine is complete.  Because of this, it is not possible to have a
- * function to drain a single device's I/O queue.
- */
-void bdrv_drain_all(void)
-{
-    /* Always run first iteration so any pending completion BHs run */
-    bool busy = true;
-    BlockDriverState *bs;
-
-    while (busy) {
-        busy = false;
-
-        QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
-            AioContext *aio_context = bdrv_get_aio_context(bs);
-
-            aio_context_acquire(aio_context);
-            busy |= bdrv_drain_one(bs);
-            aio_context_release(aio_context);
-        }
-    }
-}
-
 /* make a BlockDriverState anonymous by removing from bdrv_state and
  * graph_bdrv_state list.
    Also, NULL terminate the device_name to prevent double remove */
@@ -2367,152 +2091,6 @@ int bdrv_commit_all(void)
     return 0;
 }
 
-/**
- * Remove an active request from the tracked requests list
- *
- * This function should be called when a tracked request is completing.
- */
-static void tracked_request_end(BdrvTrackedRequest *req)
-{
-    if (req->serialising) {
-        req->bs->serialising_in_flight--;
-    }
-
-    QLIST_REMOVE(req, list);
-    qemu_co_queue_restart_all(&req->wait_queue);
-}
-
-/**
- * Add an active request to the tracked requests list
- */
-static void tracked_request_begin(BdrvTrackedRequest *req,
-                                  BlockDriverState *bs,
-                                  int64_t offset,
-                                  unsigned int bytes, bool is_write)
-{
-    *req = (BdrvTrackedRequest){
-        .bs = bs,
-        .offset         = offset,
-        .bytes          = bytes,
-        .is_write       = is_write,
-        .co             = qemu_coroutine_self(),
-        .serialising    = false,
-        .overlap_offset = offset,
-        .overlap_bytes  = bytes,
-    };
-
-    qemu_co_queue_init(&req->wait_queue);
-
-    QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
-}
-
-static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
-{
-    int64_t overlap_offset = req->offset & ~(align - 1);
-    unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
-                               - overlap_offset;
-
-    if (!req->serialising) {
-        req->bs->serialising_in_flight++;
-        req->serialising = true;
-    }
-
-    req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
-    req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
-}
-
-/**
- * Round a region to cluster boundaries
- */
-void bdrv_round_to_clusters(BlockDriverState *bs,
-                            int64_t sector_num, int nb_sectors,
-                            int64_t *cluster_sector_num,
-                            int *cluster_nb_sectors)
-{
-    BlockDriverInfo bdi;
-
-    if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
-        *cluster_sector_num = sector_num;
-        *cluster_nb_sectors = nb_sectors;
-    } else {
-        int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
-        *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
-        *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
-                                            nb_sectors, c);
-    }
-}
-
-static int bdrv_get_cluster_size(BlockDriverState *bs)
-{
-    BlockDriverInfo bdi;
-    int ret;
-
-    ret = bdrv_get_info(bs, &bdi);
-    if (ret < 0 || bdi.cluster_size == 0) {
-        return bs->request_alignment;
-    } else {
-        return bdi.cluster_size;
-    }
-}
-
-static bool tracked_request_overlaps(BdrvTrackedRequest *req,
-                                     int64_t offset, unsigned int bytes)
-{
-    /*        aaaa   bbbb */
-    if (offset >= req->overlap_offset + req->overlap_bytes) {
-        return false;
-    }
-    /* bbbb   aaaa        */
-    if (req->overlap_offset >= offset + bytes) {
-        return false;
-    }
-    return true;
-}
-
-static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
-{
-    BlockDriverState *bs = self->bs;
-    BdrvTrackedRequest *req;
-    bool retry;
-    bool waited = false;
-
-    if (!bs->serialising_in_flight) {
-        return false;
-    }
-
-    do {
-        retry = false;
-        QLIST_FOREACH(req, &bs->tracked_requests, list) {
-            if (req == self || (!req->serialising && !self->serialising)) {
-                continue;
-            }
-            if (tracked_request_overlaps(req, self->overlap_offset,
-                                         self->overlap_bytes))
-            {
-                /* Hitting this means there was a reentrant request, for
-                 * example, a block driver issuing nested requests.  This must
-                 * never happen since it means deadlock.
-                 */
-                assert(qemu_coroutine_self() != req->co);
-
-                /* If the request is already (indirectly) waiting for us, or
-                 * will wait for us as soon as it wakes up, then just go on
-                 * (instead of producing a deadlock in the former case). */
-                if (!req->waiting_for) {
-                    self->waiting_for = req;
-                    qemu_co_queue_wait(&req->wait_queue);
-                    self->waiting_for = NULL;
-                    retry = true;
-                    waited = true;
-                    break;
-                }
-            }
-        }
-    } while (retry);
-
-    return waited;
-}
-
 /*
  * Return values:
  * 0        - success
@@ -2681,939 +2259,67 @@ exit:
     return ret;
 }
 
-
-static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
-                                   size_t size)
+/**
+ * Truncate file to 'offset' bytes (needed only for file protocols)
+ */
+int bdrv_truncate(BlockDriverState *bs, int64_t offset)
 {
-    if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
-        return -EIO;
-    }
-
-    if (!bdrv_is_inserted(bs)) {
+    BlockDriver *drv = bs->drv;
+    int ret;
+    if (!drv)
         return -ENOMEDIUM;
-    }
+    if (!drv->bdrv_truncate)
+        return -ENOTSUP;
+    if (bs->read_only)
+        return -EACCES;
 
-    if (offset < 0) {
-        return -EIO;
+    ret = drv->bdrv_truncate(bs, offset);
+    if (ret == 0) {
+        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
+        bdrv_dirty_bitmap_truncate(bs);
+        if (bs->blk) {
+            blk_dev_resize_cb(bs->blk);
+        }
     }
-
-    return 0;
+    return ret;
 }
 
-static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
-                              int nb_sectors)
+/**
+ * Length of a allocated file in bytes. Sparse files are counted by actual
+ * allocated space. Return < 0 if error or unknown.
+ */
+int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
 {
-    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-        return -EIO;
+    BlockDriver *drv = bs->drv;
+    if (!drv) {
+        return -ENOMEDIUM;
     }
-
-    return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
-                                   nb_sectors * BDRV_SECTOR_SIZE);
-}
-
-typedef struct RwCo {
-    BlockDriverState *bs;
-    int64_t offset;
-    QEMUIOVector *qiov;
-    bool is_write;
-    int ret;
-    BdrvRequestFlags flags;
-} RwCo;
-
-static void coroutine_fn bdrv_rw_co_entry(void *opaque)
-{
-    RwCo *rwco = opaque;
-
-    if (!rwco->is_write) {
-        rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
-                                      rwco->qiov->size, rwco->qiov,
-                                      rwco->flags);
-    } else {
-        rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
-                                       rwco->qiov->size, rwco->qiov,
-                                       rwco->flags);
+    if (drv->bdrv_get_allocated_file_size) {
+        return drv->bdrv_get_allocated_file_size(bs);
+    }
+    if (bs->file) {
+        return bdrv_get_allocated_file_size(bs->file);
     }
+    return -ENOTSUP;
 }
 
-/*
- * Process a vectored synchronous request using coroutines
+/**
+ * Return number of sectors on success, -errno on error.
  */
-static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
-                        QEMUIOVector *qiov, bool is_write,
-                        BdrvRequestFlags flags)
+int64_t bdrv_nb_sectors(BlockDriverState *bs)
 {
-    Coroutine *co;
-    RwCo rwco = {
-        .bs = bs,
-        .offset = offset,
-        .qiov = qiov,
-        .is_write = is_write,
-        .ret = NOT_DONE,
-        .flags = flags,
-    };
-
-    /**
-     * In sync call context, when the vcpu is blocked, this throttling timer
-     * will not fire; so the I/O throttling function has to be disabled here
-     * if it has been enabled.
-     */
-    if (bs->io_limits_enabled) {
-        fprintf(stderr, "Disabling I/O throttling on '%s' due "
-                        "to synchronous I/O.\n", bdrv_get_device_name(bs));
-        bdrv_io_limits_disable(bs);
-    }
+    BlockDriver *drv = bs->drv;
 
-    if (qemu_in_coroutine()) {
-        /* Fast-path if already in coroutine context */
-        bdrv_rw_co_entry(&rwco);
-    } else {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
+    if (!drv)
+        return -ENOMEDIUM;
 
-        co = qemu_coroutine_create(bdrv_rw_co_entry);
-        qemu_coroutine_enter(co, &rwco);
-        while (rwco.ret == NOT_DONE) {
-            aio_poll(aio_context, true);
+    if (drv->has_variable_length) {
+        int ret = refresh_total_sectors(bs, bs->total_sectors);
+        if (ret < 0) {
+            return ret;
         }
     }
-    return rwco.ret;
-}
-
-/*
- * Process a synchronous request using coroutines
- */
-static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
-                      int nb_sectors, bool is_write, BdrvRequestFlags flags)
-{
-    QEMUIOVector qiov;
-    struct iovec iov = {
-        .iov_base = (void *)buf,
-        .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
-    };
-
-    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-        return -EINVAL;
-    }
-
-    qemu_iovec_init_external(&qiov, &iov, 1);
-    return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
-                        &qiov, is_write, flags);
-}
-
-/* return < 0 if error. See bdrv_write() for the return codes */
-int bdrv_read(BlockDriverState *bs, int64_t sector_num,
-              uint8_t *buf, int nb_sectors)
-{
-    return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
-}
-
-/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
-int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
-                          uint8_t *buf, int nb_sectors)
-{
-    bool enabled;
-    int ret;
-
-    enabled = bs->io_limits_enabled;
-    bs->io_limits_enabled = false;
-    ret = bdrv_read(bs, sector_num, buf, nb_sectors);
-    bs->io_limits_enabled = enabled;
-    return ret;
-}
-
-/* Return < 0 if error. Important errors are:
-  -EIO         generic I/O error (may happen for all errors)
-  -ENOMEDIUM   No media inserted.
-  -EINVAL      Invalid sector number or nb_sectors
-  -EACCES      Trying to write a read-only device
-*/
-int bdrv_write(BlockDriverState *bs, int64_t sector_num,
-               const uint8_t *buf, int nb_sectors)
-{
-    return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
-}
-
-int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
-                      int nb_sectors, BdrvRequestFlags flags)
-{
-    return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
-                      BDRV_REQ_ZERO_WRITE | flags);
-}
-
-/*
- * Completely zero out a block device with the help of bdrv_write_zeroes.
- * The operation is sped up by checking the block status and only writing
- * zeroes to the device if they currently do not return zeroes. Optional
- * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
- *
- * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
- */
-int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
-{
-    int64_t target_sectors, ret, nb_sectors, sector_num = 0;
-    int n;
-
-    target_sectors = bdrv_nb_sectors(bs);
-    if (target_sectors < 0) {
-        return target_sectors;
-    }
-
-    for (;;) {
-        nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
-        if (nb_sectors <= 0) {
-            return 0;
-        }
-        ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
-        if (ret < 0) {
-            error_report("error getting block status at sector %" PRId64 ": %s",
-                         sector_num, strerror(-ret));
-            return ret;
-        }
-        if (ret & BDRV_BLOCK_ZERO) {
-            sector_num += n;
-            continue;
-        }
-        ret = bdrv_write_zeroes(bs, sector_num, n, flags);
-        if (ret < 0) {
-            error_report("error writing zeroes at sector %" PRId64 ": %s",
-                         sector_num, strerror(-ret));
-            return ret;
-        }
-        sector_num += n;
-    }
-}
-
-int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
-{
-    QEMUIOVector qiov;
-    struct iovec iov = {
-        .iov_base = (void *)buf,
-        .iov_len = bytes,
-    };
-    int ret;
-
-    if (bytes < 0) {
-        return -EINVAL;
-    }
-
-    qemu_iovec_init_external(&qiov, &iov, 1);
-    ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bytes;
-}
-
-int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
-{
-    int ret;
-
-    ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return qiov->size;
-}
-
-int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
-                const void *buf, int bytes)
-{
-    QEMUIOVector qiov;
-    struct iovec iov = {
-        .iov_base   = (void *) buf,
-        .iov_len    = bytes,
-    };
-
-    if (bytes < 0) {
-        return -EINVAL;
-    }
-
-    qemu_iovec_init_external(&qiov, &iov, 1);
-    return bdrv_pwritev(bs, offset, &qiov);
-}
-
-/*
- * Writes to the file and ensures that no writes are reordered across this
- * request (acts as a barrier)
- *
- * Returns 0 on success, -errno in error cases.
- */
-int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
-    const void *buf, int count)
-{
-    int ret;
-
-    ret = bdrv_pwrite(bs, offset, buf, count);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* No flush needed for cache modes that already do it */
-    if (bs->enable_write_cache) {
-        bdrv_flush(bs);
-    }
-
-    return 0;
-}
-
-static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    /* Perform I/O through a temporary buffer so that users who scribble over
-     * their read buffer while the operation is in progress do not end up
-     * modifying the image file.  This is critical for zero-copy guest I/O
-     * where anything might happen inside guest memory.
-     */
-    void *bounce_buffer;
-
-    BlockDriver *drv = bs->drv;
-    struct iovec iov;
-    QEMUIOVector bounce_qiov;
-    int64_t cluster_sector_num;
-    int cluster_nb_sectors;
-    size_t skip_bytes;
-    int ret;
-
-    /* Cover entire cluster so no additional backing file I/O is required when
-     * allocating cluster in the image file.
-     */
-    bdrv_round_to_clusters(bs, sector_num, nb_sectors,
-                           &cluster_sector_num, &cluster_nb_sectors);
-
-    trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
-                                   cluster_sector_num, cluster_nb_sectors);
-
-    iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
-    iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
-    if (bounce_buffer == NULL) {
-        ret = -ENOMEM;
-        goto err;
-    }
-
-    qemu_iovec_init_external(&bounce_qiov, &iov, 1);
-
-    ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
-                             &bounce_qiov);
-    if (ret < 0) {
-        goto err;
-    }
-
-    if (drv->bdrv_co_write_zeroes &&
-        buffer_is_zero(bounce_buffer, iov.iov_len)) {
-        ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
-                                      cluster_nb_sectors, 0);
-    } else {
-        /* This does not change the data on the disk, it is not necessary
-         * to flush even in cache=writethrough mode.
-         */
-        ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
-                                  &bounce_qiov);
-    }
-
-    if (ret < 0) {
-        /* It might be okay to ignore write errors for guest requests.  If this
-         * is a deliberate copy-on-read then we don't want to ignore the error.
-         * Simply report it in all cases.
-         */
-        goto err;
-    }
-
-    skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
-    qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
-                        nb_sectors * BDRV_SECTOR_SIZE);
-
-err:
-    qemu_vfree(bounce_buffer);
-    return ret;
-}
-
-/*
- * Forwards an already correctly aligned request to the BlockDriver. This
- * handles copy on read and zeroing after EOF; any other features must be
- * implemented by the caller.
- */
-static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
-    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
-    int64_t align, QEMUIOVector *qiov, int flags)
-{
-    BlockDriver *drv = bs->drv;
-    int ret;
-
-    int64_t sector_num = offset >> BDRV_SECTOR_BITS;
-    unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
-
-    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert(!qiov || bytes == qiov->size);
-
-    /* Handle Copy on Read and associated serialisation */
-    if (flags & BDRV_REQ_COPY_ON_READ) {
-        /* If we touch the same cluster it counts as an overlap.  This
-         * guarantees that allocating writes will be serialized and not race
-         * with each other for the same cluster.  For example, in copy-on-read
-         * it ensures that the CoR read and write operations are atomic and
-         * guest writes cannot interleave between them. */
-        mark_request_serialising(req, bdrv_get_cluster_size(bs));
-    }
-
-    wait_serialising_requests(req);
-
-    if (flags & BDRV_REQ_COPY_ON_READ) {
-        int pnum;
-
-        ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
-        if (ret < 0) {
-            goto out;
-        }
-
-        if (!ret || pnum != nb_sectors) {
-            ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
-            goto out;
-        }
-    }
-
-    /* Forward the request to the BlockDriver */
-    if (!bs->zero_beyond_eof) {
-        ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
-    } else {
-        /* Read zeros after EOF */
-        int64_t total_sectors, max_nb_sectors;
-
-        total_sectors = bdrv_nb_sectors(bs);
-        if (total_sectors < 0) {
-            ret = total_sectors;
-            goto out;
-        }
-
-        max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
-                                  align >> BDRV_SECTOR_BITS);
-        if (nb_sectors < max_nb_sectors) {
-            ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
-        } else if (max_nb_sectors > 0) {
-            QEMUIOVector local_qiov;
-
-            qemu_iovec_init(&local_qiov, qiov->niov);
-            qemu_iovec_concat(&local_qiov, qiov, 0,
-                              max_nb_sectors * BDRV_SECTOR_SIZE);
-
-            ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
-                                     &local_qiov);
-
-            qemu_iovec_destroy(&local_qiov);
-        } else {
-            ret = 0;
-        }
-
-        /* Reading beyond end of file is supposed to produce zeroes */
-        if (ret == 0 && total_sectors < sector_num + nb_sectors) {
-            uint64_t offset = MAX(0, total_sectors - sector_num);
-            uint64_t bytes = (sector_num + nb_sectors - offset) *
-                              BDRV_SECTOR_SIZE;
-            qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
-        }
-    }
-
-out:
-    return ret;
-}
-
-static inline uint64_t bdrv_get_align(BlockDriverState *bs)
-{
-    /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
-    return MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
-}
-
-static inline bool bdrv_req_is_aligned(BlockDriverState *bs,
-                                       int64_t offset, size_t bytes)
-{
-    int64_t align = bdrv_get_align(bs);
-    return !(offset & (align - 1) || (bytes & (align - 1)));
-}
-
-/*
- * Handle a read request in coroutine context
- */
-static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
-    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
-    BdrvRequestFlags flags)
-{
-    BlockDriver *drv = bs->drv;
-    BdrvTrackedRequest req;
-
-    uint64_t align = bdrv_get_align(bs);
-    uint8_t *head_buf = NULL;
-    uint8_t *tail_buf = NULL;
-    QEMUIOVector local_qiov;
-    bool use_local_qiov = false;
-    int ret;
-
-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-
-    ret = bdrv_check_byte_request(bs, offset, bytes);
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (bs->copy_on_read) {
-        flags |= BDRV_REQ_COPY_ON_READ;
-    }
-
-    /* throttling disk I/O */
-    if (bs->io_limits_enabled) {
-        bdrv_io_limits_intercept(bs, bytes, false);
-    }
-
-    /* Align read if necessary by padding qiov */
-    if (offset & (align - 1)) {
-        head_buf = qemu_blockalign(bs, align);
-        qemu_iovec_init(&local_qiov, qiov->niov + 2);
-        qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
-        qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
-        use_local_qiov = true;
-
-        bytes += offset & (align - 1);
-        offset = offset & ~(align - 1);
-    }
-
-    if ((offset + bytes) & (align - 1)) {
-        if (!use_local_qiov) {
-            qemu_iovec_init(&local_qiov, qiov->niov + 1);
-            qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
-            use_local_qiov = true;
-        }
-        tail_buf = qemu_blockalign(bs, align);
-        qemu_iovec_add(&local_qiov, tail_buf,
-                       align - ((offset + bytes) & (align - 1)));
-
-        bytes = ROUND_UP(bytes, align);
-    }
-
-    tracked_request_begin(&req, bs, offset, bytes, false);
-    ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
-                              use_local_qiov ? &local_qiov : qiov,
-                              flags);
-    tracked_request_end(&req);
-
-    if (use_local_qiov) {
-        qemu_iovec_destroy(&local_qiov);
-        qemu_vfree(head_buf);
-        qemu_vfree(tail_buf);
-    }
-
-    return ret;
-}
-
-static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
-    BdrvRequestFlags flags)
-{
-    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-        return -EINVAL;
-    }
-
-    return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
-                             nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
-}
-
-int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
-    int nb_sectors, QEMUIOVector *qiov)
-{
-    trace_bdrv_co_readv(bs, sector_num, nb_sectors);
-
-    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
-}
-
-int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
-
-    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
-                            BDRV_REQ_COPY_ON_READ);
-}
-
-#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
-
-static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
-{
-    BlockDriver *drv = bs->drv;
-    QEMUIOVector qiov;
-    struct iovec iov = {0};
-    int ret = 0;
-
-    int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
-                                        BDRV_REQUEST_MAX_SECTORS);
-
-    while (nb_sectors > 0 && !ret) {
-        int num = nb_sectors;
-
-        /* Align request.  Block drivers can expect the "bulk" of the request
-         * to be aligned.
-         */
-        if (bs->bl.write_zeroes_alignment
-            && num > bs->bl.write_zeroes_alignment) {
-            if (sector_num % bs->bl.write_zeroes_alignment != 0) {
-                /* Make a small request up to the first aligned sector.  */
-                num = bs->bl.write_zeroes_alignment;
-                num -= sector_num % bs->bl.write_zeroes_alignment;
-            } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
-                /* Shorten the request to the last aligned sector.  num cannot
-                 * underflow because num > bs->bl.write_zeroes_alignment.
-                 */
-                num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
-            }
-        }
-
-        /* limit request size */
-        if (num > max_write_zeroes) {
-            num = max_write_zeroes;
-        }
-
-        ret = -ENOTSUP;
-        /* First try the efficient write zeroes operation */
-        if (drv->bdrv_co_write_zeroes) {
-            ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
-        }
-
-        if (ret == -ENOTSUP) {
-            /* Fall back to bounce buffer if write zeroes is unsupported */
-            int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
-                                            MAX_WRITE_ZEROES_BOUNCE_BUFFER);
-            num = MIN(num, max_xfer_len);
-            iov.iov_len = num * BDRV_SECTOR_SIZE;
-            if (iov.iov_base == NULL) {
-                iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
-                if (iov.iov_base == NULL) {
-                    ret = -ENOMEM;
-                    goto fail;
-                }
-                memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
-            }
-            qemu_iovec_init_external(&qiov, &iov, 1);
-
-            ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
-
-            /* Keep bounce buffer around if it is big enough for all
-             * all future requests.
-             */
-            if (num < max_xfer_len) {
-                qemu_vfree(iov.iov_base);
-                iov.iov_base = NULL;
-            }
-        }
-
-        sector_num += num;
-        nb_sectors -= num;
-    }
-
-fail:
-    qemu_vfree(iov.iov_base);
-    return ret;
-}
-
-/*
- * Forwards an already correctly aligned write request to the BlockDriver.
- */
-static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
-    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
-    QEMUIOVector *qiov, int flags)
-{
-    BlockDriver *drv = bs->drv;
-    bool waited;
-    int ret;
-
-    int64_t sector_num = offset >> BDRV_SECTOR_BITS;
-    unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
-
-    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert(!qiov || bytes == qiov->size);
-
-    waited = wait_serialising_requests(req);
-    assert(!waited || !req->serialising);
-    assert(req->overlap_offset <= offset);
-    assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
-
-    ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
-
-    if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
-        !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
-        qemu_iovec_is_zero(qiov)) {
-        flags |= BDRV_REQ_ZERO_WRITE;
-        if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
-            flags |= BDRV_REQ_MAY_UNMAP;
-        }
-    }
-
-    if (ret < 0) {
-        /* Do nothing, write notifier decided to fail this request */
-    } else if (flags & BDRV_REQ_ZERO_WRITE) {
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
-        ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
-    } else {
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
-        ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
-    }
-    BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
-
-    if (ret == 0 && !bs->enable_write_cache) {
-        ret = bdrv_co_flush(bs);
-    }
-
-    bdrv_set_dirty(bs, sector_num, nb_sectors);
-
-    block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
-
-    if (ret >= 0) {
-        bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
-    }
-
-    return ret;
-}
-
-/*
- * Handle a write request in coroutine context
- */
-static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
-    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
-    BdrvRequestFlags flags)
-{
-    BdrvTrackedRequest req;
-    uint64_t align = bdrv_get_align(bs);
-    uint8_t *head_buf = NULL;
-    uint8_t *tail_buf = NULL;
-    QEMUIOVector local_qiov;
-    bool use_local_qiov = false;
-    int ret;
-
-    if (!bs->drv) {
-        return -ENOMEDIUM;
-    }
-    if (bs->read_only) {
-        return -EACCES;
-    }
-
-    ret = bdrv_check_byte_request(bs, offset, bytes);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* throttling disk I/O */
-    if (bs->io_limits_enabled) {
-        bdrv_io_limits_intercept(bs, bytes, true);
-    }
-
-    /*
-     * Align write if necessary by performing a read-modify-write cycle.
-     * Pad qiov with the read parts and be sure to have a tracked request not
-     * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
-     */
-    tracked_request_begin(&req, bs, offset, bytes, true);
-
-    if (offset & (align - 1)) {
-        QEMUIOVector head_qiov;
-        struct iovec head_iov;
-
-        mark_request_serialising(&req, align);
-        wait_serialising_requests(&req);
-
-        head_buf = qemu_blockalign(bs, align);
-        head_iov = (struct iovec) {
-            .iov_base   = head_buf,
-            .iov_len    = align,
-        };
-        qemu_iovec_init_external(&head_qiov, &head_iov, 1);
-
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
-        ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
-                                  align, &head_qiov, 0);
-        if (ret < 0) {
-            goto fail;
-        }
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
-
-        qemu_iovec_init(&local_qiov, qiov->niov + 2);
-        qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
-        qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
-        use_local_qiov = true;
-
-        bytes += offset & (align - 1);
-        offset = offset & ~(align - 1);
-    }
-
-    if ((offset + bytes) & (align - 1)) {
-        QEMUIOVector tail_qiov;
-        struct iovec tail_iov;
-        size_t tail_bytes;
-        bool waited;
-
-        mark_request_serialising(&req, align);
-        waited = wait_serialising_requests(&req);
-        assert(!waited || !use_local_qiov);
-
-        tail_buf = qemu_blockalign(bs, align);
-        tail_iov = (struct iovec) {
-            .iov_base   = tail_buf,
-            .iov_len    = align,
-        };
-        qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
-
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
-        ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
-                                  align, &tail_qiov, 0);
-        if (ret < 0) {
-            goto fail;
-        }
-        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
-
-        if (!use_local_qiov) {
-            qemu_iovec_init(&local_qiov, qiov->niov + 1);
-            qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
-            use_local_qiov = true;
-        }
-
-        tail_bytes = (offset + bytes) & (align - 1);
-        qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
-
-        bytes = ROUND_UP(bytes, align);
-    }
-
-    if (use_local_qiov) {
-        /* Local buffer may have non-zero data. */
-        flags &= ~BDRV_REQ_ZERO_WRITE;
-    }
-    ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
-                               use_local_qiov ? &local_qiov : qiov,
-                               flags);
-
-fail:
-    tracked_request_end(&req);
-
-    if (use_local_qiov) {
-        qemu_iovec_destroy(&local_qiov);
-    }
-    qemu_vfree(head_buf);
-    qemu_vfree(tail_buf);
-
-    return ret;
-}
-
-static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
-    BdrvRequestFlags flags)
-{
-    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-        return -EINVAL;
-    }
-
-    return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
-                              nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
-}
-
-int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
-    int nb_sectors, QEMUIOVector *qiov)
-{
-    trace_bdrv_co_writev(bs, sector_num, nb_sectors);
-
-    return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
-}
-
-int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
-                                      int64_t sector_num, int nb_sectors,
-                                      BdrvRequestFlags flags)
-{
-    int ret;
-
-    trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
-
-    if (!(bs->open_flags & BDRV_O_UNMAP)) {
-        flags &= ~BDRV_REQ_MAY_UNMAP;
-    }
-    if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS,
-                            nb_sectors << BDRV_SECTOR_BITS)) {
-        ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
-                                BDRV_REQ_ZERO_WRITE | flags);
-    } else {
-        uint8_t *buf;
-        QEMUIOVector local_qiov;
-        size_t bytes = nb_sectors << BDRV_SECTOR_BITS;
-
-        buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes);
-        memset(buf, 0, bytes);
-        qemu_iovec_init(&local_qiov, 1);
-        qemu_iovec_add(&local_qiov, buf, bytes);
-
-        ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov,
-                                BDRV_REQ_ZERO_WRITE | flags);
-        qemu_vfree(buf);
-    }
-    return ret;
-}
-
-/**
- * Truncate file to 'offset' bytes (needed only for file protocols)
- */
-int bdrv_truncate(BlockDriverState *bs, int64_t offset)
-{
-    BlockDriver *drv = bs->drv;
-    int ret;
-    if (!drv)
-        return -ENOMEDIUM;
-    if (!drv->bdrv_truncate)
-        return -ENOTSUP;
-    if (bs->read_only)
-        return -EACCES;
-
-    ret = drv->bdrv_truncate(bs, offset);
-    if (ret == 0) {
-        ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
-        if (bs->blk) {
-            blk_dev_resize_cb(bs->blk);
-        }
-    }
-    return ret;
-}
-
-/**
- * Length of a allocated file in bytes. Sparse files are counted by actual
- * allocated space. Return < 0 if error or unknown.
- */
-int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
-{
-    BlockDriver *drv = bs->drv;
-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-    if (drv->bdrv_get_allocated_file_size) {
-        return drv->bdrv_get_allocated_file_size(bs);
-    }
-    if (bs->file) {
-        return bdrv_get_allocated_file_size(bs->file);
-    }
-    return -ENOTSUP;
-}
-
-/**
- * Return number of sectors on success, -errno on error.
- */
-int64_t bdrv_nb_sectors(BlockDriverState *bs)
-{
-    BlockDriver *drv = bs->drv;
-
-    if (!drv)
-        return -ENOMEDIUM;
-
-    if (drv->has_variable_length) {
-        int ret = refresh_total_sectors(bs, bs->total_sectors);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-    return bs->total_sectors;
+    return bs->total_sectors;
 }
 
 /**
@@ -3797,8 +2503,8 @@ void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
 {
     if (key) {
         if (!bdrv_is_encrypted(bs)) {
-            error_setg(errp, "Device '%s' is not encrypted",
-                      bdrv_get_device_name(bs));
+            error_setg(errp, "Node '%s' is not encrypted",
+                      bdrv_get_device_or_node_name(bs));
         } else if (bdrv_set_key(bs, key) < 0) {
             error_set(errp, QERR_INVALID_PASSWORD);
         }
@@ -3806,7 +2512,7 @@ void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
         if (bdrv_key_required(bs)) {
             error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
                       "'%s' (%s) is encrypted",
-                      bdrv_get_device_name(bs),
+                      bdrv_get_device_or_node_name(bs),
                       bdrv_get_encrypted_filename(bs));
         }
     }
@@ -3870,15 +2576,20 @@ BlockDriverState *bdrv_find_node(const char *node_name)
 }
 
 /* Put this QMP function here so it can access the static graph_bdrv_states. */
-BlockDeviceInfoList *bdrv_named_nodes_list(void)
+BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
 {
     BlockDeviceInfoList *list, *entry;
     BlockDriverState *bs;
 
     list = NULL;
     QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
+        BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
+        if (!info) {
+            qapi_free_BlockDeviceInfoList(list);
+            return NULL;
+        }
         entry = g_malloc0(sizeof(*entry));
-        entry->value = bdrv_block_device_info(bs);
+        entry->value = info;
         entry->next = list;
         list = entry;
     }
@@ -3953,29 +2664,18 @@ const char *bdrv_get_device_name(const BlockDriverState *bs)
     return bs->blk ? blk_name(bs->blk) : "";
 }
 
-int bdrv_get_flags(BlockDriverState *bs)
+/* This can be used to identify nodes that might not have a device
+ * name associated. Since node and device names live in the same
+ * namespace, the result is unambiguous. The exception is if both are
+ * absent, then this returns an empty (non-null) string. */
+const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
 {
-    return bs->open_flags;
+    return bs->blk ? blk_name(bs->blk) : bs->node_name;
 }
 
-int bdrv_flush_all(void)
+int bdrv_get_flags(BlockDriverState *bs)
 {
-    BlockDriverState *bs;
-    int result = 0;
-
-    QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-        int ret;
-
-        aio_context_acquire(aio_context);
-        ret = bdrv_flush(bs);
-        if (ret < 0 && !result) {
-            result = ret;
-        }
-        aio_context_release(aio_context);
-    }
-
-    return result;
+    return bs->open_flags;
 }
 
 int bdrv_has_zero_init_1(BlockDriverState *bs)
@@ -4030,222 +2730,6 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
     return false;
 }
 
-typedef struct BdrvCoGetBlockStatusData {
-    BlockDriverState *bs;
-    BlockDriverState *base;
-    int64_t sector_num;
-    int nb_sectors;
-    int *pnum;
-    int64_t ret;
-    bool done;
-} BdrvCoGetBlockStatusData;
-
-/*
- * Returns the allocation status of the specified sectors.
- * Drivers not implementing the functionality are assumed to not support
- * backing files, hence all their sectors are reported as allocated.
- *
- * If 'sector_num' is beyond the end of the disk image the return value is 0
- * and 'pnum' is set to 0.
- *
- * 'pnum' is set to the number of sectors (including and immediately following
- * the specified sector) that are known to be in the same
- * allocated/unallocated state.
- *
- * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
- * beyond the end of the disk image it will be clamped.
- */
-static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
-                                                     int64_t sector_num,
-                                                     int nb_sectors, int *pnum)
-{
-    int64_t total_sectors;
-    int64_t n;
-    int64_t ret, ret2;
-
-    total_sectors = bdrv_nb_sectors(bs);
-    if (total_sectors < 0) {
-        return total_sectors;
-    }
-
-    if (sector_num >= total_sectors) {
-        *pnum = 0;
-        return 0;
-    }
-
-    n = total_sectors - sector_num;
-    if (n < nb_sectors) {
-        nb_sectors = n;
-    }
-
-    if (!bs->drv->bdrv_co_get_block_status) {
-        *pnum = nb_sectors;
-        ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
-        if (bs->drv->protocol_name) {
-            ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
-        }
-        return ret;
-    }
-
-    ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
-    if (ret < 0) {
-        *pnum = 0;
-        return ret;
-    }
-
-    if (ret & BDRV_BLOCK_RAW) {
-        assert(ret & BDRV_BLOCK_OFFSET_VALID);
-        return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
-                                     *pnum, pnum);
-    }
-
-    if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
-        ret |= BDRV_BLOCK_ALLOCATED;
-    }
-
-    if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
-        if (bdrv_unallocated_blocks_are_zero(bs)) {
-            ret |= BDRV_BLOCK_ZERO;
-        } else if (bs->backing_hd) {
-            BlockDriverState *bs2 = bs->backing_hd;
-            int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
-            if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
-                ret |= BDRV_BLOCK_ZERO;
-            }
-        }
-    }
-
-    if (bs->file &&
-        (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
-        (ret & BDRV_BLOCK_OFFSET_VALID)) {
-        int file_pnum;
-
-        ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
-                                        *pnum, &file_pnum);
-        if (ret2 >= 0) {
-            /* Ignore errors.  This is just providing extra information, it
-             * is useful but not necessary.
-             */
-            if (!file_pnum) {
-                /* !file_pnum indicates an offset at or beyond the EOF; it is
-                 * perfectly valid for the format block driver to point to such
-                 * offsets, so catch it and mark everything as zero */
-                ret |= BDRV_BLOCK_ZERO;
-            } else {
-                /* Limit request to the range reported by the protocol driver */
-                *pnum = file_pnum;
-                ret |= (ret2 & BDRV_BLOCK_ZERO);
-            }
-        }
-    }
-
-    return ret;
-}
-
-/* Coroutine wrapper for bdrv_get_block_status() */
-static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
-{
-    BdrvCoGetBlockStatusData *data = opaque;
-    BlockDriverState *bs = data->bs;
-
-    data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
-                                         data->pnum);
-    data->done = true;
-}
-
-/*
- * Synchronous wrapper around bdrv_co_get_block_status().
- *
- * See bdrv_co_get_block_status() for details.
- */
-int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
-                              int nb_sectors, int *pnum)
-{
-    Coroutine *co;
-    BdrvCoGetBlockStatusData data = {
-        .bs = bs,
-        .sector_num = sector_num,
-        .nb_sectors = nb_sectors,
-        .pnum = pnum,
-        .done = false,
-    };
-
-    if (qemu_in_coroutine()) {
-        /* Fast-path if already in coroutine context */
-        bdrv_get_block_status_co_entry(&data);
-    } else {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
-        co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
-        qemu_coroutine_enter(co, &data);
-        while (!data.done) {
-            aio_poll(aio_context, true);
-        }
-    }
-    return data.ret;
-}
-
-int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
-                                   int nb_sectors, int *pnum)
-{
-    int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
-    if (ret < 0) {
-        return ret;
-    }
-    return !!(ret & BDRV_BLOCK_ALLOCATED);
-}
-
-/*
- * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
- *
- * Return true if the given sector is allocated in any image between
- * BASE and TOP (inclusive).  BASE can be NULL to check if the given
- * sector is allocated in any image of the chain.  Return false otherwise.
- *
- * 'pnum' is set to the number of sectors (including and immediately following
- *  the specified sector) that are known to be in the same
- *  allocated/unallocated state.
- *
- */
-int bdrv_is_allocated_above(BlockDriverState *top,
-                            BlockDriverState *base,
-                            int64_t sector_num,
-                            int nb_sectors, int *pnum)
-{
-    BlockDriverState *intermediate;
-    int ret, n = nb_sectors;
-
-    intermediate = top;
-    while (intermediate && intermediate != base) {
-        int pnum_inter;
-        ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
-                                &pnum_inter);
-        if (ret < 0) {
-            return ret;
-        } else if (ret) {
-            *pnum = pnum_inter;
-            return 1;
-        }
-
-        /*
-         * [sector_num, nb_sectors] is unallocated on top but intermediate
-         * might have
-         *
-         * [sector_num+x, nr_sectors] allocated.
-         */
-        if (n > pnum_inter &&
-            (intermediate == top ||
-             sector_num + pnum_inter < intermediate->total_sectors)) {
-            n = pnum_inter;
-        }
-
-        intermediate = intermediate->backing_hd;
-    }
-
-    *pnum = n;
-    return 0;
-}
-
 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
 {
     if (bs->backing_hd && bs->backing_hd->encrypted)
@@ -4256,32 +2740,10 @@ const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
         return NULL;
 }
 
-void bdrv_get_backing_filename(BlockDriverState *bs,
-                               char *filename, int filename_size)
-{
-    pstrcpy(filename, filename_size, bs->backing_file);
-}
-
-int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
-                          const uint8_t *buf, int nb_sectors)
-{
-    BlockDriver *drv = bs->drv;
-    int ret;
-
-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-    if (!drv->bdrv_write_compressed) {
-        return -ENOTSUP;
-    }
-    ret = bdrv_check_request(bs, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    }
-
-    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
-
-    return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
+void bdrv_get_backing_filename(BlockDriverState *bs,
+                               char *filename, int filename_size)
+{
+    pstrcpy(filename, filename_size, bs->backing_file);
 }
 
 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
@@ -4304,47 +2766,6 @@ ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
     return NULL;
 }
 
-int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
-                      int64_t pos, int size)
-{
-    QEMUIOVector qiov;
-    struct iovec iov = {
-        .iov_base   = (void *) buf,
-        .iov_len    = size,
-    };
-
-    qemu_iovec_init_external(&qiov, &iov, 1);
-    return bdrv_writev_vmstate(bs, &qiov, pos);
-}
-
-int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
-{
-    BlockDriver *drv = bs->drv;
-
-    if (!drv) {
-        return -ENOMEDIUM;
-    } else if (drv->bdrv_save_vmstate) {
-        return drv->bdrv_save_vmstate(bs, qiov, pos);
-    } else if (bs->file) {
-        return bdrv_writev_vmstate(bs->file, qiov, pos);
-    }
-
-    return -ENOTSUP;
-}
-
-int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
-                      int64_t pos, int size)
-{
-    BlockDriver *drv = bs->drv;
-    if (!drv)
-        return -ENOMEDIUM;
-    if (drv->bdrv_load_vmstate)
-        return drv->bdrv_load_vmstate(bs, buf, pos, size);
-    if (bs->file)
-        return bdrv_load_vmstate(bs->file, buf, pos, size);
-    return -ENOTSUP;
-}
-
 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
 {
     if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
@@ -4461,646 +2882,45 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
             path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
                          curr_bs->backing_file);
 
-            if (!realpath(filename_tmp, backing_file_full)) {
-                continue;
-            }
-
-            if (strcmp(backing_file_full, filename_full) == 0) {
-                retval = curr_bs->backing_hd;
-                break;
-            }
-        }
-    }
-
-    g_free(filename_full);
-    g_free(backing_file_full);
-    g_free(filename_tmp);
-    return retval;
-}
-
-int bdrv_get_backing_file_depth(BlockDriverState *bs)
-{
-    if (!bs->drv) {
-        return 0;
-    }
-
-    if (!bs->backing_hd) {
-        return 0;
-    }
-
-    return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
-}
-
-/**************************************************************/
-/* async I/Os */
-
-BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
-                           QEMUIOVector *qiov, int nb_sectors,
-                           BlockCompletionFunc *cb, void *opaque)
-{
-    trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
-
-    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
-                                 cb, opaque, false);
-}
-
-BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
-                            QEMUIOVector *qiov, int nb_sectors,
-                            BlockCompletionFunc *cb, void *opaque)
-{
-    trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
-
-    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
-                                 cb, opaque, true);
-}
-
-BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
-
-    return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
-                                 BDRV_REQ_ZERO_WRITE | flags,
-                                 cb, opaque, true);
-}
-
-
-typedef struct MultiwriteCB {
-    int error;
-    int num_requests;
-    int num_callbacks;
-    struct {
-        BlockCompletionFunc *cb;
-        void *opaque;
-        QEMUIOVector *free_qiov;
-    } callbacks[];
-} MultiwriteCB;
-
-static void multiwrite_user_cb(MultiwriteCB *mcb)
-{
-    int i;
-
-    for (i = 0; i < mcb->num_callbacks; i++) {
-        mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
-        if (mcb->callbacks[i].free_qiov) {
-            qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
-        }
-        g_free(mcb->callbacks[i].free_qiov);
-    }
-}
-
-static void multiwrite_cb(void *opaque, int ret)
-{
-    MultiwriteCB *mcb = opaque;
-
-    trace_multiwrite_cb(mcb, ret);
-
-    if (ret < 0 && !mcb->error) {
-        mcb->error = ret;
-    }
-
-    mcb->num_requests--;
-    if (mcb->num_requests == 0) {
-        multiwrite_user_cb(mcb);
-        g_free(mcb);
-    }
-}
-
-static int multiwrite_req_compare(const void *a, const void *b)
-{
-    const BlockRequest *req1 = a, *req2 = b;
-
-    /*
-     * Note that we can't simply subtract req2->sector from req1->sector
-     * here as that could overflow the return value.
-     */
-    if (req1->sector > req2->sector) {
-        return 1;
-    } else if (req1->sector < req2->sector) {
-        return -1;
-    } else {
-        return 0;
-    }
-}
-
-/*
- * Takes a bunch of requests and tries to merge them. Returns the number of
- * requests that remain after merging.
- */
-static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
-    int num_reqs, MultiwriteCB *mcb)
-{
-    int i, outidx;
-
-    // Sort requests by start sector
-    qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
-
-    // Check if adjacent requests touch the same clusters. If so, combine them,
-    // filling up gaps with zero sectors.
-    outidx = 0;
-    for (i = 1; i < num_reqs; i++) {
-        int merge = 0;
-        int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
-
-        // Handle exactly sequential writes and overlapping writes.
-        if (reqs[i].sector <= oldreq_last) {
-            merge = 1;
-        }
-
-        if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
-            merge = 0;
-        }
-
-        if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
-            reqs[i].nb_sectors > bs->bl.max_transfer_length) {
-            merge = 0;
-        }
-
-        if (merge) {
-            size_t size;
-            QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
-            qemu_iovec_init(qiov,
-                reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
-
-            // Add the first request to the merged one. If the requests are
-            // overlapping, drop the last sectors of the first request.
-            size = (reqs[i].sector - reqs[outidx].sector) << 9;
-            qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
-
-            // We should need to add any zeros between the two requests
-            assert (reqs[i].sector <= oldreq_last);
-
-            // Add the second request
-            qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
-
-            // Add tail of first request, if necessary
-            if (qiov->size < reqs[outidx].qiov->size) {
-                qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
-                                  reqs[outidx].qiov->size - qiov->size);
-            }
-
-            reqs[outidx].nb_sectors = qiov->size >> 9;
-            reqs[outidx].qiov = qiov;
-
-            mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
-        } else {
-            outidx++;
-            reqs[outidx].sector     = reqs[i].sector;
-            reqs[outidx].nb_sectors = reqs[i].nb_sectors;
-            reqs[outidx].qiov       = reqs[i].qiov;
-        }
-    }
-
-    block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
-
-    return outidx + 1;
-}
-
-/*
- * Submit multiple AIO write requests at once.
- *
- * On success, the function returns 0 and all requests in the reqs array have
- * been submitted. In error case this function returns -1, and any of the
- * requests may or may not be submitted yet. In particular, this means that the
- * callback will be called for some of the requests, for others it won't. The
- * caller must check the error field of the BlockRequest to wait for the right
- * callbacks (if error != 0, no callback will be called).
- *
- * The implementation may modify the contents of the reqs array, e.g. to merge
- * requests. However, the fields opaque and error are left unmodified as they
- * are used to signal failure for a single request to the caller.
- */
-int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
-{
-    MultiwriteCB *mcb;
-    int i;
-
-    /* don't submit writes if we don't have a medium */
-    if (bs->drv == NULL) {
-        for (i = 0; i < num_reqs; i++) {
-            reqs[i].error = -ENOMEDIUM;
-        }
-        return -1;
-    }
-
-    if (num_reqs == 0) {
-        return 0;
-    }
-
-    // Create MultiwriteCB structure
-    mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
-    mcb->num_requests = 0;
-    mcb->num_callbacks = num_reqs;
-
-    for (i = 0; i < num_reqs; i++) {
-        mcb->callbacks[i].cb = reqs[i].cb;
-        mcb->callbacks[i].opaque = reqs[i].opaque;
-    }
-
-    // Check for mergable requests
-    num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
-
-    trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
-
-    /* Run the aio requests. */
-    mcb->num_requests = num_reqs;
-    for (i = 0; i < num_reqs; i++) {
-        bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
-                              reqs[i].nb_sectors, reqs[i].flags,
-                              multiwrite_cb, mcb,
-                              true);
-    }
-
-    return 0;
-}
-
-void bdrv_aio_cancel(BlockAIOCB *acb)
-{
-    qemu_aio_ref(acb);
-    bdrv_aio_cancel_async(acb);
-    while (acb->refcnt > 1) {
-        if (acb->aiocb_info->get_aio_context) {
-            aio_poll(acb->aiocb_info->get_aio_context(acb), true);
-        } else if (acb->bs) {
-            aio_poll(bdrv_get_aio_context(acb->bs), true);
-        } else {
-            abort();
-        }
-    }
-    qemu_aio_unref(acb);
-}
-
-/* Async version of aio cancel. The caller is not blocked if the acb implements
- * cancel_async, otherwise we do nothing and let the request normally complete.
- * In either case the completion callback must be called. */
-void bdrv_aio_cancel_async(BlockAIOCB *acb)
-{
-    if (acb->aiocb_info->cancel_async) {
-        acb->aiocb_info->cancel_async(acb);
-    }
-}
-
-/**************************************************************/
-/* async block device emulation */
-
-typedef struct BlockAIOCBSync {
-    BlockAIOCB common;
-    QEMUBH *bh;
-    int ret;
-    /* vector translation state */
-    QEMUIOVector *qiov;
-    uint8_t *bounce;
-    int is_write;
-} BlockAIOCBSync;
-
-static const AIOCBInfo bdrv_em_aiocb_info = {
-    .aiocb_size         = sizeof(BlockAIOCBSync),
-};
-
-static void bdrv_aio_bh_cb(void *opaque)
-{
-    BlockAIOCBSync *acb = opaque;
-
-    if (!acb->is_write && acb->ret >= 0) {
-        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
-    }
-    qemu_vfree(acb->bounce);
-    acb->common.cb(acb->common.opaque, acb->ret);
-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
-    qemu_aio_unref(acb);
-}
-
-static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
-                                      int64_t sector_num,
-                                      QEMUIOVector *qiov,
-                                      int nb_sectors,
-                                      BlockCompletionFunc *cb,
-                                      void *opaque,
-                                      int is_write)
-
-{
-    BlockAIOCBSync *acb;
-
-    acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
-    acb->is_write = is_write;
-    acb->qiov = qiov;
-    acb->bounce = qemu_try_blockalign(bs, qiov->size);
-    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
-
-    if (acb->bounce == NULL) {
-        acb->ret = -ENOMEM;
-    } else if (is_write) {
-        qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
-        acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
-    } else {
-        acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
-    }
-
-    qemu_bh_schedule(acb->bh);
-
-    return &acb->common;
-}
-
-static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
-}
-
-static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
-}
-
-
-typedef struct BlockAIOCBCoroutine {
-    BlockAIOCB common;
-    BlockRequest req;
-    bool is_write;
-    bool *done;
-    QEMUBH* bh;
-} BlockAIOCBCoroutine;
-
-static const AIOCBInfo bdrv_em_co_aiocb_info = {
-    .aiocb_size         = sizeof(BlockAIOCBCoroutine),
-};
-
-static void bdrv_co_em_bh(void *opaque)
-{
-    BlockAIOCBCoroutine *acb = opaque;
-
-    acb->common.cb(acb->common.opaque, acb->req.error);
-
-    qemu_bh_delete(acb->bh);
-    qemu_aio_unref(acb);
-}
-
-/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
-static void coroutine_fn bdrv_co_do_rw(void *opaque)
-{
-    BlockAIOCBCoroutine *acb = opaque;
-    BlockDriverState *bs = acb->common.bs;
-
-    if (!acb->is_write) {
-        acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
-            acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
-    } else {
-        acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
-            acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
-    }
-
-    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
-    qemu_bh_schedule(acb->bh);
-}
-
-static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
-                                         int64_t sector_num,
-                                         QEMUIOVector *qiov,
-                                         int nb_sectors,
-                                         BdrvRequestFlags flags,
-                                         BlockCompletionFunc *cb,
-                                         void *opaque,
-                                         bool is_write)
-{
-    Coroutine *co;
-    BlockAIOCBCoroutine *acb;
-
-    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
-    acb->req.sector = sector_num;
-    acb->req.nb_sectors = nb_sectors;
-    acb->req.qiov = qiov;
-    acb->req.flags = flags;
-    acb->is_write = is_write;
-
-    co = qemu_coroutine_create(bdrv_co_do_rw);
-    qemu_coroutine_enter(co, acb);
-
-    return &acb->common;
-}
-
-static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
-{
-    BlockAIOCBCoroutine *acb = opaque;
-    BlockDriverState *bs = acb->common.bs;
-
-    acb->req.error = bdrv_co_flush(bs);
-    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
-    qemu_bh_schedule(acb->bh);
-}
-
-BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    trace_bdrv_aio_flush(bs, opaque);
-
-    Coroutine *co;
-    BlockAIOCBCoroutine *acb;
-
-    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
-
-    co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
-    qemu_coroutine_enter(co, acb);
-
-    return &acb->common;
-}
-
-static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
-{
-    BlockAIOCBCoroutine *acb = opaque;
-    BlockDriverState *bs = acb->common.bs;
-
-    acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
-    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
-    qemu_bh_schedule(acb->bh);
-}
-
-BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    Coroutine *co;
-    BlockAIOCBCoroutine *acb;
-
-    trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
-
-    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
-    acb->req.sector = sector_num;
-    acb->req.nb_sectors = nb_sectors;
-    co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
-    qemu_coroutine_enter(co, acb);
-
-    return &acb->common;
-}
-
-void bdrv_init(void)
-{
-    module_call_init(MODULE_INIT_BLOCK);
-}
-
-void bdrv_init_with_whitelist(void)
-{
-    use_bdrv_whitelist = 1;
-    bdrv_init();
-}
-
-void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
-                   BlockCompletionFunc *cb, void *opaque)
-{
-    BlockAIOCB *acb;
-
-    acb = g_slice_alloc(aiocb_info->aiocb_size);
-    acb->aiocb_info = aiocb_info;
-    acb->bs = bs;
-    acb->cb = cb;
-    acb->opaque = opaque;
-    acb->refcnt = 1;
-    return acb;
-}
-
-void qemu_aio_ref(void *p)
-{
-    BlockAIOCB *acb = p;
-    acb->refcnt++;
-}
-
-void qemu_aio_unref(void *p)
-{
-    BlockAIOCB *acb = p;
-    assert(acb->refcnt > 0);
-    if (--acb->refcnt == 0) {
-        g_slice_free1(acb->aiocb_info->aiocb_size, acb);
-    }
-}
-
-/**************************************************************/
-/* Coroutine block device emulation */
-
-typedef struct CoroutineIOCompletion {
-    Coroutine *coroutine;
-    int ret;
-} CoroutineIOCompletion;
+            if (!realpath(filename_tmp, backing_file_full)) {
+                continue;
+            }
 
-static void bdrv_co_io_em_complete(void *opaque, int ret)
-{
-    CoroutineIOCompletion *co = opaque;
+            if (strcmp(backing_file_full, filename_full) == 0) {
+                retval = curr_bs->backing_hd;
+                break;
+            }
+        }
+    }
 
-    co->ret = ret;
-    qemu_coroutine_enter(co->coroutine, NULL);
+    g_free(filename_full);
+    g_free(backing_file_full);
+    g_free(filename_tmp);
+    return retval;
 }
 
-static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
-                                      int nb_sectors, QEMUIOVector *iov,
-                                      bool is_write)
+int bdrv_get_backing_file_depth(BlockDriverState *bs)
 {
-    CoroutineIOCompletion co = {
-        .coroutine = qemu_coroutine_self(),
-    };
-    BlockAIOCB *acb;
-
-    if (is_write) {
-        acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
-                                       bdrv_co_io_em_complete, &co);
-    } else {
-        acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
-                                      bdrv_co_io_em_complete, &co);
+    if (!bs->drv) {
+        return 0;
     }
 
-    trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
-    if (!acb) {
-        return -EIO;
+    if (!bs->backing_hd) {
+        return 0;
     }
-    qemu_coroutine_yield();
-
-    return co.ret;
-}
-
-static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
-                                         int64_t sector_num, int nb_sectors,
-                                         QEMUIOVector *iov)
-{
-    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
-}
 
-static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
-                                         int64_t sector_num, int nb_sectors,
-                                         QEMUIOVector *iov)
-{
-    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
+    return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
 }
 
-static void coroutine_fn bdrv_flush_co_entry(void *opaque)
+void bdrv_init(void)
 {
-    RwCo *rwco = opaque;
-
-    rwco->ret = bdrv_co_flush(rwco->bs);
+    module_call_init(MODULE_INIT_BLOCK);
 }
 
-int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
+void bdrv_init_with_whitelist(void)
 {
-    int ret;
-
-    if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
-        return 0;
-    }
-
-    /* Write back cached data to the OS even with cache=unsafe */
-    BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
-    if (bs->drv->bdrv_co_flush_to_os) {
-        ret = bs->drv->bdrv_co_flush_to_os(bs);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    /* But don't actually force it to the disk with cache=unsafe */
-    if (bs->open_flags & BDRV_O_NO_FLUSH) {
-        goto flush_parent;
-    }
-
-    BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
-    if (bs->drv->bdrv_co_flush_to_disk) {
-        ret = bs->drv->bdrv_co_flush_to_disk(bs);
-    } else if (bs->drv->bdrv_aio_flush) {
-        BlockAIOCB *acb;
-        CoroutineIOCompletion co = {
-            .coroutine = qemu_coroutine_self(),
-        };
-
-        acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
-        if (acb == NULL) {
-            ret = -EIO;
-        } else {
-            qemu_coroutine_yield();
-            ret = co.ret;
-        }
-    } else {
-        /*
-         * Some block drivers always operate in either writethrough or unsafe
-         * mode and don't support bdrv_flush therefore. Usually qemu doesn't
-         * know how the server works (because the behaviour is hardcoded or
-         * depends on server-side configuration), so we can't ensure that
-         * everything is safe on disk. Returning an error doesn't work because
-         * that would break guests even if the server operates in writethrough
-         * mode.
-         *
-         * Let's hope the user knows what he's doing.
-         */
-        ret = 0;
-    }
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* Now flush the underlying protocol.  It will also have BDRV_O_NO_FLUSH
-     * in the case of cache=unsafe, so there are no useless flushes.
-     */
-flush_parent:
-    return bdrv_co_flush(bs->file);
+    use_bdrv_whitelist = 1;
+    bdrv_init();
 }
 
 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
@@ -5152,143 +2972,6 @@ void bdrv_invalidate_cache_all(Error **errp)
     }
 }
 
-int bdrv_flush(BlockDriverState *bs)
-{
-    Coroutine *co;
-    RwCo rwco = {
-        .bs = bs,
-        .ret = NOT_DONE,
-    };
-
-    if (qemu_in_coroutine()) {
-        /* Fast-path if already in coroutine context */
-        bdrv_flush_co_entry(&rwco);
-    } else {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
-        co = qemu_coroutine_create(bdrv_flush_co_entry);
-        qemu_coroutine_enter(co, &rwco);
-        while (rwco.ret == NOT_DONE) {
-            aio_poll(aio_context, true);
-        }
-    }
-
-    return rwco.ret;
-}
-
-typedef struct DiscardCo {
-    BlockDriverState *bs;
-    int64_t sector_num;
-    int nb_sectors;
-    int ret;
-} DiscardCo;
-static void coroutine_fn bdrv_discard_co_entry(void *opaque)
-{
-    DiscardCo *rwco = opaque;
-
-    rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
-}
-
-int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
-                                 int nb_sectors)
-{
-    int max_discard, ret;
-
-    if (!bs->drv) {
-        return -ENOMEDIUM;
-    }
-
-    ret = bdrv_check_request(bs, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    } else if (bs->read_only) {
-        return -EROFS;
-    }
-
-    bdrv_reset_dirty(bs, sector_num, nb_sectors);
-
-    /* Do nothing if disabled.  */
-    if (!(bs->open_flags & BDRV_O_UNMAP)) {
-        return 0;
-    }
-
-    if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
-        return 0;
-    }
-
-    max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
-    while (nb_sectors > 0) {
-        int ret;
-        int num = nb_sectors;
-
-        /* align request */
-        if (bs->bl.discard_alignment &&
-            num >= bs->bl.discard_alignment &&
-            sector_num % bs->bl.discard_alignment) {
-            if (num > bs->bl.discard_alignment) {
-                num = bs->bl.discard_alignment;
-            }
-            num -= sector_num % bs->bl.discard_alignment;
-        }
-
-        /* limit request size */
-        if (num > max_discard) {
-            num = max_discard;
-        }
-
-        if (bs->drv->bdrv_co_discard) {
-            ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
-        } else {
-            BlockAIOCB *acb;
-            CoroutineIOCompletion co = {
-                .coroutine = qemu_coroutine_self(),
-            };
-
-            acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
-                                            bdrv_co_io_em_complete, &co);
-            if (acb == NULL) {
-                return -EIO;
-            } else {
-                qemu_coroutine_yield();
-                ret = co.ret;
-            }
-        }
-        if (ret && ret != -ENOTSUP) {
-            return ret;
-        }
-
-        sector_num += num;
-        nb_sectors -= num;
-    }
-    return 0;
-}
-
-int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
-{
-    Coroutine *co;
-    DiscardCo rwco = {
-        .bs = bs,
-        .sector_num = sector_num,
-        .nb_sectors = nb_sectors,
-        .ret = NOT_DONE,
-    };
-
-    if (qemu_in_coroutine()) {
-        /* Fast-path if already in coroutine context */
-        bdrv_discard_co_entry(&rwco);
-    } else {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
-        co = qemu_coroutine_create(bdrv_discard_co_entry);
-        qemu_coroutine_enter(co, &rwco);
-        while (rwco.ret == NOT_DONE) {
-            aio_poll(aio_context, true);
-        }
-    }
-
-    return rwco.ret;
-}
-
 /**************************************************************/
 /* removable device support */
 
@@ -5354,107 +3037,171 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked)
     }
 }
 
-/* needed for generic scsi interface */
-
-int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
+void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
 {
-    BlockDriver *drv = bs->drv;
-
-    if (drv && drv->bdrv_ioctl)
-        return drv->bdrv_ioctl(bs, req, buf);
-    return -ENOTSUP;
+    bs->guest_block_size = align;
 }
 
-BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
-        unsigned long int req, void *buf,
-        BlockCompletionFunc *cb, void *opaque)
+BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
 {
-    BlockDriver *drv = bs->drv;
+    BdrvDirtyBitmap *bm;
 
-    if (drv && drv->bdrv_aio_ioctl)
-        return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
+    assert(name);
+    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+        if (bm->name && !strcmp(name, bm->name)) {
+            return bm;
+        }
+    }
     return NULL;
 }
 
-void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
+void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
 {
-    bs->guest_block_size = align;
+    assert(!bdrv_dirty_bitmap_frozen(bitmap));
+    g_free(bitmap->name);
+    bitmap->name = NULL;
+}
+
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
+                                          uint32_t granularity,
+                                          const char *name,
+                                          Error **errp)
+{
+    int64_t bitmap_size;
+    BdrvDirtyBitmap *bitmap;
+    uint32_t sector_granularity;
+
+    assert((granularity & (granularity - 1)) == 0);
+
+    if (name && bdrv_find_dirty_bitmap(bs, name)) {
+        error_setg(errp, "Bitmap already exists: %s", name);
+        return NULL;
+    }
+    sector_granularity = granularity >> BDRV_SECTOR_BITS;
+    assert(sector_granularity);
+    bitmap_size = bdrv_nb_sectors(bs);
+    if (bitmap_size < 0) {
+        error_setg_errno(errp, -bitmap_size, "could not get length of device");
+        errno = -bitmap_size;
+        return NULL;
+    }
+    bitmap = g_new0(BdrvDirtyBitmap, 1);
+    bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
+    bitmap->size = bitmap_size;
+    bitmap->name = g_strdup(name);
+    bitmap->disabled = false;
+    QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
+    return bitmap;
 }
 
-void *qemu_blockalign(BlockDriverState *bs, size_t size)
+bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
 {
-    return qemu_memalign(bdrv_opt_mem_align(bs), size);
+    return bitmap->successor;
 }
 
-void *qemu_blockalign0(BlockDriverState *bs, size_t size)
+bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
 {
-    return memset(qemu_blockalign(bs, size), 0, size);
+    return !(bitmap->disabled || bitmap->successor);
 }
 
-void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
+/**
+ * Create a successor bitmap destined to replace this bitmap after an operation.
+ * Requires that the bitmap is not frozen and has no successor.
+ */
+int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
+                                       BdrvDirtyBitmap *bitmap, Error **errp)
 {
-    size_t align = bdrv_opt_mem_align(bs);
+    uint64_t granularity;
+    BdrvDirtyBitmap *child;
+
+    if (bdrv_dirty_bitmap_frozen(bitmap)) {
+        error_setg(errp, "Cannot create a successor for a bitmap that is "
+                   "currently frozen");
+        return -1;
+    }
+    assert(!bitmap->successor);
 
-    /* Ensure that NULL is never returned on success */
-    assert(align > 0);
-    if (size == 0) {
-        size = align;
+    /* Create an anonymous successor */
+    granularity = bdrv_dirty_bitmap_granularity(bitmap);
+    child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
+    if (!child) {
+        return -1;
     }
 
-    return qemu_try_memalign(align, size);
+    /* Successor will be on or off based on our current state. */
+    child->disabled = bitmap->disabled;
+
+    /* Install the successor and freeze the parent */
+    bitmap->successor = child;
+    return 0;
 }
 
-void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
+/**
+ * For a bitmap with a successor, yield our name to the successor,
+ * delete the old bitmap, and return a handle to the new bitmap.
+ */
+BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
+                                            BdrvDirtyBitmap *bitmap,
+                                            Error **errp)
 {
-    void *mem = qemu_try_blockalign(bs, size);
+    char *name;
+    BdrvDirtyBitmap *successor = bitmap->successor;
 
-    if (mem) {
-        memset(mem, 0, size);
+    if (successor == NULL) {
+        error_setg(errp, "Cannot relinquish control if "
+                   "there's no successor present");
+        return NULL;
     }
 
-    return mem;
+    name = bitmap->name;
+    bitmap->name = NULL;
+    successor->name = name;
+    bitmap->successor = NULL;
+    bdrv_release_dirty_bitmap(bs, bitmap);
+
+    return successor;
 }
 
-/*
- * Check if all memory in this vector is sector aligned.
+/**
+ * In cases of failure where we can no longer safely delete the parent,
+ * we may wish to re-join the parent and child/successor.
+ * The merged parent will be un-frozen, but not explicitly re-enabled.
  */
-bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
+BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
+                                           BdrvDirtyBitmap *parent,
+                                           Error **errp)
 {
-    int i;
-    size_t alignment = bdrv_opt_mem_align(bs);
+    BdrvDirtyBitmap *successor = parent->successor;
 
-    for (i = 0; i < qiov->niov; i++) {
-        if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
-            return false;
-        }
-        if (qiov->iov[i].iov_len % alignment) {
-            return false;
-        }
+    if (!successor) {
+        error_setg(errp, "Cannot reclaim a successor when none is present");
+        return NULL;
     }
 
-    return true;
+    if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
+        error_setg(errp, "Merging of parent and successor bitmap failed");
+        return NULL;
+    }
+    bdrv_release_dirty_bitmap(bs, successor);
+    parent->successor = NULL;
+
+    return parent;
 }
 
-BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
-                                          Error **errp)
+/**
+ * Truncates _all_ bitmaps attached to a BDS.
+ */
+static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
 {
-    int64_t bitmap_size;
     BdrvDirtyBitmap *bitmap;
+    uint64_t size = bdrv_nb_sectors(bs);
 
-    assert((granularity & (granularity - 1)) == 0);
-
-    granularity >>= BDRV_SECTOR_BITS;
-    assert(granularity);
-    bitmap_size = bdrv_nb_sectors(bs);
-    if (bitmap_size < 0) {
-        error_setg_errno(errp, -bitmap_size, "could not get length of device");
-        errno = -bitmap_size;
-        return NULL;
+    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+        if (bdrv_dirty_bitmap_frozen(bitmap)) {
+            continue;
+        }
+        hbitmap_truncate(bitmap->bitmap, size);
     }
-    bitmap = g_new0(BdrvDirtyBitmap, 1);
-    bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
-    QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
-    return bitmap;
 }
 
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
@@ -5462,14 +3209,28 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
     BdrvDirtyBitmap *bm, *next;
     QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
         if (bm == bitmap) {
+            assert(!bdrv_dirty_bitmap_frozen(bm));
             QLIST_REMOVE(bitmap, list);
             hbitmap_free(bitmap->bitmap);
+            g_free(bitmap->name);
             g_free(bitmap);
             return;
         }
     }
 }
 
+void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+{
+    assert(!bdrv_dirty_bitmap_frozen(bitmap));
+    bitmap->disabled = true;
+}
+
+void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+{
+    assert(!bdrv_dirty_bitmap_frozen(bitmap));
+    bitmap->disabled = false;
+}
+
 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
 {
     BdrvDirtyBitmap *bm;
@@ -5479,9 +3240,11 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
     QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
         BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
         BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
-        info->count = bdrv_get_dirty_count(bs, bm);
-        info->granularity =
-            ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
+        info->count = bdrv_get_dirty_count(bm);
+        info->granularity = bdrv_dirty_bitmap_granularity(bm);
+        info->has_name = !!bm->name;
+        info->name = g_strdup(bm->name);
+        info->frozen = bdrv_dirty_bitmap_frozen(bm);
         entry->value = info;
         *plist = entry;
         plist = &entry->next;
@@ -5499,43 +3262,90 @@ int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector
     }
 }
 
-void bdrv_dirty_iter_init(BlockDriverState *bs,
-                          BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
+/**
+ * Chooses a default granularity based on the existing cluster size,
+ * but clamped between [4K, 64K]. Defaults to 64K in the case that there
+ * is no cluster size information available.
+ */
+uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
+{
+    BlockDriverInfo bdi;
+    uint32_t granularity;
+
+    if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
+        granularity = MAX(4096, bdi.cluster_size);
+        granularity = MIN(65536, granularity);
+    } else {
+        granularity = 65536;
+    }
+
+    return granularity;
+}
+
+uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
+{
+    return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
+}
+
+void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
 {
     hbitmap_iter_init(hbi, bitmap->bitmap, 0);
 }
 
-void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
                            int64_t cur_sector, int nr_sectors)
 {
+    assert(bdrv_dirty_bitmap_enabled(bitmap));
     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
 }
 
-void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
                              int64_t cur_sector, int nr_sectors)
 {
+    assert(bdrv_dirty_bitmap_enabled(bitmap));
     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
 }
 
-static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
-                           int nr_sectors)
+void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+{
+    assert(bdrv_dirty_bitmap_enabled(bitmap));
+    hbitmap_reset(bitmap->bitmap, 0, bitmap->size);
+}
+
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
+                    int nr_sectors)
 {
     BdrvDirtyBitmap *bitmap;
     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+        if (!bdrv_dirty_bitmap_enabled(bitmap)) {
+            continue;
+        }
         hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
     }
 }
 
-static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
-                             int nr_sectors)
+void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
+                      int nr_sectors)
 {
     BdrvDirtyBitmap *bitmap;
     QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+        if (!bdrv_dirty_bitmap_enabled(bitmap)) {
+            continue;
+        }
         hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
     }
 }
 
-int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
+/**
+ * Advance an HBitmapIter to an arbitrary offset.
+ */
+void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
+{
+    assert(hbi->hb);
+    hbitmap_iter_init(hbi, hbi->hb, offset);
+}
+
+int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
 {
     return hbitmap_count(bitmap->bitmap);
 }
@@ -5572,8 +3382,8 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
     if (!QLIST_EMPTY(&bs->op_blockers[op])) {
         blocker = QLIST_FIRST(&bs->op_blockers[op]);
         if (errp) {
-            error_setg(errp, "Device '%s' is busy: %s",
-                       bdrv_get_device_name(bs),
+            error_setg(errp, "Node '%s' is busy: %s",
+                       bdrv_get_device_or_node_name(bs),
                        error_get_pretty(blocker->reason));
         }
         return true;
@@ -5953,12 +3763,6 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
     abort();
 }
 
-void bdrv_add_before_write_notifier(BlockDriverState *bs,
-                                    NotifierWithReturn *notifier)
-{
-    notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
-}
-
 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
                        BlockDriverAmendStatusCB *status_cb)
 {
@@ -6059,36 +3863,6 @@ out:
     return to_replace_bs;
 }
 
-void bdrv_io_plug(BlockDriverState *bs)
-{
-    BlockDriver *drv = bs->drv;
-    if (drv && drv->bdrv_io_plug) {
-        drv->bdrv_io_plug(bs);
-    } else if (bs->file) {
-        bdrv_io_plug(bs->file);
-    }
-}
-
-void bdrv_io_unplug(BlockDriverState *bs)
-{
-    BlockDriver *drv = bs->drv;
-    if (drv && drv->bdrv_io_unplug) {
-        drv->bdrv_io_unplug(bs);
-    } else if (bs->file) {
-        bdrv_io_unplug(bs->file);
-    }
-}
-
-void bdrv_flush_io_queue(BlockDriverState *bs)
-{
-    BlockDriver *drv = bs->drv;
-    if (drv && drv->bdrv_flush_io_queue) {
-        drv->bdrv_flush_io_queue(bs);
-    } else if (bs->file) {
-        bdrv_flush_io_queue(bs->file);
-    }
-}
-
 static bool append_open_options(QDict *d, BlockDriverState *bs)
 {
     const QDictEntry *entry;
index db2933e4699fbcbae3b00e40af8d27b72eb299ab..0d8c2a4ab62961c02f8f09e8bcab324ea9d2091c 100644 (file)
@@ -1,4 +1,4 @@
-block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
+block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
@@ -9,7 +9,7 @@ block-obj-y += block-backend.o snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
-block-obj-y += null.o mirror.o
+block-obj-y += null.o mirror.o io.o
 
 block-obj-y += nbd.o nbd-client.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
@@ -37,6 +37,7 @@ gluster.o-libs     := $(GLUSTERFS_LIBS)
 ssh.o-cflags       := $(LIBSSH2_CFLAGS)
 ssh.o-libs         := $(LIBSSH2_LIBS)
 archipelago.o-libs := $(ARCHIPELAGO_LIBS)
+block-obj-m        += dmg.o
 dmg.o-libs         := $(BZIP2_LIBS)
 qcow.o-libs        := -lz
 linux-aio.o-libs   := -laio
index 1c535b1ab986fc753d545a52afba76167162ae5f..d3f648ddd72a2bd33d4c6ab967eb8b947f39ef7d 100644 (file)
@@ -37,6 +37,8 @@ typedef struct CowRequest {
 typedef struct BackupBlockJob {
     BlockJob common;
     BlockDriverState *target;
+    /* bitmap for sync=dirty-bitmap */
+    BdrvDirtyBitmap *sync_bitmap;
     MirrorSyncMode sync_mode;
     RateLimit limit;
     BlockdevOnError on_source_error;
@@ -242,6 +244,91 @@ static void backup_complete(BlockJob *job, void *opaque)
     g_free(data);
 }
 
+static bool coroutine_fn yield_and_check(BackupBlockJob *job)
+{
+    if (block_job_is_cancelled(&job->common)) {
+        return true;
+    }
+
+    /* we need to yield so that bdrv_drain_all() returns.
+     * (without, VM does not reboot)
+     */
+    if (job->common.speed) {
+        uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
+                                                      job->sectors_read);
+        job->sectors_read = 0;
+        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
+    } else {
+        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
+    }
+
+    if (block_job_is_cancelled(&job->common)) {
+        return true;
+    }
+
+    return false;
+}
+
+static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
+{
+    bool error_is_read;
+    int ret = 0;
+    int clusters_per_iter;
+    uint32_t granularity;
+    int64_t sector;
+    int64_t cluster;
+    int64_t end;
+    int64_t last_cluster = -1;
+    BlockDriverState *bs = job->common.bs;
+    HBitmapIter hbi;
+
+    granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
+    clusters_per_iter = MAX((granularity / BACKUP_CLUSTER_SIZE), 1);
+    bdrv_dirty_iter_init(job->sync_bitmap, &hbi);
+
+    /* Find the next dirty sector(s) */
+    while ((sector = hbitmap_iter_next(&hbi)) != -1) {
+        cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
+
+        /* Fake progress updates for any clusters we skipped */
+        if (cluster != last_cluster + 1) {
+            job->common.offset += ((cluster - last_cluster - 1) *
+                                   BACKUP_CLUSTER_SIZE);
+        }
+
+        for (end = cluster + clusters_per_iter; cluster < end; cluster++) {
+            do {
+                if (yield_and_check(job)) {
+                    return ret;
+                }
+                ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
+                                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
+                if ((ret < 0) &&
+                    backup_error_action(job, error_is_read, -ret) ==
+                    BLOCK_ERROR_ACTION_REPORT) {
+                    return ret;
+                }
+            } while (ret < 0);
+        }
+
+        /* If the bitmap granularity is smaller than the backup granularity,
+         * we need to advance the iterator pointer to the next cluster. */
+        if (granularity < BACKUP_CLUSTER_SIZE) {
+            bdrv_set_dirty_iter(&hbi, cluster * BACKUP_SECTORS_PER_CLUSTER);
+        }
+
+        last_cluster = cluster - 1;
+    }
+
+    /* Play some final catchup with the progress meter */
+    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
+    if (last_cluster + 1 < end) {
+        job->common.offset += ((end - last_cluster - 1) * BACKUP_CLUSTER_SIZE);
+    }
+
+    return ret;
+}
+
 static void coroutine_fn backup_run(void *opaque)
 {
     BackupBlockJob *job = opaque;
@@ -259,8 +346,7 @@ static void coroutine_fn backup_run(void *opaque)
     qemu_co_rwlock_init(&job->flush_rwlock);
 
     start = 0;
-    end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE,
-                       BACKUP_SECTORS_PER_CLUSTER);
+    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
 
     job->bitmap = hbitmap_alloc(end, 0);
 
@@ -278,28 +364,13 @@ static void coroutine_fn backup_run(void *opaque)
             qemu_coroutine_yield();
             job->common.busy = true;
         }
+    } else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
+        ret = backup_run_incremental(job);
     } else {
         /* Both FULL and TOP SYNC_MODE's require copying.. */
         for (; start < end; start++) {
             bool error_is_read;
-
-            if (block_job_is_cancelled(&job->common)) {
-                break;
-            }
-
-            /* we need to yield so that qemu_aio_flush() returns.
-             * (without, VM does not reboot)
-             */
-            if (job->common.speed) {
-                uint64_t delay_ns = ratelimit_calculate_delay(
-                        &job->limit, job->sectors_read);
-                job->sectors_read = 0;
-                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
-            } else {
-                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
-            }
-
-            if (block_job_is_cancelled(&job->common)) {
+            if (yield_and_check(job)) {
                 break;
             }
 
@@ -357,6 +428,18 @@ static void coroutine_fn backup_run(void *opaque)
     qemu_co_rwlock_wrlock(&job->flush_rwlock);
     qemu_co_rwlock_unlock(&job->flush_rwlock);
 
+    if (job->sync_bitmap) {
+        BdrvDirtyBitmap *bm;
+        if (ret < 0) {
+            /* Merge the successor back into the parent, delete nothing. */
+            bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
+            assert(bm);
+        } else {
+            /* Everything is fine, delete this bitmap and install the backup. */
+            bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
+            assert(bm);
+        }
+    }
     hbitmap_free(job->bitmap);
 
     bdrv_iostatus_disable(target);
@@ -369,6 +452,7 @@ static void coroutine_fn backup_run(void *opaque)
 
 void backup_start(BlockDriverState *bs, BlockDriverState *target,
                   int64_t speed, MirrorSyncMode sync_mode,
+                  BdrvDirtyBitmap *sync_bitmap,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
                   BlockCompletionFunc *cb, void *opaque,
@@ -412,17 +496,36 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
         return;
     }
 
+    if (sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
+        if (!sync_bitmap) {
+            error_setg(errp, "must provide a valid bitmap name for "
+                             "\"dirty-bitmap\" sync mode");
+            return;
+        }
+
+        /* Create a new bitmap, and freeze/disable this one. */
+        if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
+            return;
+        }
+    } else if (sync_bitmap) {
+        error_setg(errp,
+                   "a sync_bitmap was provided to backup_run, "
+                   "but received an incompatible sync_mode (%s)",
+                   MirrorSyncMode_lookup[sync_mode]);
+        return;
+    }
+
     len = bdrv_getlength(bs);
     if (len < 0) {
         error_setg_errno(errp, -len, "unable to get length for '%s'",
                          bdrv_get_device_name(bs));
-        return;
+        goto error;
     }
 
     BackupBlockJob *job = block_job_create(&backup_job_driver, bs, speed,
                                            cb, opaque, errp);
     if (!job) {
-        return;
+        goto error;
     }
 
     bdrv_op_block_all(target, job->common.blocker);
@@ -431,7 +534,15 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
     job->on_target_error = on_target_error;
     job->target = target;
     job->sync_mode = sync_mode;
+    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
+                       sync_bitmap : NULL;
     job->common.len = len;
     job->common.co = qemu_coroutine_create(backup_run);
     qemu_coroutine_enter(job->common.co, job);
+    return;
+
+ error:
+    if (sync_bitmap) {
+        bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
+    }
 }
index 63611e0a33e9ffeba7d5fc7d528711c0ef40d2eb..3c30edba732790417e8b95a608963b7d26ae7d32 100644 (file)
@@ -721,6 +721,11 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)
     return bdrv_getlength(bs->file);
 }
 
+static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
+{
+    return bdrv_truncate(bs->file, offset);
+}
+
 static void blkdebug_refresh_filename(BlockDriverState *bs)
 {
     QDict *opts;
@@ -779,6 +784,7 @@ static BlockDriver bdrv_blkdebug = {
     .bdrv_file_open         = blkdebug_open,
     .bdrv_close             = blkdebug_close,
     .bdrv_getlength         = blkdebug_getlength,
+    .bdrv_truncate          = blkdebug_truncate,
     .bdrv_refresh_filename  = blkdebug_refresh_filename,
 
     .bdrv_aio_readv         = blkdebug_aio_readv,
index 48b6e4c05c1ce98e560af5a45774481b3ef5231c..93e46f376aa9b2341f59e694df9325b30a397358 100644 (file)
@@ -515,6 +515,17 @@ int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
     return bdrv_write(blk->bs, sector_num, buf, nb_sectors);
 }
 
+int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
+                     int nb_sectors, BdrvRequestFlags flags)
+{
+    int ret = blk_check_request(blk, sector_num, nb_sectors);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return bdrv_write_zeroes(blk->bs, sector_num, nb_sectors, flags);
+}
+
 static void error_callback_bh(void *opaque)
 {
     struct BlockBackendAIOCB *acb = opaque;
diff --git a/block/io.c b/block/io.c
new file mode 100644 (file)
index 0000000..1ce62c4
--- /dev/null
@@ -0,0 +1,2540 @@
+/*
+ * Block layer I/O functions
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "trace.h"
+#include "sysemu/qtest.h"
+#include "block/blockjob.h"
+#include "block/block_int.h"
+
+#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
+
+static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockCompletionFunc *cb, void *opaque);
+static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockCompletionFunc *cb, void *opaque);
+static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
+                                         int64_t sector_num, int nb_sectors,
+                                         QEMUIOVector *iov);
+static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
+                                         int64_t sector_num, int nb_sectors,
+                                         QEMUIOVector *iov);
+static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
+    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+    BdrvRequestFlags flags);
+static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
+    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+    BdrvRequestFlags flags);
+static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
+                                         int64_t sector_num,
+                                         QEMUIOVector *qiov,
+                                         int nb_sectors,
+                                         BdrvRequestFlags flags,
+                                         BlockCompletionFunc *cb,
+                                         void *opaque,
+                                         bool is_write);
+static void coroutine_fn bdrv_co_do_rw(void *opaque);
+static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
+
+/* throttling disk I/O limits */
+void bdrv_set_io_limits(BlockDriverState *bs,
+                        ThrottleConfig *cfg)
+{
+    int i;
+
+    throttle_config(&bs->throttle_state, cfg);
+
+    for (i = 0; i < 2; i++) {
+        qemu_co_enter_next(&bs->throttled_reqs[i]);
+    }
+}
+
+/* this function drain all the throttled IOs */
+static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
+{
+    bool drained = false;
+    bool enabled = bs->io_limits_enabled;
+    int i;
+
+    bs->io_limits_enabled = false;
+
+    for (i = 0; i < 2; i++) {
+        while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
+            drained = true;
+        }
+    }
+
+    bs->io_limits_enabled = enabled;
+
+    return drained;
+}
+
+void bdrv_io_limits_disable(BlockDriverState *bs)
+{
+    bs->io_limits_enabled = false;
+
+    bdrv_start_throttled_reqs(bs);
+
+    throttle_destroy(&bs->throttle_state);
+}
+
+static void bdrv_throttle_read_timer_cb(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+    qemu_co_enter_next(&bs->throttled_reqs[0]);
+}
+
+static void bdrv_throttle_write_timer_cb(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+    qemu_co_enter_next(&bs->throttled_reqs[1]);
+}
+
+/* should be called before bdrv_set_io_limits if a limit is set */
+void bdrv_io_limits_enable(BlockDriverState *bs)
+{
+    int clock_type = QEMU_CLOCK_REALTIME;
+
+    if (qtest_enabled()) {
+        /* For testing block IO throttling only */
+        clock_type = QEMU_CLOCK_VIRTUAL;
+    }
+    assert(!bs->io_limits_enabled);
+    throttle_init(&bs->throttle_state,
+                  bdrv_get_aio_context(bs),
+                  clock_type,
+                  bdrv_throttle_read_timer_cb,
+                  bdrv_throttle_write_timer_cb,
+                  bs);
+    bs->io_limits_enabled = true;
+}
+
+/* This function makes an IO wait if needed
+ *
+ * @nb_sectors: the number of sectors of the IO
+ * @is_write:   is the IO a write
+ */
+static void bdrv_io_limits_intercept(BlockDriverState *bs,
+                                     unsigned int bytes,
+                                     bool is_write)
+{
+    /* does this io must wait */
+    bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
+
+    /* if must wait or any request of this type throttled queue the IO */
+    if (must_wait ||
+        !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
+        qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
+    }
+
+    /* the IO will be executed, do the accounting */
+    throttle_account(&bs->throttle_state, is_write, bytes);
+
+
+    /* if the next request must wait -> do nothing */
+    if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
+        return;
+    }
+
+    /* else queue next request for execution */
+    qemu_co_queue_next(&bs->throttled_reqs[is_write]);
+}
+
+void bdrv_setup_io_funcs(BlockDriver *bdrv)
+{
+    /* Block drivers without coroutine functions need emulation */
+    if (!bdrv->bdrv_co_readv) {
+        bdrv->bdrv_co_readv = bdrv_co_readv_em;
+        bdrv->bdrv_co_writev = bdrv_co_writev_em;
+
+        /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
+         * the block driver lacks aio we need to emulate that too.
+         */
+        if (!bdrv->bdrv_aio_readv) {
+            /* add AIO emulation layer */
+            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
+            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
+        }
+    }
+}
+
+void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+    BlockDriver *drv = bs->drv;
+    Error *local_err = NULL;
+
+    memset(&bs->bl, 0, sizeof(bs->bl));
+
+    if (!drv) {
+        return;
+    }
+
+    /* Take some limits from the children as a default */
+    if (bs->file) {
+        bdrv_refresh_limits(bs->file, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+        bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
+        bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
+        bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
+    } else {
+        bs->bl.opt_mem_alignment = 512;
+    }
+
+    if (bs->backing_hd) {
+        bdrv_refresh_limits(bs->backing_hd, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+        bs->bl.opt_transfer_length =
+            MAX(bs->bl.opt_transfer_length,
+                bs->backing_hd->bl.opt_transfer_length);
+        bs->bl.max_transfer_length =
+            MIN_NON_ZERO(bs->bl.max_transfer_length,
+                         bs->backing_hd->bl.max_transfer_length);
+        bs->bl.opt_mem_alignment =
+            MAX(bs->bl.opt_mem_alignment,
+                bs->backing_hd->bl.opt_mem_alignment);
+    }
+
+    /* Then let the driver override it */
+    if (drv->bdrv_refresh_limits) {
+        drv->bdrv_refresh_limits(bs, errp);
+    }
+}
+
+/**
+ * The copy-on-read flag is actually a reference count so multiple users may
+ * use the feature without worrying about clobbering its previous state.
+ * Copy-on-read stays enabled until all users have called to disable it.
+ */
+void bdrv_enable_copy_on_read(BlockDriverState *bs)
+{
+    bs->copy_on_read++;
+}
+
+void bdrv_disable_copy_on_read(BlockDriverState *bs)
+{
+    assert(bs->copy_on_read > 0);
+    bs->copy_on_read--;
+}
+
+/* Check if any requests are in-flight (including throttled requests) */
+static bool bdrv_requests_pending(BlockDriverState *bs)
+{
+    if (!QLIST_EMPTY(&bs->tracked_requests)) {
+        return true;
+    }
+    if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
+        return true;
+    }
+    if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
+        return true;
+    }
+    if (bs->file && bdrv_requests_pending(bs->file)) {
+        return true;
+    }
+    if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
+        return true;
+    }
+    return false;
+}
+
+static bool bdrv_drain_one(BlockDriverState *bs)
+{
+    bool bs_busy;
+
+    bdrv_flush_io_queue(bs);
+    bdrv_start_throttled_reqs(bs);
+    bs_busy = bdrv_requests_pending(bs);
+    bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
+    return bs_busy;
+}
+
+/*
+ * Wait for pending requests to complete on a single BlockDriverState subtree
+ *
+ * See the warning in bdrv_drain_all().  This function can only be called if
+ * you are sure nothing can generate I/O because you have op blockers
+ * installed.
+ *
+ * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
+ * AioContext.
+ */
+void bdrv_drain(BlockDriverState *bs)
+{
+    while (bdrv_drain_one(bs)) {
+        /* Keep iterating */
+    }
+}
+
+/*
+ * Wait for pending requests to complete across all BlockDriverStates
+ *
+ * This function does not flush data to disk, use bdrv_flush_all() for that
+ * after calling this function.
+ *
+ * Note that completion of an asynchronous I/O operation can trigger any
+ * number of other I/O operations on other devices---for example a coroutine
+ * can be arbitrarily complex and a constant flow of I/O can come until the
+ * coroutine is complete.  Because of this, it is not possible to have a
+ * function to drain a single device's I/O queue.
+ */
+void bdrv_drain_all(void)
+{
+    /* Always run first iteration so any pending completion BHs run */
+    bool busy = true;
+    BlockDriverState *bs = NULL;
+
+    while ((bs = bdrv_next(bs))) {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
+        aio_context_acquire(aio_context);
+        if (bs->job) {
+            block_job_pause(bs->job);
+        }
+        aio_context_release(aio_context);
+    }
+
+    while (busy) {
+        busy = false;
+        bs = NULL;
+
+        while ((bs = bdrv_next(bs))) {
+            AioContext *aio_context = bdrv_get_aio_context(bs);
+
+            aio_context_acquire(aio_context);
+            busy |= bdrv_drain_one(bs);
+            aio_context_release(aio_context);
+        }
+    }
+
+    bs = NULL;
+    while ((bs = bdrv_next(bs))) {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
+        aio_context_acquire(aio_context);
+        if (bs->job) {
+            block_job_resume(bs->job);
+        }
+        aio_context_release(aio_context);
+    }
+}
+
+/**
+ * Remove an active request from the tracked requests list
+ *
+ * This function should be called when a tracked request is completing.
+ */
+static void tracked_request_end(BdrvTrackedRequest *req)
+{
+    if (req->serialising) {
+        req->bs->serialising_in_flight--;
+    }
+
+    QLIST_REMOVE(req, list);
+    qemu_co_queue_restart_all(&req->wait_queue);
+}
+
+/**
+ * Add an active request to the tracked requests list
+ */
+static void tracked_request_begin(BdrvTrackedRequest *req,
+                                  BlockDriverState *bs,
+                                  int64_t offset,
+                                  unsigned int bytes, bool is_write)
+{
+    *req = (BdrvTrackedRequest){
+        .bs = bs,
+        .offset         = offset,
+        .bytes          = bytes,
+        .is_write       = is_write,
+        .co             = qemu_coroutine_self(),
+        .serialising    = false,
+        .overlap_offset = offset,
+        .overlap_bytes  = bytes,
+    };
+
+    qemu_co_queue_init(&req->wait_queue);
+
+    QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
+}
+
+static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
+{
+    int64_t overlap_offset = req->offset & ~(align - 1);
+    unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
+                               - overlap_offset;
+
+    if (!req->serialising) {
+        req->bs->serialising_in_flight++;
+        req->serialising = true;
+    }
+
+    req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
+    req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
+}
+
+/**
+ * Round a region to cluster boundaries
+ */
+void bdrv_round_to_clusters(BlockDriverState *bs,
+                            int64_t sector_num, int nb_sectors,
+                            int64_t *cluster_sector_num,
+                            int *cluster_nb_sectors)
+{
+    BlockDriverInfo bdi;
+
+    if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
+        *cluster_sector_num = sector_num;
+        *cluster_nb_sectors = nb_sectors;
+    } else {
+        int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
+        *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
+        *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
+                                            nb_sectors, c);
+    }
+}
+
+static int bdrv_get_cluster_size(BlockDriverState *bs)
+{
+    BlockDriverInfo bdi;
+    int ret;
+
+    ret = bdrv_get_info(bs, &bdi);
+    if (ret < 0 || bdi.cluster_size == 0) {
+        return bs->request_alignment;
+    } else {
+        return bdi.cluster_size;
+    }
+}
+
+static bool tracked_request_overlaps(BdrvTrackedRequest *req,
+                                     int64_t offset, unsigned int bytes)
+{
+    /*        aaaa   bbbb */
+    if (offset >= req->overlap_offset + req->overlap_bytes) {
+        return false;
+    }
+    /* bbbb   aaaa        */
+    if (req->overlap_offset >= offset + bytes) {
+        return false;
+    }
+    return true;
+}
+
+static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
+{
+    BlockDriverState *bs = self->bs;
+    BdrvTrackedRequest *req;
+    bool retry;
+    bool waited = false;
+
+    if (!bs->serialising_in_flight) {
+        return false;
+    }
+
+    do {
+        retry = false;
+        QLIST_FOREACH(req, &bs->tracked_requests, list) {
+            if (req == self || (!req->serialising && !self->serialising)) {
+                continue;
+            }
+            if (tracked_request_overlaps(req, self->overlap_offset,
+                                         self->overlap_bytes))
+            {
+                /* Hitting this means there was a reentrant request, for
+                 * example, a block driver issuing nested requests.  This must
+                 * never happen since it means deadlock.
+                 */
+                assert(qemu_coroutine_self() != req->co);
+
+                /* If the request is already (indirectly) waiting for us, or
+                 * will wait for us as soon as it wakes up, then just go on
+                 * (instead of producing a deadlock in the former case). */
+                if (!req->waiting_for) {
+                    self->waiting_for = req;
+                    qemu_co_queue_wait(&req->wait_queue);
+                    self->waiting_for = NULL;
+                    retry = true;
+                    waited = true;
+                    break;
+                }
+            }
+        }
+    } while (retry);
+
+    return waited;
+}
+
+static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
+                                   size_t size)
+{
+    if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
+        return -EIO;
+    }
+
+    if (!bdrv_is_inserted(bs)) {
+        return -ENOMEDIUM;
+    }
+
+    if (offset < 0) {
+        return -EIO;
+    }
+
+    return 0;
+}
+
+static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
+                              int nb_sectors)
+{
+    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+        return -EIO;
+    }
+
+    return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
+                                   nb_sectors * BDRV_SECTOR_SIZE);
+}
+
+typedef struct RwCo {
+    BlockDriverState *bs;
+    int64_t offset;
+    QEMUIOVector *qiov;
+    bool is_write;
+    int ret;
+    BdrvRequestFlags flags;
+} RwCo;
+
+static void coroutine_fn bdrv_rw_co_entry(void *opaque)
+{
+    RwCo *rwco = opaque;
+
+    if (!rwco->is_write) {
+        rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
+                                      rwco->qiov->size, rwco->qiov,
+                                      rwco->flags);
+    } else {
+        rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
+                                       rwco->qiov->size, rwco->qiov,
+                                       rwco->flags);
+    }
+}
+
+/*
+ * Process a vectored synchronous request using coroutines
+ */
+static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
+                        QEMUIOVector *qiov, bool is_write,
+                        BdrvRequestFlags flags)
+{
+    Coroutine *co;
+    RwCo rwco = {
+        .bs = bs,
+        .offset = offset,
+        .qiov = qiov,
+        .is_write = is_write,
+        .ret = NOT_DONE,
+        .flags = flags,
+    };
+
+    /**
+     * In sync call context, when the vcpu is blocked, this throttling timer
+     * will not fire; so the I/O throttling function has to be disabled here
+     * if it has been enabled.
+     */
+    if (bs->io_limits_enabled) {
+        fprintf(stderr, "Disabling I/O throttling on '%s' due "
+                        "to synchronous I/O.\n", bdrv_get_device_name(bs));
+        bdrv_io_limits_disable(bs);
+    }
+
+    if (qemu_in_coroutine()) {
+        /* Fast-path if already in coroutine context */
+        bdrv_rw_co_entry(&rwco);
+    } else {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
+        co = qemu_coroutine_create(bdrv_rw_co_entry);
+        qemu_coroutine_enter(co, &rwco);
+        while (rwco.ret == NOT_DONE) {
+            aio_poll(aio_context, true);
+        }
+    }
+    return rwco.ret;
+}
+
+/*
+ * Process a synchronous request using coroutines
+ */
+static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
+                      int nb_sectors, bool is_write, BdrvRequestFlags flags)
+{
+    QEMUIOVector qiov;
+    struct iovec iov = {
+        .iov_base = (void *)buf,
+        .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+    };
+
+    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+        return -EINVAL;
+    }
+
+    qemu_iovec_init_external(&qiov, &iov, 1);
+    return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
+                        &qiov, is_write, flags);
+}
+
+/* return < 0 if error. See bdrv_write() for the return codes */
+int bdrv_read(BlockDriverState *bs, int64_t sector_num,
+              uint8_t *buf, int nb_sectors)
+{
+    return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
+}
+
+/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
+int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
+                          uint8_t *buf, int nb_sectors)
+{
+    bool enabled;
+    int ret;
+
+    enabled = bs->io_limits_enabled;
+    bs->io_limits_enabled = false;
+    ret = bdrv_read(bs, sector_num, buf, nb_sectors);
+    bs->io_limits_enabled = enabled;
+    return ret;
+}
+
+/* Return < 0 if error. Important errors are:
+  -EIO         generic I/O error (may happen for all errors)
+  -ENOMEDIUM   No media inserted.
+  -EINVAL      Invalid sector number or nb_sectors
+  -EACCES      Trying to write a read-only device
+*/
+int bdrv_write(BlockDriverState *bs, int64_t sector_num,
+               const uint8_t *buf, int nb_sectors)
+{
+    return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
+}
+
+int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+                      int nb_sectors, BdrvRequestFlags flags)
+{
+    return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
+                      BDRV_REQ_ZERO_WRITE | flags);
+}
+
+/*
+ * Completely zero out a block device with the help of bdrv_write_zeroes.
+ * The operation is sped up by checking the block status and only writing
+ * zeroes to the device if they currently do not return zeroes. Optional
+ * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
+ *
+ * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
+ */
+int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
+{
+    int64_t target_sectors, ret, nb_sectors, sector_num = 0;
+    int n;
+
+    target_sectors = bdrv_nb_sectors(bs);
+    if (target_sectors < 0) {
+        return target_sectors;
+    }
+
+    for (;;) {
+        nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
+        if (nb_sectors <= 0) {
+            return 0;
+        }
+        ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
+        if (ret < 0) {
+            error_report("error getting block status at sector %" PRId64 ": %s",
+                         sector_num, strerror(-ret));
+            return ret;
+        }
+        if (ret & BDRV_BLOCK_ZERO) {
+            sector_num += n;
+            continue;
+        }
+        ret = bdrv_write_zeroes(bs, sector_num, n, flags);
+        if (ret < 0) {
+            error_report("error writing zeroes at sector %" PRId64 ": %s",
+                         sector_num, strerror(-ret));
+            return ret;
+        }
+        sector_num += n;
+    }
+}
+
+int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
+{
+    QEMUIOVector qiov;
+    struct iovec iov = {
+        .iov_base = (void *)buf,
+        .iov_len = bytes,
+    };
+    int ret;
+
+    if (bytes < 0) {
+        return -EINVAL;
+    }
+
+    qemu_iovec_init_external(&qiov, &iov, 1);
+    ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return bytes;
+}
+
+int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
+{
+    int ret;
+
+    ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return qiov->size;
+}
+
+int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
+                const void *buf, int bytes)
+{
+    QEMUIOVector qiov;
+    struct iovec iov = {
+        .iov_base   = (void *) buf,
+        .iov_len    = bytes,
+    };
+
+    if (bytes < 0) {
+        return -EINVAL;
+    }
+
+    qemu_iovec_init_external(&qiov, &iov, 1);
+    return bdrv_pwritev(bs, offset, &qiov);
+}
+
+/*
+ * Writes to the file and ensures that no writes are reordered across this
+ * request (acts as a barrier)
+ *
+ * Returns 0 on success, -errno in error cases.
+ */
+int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
+    const void *buf, int count)
+{
+    int ret;
+
+    ret = bdrv_pwrite(bs, offset, buf, count);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* No flush needed for cache modes that already do it */
+    if (bs->enable_write_cache) {
+        bdrv_flush(bs);
+    }
+
+    return 0;
+}
+
+static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+    /* Perform I/O through a temporary buffer so that users who scribble over
+     * their read buffer while the operation is in progress do not end up
+     * modifying the image file.  This is critical for zero-copy guest I/O
+     * where anything might happen inside guest memory.
+     */
+    void *bounce_buffer;
+
+    BlockDriver *drv = bs->drv;
+    struct iovec iov;
+    QEMUIOVector bounce_qiov;
+    int64_t cluster_sector_num;
+    int cluster_nb_sectors;
+    size_t skip_bytes;
+    int ret;
+
+    /* Cover entire cluster so no additional backing file I/O is required when
+     * allocating cluster in the image file.
+     */
+    bdrv_round_to_clusters(bs, sector_num, nb_sectors,
+                           &cluster_sector_num, &cluster_nb_sectors);
+
+    trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
+                                   cluster_sector_num, cluster_nb_sectors);
+
+    iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
+    iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
+    if (bounce_buffer == NULL) {
+        ret = -ENOMEM;
+        goto err;
+    }
+
+    qemu_iovec_init_external(&bounce_qiov, &iov, 1);
+
+    ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
+                             &bounce_qiov);
+    if (ret < 0) {
+        goto err;
+    }
+
+    if (drv->bdrv_co_write_zeroes &&
+        buffer_is_zero(bounce_buffer, iov.iov_len)) {
+        ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
+                                      cluster_nb_sectors, 0);
+    } else {
+        /* This does not change the data on the disk, it is not necessary
+         * to flush even in cache=writethrough mode.
+         */
+        ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
+                                  &bounce_qiov);
+    }
+
+    if (ret < 0) {
+        /* It might be okay to ignore write errors for guest requests.  If this
+         * is a deliberate copy-on-read then we don't want to ignore the error.
+         * Simply report it in all cases.
+         */
+        goto err;
+    }
+
+    skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
+    qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
+                        nb_sectors * BDRV_SECTOR_SIZE);
+
+err:
+    qemu_vfree(bounce_buffer);
+    return ret;
+}
+
+/*
+ * Forwards an already correctly aligned request to the BlockDriver. This
+ * handles copy on read and zeroing after EOF; any other features must be
+ * implemented by the caller.
+ */
+static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
+    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
+    int64_t align, QEMUIOVector *qiov, int flags)
+{
+    BlockDriver *drv = bs->drv;
+    int ret;
+
+    int64_t sector_num = offset >> BDRV_SECTOR_BITS;
+    unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+
+    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert(!qiov || bytes == qiov->size);
+
+    /* Handle Copy on Read and associated serialisation */
+    if (flags & BDRV_REQ_COPY_ON_READ) {
+        /* If we touch the same cluster it counts as an overlap.  This
+         * guarantees that allocating writes will be serialized and not race
+         * with each other for the same cluster.  For example, in copy-on-read
+         * it ensures that the CoR read and write operations are atomic and
+         * guest writes cannot interleave between them. */
+        mark_request_serialising(req, bdrv_get_cluster_size(bs));
+    }
+
+    wait_serialising_requests(req);
+
+    if (flags & BDRV_REQ_COPY_ON_READ) {
+        int pnum;
+
+        ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
+        if (ret < 0) {
+            goto out;
+        }
+
+        if (!ret || pnum != nb_sectors) {
+            ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
+            goto out;
+        }
+    }
+
+    /* Forward the request to the BlockDriver */
+    if (!bs->zero_beyond_eof) {
+        ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+    } else {
+        /* Read zeros after EOF */
+        int64_t total_sectors, max_nb_sectors;
+
+        total_sectors = bdrv_nb_sectors(bs);
+        if (total_sectors < 0) {
+            ret = total_sectors;
+            goto out;
+        }
+
+        max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
+                                  align >> BDRV_SECTOR_BITS);
+        if (nb_sectors < max_nb_sectors) {
+            ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+        } else if (max_nb_sectors > 0) {
+            QEMUIOVector local_qiov;
+
+            qemu_iovec_init(&local_qiov, qiov->niov);
+            qemu_iovec_concat(&local_qiov, qiov, 0,
+                              max_nb_sectors * BDRV_SECTOR_SIZE);
+
+            ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
+                                     &local_qiov);
+
+            qemu_iovec_destroy(&local_qiov);
+        } else {
+            ret = 0;
+        }
+
+        /* Reading beyond end of file is supposed to produce zeroes */
+        if (ret == 0 && total_sectors < sector_num + nb_sectors) {
+            uint64_t offset = MAX(0, total_sectors - sector_num);
+            uint64_t bytes = (sector_num + nb_sectors - offset) *
+                              BDRV_SECTOR_SIZE;
+            qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
+        }
+    }
+
+out:
+    return ret;
+}
+
+static inline uint64_t bdrv_get_align(BlockDriverState *bs)
+{
+    /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
+    return MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+}
+
+static inline bool bdrv_req_is_aligned(BlockDriverState *bs,
+                                       int64_t offset, size_t bytes)
+{
+    int64_t align = bdrv_get_align(bs);
+    return !(offset & (align - 1) || (bytes & (align - 1)));
+}
+
+/*
+ * Handle a read request in coroutine context
+ */
+static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
+    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+    BdrvRequestFlags flags)
+{
+    BlockDriver *drv = bs->drv;
+    BdrvTrackedRequest req;
+
+    uint64_t align = bdrv_get_align(bs);
+    uint8_t *head_buf = NULL;
+    uint8_t *tail_buf = NULL;
+    QEMUIOVector local_qiov;
+    bool use_local_qiov = false;
+    int ret;
+
+    if (!drv) {
+        return -ENOMEDIUM;
+    }
+
+    ret = bdrv_check_byte_request(bs, offset, bytes);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (bs->copy_on_read) {
+        flags |= BDRV_REQ_COPY_ON_READ;
+    }
+
+    /* throttling disk I/O */
+    if (bs->io_limits_enabled) {
+        bdrv_io_limits_intercept(bs, bytes, false);
+    }
+
+    /* Align read if necessary by padding qiov */
+    if (offset & (align - 1)) {
+        head_buf = qemu_blockalign(bs, align);
+        qemu_iovec_init(&local_qiov, qiov->niov + 2);
+        qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
+        qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
+        use_local_qiov = true;
+
+        bytes += offset & (align - 1);
+        offset = offset & ~(align - 1);
+    }
+
+    if ((offset + bytes) & (align - 1)) {
+        if (!use_local_qiov) {
+            qemu_iovec_init(&local_qiov, qiov->niov + 1);
+            qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
+            use_local_qiov = true;
+        }
+        tail_buf = qemu_blockalign(bs, align);
+        qemu_iovec_add(&local_qiov, tail_buf,
+                       align - ((offset + bytes) & (align - 1)));
+
+        bytes = ROUND_UP(bytes, align);
+    }
+
+    tracked_request_begin(&req, bs, offset, bytes, false);
+    ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
+                              use_local_qiov ? &local_qiov : qiov,
+                              flags);
+    tracked_request_end(&req);
+
+    if (use_local_qiov) {
+        qemu_iovec_destroy(&local_qiov);
+        qemu_vfree(head_buf);
+        qemu_vfree(tail_buf);
+    }
+
+    return ret;
+}
+
+static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+    BdrvRequestFlags flags)
+{
+    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+        return -EINVAL;
+    }
+
+    return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
+                             nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
+}
+
+int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
+    int nb_sectors, QEMUIOVector *qiov)
+{
+    trace_bdrv_co_readv(bs, sector_num, nb_sectors);
+
+    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
+}
+
+int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+    trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
+
+    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
+                            BDRV_REQ_COPY_ON_READ);
+}
+
+#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
+
+static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+{
+    BlockDriver *drv = bs->drv;
+    QEMUIOVector qiov;
+    struct iovec iov = {0};
+    int ret = 0;
+
+    int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
+                                        BDRV_REQUEST_MAX_SECTORS);
+
+    while (nb_sectors > 0 && !ret) {
+        int num = nb_sectors;
+
+        /* Align request.  Block drivers can expect the "bulk" of the request
+         * to be aligned.
+         */
+        if (bs->bl.write_zeroes_alignment
+            && num > bs->bl.write_zeroes_alignment) {
+            if (sector_num % bs->bl.write_zeroes_alignment != 0) {
+                /* Make a small request up to the first aligned sector.  */
+                num = bs->bl.write_zeroes_alignment;
+                num -= sector_num % bs->bl.write_zeroes_alignment;
+            } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
+                /* Shorten the request to the last aligned sector.  num cannot
+                 * underflow because num > bs->bl.write_zeroes_alignment.
+                 */
+                num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
+            }
+        }
+
+        /* limit request size */
+        if (num > max_write_zeroes) {
+            num = max_write_zeroes;
+        }
+
+        ret = -ENOTSUP;
+        /* First try the efficient write zeroes operation */
+        if (drv->bdrv_co_write_zeroes) {
+            ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
+        }
+
+        if (ret == -ENOTSUP) {
+            /* Fall back to bounce buffer if write zeroes is unsupported */
+            int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
+                                            MAX_WRITE_ZEROES_BOUNCE_BUFFER);
+            num = MIN(num, max_xfer_len);
+            iov.iov_len = num * BDRV_SECTOR_SIZE;
+            if (iov.iov_base == NULL) {
+                iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
+                if (iov.iov_base == NULL) {
+                    ret = -ENOMEM;
+                    goto fail;
+                }
+                memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
+            }
+            qemu_iovec_init_external(&qiov, &iov, 1);
+
+            ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
+
+            /* Keep bounce buffer around if it is big enough for all
+             * all future requests.
+             */
+            if (num < max_xfer_len) {
+                qemu_vfree(iov.iov_base);
+                iov.iov_base = NULL;
+            }
+        }
+
+        sector_num += num;
+        nb_sectors -= num;
+    }
+
+fail:
+    qemu_vfree(iov.iov_base);
+    return ret;
+}
+
+/*
+ * Forwards an already correctly aligned write request to the BlockDriver.
+ */
+static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
+    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
+    QEMUIOVector *qiov, int flags)
+{
+    BlockDriver *drv = bs->drv;
+    bool waited;
+    int ret;
+
+    int64_t sector_num = offset >> BDRV_SECTOR_BITS;
+    unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+
+    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert(!qiov || bytes == qiov->size);
+
+    waited = wait_serialising_requests(req);
+    assert(!waited || !req->serialising);
+    assert(req->overlap_offset <= offset);
+    assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
+
+    ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
+
+    if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
+        !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
+        qemu_iovec_is_zero(qiov)) {
+        flags |= BDRV_REQ_ZERO_WRITE;
+        if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
+            flags |= BDRV_REQ_MAY_UNMAP;
+        }
+    }
+
+    if (ret < 0) {
+        /* Do nothing, write notifier decided to fail this request */
+    } else if (flags & BDRV_REQ_ZERO_WRITE) {
+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
+        ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
+    } else {
+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
+        ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
+    }
+    BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
+
+    if (ret == 0 && !bs->enable_write_cache) {
+        ret = bdrv_co_flush(bs);
+    }
+
+    bdrv_set_dirty(bs, sector_num, nb_sectors);
+
+    block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
+
+    if (ret >= 0) {
+        bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
+    }
+
+    return ret;
+}
+
+/*
+ * Handle a write request in coroutine context
+ */
+static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
+    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+    BdrvRequestFlags flags)
+{
+    BdrvTrackedRequest req;
+    uint64_t align = bdrv_get_align(bs);
+    uint8_t *head_buf = NULL;
+    uint8_t *tail_buf = NULL;
+    QEMUIOVector local_qiov;
+    bool use_local_qiov = false;
+    int ret;
+
+    if (!bs->drv) {
+        return -ENOMEDIUM;
+    }
+    if (bs->read_only) {
+        return -EACCES;
+    }
+
+    ret = bdrv_check_byte_request(bs, offset, bytes);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* throttling disk I/O */
+    if (bs->io_limits_enabled) {
+        bdrv_io_limits_intercept(bs, bytes, true);
+    }
+
+    /*
+     * Align write if necessary by performing a read-modify-write cycle.
+     * Pad qiov with the read parts and be sure to have a tracked request not
+     * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
+     */
+    tracked_request_begin(&req, bs, offset, bytes, true);
+
+    if (offset & (align - 1)) {
+        QEMUIOVector head_qiov;
+        struct iovec head_iov;
+
+        mark_request_serialising(&req, align);
+        wait_serialising_requests(&req);
+
+        head_buf = qemu_blockalign(bs, align);
+        head_iov = (struct iovec) {
+            .iov_base   = head_buf,
+            .iov_len    = align,
+        };
+        qemu_iovec_init_external(&head_qiov, &head_iov, 1);
+
+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
+        ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
+                                  align, &head_qiov, 0);
+        if (ret < 0) {
+            goto fail;
+        }
+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
+
+        qemu_iovec_init(&local_qiov, qiov->niov + 2);
+        qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
+        qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
+        use_local_qiov = true;
+
+        bytes += offset & (align - 1);
+        offset = offset & ~(align - 1);
+    }
+
+    if ((offset + bytes) & (align - 1)) {
+        QEMUIOVector tail_qiov;
+        struct iovec tail_iov;
+        size_t tail_bytes;
+        bool waited;
+
+        mark_request_serialising(&req, align);
+        waited = wait_serialising_requests(&req);
+        assert(!waited || !use_local_qiov);
+
+        tail_buf = qemu_blockalign(bs, align);
+        tail_iov = (struct iovec) {
+            .iov_base   = tail_buf,
+            .iov_len    = align,
+        };
+        qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
+
+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
+        ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
+                                  align, &tail_qiov, 0);
+        if (ret < 0) {
+            goto fail;
+        }
+        BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+
+        if (!use_local_qiov) {
+            qemu_iovec_init(&local_qiov, qiov->niov + 1);
+            qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
+            use_local_qiov = true;
+        }
+
+        tail_bytes = (offset + bytes) & (align - 1);
+        qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
+
+        bytes = ROUND_UP(bytes, align);
+    }
+
+    if (use_local_qiov) {
+        /* Local buffer may have non-zero data. */
+        flags &= ~BDRV_REQ_ZERO_WRITE;
+    }
+    ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
+                               use_local_qiov ? &local_qiov : qiov,
+                               flags);
+
+fail:
+    tracked_request_end(&req);
+
+    if (use_local_qiov) {
+        qemu_iovec_destroy(&local_qiov);
+    }
+    qemu_vfree(head_buf);
+    qemu_vfree(tail_buf);
+
+    return ret;
+}
+
+static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+    BdrvRequestFlags flags)
+{
+    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+        return -EINVAL;
+    }
+
+    return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
+                              nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
+}
+
+int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
+    int nb_sectors, QEMUIOVector *qiov)
+{
+    trace_bdrv_co_writev(bs, sector_num, nb_sectors);
+
+    return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
+}
+
+int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
+                                      int64_t sector_num, int nb_sectors,
+                                      BdrvRequestFlags flags)
+{
+    int ret;
+
+    trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
+
+    if (!(bs->open_flags & BDRV_O_UNMAP)) {
+        flags &= ~BDRV_REQ_MAY_UNMAP;
+    }
+    if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS,
+                            nb_sectors << BDRV_SECTOR_BITS)) {
+        ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
+                                BDRV_REQ_ZERO_WRITE | flags);
+    } else {
+        uint8_t *buf;
+        QEMUIOVector local_qiov;
+        size_t bytes = nb_sectors << BDRV_SECTOR_BITS;
+
+        buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes);
+        memset(buf, 0, bytes);
+        qemu_iovec_init(&local_qiov, 1);
+        qemu_iovec_add(&local_qiov, buf, bytes);
+
+        ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov,
+                                BDRV_REQ_ZERO_WRITE | flags);
+        qemu_vfree(buf);
+    }
+    return ret;
+}
+
+int bdrv_flush_all(void)
+{
+    BlockDriverState *bs = NULL;
+    int result = 0;
+
+    while ((bs = bdrv_next(bs))) {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+        int ret;
+
+        aio_context_acquire(aio_context);
+        ret = bdrv_flush(bs);
+        if (ret < 0 && !result) {
+            result = ret;
+        }
+        aio_context_release(aio_context);
+    }
+
+    return result;
+}
+
+typedef struct BdrvCoGetBlockStatusData {
+    BlockDriverState *bs;
+    BlockDriverState *base;
+    int64_t sector_num;
+    int nb_sectors;
+    int *pnum;
+    int64_t ret;
+    bool done;
+} BdrvCoGetBlockStatusData;
+
+/*
+ * Returns the allocation status of the specified sectors.
+ * Drivers not implementing the functionality are assumed to not support
+ * backing files, hence all their sectors are reported as allocated.
+ *
+ * If 'sector_num' is beyond the end of the disk image the return value is 0
+ * and 'pnum' is set to 0.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ * the specified sector) that are known to be in the same
+ * allocated/unallocated state.
+ *
+ * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
+ * beyond the end of the disk image it will be clamped.
+ */
+static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
+                                                     int64_t sector_num,
+                                                     int nb_sectors, int *pnum)
+{
+    int64_t total_sectors;
+    int64_t n;
+    int64_t ret, ret2;
+
+    total_sectors = bdrv_nb_sectors(bs);
+    if (total_sectors < 0) {
+        return total_sectors;
+    }
+
+    if (sector_num >= total_sectors) {
+        *pnum = 0;
+        return 0;
+    }
+
+    n = total_sectors - sector_num;
+    if (n < nb_sectors) {
+        nb_sectors = n;
+    }
+
+    if (!bs->drv->bdrv_co_get_block_status) {
+        *pnum = nb_sectors;
+        ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
+        if (bs->drv->protocol_name) {
+            ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
+        }
+        return ret;
+    }
+
+    ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
+    if (ret < 0) {
+        *pnum = 0;
+        return ret;
+    }
+
+    if (ret & BDRV_BLOCK_RAW) {
+        assert(ret & BDRV_BLOCK_OFFSET_VALID);
+        return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
+                                     *pnum, pnum);
+    }
+
+    if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
+        ret |= BDRV_BLOCK_ALLOCATED;
+    }
+
+    if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
+        if (bdrv_unallocated_blocks_are_zero(bs)) {
+            ret |= BDRV_BLOCK_ZERO;
+        } else if (bs->backing_hd) {
+            BlockDriverState *bs2 = bs->backing_hd;
+            int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
+            if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
+                ret |= BDRV_BLOCK_ZERO;
+            }
+        }
+    }
+
+    if (bs->file &&
+        (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
+        (ret & BDRV_BLOCK_OFFSET_VALID)) {
+        int file_pnum;
+
+        ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
+                                        *pnum, &file_pnum);
+        if (ret2 >= 0) {
+            /* Ignore errors.  This is just providing extra information, it
+             * is useful but not necessary.
+             */
+            if (!file_pnum) {
+                /* !file_pnum indicates an offset at or beyond the EOF; it is
+                 * perfectly valid for the format block driver to point to such
+                 * offsets, so catch it and mark everything as zero */
+                ret |= BDRV_BLOCK_ZERO;
+            } else {
+                /* Limit request to the range reported by the protocol driver */
+                *pnum = file_pnum;
+                ret |= (ret2 & BDRV_BLOCK_ZERO);
+            }
+        }
+    }
+
+    return ret;
+}
+
+/* Coroutine wrapper for bdrv_get_block_status() */
+static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
+{
+    BdrvCoGetBlockStatusData *data = opaque;
+    BlockDriverState *bs = data->bs;
+
+    data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
+                                         data->pnum);
+    data->done = true;
+}
+
+/*
+ * Synchronous wrapper around bdrv_co_get_block_status().
+ *
+ * See bdrv_co_get_block_status() for details.
+ */
+int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
+                              int nb_sectors, int *pnum)
+{
+    Coroutine *co;
+    BdrvCoGetBlockStatusData data = {
+        .bs = bs,
+        .sector_num = sector_num,
+        .nb_sectors = nb_sectors,
+        .pnum = pnum,
+        .done = false,
+    };
+
+    if (qemu_in_coroutine()) {
+        /* Fast-path if already in coroutine context */
+        bdrv_get_block_status_co_entry(&data);
+    } else {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
+        co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
+        qemu_coroutine_enter(co, &data);
+        while (!data.done) {
+            aio_poll(aio_context, true);
+        }
+    }
+    return data.ret;
+}
+
+int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
+                                   int nb_sectors, int *pnum)
+{
+    int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
+    if (ret < 0) {
+        return ret;
+    }
+    return !!(ret & BDRV_BLOCK_ALLOCATED);
+}
+
+/*
+ * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
+ *
+ * Return true if the given sector is allocated in any image between
+ * BASE and TOP (inclusive).  BASE can be NULL to check if the given
+ * sector is allocated in any image of the chain.  Return false otherwise.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ *  the specified sector) that are known to be in the same
+ *  allocated/unallocated state.
+ *
+ */
+int bdrv_is_allocated_above(BlockDriverState *top,
+                            BlockDriverState *base,
+                            int64_t sector_num,
+                            int nb_sectors, int *pnum)
+{
+    BlockDriverState *intermediate;
+    int ret, n = nb_sectors;
+
+    intermediate = top;
+    while (intermediate && intermediate != base) {
+        int pnum_inter;
+        ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
+                                &pnum_inter);
+        if (ret < 0) {
+            return ret;
+        } else if (ret) {
+            *pnum = pnum_inter;
+            return 1;
+        }
+
+        /*
+         * [sector_num, nb_sectors] is unallocated on top but intermediate
+         * might have
+         *
+         * [sector_num+x, nr_sectors] allocated.
+         */
+        if (n > pnum_inter &&
+            (intermediate == top ||
+             sector_num + pnum_inter < intermediate->total_sectors)) {
+            n = pnum_inter;
+        }
+
+        intermediate = intermediate->backing_hd;
+    }
+
+    *pnum = n;
+    return 0;
+}
+
+int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
+                          const uint8_t *buf, int nb_sectors)
+{
+    BlockDriver *drv = bs->drv;
+    int ret;
+
+    if (!drv) {
+        return -ENOMEDIUM;
+    }
+    if (!drv->bdrv_write_compressed) {
+        return -ENOTSUP;
+    }
+    ret = bdrv_check_request(bs, sector_num, nb_sectors);
+    if (ret < 0) {
+        return ret;
+    }
+
+    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
+
+    return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
+}
+
+int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
+                      int64_t pos, int size)
+{
+    QEMUIOVector qiov;
+    struct iovec iov = {
+        .iov_base   = (void *) buf,
+        .iov_len    = size,
+    };
+
+    qemu_iovec_init_external(&qiov, &iov, 1);
+    return bdrv_writev_vmstate(bs, &qiov, pos);
+}
+
+int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
+{
+    BlockDriver *drv = bs->drv;
+
+    if (!drv) {
+        return -ENOMEDIUM;
+    } else if (drv->bdrv_save_vmstate) {
+        return drv->bdrv_save_vmstate(bs, qiov, pos);
+    } else if (bs->file) {
+        return bdrv_writev_vmstate(bs->file, qiov, pos);
+    }
+
+    return -ENOTSUP;
+}
+
+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+                      int64_t pos, int size)
+{
+    BlockDriver *drv = bs->drv;
+    if (!drv)
+        return -ENOMEDIUM;
+    if (drv->bdrv_load_vmstate)
+        return drv->bdrv_load_vmstate(bs, buf, pos, size);
+    if (bs->file)
+        return bdrv_load_vmstate(bs->file, buf, pos, size);
+    return -ENOTSUP;
+}
+
+/**************************************************************/
+/* async I/Os */
+
+BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
+                           QEMUIOVector *qiov, int nb_sectors,
+                           BlockCompletionFunc *cb, void *opaque)
+{
+    trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
+
+    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
+                                 cb, opaque, false);
+}
+
+BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
+                            QEMUIOVector *qiov, int nb_sectors,
+                            BlockCompletionFunc *cb, void *opaque)
+{
+    trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
+
+    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
+                                 cb, opaque, true);
+}
+
+BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
+        BlockCompletionFunc *cb, void *opaque)
+{
+    trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
+
+    return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
+                                 BDRV_REQ_ZERO_WRITE | flags,
+                                 cb, opaque, true);
+}
+
+
+typedef struct MultiwriteCB {
+    int error;
+    int num_requests;
+    int num_callbacks;
+    struct {
+        BlockCompletionFunc *cb;
+        void *opaque;
+        QEMUIOVector *free_qiov;
+    } callbacks[];
+} MultiwriteCB;
+
+static void multiwrite_user_cb(MultiwriteCB *mcb)
+{
+    int i;
+
+    for (i = 0; i < mcb->num_callbacks; i++) {
+        mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
+        if (mcb->callbacks[i].free_qiov) {
+            qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
+        }
+        g_free(mcb->callbacks[i].free_qiov);
+    }
+}
+
+static void multiwrite_cb(void *opaque, int ret)
+{
+    MultiwriteCB *mcb = opaque;
+
+    trace_multiwrite_cb(mcb, ret);
+
+    if (ret < 0 && !mcb->error) {
+        mcb->error = ret;
+    }
+
+    mcb->num_requests--;
+    if (mcb->num_requests == 0) {
+        multiwrite_user_cb(mcb);
+        g_free(mcb);
+    }
+}
+
+static int multiwrite_req_compare(const void *a, const void *b)
+{
+    const BlockRequest *req1 = a, *req2 = b;
+
+    /*
+     * Note that we can't simply subtract req2->sector from req1->sector
+     * here as that could overflow the return value.
+     */
+    if (req1->sector > req2->sector) {
+        return 1;
+    } else if (req1->sector < req2->sector) {
+        return -1;
+    } else {
+        return 0;
+    }
+}
+
+/*
+ * Takes a bunch of requests and tries to merge them. Returns the number of
+ * requests that remain after merging.
+ */
+static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
+    int num_reqs, MultiwriteCB *mcb)
+{
+    int i, outidx;
+
+    // Sort requests by start sector
+    qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
+
+    // Check if adjacent requests touch the same clusters. If so, combine them,
+    // filling up gaps with zero sectors.
+    outidx = 0;
+    for (i = 1; i < num_reqs; i++) {
+        int merge = 0;
+        int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
+
+        // Handle exactly sequential writes and overlapping writes.
+        if (reqs[i].sector <= oldreq_last) {
+            merge = 1;
+        }
+
+        if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
+            merge = 0;
+        }
+
+        if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
+            reqs[i].nb_sectors > bs->bl.max_transfer_length) {
+            merge = 0;
+        }
+
+        if (merge) {
+            size_t size;
+            QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
+            qemu_iovec_init(qiov,
+                reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
+
+            // Add the first request to the merged one. If the requests are
+            // overlapping, drop the last sectors of the first request.
+            size = (reqs[i].sector - reqs[outidx].sector) << 9;
+            qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
+
+            // We should need to add any zeros between the two requests
+            assert (reqs[i].sector <= oldreq_last);
+
+            // Add the second request
+            qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
+
+            // Add tail of first request, if necessary
+            if (qiov->size < reqs[outidx].qiov->size) {
+                qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
+                                  reqs[outidx].qiov->size - qiov->size);
+            }
+
+            reqs[outidx].nb_sectors = qiov->size >> 9;
+            reqs[outidx].qiov = qiov;
+
+            mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
+        } else {
+            outidx++;
+            reqs[outidx].sector     = reqs[i].sector;
+            reqs[outidx].nb_sectors = reqs[i].nb_sectors;
+            reqs[outidx].qiov       = reqs[i].qiov;
+        }
+    }
+
+    block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
+
+    return outidx + 1;
+}
+
+/*
+ * Submit multiple AIO write requests at once.
+ *
+ * On success, the function returns 0 and all requests in the reqs array have
+ * been submitted. In error case this function returns -1, and any of the
+ * requests may or may not be submitted yet. In particular, this means that the
+ * callback will be called for some of the requests, for others it won't. The
+ * caller must check the error field of the BlockRequest to wait for the right
+ * callbacks (if error != 0, no callback will be called).
+ *
+ * The implementation may modify the contents of the reqs array, e.g. to merge
+ * requests. However, the fields opaque and error are left unmodified as they
+ * are used to signal failure for a single request to the caller.
+ */
+int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
+{
+    MultiwriteCB *mcb;
+    int i;
+
+    /* don't submit writes if we don't have a medium */
+    if (bs->drv == NULL) {
+        for (i = 0; i < num_reqs; i++) {
+            reqs[i].error = -ENOMEDIUM;
+        }
+        return -1;
+    }
+
+    if (num_reqs == 0) {
+        return 0;
+    }
+
+    // Create MultiwriteCB structure
+    mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
+    mcb->num_requests = 0;
+    mcb->num_callbacks = num_reqs;
+
+    for (i = 0; i < num_reqs; i++) {
+        mcb->callbacks[i].cb = reqs[i].cb;
+        mcb->callbacks[i].opaque = reqs[i].opaque;
+    }
+
+    // Check for mergable requests
+    num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
+
+    trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
+
+    /* Run the aio requests. */
+    mcb->num_requests = num_reqs;
+    for (i = 0; i < num_reqs; i++) {
+        bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
+                              reqs[i].nb_sectors, reqs[i].flags,
+                              multiwrite_cb, mcb,
+                              true);
+    }
+
+    return 0;
+}
+
+void bdrv_aio_cancel(BlockAIOCB *acb)
+{
+    qemu_aio_ref(acb);
+    bdrv_aio_cancel_async(acb);
+    while (acb->refcnt > 1) {
+        if (acb->aiocb_info->get_aio_context) {
+            aio_poll(acb->aiocb_info->get_aio_context(acb), true);
+        } else if (acb->bs) {
+            aio_poll(bdrv_get_aio_context(acb->bs), true);
+        } else {
+            abort();
+        }
+    }
+    qemu_aio_unref(acb);
+}
+
+/* Async version of aio cancel. The caller is not blocked if the acb implements
+ * cancel_async, otherwise we do nothing and let the request normally complete.
+ * In either case the completion callback must be called. */
+void bdrv_aio_cancel_async(BlockAIOCB *acb)
+{
+    if (acb->aiocb_info->cancel_async) {
+        acb->aiocb_info->cancel_async(acb);
+    }
+}
+
+/**************************************************************/
+/* async block device emulation */
+
+typedef struct BlockAIOCBSync {
+    BlockAIOCB common;
+    QEMUBH *bh;
+    int ret;
+    /* vector translation state */
+    QEMUIOVector *qiov;
+    uint8_t *bounce;
+    int is_write;
+} BlockAIOCBSync;
+
+static const AIOCBInfo bdrv_em_aiocb_info = {
+    .aiocb_size         = sizeof(BlockAIOCBSync),
+};
+
+static void bdrv_aio_bh_cb(void *opaque)
+{
+    BlockAIOCBSync *acb = opaque;
+
+    if (!acb->is_write && acb->ret >= 0) {
+        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
+    }
+    qemu_vfree(acb->bounce);
+    acb->common.cb(acb->common.opaque, acb->ret);
+    qemu_bh_delete(acb->bh);
+    acb->bh = NULL;
+    qemu_aio_unref(acb);
+}
+
+static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
+                                      int64_t sector_num,
+                                      QEMUIOVector *qiov,
+                                      int nb_sectors,
+                                      BlockCompletionFunc *cb,
+                                      void *opaque,
+                                      int is_write)
+
+{
+    BlockAIOCBSync *acb;
+
+    acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
+    acb->is_write = is_write;
+    acb->qiov = qiov;
+    acb->bounce = qemu_try_blockalign(bs, qiov->size);
+    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
+
+    if (acb->bounce == NULL) {
+        acb->ret = -ENOMEM;
+    } else if (is_write) {
+        qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
+        acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
+    } else {
+        acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
+    }
+
+    qemu_bh_schedule(acb->bh);
+
+    return &acb->common;
+}
+
+static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockCompletionFunc *cb, void *opaque)
+{
+    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+}
+
+static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockCompletionFunc *cb, void *opaque)
+{
+    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+}
+
+
+typedef struct BlockAIOCBCoroutine {
+    BlockAIOCB common;
+    BlockRequest req;
+    bool is_write;
+    bool need_bh;
+    bool *done;
+    QEMUBH* bh;
+} BlockAIOCBCoroutine;
+
+static const AIOCBInfo bdrv_em_co_aiocb_info = {
+    .aiocb_size         = sizeof(BlockAIOCBCoroutine),
+};
+
+static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
+{
+    if (!acb->need_bh) {
+        acb->common.cb(acb->common.opaque, acb->req.error);
+        qemu_aio_unref(acb);
+    }
+}
+
+static void bdrv_co_em_bh(void *opaque)
+{
+    BlockAIOCBCoroutine *acb = opaque;
+
+    assert(!acb->need_bh);
+    qemu_bh_delete(acb->bh);
+    bdrv_co_complete(acb);
+}
+
+static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
+{
+    acb->need_bh = false;
+    if (acb->req.error != -EINPROGRESS) {
+        BlockDriverState *bs = acb->common.bs;
+
+        acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
+        qemu_bh_schedule(acb->bh);
+    }
+}
+
+/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
+static void coroutine_fn bdrv_co_do_rw(void *opaque)
+{
+    BlockAIOCBCoroutine *acb = opaque;
+    BlockDriverState *bs = acb->common.bs;
+
+    if (!acb->is_write) {
+        acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
+            acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
+    } else {
+        acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
+            acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
+    }
+
+    bdrv_co_complete(acb);
+}
+
+static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
+                                         int64_t sector_num,
+                                         QEMUIOVector *qiov,
+                                         int nb_sectors,
+                                         BdrvRequestFlags flags,
+                                         BlockCompletionFunc *cb,
+                                         void *opaque,
+                                         bool is_write)
+{
+    Coroutine *co;
+    BlockAIOCBCoroutine *acb;
+
+    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+    acb->need_bh = true;
+    acb->req.error = -EINPROGRESS;
+    acb->req.sector = sector_num;
+    acb->req.nb_sectors = nb_sectors;
+    acb->req.qiov = qiov;
+    acb->req.flags = flags;
+    acb->is_write = is_write;
+
+    co = qemu_coroutine_create(bdrv_co_do_rw);
+    qemu_coroutine_enter(co, acb);
+
+    bdrv_co_maybe_schedule_bh(acb);
+    return &acb->common;
+}
+
+static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
+{
+    BlockAIOCBCoroutine *acb = opaque;
+    BlockDriverState *bs = acb->common.bs;
+
+    acb->req.error = bdrv_co_flush(bs);
+    bdrv_co_complete(acb);
+}
+
+BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
+        BlockCompletionFunc *cb, void *opaque)
+{
+    trace_bdrv_aio_flush(bs, opaque);
+
+    Coroutine *co;
+    BlockAIOCBCoroutine *acb;
+
+    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+    acb->need_bh = true;
+    acb->req.error = -EINPROGRESS;
+
+    co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
+    qemu_coroutine_enter(co, acb);
+
+    bdrv_co_maybe_schedule_bh(acb);
+    return &acb->common;
+}
+
+static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
+{
+    BlockAIOCBCoroutine *acb = opaque;
+    BlockDriverState *bs = acb->common.bs;
+
+    acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
+    bdrv_co_complete(acb);
+}
+
+BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors,
+        BlockCompletionFunc *cb, void *opaque)
+{
+    Coroutine *co;
+    BlockAIOCBCoroutine *acb;
+
+    trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
+
+    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+    acb->need_bh = true;
+    acb->req.error = -EINPROGRESS;
+    acb->req.sector = sector_num;
+    acb->req.nb_sectors = nb_sectors;
+    co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
+    qemu_coroutine_enter(co, acb);
+
+    bdrv_co_maybe_schedule_bh(acb);
+    return &acb->common;
+}
+
+void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
+                   BlockCompletionFunc *cb, void *opaque)
+{
+    BlockAIOCB *acb;
+
+    acb = g_slice_alloc(aiocb_info->aiocb_size);
+    acb->aiocb_info = aiocb_info;
+    acb->bs = bs;
+    acb->cb = cb;
+    acb->opaque = opaque;
+    acb->refcnt = 1;
+    return acb;
+}
+
+void qemu_aio_ref(void *p)
+{
+    BlockAIOCB *acb = p;
+    acb->refcnt++;
+}
+
+void qemu_aio_unref(void *p)
+{
+    BlockAIOCB *acb = p;
+    assert(acb->refcnt > 0);
+    if (--acb->refcnt == 0) {
+        g_slice_free1(acb->aiocb_info->aiocb_size, acb);
+    }
+}
+
+/**************************************************************/
+/* Coroutine block device emulation */
+
+typedef struct CoroutineIOCompletion {
+    Coroutine *coroutine;
+    int ret;
+} CoroutineIOCompletion;
+
+static void bdrv_co_io_em_complete(void *opaque, int ret)
+{
+    CoroutineIOCompletion *co = opaque;
+
+    co->ret = ret;
+    qemu_coroutine_enter(co->coroutine, NULL);
+}
+
+static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
+                                      int nb_sectors, QEMUIOVector *iov,
+                                      bool is_write)
+{
+    CoroutineIOCompletion co = {
+        .coroutine = qemu_coroutine_self(),
+    };
+    BlockAIOCB *acb;
+
+    if (is_write) {
+        acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
+                                       bdrv_co_io_em_complete, &co);
+    } else {
+        acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
+                                      bdrv_co_io_em_complete, &co);
+    }
+
+    trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
+    if (!acb) {
+        return -EIO;
+    }
+    qemu_coroutine_yield();
+
+    return co.ret;
+}
+
+static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
+                                         int64_t sector_num, int nb_sectors,
+                                         QEMUIOVector *iov)
+{
+    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
+}
+
+static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
+                                         int64_t sector_num, int nb_sectors,
+                                         QEMUIOVector *iov)
+{
+    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
+}
+
+static void coroutine_fn bdrv_flush_co_entry(void *opaque)
+{
+    RwCo *rwco = opaque;
+
+    rwco->ret = bdrv_co_flush(rwco->bs);
+}
+
+int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
+{
+    int ret;
+
+    if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
+        return 0;
+    }
+
+    /* Write back cached data to the OS even with cache=unsafe */
+    BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
+    if (bs->drv->bdrv_co_flush_to_os) {
+        ret = bs->drv->bdrv_co_flush_to_os(bs);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    /* But don't actually force it to the disk with cache=unsafe */
+    if (bs->open_flags & BDRV_O_NO_FLUSH) {
+        goto flush_parent;
+    }
+
+    BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
+    if (bs->drv->bdrv_co_flush_to_disk) {
+        ret = bs->drv->bdrv_co_flush_to_disk(bs);
+    } else if (bs->drv->bdrv_aio_flush) {
+        BlockAIOCB *acb;
+        CoroutineIOCompletion co = {
+            .coroutine = qemu_coroutine_self(),
+        };
+
+        acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
+        if (acb == NULL) {
+            ret = -EIO;
+        } else {
+            qemu_coroutine_yield();
+            ret = co.ret;
+        }
+    } else {
+        /*
+         * Some block drivers always operate in either writethrough or unsafe
+         * mode and don't support bdrv_flush therefore. Usually qemu doesn't
+         * know how the server works (because the behaviour is hardcoded or
+         * depends on server-side configuration), so we can't ensure that
+         * everything is safe on disk. Returning an error doesn't work because
+         * that would break guests even if the server operates in writethrough
+         * mode.
+         *
+         * Let's hope the user knows what he's doing.
+         */
+        ret = 0;
+    }
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* Now flush the underlying protocol.  It will also have BDRV_O_NO_FLUSH
+     * in the case of cache=unsafe, so there are no useless flushes.
+     */
+flush_parent:
+    return bdrv_co_flush(bs->file);
+}
+
+int bdrv_flush(BlockDriverState *bs)
+{
+    Coroutine *co;
+    RwCo rwco = {
+        .bs = bs,
+        .ret = NOT_DONE,
+    };
+
+    if (qemu_in_coroutine()) {
+        /* Fast-path if already in coroutine context */
+        bdrv_flush_co_entry(&rwco);
+    } else {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
+        co = qemu_coroutine_create(bdrv_flush_co_entry);
+        qemu_coroutine_enter(co, &rwco);
+        while (rwco.ret == NOT_DONE) {
+            aio_poll(aio_context, true);
+        }
+    }
+
+    return rwco.ret;
+}
+
+typedef struct DiscardCo {
+    BlockDriverState *bs;
+    int64_t sector_num;
+    int nb_sectors;
+    int ret;
+} DiscardCo;
+static void coroutine_fn bdrv_discard_co_entry(void *opaque)
+{
+    DiscardCo *rwco = opaque;
+
+    rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
+}
+
+int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
+                                 int nb_sectors)
+{
+    int max_discard, ret;
+
+    if (!bs->drv) {
+        return -ENOMEDIUM;
+    }
+
+    ret = bdrv_check_request(bs, sector_num, nb_sectors);
+    if (ret < 0) {
+        return ret;
+    } else if (bs->read_only) {
+        return -EROFS;
+    }
+
+    bdrv_reset_dirty(bs, sector_num, nb_sectors);
+
+    /* Do nothing if disabled.  */
+    if (!(bs->open_flags & BDRV_O_UNMAP)) {
+        return 0;
+    }
+
+    if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
+        return 0;
+    }
+
+    max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
+    while (nb_sectors > 0) {
+        int ret;
+        int num = nb_sectors;
+
+        /* align request */
+        if (bs->bl.discard_alignment &&
+            num >= bs->bl.discard_alignment &&
+            sector_num % bs->bl.discard_alignment) {
+            if (num > bs->bl.discard_alignment) {
+                num = bs->bl.discard_alignment;
+            }
+            num -= sector_num % bs->bl.discard_alignment;
+        }
+
+        /* limit request size */
+        if (num > max_discard) {
+            num = max_discard;
+        }
+
+        if (bs->drv->bdrv_co_discard) {
+            ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
+        } else {
+            BlockAIOCB *acb;
+            CoroutineIOCompletion co = {
+                .coroutine = qemu_coroutine_self(),
+            };
+
+            acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
+                                            bdrv_co_io_em_complete, &co);
+            if (acb == NULL) {
+                return -EIO;
+            } else {
+                qemu_coroutine_yield();
+                ret = co.ret;
+            }
+        }
+        if (ret && ret != -ENOTSUP) {
+            return ret;
+        }
+
+        sector_num += num;
+        nb_sectors -= num;
+    }
+    return 0;
+}
+
+int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+{
+    Coroutine *co;
+    DiscardCo rwco = {
+        .bs = bs,
+        .sector_num = sector_num,
+        .nb_sectors = nb_sectors,
+        .ret = NOT_DONE,
+    };
+
+    if (qemu_in_coroutine()) {
+        /* Fast-path if already in coroutine context */
+        bdrv_discard_co_entry(&rwco);
+    } else {
+        AioContext *aio_context = bdrv_get_aio_context(bs);
+
+        co = qemu_coroutine_create(bdrv_discard_co_entry);
+        qemu_coroutine_enter(co, &rwco);
+        while (rwco.ret == NOT_DONE) {
+            aio_poll(aio_context, true);
+        }
+    }
+
+    return rwco.ret;
+}
+
+/* needed for generic scsi interface */
+
+int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
+{
+    BlockDriver *drv = bs->drv;
+
+    if (drv && drv->bdrv_ioctl)
+        return drv->bdrv_ioctl(bs, req, buf);
+    return -ENOTSUP;
+}
+
+BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
+        unsigned long int req, void *buf,
+        BlockCompletionFunc *cb, void *opaque)
+{
+    BlockDriver *drv = bs->drv;
+
+    if (drv && drv->bdrv_aio_ioctl)
+        return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
+    return NULL;
+}
+
+void *qemu_blockalign(BlockDriverState *bs, size_t size)
+{
+    return qemu_memalign(bdrv_opt_mem_align(bs), size);
+}
+
+void *qemu_blockalign0(BlockDriverState *bs, size_t size)
+{
+    return memset(qemu_blockalign(bs, size), 0, size);
+}
+
+void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
+{
+    size_t align = bdrv_opt_mem_align(bs);
+
+    /* Ensure that NULL is never returned on success */
+    assert(align > 0);
+    if (size == 0) {
+        size = align;
+    }
+
+    return qemu_try_memalign(align, size);
+}
+
+void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
+{
+    void *mem = qemu_try_blockalign(bs, size);
+
+    if (mem) {
+        memset(mem, 0, size);
+    }
+
+    return mem;
+}
+
+/*
+ * Check if all memory in this vector is sector aligned.
+ */
+bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
+{
+    int i;
+    size_t alignment = bdrv_opt_mem_align(bs);
+
+    for (i = 0; i < qiov->niov; i++) {
+        if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
+            return false;
+        }
+        if (qiov->iov[i].iov_len % alignment) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+void bdrv_add_before_write_notifier(BlockDriverState *bs,
+                                    NotifierWithReturn *notifier)
+{
+    notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
+}
+
+void bdrv_io_plug(BlockDriverState *bs)
+{
+    BlockDriver *drv = bs->drv;
+    if (drv && drv->bdrv_io_plug) {
+        drv->bdrv_io_plug(bs);
+    } else if (bs->file) {
+        bdrv_io_plug(bs->file);
+    }
+}
+
+void bdrv_io_unplug(BlockDriverState *bs)
+{
+    BlockDriver *drv = bs->drv;
+    if (drv && drv->bdrv_io_unplug) {
+        drv->bdrv_io_unplug(bs);
+    } else if (bs->file) {
+        bdrv_io_unplug(bs->file);
+    }
+}
+
+void bdrv_flush_io_queue(BlockDriverState *bs)
+{
+    BlockDriver *drv = bs->drv;
+    if (drv && drv->bdrv_flush_io_queue) {
+        drv->bdrv_flush_io_queue(bs);
+    } else if (bs->file) {
+        bdrv_flush_io_queue(bs->file);
+    }
+}
index ba33290000f17fc427acbf2b4a3d29214518f56a..8fca1d32cb03fd98c0512fad8ad38f5e26105bed 100644 (file)
@@ -2,7 +2,7 @@
  * QEMU Block driver for iSCSI images
  *
  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
- * Copyright (c) 2012-2014 Peter Lieven <pl@kamp.de>
+ * Copyright (c) 2012-2015 Peter Lieven <pl@kamp.de>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -57,9 +57,6 @@ typedef struct IscsiLun {
     int events;
     QEMUTimer *nop_timer;
     QEMUTimer *event_timer;
-    uint8_t lbpme;
-    uint8_t lbprz;
-    uint8_t has_write_same;
     struct scsi_inquiry_logical_block_provisioning lbp;
     struct scsi_inquiry_block_limits bl;
     unsigned char *zeroblock;
@@ -67,6 +64,11 @@ typedef struct IscsiLun {
     int cluster_sectors;
     bool use_16_for_rw;
     bool write_protected;
+    bool lbpme;
+    bool lbprz;
+    bool dpofua;
+    bool has_write_same;
+    bool force_next_flush;
 } IscsiLun;
 
 typedef struct IscsiTask {
@@ -79,6 +81,7 @@ typedef struct IscsiTask {
     QEMUBH *bh;
     IscsiLun *iscsilun;
     QEMUTimer retry_timer;
+    bool force_next_flush;
 } IscsiTask;
 
 typedef struct IscsiAIOCB {
@@ -100,7 +103,7 @@ typedef struct IscsiAIOCB {
 #define NOP_INTERVAL 5000
 #define MAX_NOP_FAILURES 3
 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
-static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048};
+static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768};
 
 /* this threshold is a trade-off knob to choose between
  * the potential additional overhead of an extra GET_LBA_STATUS request
@@ -183,10 +186,13 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
                 iTask->do_retry = 1;
                 goto out;
             }
-            if (status == SCSI_STATUS_BUSY) {
+            /* status 0x28 is SCSI_TASK_SET_FULL. It was first introduced
+             * in libiscsi 1.10.0. Hardcode this value here to avoid
+             * the need to bump the libiscsi requirement to 1.10.0 */
+            if (status == SCSI_STATUS_BUSY || status == 0x28) {
                 unsigned retry_time =
                     exp_random(iscsi_retry_times[iTask->retries - 1]);
-                error_report("iSCSI Busy (retry #%u in %u ms): %s",
+                error_report("iSCSI Busy/TaskSetFull (retry #%u in %u ms): %s",
                              iTask->retries, retry_time,
                              iscsi_get_error(iscsi));
                 aio_timer_init(iTask->iscsilun->aio_context,
@@ -199,6 +205,8 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
             }
         }
         error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
+    } else {
+        iTask->iscsilun->force_next_flush |= iTask->force_next_flush;
     }
 
 out:
@@ -369,6 +377,7 @@ static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
     struct IscsiTask iTask;
     uint64_t lba;
     uint32_t num_sectors;
+    int fua;
 
     if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
         return -EINVAL;
@@ -384,15 +393,17 @@ static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
     iscsi_co_init_iscsitask(iscsilun, &iTask);
 retry:
+    fua = iscsilun->dpofua && !bs->enable_write_cache;
+    iTask.force_next_flush = !fua;
     if (iscsilun->use_16_for_rw) {
         iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
                                         NULL, num_sectors * iscsilun->block_size,
-                                        iscsilun->block_size, 0, 0, 0, 0, 0,
+                                        iscsilun->block_size, 0, 0, fua, 0, 0,
                                         iscsi_co_generic_cb, &iTask);
     } else {
         iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
                                         NULL, num_sectors * iscsilun->block_size,
-                                        iscsilun->block_size, 0, 0, 0, 0, 0,
+                                        iscsilun->block_size, 0, 0, fua, 0, 0,
                                         iscsi_co_generic_cb, &iTask);
     }
     if (iTask.task == NULL) {
@@ -460,7 +471,7 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
     *pnum = nb_sectors;
 
     /* LUN does not support logical block provisioning */
-    if (iscsilun->lbpme == 0) {
+    if (!iscsilun->lbpme) {
         goto out;
     }
 
@@ -620,8 +631,12 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
         return 0;
     }
 
-    iscsi_co_init_iscsitask(iscsilun, &iTask);
+    if (!iscsilun->force_next_flush) {
+        return 0;
+    }
+    iscsilun->force_next_flush = false;
 
+    iscsi_co_init_iscsitask(iscsilun, &iTask);
 retry:
     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
                                       0, iscsi_co_generic_cb, &iTask) == NULL) {
@@ -917,6 +932,7 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
     }
 
     iscsi_co_init_iscsitask(iscsilun, &iTask);
+    iTask.force_next_flush = true;
 retry:
     if (use_16_for_ws) {
         iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
@@ -1121,8 +1137,8 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
                 } else {
                     iscsilun->block_size = rc16->block_length;
                     iscsilun->num_blocks = rc16->returned_lba + 1;
-                    iscsilun->lbpme = rc16->lbpme;
-                    iscsilun->lbprz = rc16->lbprz;
+                    iscsilun->lbpme = !!rc16->lbpme;
+                    iscsilun->lbprz = !!rc16->lbprz;
                     iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
                 }
             }
@@ -1253,11 +1269,12 @@ static void iscsi_attach_aio_context(BlockDriverState *bs,
                                           iscsi_timed_set_events, iscsilun);
 }
 
-static bool iscsi_is_write_protected(IscsiLun *iscsilun)
+static void iscsi_modesense_sync(IscsiLun *iscsilun)
 {
     struct scsi_task *task;
     struct scsi_mode_sense *ms = NULL;
-    bool wrprotected = false;
+    iscsilun->write_protected = false;
+    iscsilun->dpofua = false;
 
     task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
                                  1, SCSI_MODESENSE_PC_CURRENT,
@@ -1278,13 +1295,13 @@ static bool iscsi_is_write_protected(IscsiLun *iscsilun)
                      iscsi_get_error(iscsilun->iscsi));
         goto out;
     }
-    wrprotected = ms->device_specific_parameter & 0x80;
+    iscsilun->write_protected = ms->device_specific_parameter & 0x80;
+    iscsilun->dpofua          = ms->device_specific_parameter & 0x10;
 
 out:
     if (task) {
         scsi_free_scsi_task(task);
     }
-    return wrprotected;
 }
 
 /*
@@ -1403,7 +1420,8 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
     scsi_free_scsi_task(task);
     task = NULL;
 
-    iscsilun->write_protected = iscsi_is_write_protected(iscsilun);
+    iscsi_modesense_sync(iscsilun);
+
     /* Check the write protect flag of the LUN if we want to write */
     if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
         iscsilun->write_protected) {
@@ -1481,7 +1499,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
         iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
         iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
                                      iscsilun->block_size) >> BDRV_SECTOR_BITS;
-        if (iscsilun->lbprz && !(bs->open_flags & BDRV_O_NOCACHE)) {
+        if (iscsilun->lbprz) {
             iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
             if (iscsilun->allocationmap == NULL) {
                 ret = -ENOMEM;
@@ -1501,6 +1519,9 @@ out:
 
     if (ret) {
         if (iscsi != NULL) {
+            if (iscsi_is_logged_in(iscsi)) {
+                iscsi_logout_sync(iscsi);
+            }
             iscsi_destroy_context(iscsi);
         }
         memset(iscsilun, 0, sizeof(IscsiLun));
@@ -1514,6 +1535,9 @@ static void iscsi_close(BlockDriverState *bs)
     struct iscsi_context *iscsi = iscsilun->iscsi;
 
     iscsi_detach_aio_context(bs);
+    if (iscsi_is_logged_in(iscsi)) {
+        iscsi_logout_sync(iscsi);
+    }
     iscsi_destroy_context(iscsi);
     g_free(iscsilun->zeroblock);
     g_free(iscsilun->allocationmap);
@@ -1649,7 +1673,7 @@ out:
 static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
     IscsiLun *iscsilun = bs->opaque;
-    bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz;
+    bdi->unallocated_blocks_are_zero = iscsilun->lbprz;
     bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
     bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
     return 0;
index 405616422b1dc864ab20c49c7b89bc8776c15041..58f391a6d626d4b4cca9567c3feedff2fd8482de 100644 (file)
@@ -125,11 +125,9 @@ static void mirror_write_complete(void *opaque, int ret)
     MirrorOp *op = opaque;
     MirrorBlockJob *s = op->s;
     if (ret < 0) {
-        BlockDriverState *source = s->common.bs;
         BlockErrorAction action;
 
-        bdrv_set_dirty_bitmap(source, s->dirty_bitmap, op->sector_num,
-                              op->nb_sectors);
+        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->sector_num, op->nb_sectors);
         action = mirror_error_action(s, false, -ret);
         if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
             s->ret = ret;
@@ -143,11 +141,9 @@ static void mirror_read_complete(void *opaque, int ret)
     MirrorOp *op = opaque;
     MirrorBlockJob *s = op->s;
     if (ret < 0) {
-        BlockDriverState *source = s->common.bs;
         BlockErrorAction action;
 
-        bdrv_set_dirty_bitmap(source, s->dirty_bitmap, op->sector_num,
-                              op->nb_sectors);
+        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->sector_num, op->nb_sectors);
         action = mirror_error_action(s, true, -ret);
         if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
             s->ret = ret;
@@ -170,10 +166,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
 
     s->sector_num = hbitmap_iter_next(&s->hbi);
     if (s->sector_num < 0) {
-        bdrv_dirty_iter_init(source, s->dirty_bitmap, &s->hbi);
+        bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
         s->sector_num = hbitmap_iter_next(&s->hbi);
-        trace_mirror_restart_iter(s,
-                                  bdrv_get_dirty_count(source, s->dirty_bitmap));
+        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
         assert(s->sector_num >= 0);
     }
 
@@ -288,8 +283,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
         next_sector += sectors_per_chunk;
     }
 
-    bdrv_reset_dirty_bitmap(source, s->dirty_bitmap, sector_num,
-                            nb_sectors);
+    bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num, nb_sectors);
 
     /* Copy the dirty cluster.  */
     s->in_flight++;
@@ -446,7 +440,7 @@ static void coroutine_fn mirror_run(void *opaque)
 
             assert(n > 0);
             if (ret == 1) {
-                bdrv_set_dirty_bitmap(bs, s->dirty_bitmap, sector_num, n);
+                bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
                 sector_num = next;
             } else {
                 sector_num += n;
@@ -454,7 +448,7 @@ static void coroutine_fn mirror_run(void *opaque)
         }
     }
 
-    bdrv_dirty_iter_init(bs, s->dirty_bitmap, &s->hbi);
+    bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
     last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     for (;;) {
         uint64_t delay_ns = 0;
@@ -466,7 +460,7 @@ static void coroutine_fn mirror_run(void *opaque)
             goto immediate_exit;
         }
 
-        cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
+        cnt = bdrv_get_dirty_count(s->dirty_bitmap);
         /* s->common.offset contains the number of bytes already processed so
          * far, cnt is the number of dirty sectors remaining and
          * s->sectors_in_flight is the number of sectors currently being
@@ -475,7 +469,7 @@ static void coroutine_fn mirror_run(void *opaque)
                         (cnt + s->sectors_in_flight) * BDRV_SECTOR_SIZE;
 
         /* Note that even when no rate limit is applied we need to yield
-         * periodically with no pending I/O so that qemu_aio_flush() returns.
+         * periodically with no pending I/O so that bdrv_drain_all() returns.
          * We do so every SLICE_TIME nanoseconds, or when there is an error,
          * or when the source is clean, whichever comes first.
          */
@@ -488,9 +482,6 @@ static void coroutine_fn mirror_run(void *opaque)
                 continue;
             } else if (cnt != 0) {
                 delay_ns = mirror_iteration(s);
-                if (delay_ns == 0) {
-                    continue;
-                }
             }
         }
 
@@ -516,7 +507,7 @@ static void coroutine_fn mirror_run(void *opaque)
 
                 should_complete = s->should_complete ||
                     block_job_is_cancelled(&s->common);
-                cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
+                cnt = bdrv_get_dirty_count(s->dirty_bitmap);
             }
         }
 
@@ -531,7 +522,7 @@ static void coroutine_fn mirror_run(void *opaque)
              */
             trace_mirror_before_drain(s, cnt);
             bdrv_drain(bs);
-            cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
+            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
         }
 
         ret = 0;
@@ -634,7 +625,7 @@ static void mirror_complete(BlockJob *job, Error **errp)
     }
 
     s->should_complete = true;
-    block_job_resume(job);
+    block_job_enter(&s->common);
 }
 
 static const BlockJobDriver mirror_job_driver = {
@@ -656,7 +647,7 @@ static const BlockJobDriver commit_active_job_driver = {
 
 static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
                              const char *replaces,
-                             int64_t speed, int64_t granularity,
+                             int64_t speed, uint32_t granularity,
                              int64_t buf_size,
                              BlockdevOnError on_source_error,
                              BlockdevOnError on_target_error,
@@ -668,15 +659,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
     MirrorBlockJob *s;
 
     if (granularity == 0) {
-        /* Choose the default granularity based on the target file's cluster
-         * size, clamped between 4k and 64k.  */
-        BlockDriverInfo bdi;
-        if (bdrv_get_info(target, &bdi) >= 0 && bdi.cluster_size != 0) {
-            granularity = MAX(4096, bdi.cluster_size);
-            granularity = MIN(65536, granularity);
-        } else {
-            granularity = 65536;
-        }
+        granularity = bdrv_get_default_bitmap_granularity(target);
     }
 
     assert ((granularity & (granularity - 1)) == 0);
@@ -703,7 +686,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
     s->granularity = granularity;
     s->buf_size = MAX(buf_size, granularity);
 
-    s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, errp);
+    s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
     if (!s->dirty_bitmap) {
         return;
     }
@@ -717,7 +700,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
 
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
                   const char *replaces,
-                  int64_t speed, int64_t granularity, int64_t buf_size,
+                  int64_t speed, uint32_t granularity, int64_t buf_size,
                   MirrorSyncMode mode, BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
                   BlockCompletionFunc *cb,
@@ -726,6 +709,10 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
     bool is_none_mode;
     BlockDriverState *base;
 
+    if (mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
+        error_setg(errp, "Sync mode 'dirty-bitmap' not supported");
+        return;
+    }
     is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
     base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
     mirror_start_job(bs, target, replaces,
index ec2bd27a4b9d1dd60fe62ebc0cce80e6552aa9c1..7d083233fb4da5a0fab2bf8bbe23cae502db1be4 100644 (file)
 
 #include "block/block_int.h"
 
+#define NULL_OPT_LATENCY "latency-ns"
+
 typedef struct {
     int64_t length;
+    int64_t latency_ns;
 } BDRVNullState;
 
 static QemuOptsList runtime_opts = {
@@ -30,6 +33,12 @@ static QemuOptsList runtime_opts = {
             .type = QEMU_OPT_SIZE,
             .help = "size of the null block",
         },
+        {
+            .name = NULL_OPT_LATENCY,
+            .type = QEMU_OPT_NUMBER,
+            .help = "nanoseconds (approximated) to wait "
+                    "before completing request",
+        },
         { /* end of list */ }
     },
 };
@@ -39,13 +48,20 @@ static int null_file_open(BlockDriverState *bs, QDict *options, int flags,
 {
     QemuOpts *opts;
     BDRVNullState *s = bs->opaque;
+    int ret = 0;
 
     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
     qemu_opts_absorb_qdict(opts, options, &error_abort);
     s->length =
         qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 1 << 30);
+    s->latency_ns =
+        qemu_opt_get_number(opts, NULL_OPT_LATENCY, 0);
+    if (s->latency_ns < 0) {
+        error_setg(errp, "latency-ns is invalid");
+        ret = -EINVAL;
+    }
     qemu_opts_del(opts);
-    return 0;
+    return ret;
 }
 
 static void null_close(BlockDriverState *bs)
@@ -58,28 +74,40 @@ static int64_t null_getlength(BlockDriverState *bs)
     return s->length;
 }
 
+static coroutine_fn int null_co_common(BlockDriverState *bs)
+{
+    BDRVNullState *s = bs->opaque;
+
+    if (s->latency_ns) {
+        co_aio_sleep_ns(bdrv_get_aio_context(bs), QEMU_CLOCK_REALTIME,
+                        s->latency_ns);
+    }
+    return 0;
+}
+
 static coroutine_fn int null_co_readv(BlockDriverState *bs,
                                       int64_t sector_num, int nb_sectors,
                                       QEMUIOVector *qiov)
 {
-    return 0;
+    return null_co_common(bs);
 }
 
 static coroutine_fn int null_co_writev(BlockDriverState *bs,
                                        int64_t sector_num, int nb_sectors,
                                        QEMUIOVector *qiov)
 {
-    return 0;
+    return null_co_common(bs);
 }
 
 static coroutine_fn int null_co_flush(BlockDriverState *bs)
 {
-    return 0;
+    return null_co_common(bs);
 }
 
 typedef struct {
     BlockAIOCB common;
     QEMUBH *bh;
+    QEMUTimer timer;
 } NullAIOCB;
 
 static const AIOCBInfo null_aiocb_info = {
@@ -94,15 +122,33 @@ static void null_bh_cb(void *opaque)
     qemu_aio_unref(acb);
 }
 
+static void null_timer_cb(void *opaque)
+{
+    NullAIOCB *acb = opaque;
+    acb->common.cb(acb->common.opaque, 0);
+    timer_deinit(&acb->timer);
+    qemu_aio_unref(acb);
+}
+
 static inline BlockAIOCB *null_aio_common(BlockDriverState *bs,
                                           BlockCompletionFunc *cb,
                                           void *opaque)
 {
     NullAIOCB *acb;
+    BDRVNullState *s = bs->opaque;
 
     acb = qemu_aio_get(&null_aiocb_info, bs, cb, opaque);
-    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), null_bh_cb, acb);
-    qemu_bh_schedule(acb->bh);
+    /* Only emulate latency after vcpu is running. */
+    if (s->latency_ns) {
+        aio_timer_init(bdrv_get_aio_context(bs), &acb->timer,
+                       QEMU_CLOCK_REALTIME, SCALE_NS,
+                       null_timer_cb, acb);
+        timer_mod_ns(&acb->timer,
+                     qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + s->latency_ns);
+    } else {
+        acb->bh = aio_bh_new(bdrv_get_aio_context(bs), null_bh_cb, acb);
+        qemu_bh_schedule(acb->bh);
+    }
     return &acb->common;
 }
 
@@ -131,6 +177,12 @@ static BlockAIOCB *null_aio_flush(BlockDriverState *bs,
     return null_aio_common(bs, cb, opaque);
 }
 
+static int null_reopen_prepare(BDRVReopenState *reopen_state,
+                               BlockReopenQueue *queue, Error **errp)
+{
+    return 0;
+}
+
 static BlockDriver bdrv_null_co = {
     .format_name            = "null-co",
     .protocol_name          = "null-co",
@@ -143,6 +195,7 @@ static BlockDriver bdrv_null_co = {
     .bdrv_co_readv          = null_co_readv,
     .bdrv_co_writev         = null_co_writev,
     .bdrv_co_flush_to_disk  = null_co_flush,
+    .bdrv_reopen_prepare    = null_reopen_prepare,
 };
 
 static BlockDriver bdrv_null_aio = {
@@ -157,6 +210,7 @@ static BlockDriver bdrv_null_aio = {
     .bdrv_aio_readv         = null_aio_readv,
     .bdrv_aio_writev        = null_aio_writev,
     .bdrv_aio_flush         = null_aio_flush,
+    .bdrv_reopen_prepare    = null_reopen_prepare,
 };
 
 static void bdrv_null_init(void)
index 8a19aed44616c92ec3b36108b01cabfd62489ff0..063dd1bc1f3f9167167e0114d79bf0e0d90fab43 100644 (file)
 #include "qapi/qmp/types.h"
 #include "sysemu/block-backend.h"
 
-BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
+BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
 {
+    ImageInfo **p_image_info;
+    BlockDriverState *bs0;
     BlockDeviceInfo *info = g_malloc0(sizeof(*info));
 
     info->file                   = g_strdup(bs->filename);
@@ -92,6 +94,25 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
 
     info->write_threshold = bdrv_write_threshold_get(bs);
 
+    bs0 = bs;
+    p_image_info = &info->image;
+    while (1) {
+        Error *local_err = NULL;
+        bdrv_query_image_info(bs0, p_image_info, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            qapi_free_BlockDeviceInfo(info);
+            return NULL;
+        }
+        if (bs0->drv && bs0->backing_hd) {
+            bs0 = bs0->backing_hd;
+            (*p_image_info)->has_backing_image = true;
+            p_image_info = &((*p_image_info)->backing_image);
+        } else {
+            break;
+        }
+    }
+
     return info;
 }
 
@@ -264,9 +285,6 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
 {
     BlockInfo *info = g_malloc0(sizeof(*info));
     BlockDriverState *bs = blk_bs(blk);
-    BlockDriverState *bs0;
-    ImageInfo **p_image_info;
-    Error *local_err = NULL;
     info->device = g_strdup(blk_name(blk));
     info->type = g_strdup("unknown");
     info->locked = blk_dev_is_medium_locked(blk);
@@ -289,23 +307,9 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
 
     if (bs->drv) {
         info->has_inserted = true;
-        info->inserted = bdrv_block_device_info(bs);
-
-        bs0 = bs;
-        p_image_info = &info->inserted->image;
-        while (1) {
-            bdrv_query_image_info(bs0, p_image_info, &local_err);
-            if (local_err) {
-                error_propagate(errp, local_err);
-                goto err;
-            }
-            if (bs0->drv && bs0->backing_hd) {
-                bs0 = bs0->backing_hd;
-                (*p_image_info)->has_backing_image = true;
-                p_image_info = &((*p_image_info)->backing_image);
-            } else {
-                break;
-            }
+        info->inserted = bdrv_block_device_info(bs, errp);
+        if (info->inserted == NULL) {
+            goto err;
         }
     }
 
index 055896910eab9c6946ff66f4265f594e5218f47c..ab893284d24b586b6e5f381cbd55714231b718ed 100644 (file)
@@ -124,7 +124,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
         snprintf(version, sizeof(version), "QCOW version %" PRIu32,
                  header.version);
         error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-                  bdrv_get_device_name(bs), "qcow", version);
+                  bdrv_get_device_or_node_name(bs), "qcow", version);
         ret = -ENOTSUP;
         goto fail;
     }
@@ -229,9 +229,9 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     /* Disable migration when qcow images are used */
-    error_set(&s->migration_blocker,
-              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-              "qcow", bdrv_get_device_name(bs), "live migration");
+    error_setg(&s->migration_blocker, "The qcow format used by node '%s' "
+               "does not support live migration",
+               bdrv_get_device_or_node_name(bs));
     migrate_add_blocker(s->migration_blocker);
 
     qemu_co_mutex_init(&s->lock);
index 6cbae1d205d3a1a6ab4af69965138dcc5db059e3..f47260b808faae26fe8c7b29fe71d7657a0ed98f 100644 (file)
@@ -2450,7 +2450,7 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
     if (ret < 0) {
         return ret;
     } else if (ret > 0) {
-        int metadata_ol_bitnr = ffs(ret) - 1;
+        int metadata_ol_bitnr = ctz32(ret);
         assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR);
 
         qcow2_signal_corruption(bs, true, offset, size, "Preventing invalid "
index 2aa9dcb1d13f84263575a021a936d6d2b55ed3a7..17bb2119b2c4d1070f985f062761e52658b4bcb3 100644 (file)
@@ -351,10 +351,8 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 
     memset(sn, 0, sizeof(*sn));
 
-    /* Generate an ID if it wasn't passed */
-    if (sn_info->id_str[0] == '\0') {
-        find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
-    }
+    /* Generate an ID */
+    find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
 
     /* Check that the ID is unique */
     if (find_snapshot_by_id_and_name(bs, sn_info->id_str, NULL) >= 0) {
index 316a8db22b2eb05447331c2935fc9ffb0b09f8d5..b9a72e39d4566c149939b34c397180855c75b79d 100644 (file)
@@ -208,7 +208,7 @@ static void GCC_FMT_ATTR(3, 4) report_unsupported(BlockDriverState *bs,
     va_end(ap);
 
     error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-              bdrv_get_device_name(bs), "qcow2", msg);
+              bdrv_get_device_or_node_name(bs), "qcow2", msg);
 }
 
 static void report_unsupported_feature(BlockDriverState *bs,
@@ -1802,7 +1802,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
 {
     /* Calculate cluster_bits */
     int cluster_bits;
-    cluster_bits = ffs(cluster_size) - 1;
+    cluster_bits = ctz32(cluster_size);
     if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
         (1 << cluster_bits) != cluster_size)
     {
@@ -2110,7 +2110,7 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp)
         goto finish;
     }
 
-    refcount_order = ffs(refcount_bits) - 1;
+    refcount_order = ctz32(refcount_bits);
 
     ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags,
                         cluster_size, prealloc, opts, version, refcount_order,
@@ -2824,6 +2824,7 @@ void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
                              int64_t size, const char *message_format, ...)
 {
     BDRVQcowState *s = bs->opaque;
+    const char *node_name;
     char *message;
     va_list ap;
 
@@ -2847,8 +2848,11 @@ void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
                 "corruption events will be suppressed\n", message);
     }
 
-    qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs), message,
-                                          offset >= 0, offset, size >= 0, size,
+    node_name = bdrv_get_node_name(bs);
+    qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
+                                          *node_name != '\0', node_name,
+                                          message, offset >= 0, offset,
+                                          size >= 0, size,
                                           fatal, &error_abort);
     g_free(message);
 
index 892b13c80649cd8211d046c56f92336a54919818..5bbe069ce95f39c6e56bca8bf692fab9abb0bbc7 100644 (file)
@@ -408,7 +408,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
         snprintf(buf, sizeof(buf), "%" PRIx64,
             s->header.features & ~QED_FEATURE_MASK);
         error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-            bdrv_get_device_name(bs), "QED", buf);
+            bdrv_get_device_or_node_name(bs), "QED", buf);
         return -ENOTSUP;
     }
     if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
@@ -436,9 +436,9 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
 
     s->table_nelems = (s->header.cluster_size * s->header.table_size) /
                       sizeof(uint64_t);
-    s->l2_shift = ffs(s->header.cluster_size) - 1;
+    s->l2_shift = ctz32(s->header.cluster_size);
     s->l2_mask = s->table_nelems - 1;
-    s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1;
+    s->l1_shift = s->l2_shift + ctz32(s->table_nelems);
 
     /* Header size calculation must not overflow uint32_t */
     if (s->header.header_size > UINT32_MAX / s->header.cluster_size) {
index 437b12251db8deb432795db775944113e5b1d4c5..f91ef75a842470ff2728195ad79b6b989eb000f5 100644 (file)
@@ -226,10 +226,7 @@ static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret)
 
 static void quorum_report_failure(QuorumAIOCB *acb)
 {
-    const char *reference = bdrv_get_device_name(acb->common.bs)[0] ?
-                            bdrv_get_device_name(acb->common.bs) :
-                            acb->common.bs->node_name;
-
+    const char *reference = bdrv_get_device_or_node_name(acb->common.bs);
     qapi_event_send_quorum_failure(reference, acb->sector_num,
                                    acb->nb_sectors, &error_abort);
 }
index f3ab2ddd5a0934cece108e72d06b56e3229d1210..fbe87e035b12aab2e96093922a83a3545738b68f 100644 (file)
@@ -325,7 +325,7 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
             error_setg(errp, "obj size too small");
             return -EINVAL;
         }
-        obj_order = ffs(objsize) - 1;
+        obj_order = ctz32(objsize);
     }
 
     clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
index c14172cfa6c404f95d4e5ace10a288cf5d084dab..2d5f06a390d7ff1398c484e4ee55ebab3d8d2620 100644 (file)
@@ -1716,7 +1716,7 @@ static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt)
         if ((object_size - 1) & object_size) {    /* not a power of 2? */
             return -EINVAL;
         }
-        obj_order = ffs(object_size) - 1;
+        obj_order = ctz32(object_size);
         if (obj_order < 20 || obj_order > 31) {
             return -EINVAL;
         }
index 698e1a1d5881cd92ba6c0aae832db687e20d7fd3..50ae610139970d676e6ced530e4ee6ddc63ce598 100644 (file)
@@ -246,9 +246,9 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
     if (bs->file) {
         return bdrv_snapshot_delete(bs->file, snapshot_id, name, errp);
     }
-    error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-              drv->format_name, bdrv_get_device_name(bs),
-              "internal snapshot deletion");
+    error_setg(errp, "Block format '%s' used by device '%s' "
+               "does not support internal snapshot deletion",
+               drv->format_name, bdrv_get_device_name(bs));
     return -ENOTSUP;
 }
 
@@ -329,9 +329,9 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
     if (drv->bdrv_snapshot_load_tmp) {
         return drv->bdrv_snapshot_load_tmp(bs, snapshot_id, name, errp);
     }
-    error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-              drv->format_name, bdrv_get_device_name(bs),
-              "temporarily load internal snapshot");
+    error_setg(errp, "Block format '%s' used by device '%s' "
+               "does not support temporarily loading internal snapshots",
+               drv->format_name, bdrv_get_device_name(bs));
     return -ENOTSUP;
 }
 
index 53bd02fe227e6221bbdf0503e796303afb8e40b0..7642ef359706d95257d5931df1d4a058b1f44a05 100644 (file)
@@ -502,9 +502,9 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     /* Disable migration when vdi images are used */
-    error_set(&s->migration_blocker,
-              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-              "vdi", bdrv_get_device_name(bs), "live migration");
+    error_setg(&s->migration_blocker, "The vdi format used by node '%s' "
+               "does not support live migration",
+               bdrv_get_device_or_node_name(bs));
     migrate_add_blocker(s->migration_blocker);
 
     qemu_co_mutex_init(&s->write_lock);
index bb3ed45d5c322e8e33f977e96e8ff309c769bd0b..0776de7174249388a24bfbe85a00a00bcff8b227 100644 (file)
@@ -1002,9 +1002,9 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
     /* TODO: differencing files */
 
     /* Disable migration when VHDX images are used */
-    error_set(&s->migration_blocker,
-            QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-            "vhdx", bdrv_get_device_name(bs), "live migration");
+    error_setg(&s->migration_blocker, "The vhdx format used by node '%s' "
+               "does not support live migration",
+               bdrv_get_device_or_node_name(bs));
     migrate_add_blocker(s->migration_blocker);
 
     return 0;
@@ -1269,7 +1269,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
                         iov1.iov_base = qemu_blockalign(bs, iov1.iov_len);
                         memset(iov1.iov_base, 0, iov1.iov_len);
                         qemu_iovec_concat_iov(&hd_qiov, &iov1, 1, 0,
-                                              sinfo.block_offset);
+                                              iov1.iov_len);
                         sectors_to_write += iov1.iov_len >> BDRV_SECTOR_BITS;
                     }
 
@@ -1285,7 +1285,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
                         iov2.iov_base = qemu_blockalign(bs, iov2.iov_len);
                         memset(iov2.iov_base, 0, iov2.iov_len);
                         qemu_iovec_concat_iov(&hd_qiov, &iov2, 1, 0,
-                                              sinfo.block_offset);
+                                              iov2.iov_len);
                         sectors_to_write += iov2.iov_len >> BDRV_SECTOR_BITS;
                     }
                 }
index 8410a158a23e210d00df8e582209a125e8b8ec09..1c5e2ef1b3852d726607aba225365ff774e68a56 100644 (file)
@@ -523,7 +523,7 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
     }
     ret = vmdk_add_extent(bs, file, false,
                           le32_to_cpu(header.disk_sectors),
-                          le32_to_cpu(header.l1dir_offset) << 9,
+                          (int64_t)le32_to_cpu(header.l1dir_offset) << 9,
                           0,
                           le32_to_cpu(header.l1dir_size),
                           4096,
@@ -669,7 +669,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
         snprintf(buf, sizeof(buf), "VMDK version %" PRId32,
                  le32_to_cpu(header.version));
         error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
-                  bdrv_get_device_name(bs), "vmdk", buf);
+                  bdrv_get_device_or_node_name(bs), "vmdk", buf);
         return -ENOTSUP;
     } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR)) {
         /* VMware KB 2064959 explains that version 3 added support for
@@ -962,9 +962,9 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
     qemu_co_mutex_init(&s->lock);
 
     /* Disable migration when VMDK images are used */
-    error_set(&s->migration_blocker,
-              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-              "vmdk", bdrv_get_device_name(bs), "live migration");
+    error_setg(&s->migration_blocker, "The vmdk format used by node '%s' "
+               "does not support live migration",
+               bdrv_get_device_or_node_name(bs));
     migrate_add_blocker(s->migration_blocker);
     g_free(buf);
     return 0;
index 43e768ee76e711a2028d39d14f52d0a023578b8e..37572bab86bcdc413c0febaa43c85b0c5d040dbf 100644 (file)
@@ -318,9 +318,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
     qemu_co_mutex_init(&s->lock);
 
     /* Disable migration when VHD images are used */
-    error_set(&s->migration_blocker,
-              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-              "vpc", bdrv_get_device_name(bs), "live migration");
+    error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
+               "does not support live migration",
+               bdrv_get_device_or_node_name(bs));
     migrate_add_blocker(s->migration_blocker);
 
     return 0;
index 9be632f4042839170482b235171bdce2cd1e4996..e803589675e6db57a0b648ce8da2e04c6d8b9a2c 100644 (file)
@@ -1180,9 +1180,10 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
 
     /* Disable migration when vvfat is used rw */
     if (s->qcow) {
-        error_set(&s->migration_blocker,
-                  QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-                  "vvfat (rw)", bdrv_get_device_name(bs), "live migration");
+        error_setg(&s->migration_blocker,
+                   "The vvfat (rw) format used by node '%s' "
+                   "does not support live migration",
+                   bdrv_get_device_or_node_name(bs));
         migrate_add_blocker(s->migration_blocker);
     }
 
index fbb3a7997887f5728dc3182e36cdc4b2508c20c2..5eaf77e5995689d0b18bbf597c849ac38f8eefdb 100644 (file)
@@ -1164,6 +1164,68 @@ out_aio_context:
     return NULL;
 }
 
+/**
+ * block_dirty_bitmap_lookup:
+ * Return a dirty bitmap (if present), after validating
+ * the node reference and bitmap names.
+ *
+ * @node: The name of the BDS node to search for bitmaps
+ * @name: The name of the bitmap to search for
+ * @pbs: Output pointer for BDS lookup, if desired. Can be NULL.
+ * @paio: Output pointer for aio_context acquisition, if desired. Can be NULL.
+ * @errp: Output pointer for error information. Can be NULL.
+ *
+ * @return: A bitmap object on success, or NULL on failure.
+ */
+static BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
+                                                  const char *name,
+                                                  BlockDriverState **pbs,
+                                                  AioContext **paio,
+                                                  Error **errp)
+{
+    BlockDriverState *bs;
+    BdrvDirtyBitmap *bitmap;
+    AioContext *aio_context;
+
+    if (!node) {
+        error_setg(errp, "Node cannot be NULL");
+        return NULL;
+    }
+    if (!name) {
+        error_setg(errp, "Bitmap name cannot be NULL");
+        return NULL;
+    }
+    bs = bdrv_lookup_bs(node, node, NULL);
+    if (!bs) {
+        error_setg(errp, "Node '%s' not found", node);
+        return NULL;
+    }
+
+    aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(aio_context);
+
+    bitmap = bdrv_find_dirty_bitmap(bs, name);
+    if (!bitmap) {
+        error_setg(errp, "Dirty bitmap '%s' not found", name);
+        goto fail;
+    }
+
+    if (pbs) {
+        *pbs = bs;
+    }
+    if (paio) {
+        *paio = aio_context;
+    } else {
+        aio_context_release(aio_context);
+    }
+
+    return bitmap;
+
+ fail:
+    aio_context_release(aio_context);
+    return NULL;
+}
+
 /* New and old BlockDriverState structs for atomic group operations */
 
 typedef struct BlkTransactionState BlkTransactionState;
@@ -1248,13 +1310,14 @@ static void internal_snapshot_prepare(BlkTransactionState *common,
     }
 
     if (bdrv_is_read_only(bs)) {
-        error_set(errp, QERR_DEVICE_IS_READ_ONLY, device);
+        error_setg(errp, "Device '%s' is read only", device);
         return;
     }
 
     if (!bdrv_can_snapshot(bs)) {
-        error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
-                  bs->drv->format_name, device, "internal snapshot");
+        error_setg(errp, "Block format '%s' used by device '%s' "
+                   "does not support internal snapshots",
+                   bs->drv->format_name, device);
         return;
     }
 
@@ -1522,6 +1585,7 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
                      backup->sync,
                      backup->has_mode, backup->mode,
                      backup->has_speed, backup->speed,
+                     backup->has_bitmap, backup->bitmap,
                      backup->has_on_source_error, backup->on_source_error,
                      backup->has_on_target_error, backup->on_target_error,
                      &local_err);
@@ -1953,6 +2017,102 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
     aio_context_release(aio_context);
 }
 
+void qmp_block_dirty_bitmap_add(const char *node, const char *name,
+                                bool has_granularity, uint32_t granularity,
+                                Error **errp)
+{
+    AioContext *aio_context;
+    BlockDriverState *bs;
+
+    if (!name || name[0] == '\0') {
+        error_setg(errp, "Bitmap name cannot be empty");
+        return;
+    }
+
+    bs = bdrv_lookup_bs(node, node, errp);
+    if (!bs) {
+        return;
+    }
+
+    aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(aio_context);
+
+    if (has_granularity) {
+        if (granularity < 512 || !is_power_of_2(granularity)) {
+            error_setg(errp, "Granularity must be power of 2 "
+                             "and at least 512");
+            goto out;
+        }
+    } else {
+        /* Default to cluster size, if available: */
+        granularity = bdrv_get_default_bitmap_granularity(bs);
+    }
+
+    bdrv_create_dirty_bitmap(bs, granularity, name, errp);
+
+ out:
+    aio_context_release(aio_context);
+}
+
+void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
+                                   Error **errp)
+{
+    AioContext *aio_context;
+    BlockDriverState *bs;
+    BdrvDirtyBitmap *bitmap;
+
+    bitmap = block_dirty_bitmap_lookup(node, name, &bs, &aio_context, errp);
+    if (!bitmap || !bs) {
+        return;
+    }
+
+    if (bdrv_dirty_bitmap_frozen(bitmap)) {
+        error_setg(errp,
+                   "Bitmap '%s' is currently frozen and cannot be removed",
+                   name);
+        goto out;
+    }
+    bdrv_dirty_bitmap_make_anon(bitmap);
+    bdrv_release_dirty_bitmap(bs, bitmap);
+
+ out:
+    aio_context_release(aio_context);
+}
+
+/**
+ * Completely clear a bitmap, for the purposes of synchronizing a bitmap
+ * immediately after a full backup operation.
+ */
+void qmp_block_dirty_bitmap_clear(const char *node, const char *name,
+                                  Error **errp)
+{
+    AioContext *aio_context;
+    BdrvDirtyBitmap *bitmap;
+    BlockDriverState *bs;
+
+    bitmap = block_dirty_bitmap_lookup(node, name, &bs, &aio_context, errp);
+    if (!bitmap || !bs) {
+        return;
+    }
+
+    if (bdrv_dirty_bitmap_frozen(bitmap)) {
+        error_setg(errp,
+                   "Bitmap '%s' is currently frozen and cannot be modified",
+                   name);
+        goto out;
+    } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
+        error_setg(errp,
+                   "Bitmap '%s' is currently disabled and cannot be cleared",
+                   name);
+        goto out;
+    }
+
+    bdrv_clear_dirty_bitmap(bitmap);
+
+ out:
+    aio_context_release(aio_context);
+}
+
 int hmp_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
 {
     const char *id = qdict_get_str(qdict, "id");
@@ -2055,7 +2215,7 @@ void qmp_block_resize(bool has_device, const char *device,
         error_set(errp, QERR_UNSUPPORTED);
         break;
     case -EACCES:
-        error_set(errp, QERR_DEVICE_IS_READ_ONLY, device);
+        error_setg(errp, "Device '%s' is read only", device);
         break;
     case -EBUSY:
         error_set(errp, QERR_DEVICE_IN_USE, device);
@@ -2270,6 +2430,7 @@ void qmp_drive_backup(const char *device, const char *target,
                       enum MirrorSyncMode sync,
                       bool has_mode, enum NewImageMode mode,
                       bool has_speed, int64_t speed,
+                      bool has_bitmap, const char *bitmap,
                       bool has_on_source_error, BlockdevOnError on_source_error,
                       bool has_on_target_error, BlockdevOnError on_target_error,
                       Error **errp)
@@ -2278,6 +2439,7 @@ void qmp_drive_backup(const char *device, const char *target,
     BlockDriverState *bs;
     BlockDriverState *target_bs;
     BlockDriverState *source = NULL;
+    BdrvDirtyBitmap *bmap = NULL;
     AioContext *aio_context;
     BlockDriver *drv = NULL;
     Error *local_err = NULL;
@@ -2377,7 +2539,16 @@ void qmp_drive_backup(const char *device, const char *target,
 
     bdrv_set_aio_context(target_bs, aio_context);
 
-    backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
+    if (has_bitmap) {
+        bmap = bdrv_find_dirty_bitmap(bs, bitmap);
+        if (!bmap) {
+            error_setg(errp, "Bitmap '%s' could not be found", bitmap);
+            goto out;
+        }
+    }
+
+    backup_start(bs, target_bs, speed, sync, bmap,
+                 on_source_error, on_target_error,
                  block_job_cb, bs, &local_err);
     if (local_err != NULL) {
         bdrv_unref(target_bs);
@@ -2391,7 +2562,7 @@ out:
 
 BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
 {
-    return bdrv_named_nodes_list();
+    return bdrv_named_nodes_list(errp);
 }
 
 void qmp_blockdev_backup(const char *device, const char *target,
@@ -2438,8 +2609,8 @@ void qmp_blockdev_backup(const char *device, const char *target,
 
     bdrv_ref(target_bs);
     bdrv_set_aio_context(target_bs, aio_context);
-    backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
-                 block_job_cb, bs, &local_err);
+    backup_start(bs, target_bs, speed, sync, NULL, on_source_error,
+                 on_target_error, block_job_cb, bs, &local_err);
     if (local_err != NULL) {
         bdrv_unref(target_bs);
         error_propagate(errp, local_err);
@@ -2699,7 +2870,7 @@ void qmp_block_job_cancel(const char *device,
         force = false;
     }
 
-    if (job->paused && !force) {
+    if (job->user_paused && !force) {
         error_setg(errp, "The block job for device '%s' is currently paused",
                    device);
         goto out;
@@ -2716,10 +2887,11 @@ void qmp_block_job_pause(const char *device, Error **errp)
     AioContext *aio_context;
     BlockJob *job = find_block_job(device, &aio_context, errp);
 
-    if (!job) {
+    if (!job || job->user_paused) {
         return;
     }
 
+    job->user_paused = true;
     trace_qmp_block_job_pause(job);
     block_job_pause(job);
     aio_context_release(aio_context);
@@ -2730,10 +2902,11 @@ void qmp_block_job_resume(const char *device, Error **errp)
     AioContext *aio_context;
     BlockJob *job = find_block_job(device, &aio_context, errp);
 
-    if (!job) {
+    if (!job || !job->user_paused) {
         return;
     }
 
+    job->user_paused = false;
     trace_qmp_block_job_resume(job);
     block_job_resume(job);
     aio_context_release(aio_context);
index ba2255d91f71ec9b93059b38014205eee9d1927e..2755465259ec4a5ee4f3fd8314211bf6a3c1b051 100644 (file)
@@ -107,7 +107,7 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
 
 void block_job_complete(BlockJob *job, Error **errp)
 {
-    if (job->paused || job->cancelled || !job->driver->complete) {
+    if (job->pause_count || job->cancelled || !job->driver->complete) {
         error_set(errp, QERR_BLOCK_JOB_NOT_READY,
                   bdrv_get_device_name(job->bs));
         return;
@@ -118,17 +118,26 @@ void block_job_complete(BlockJob *job, Error **errp)
 
 void block_job_pause(BlockJob *job)
 {
-    job->paused = true;
+    job->pause_count++;
 }
 
 bool block_job_is_paused(BlockJob *job)
 {
-    return job->paused;
+    return job->pause_count > 0;
 }
 
 void block_job_resume(BlockJob *job)
 {
-    job->paused = false;
+    assert(job->pause_count > 0);
+    job->pause_count--;
+    if (job->pause_count) {
+        return;
+    }
+    block_job_enter(job);
+}
+
+void block_job_enter(BlockJob *job)
+{
     block_job_iostatus_reset(job);
     if (job->co && !job->busy) {
         qemu_coroutine_enter(job->co, NULL);
@@ -138,7 +147,7 @@ void block_job_resume(BlockJob *job)
 void block_job_cancel(BlockJob *job)
 {
     job->cancelled = true;
-    block_job_resume(job);
+    block_job_enter(job);
 }
 
 bool block_job_is_cancelled(BlockJob *job)
@@ -258,7 +267,7 @@ BlockJobInfo *block_job_query(BlockJob *job)
     info->device    = g_strdup(bdrv_get_device_name(job->bs));
     info->len       = job->len;
     info->busy      = job->busy;
-    info->paused    = job->paused;
+    info->paused    = job->pause_count > 0;
     info->offset    = job->offset;
     info->speed     = job->speed;
     info->io_status = job->iostatus;
@@ -335,6 +344,8 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
                                     IO_OPERATION_TYPE_WRITE,
                                     action, &error_abort);
     if (action == BLOCK_ERROR_ACTION_STOP) {
+        /* make the pause user visible, which will be resumed from QMP. */
+        job->user_paused = true;
         block_job_pause(job);
         block_job_iostatus_set_err(job, error);
         if (bs != job->bs) {
index 38f21511666171b7e18f20b1575f9fe5a11903a2..76065482001d12bc1e4ad10bd15bd4bd7864cdb9 100644 (file)
--- a/cputlb.c
+++ b/cputlb.c
@@ -249,9 +249,9 @@ static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr,
  * Called from TCG-generated code, which is under an RCU read-side
  * critical section.
  */
-void tlb_set_page(CPUState *cpu, target_ulong vaddr,
-                  hwaddr paddr, int prot,
-                  int mmu_idx, target_ulong size)
+void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
+                             hwaddr paddr, MemTxAttrs attrs, int prot,
+                             int mmu_idx, target_ulong size)
 {
     CPUArchState *env = cpu->env_ptr;
     MemoryRegionSection *section;
@@ -301,7 +301,8 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
     env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
 
     /* refill the tlb */
-    env->iotlb[mmu_idx][index] = iotlb - vaddr;
+    env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
+    env->iotlb[mmu_idx][index].attrs = attrs;
     te->addend = addend - vaddr;
     if (prot & PAGE_READ) {
         te->addr_read = address;
@@ -331,6 +332,17 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
     }
 }
 
+/* Add a new TLB entry, but without specifying the memory
+ * transaction attributes to be used.
+ */
+void tlb_set_page(CPUState *cpu, target_ulong vaddr,
+                  hwaddr paddr, int prot,
+                  int mmu_idx, target_ulong size)
+{
+    tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
+                            prot, mmu_idx, size);
+}
+
 /* NOTE: this function can trigger an exception */
 /* NOTE2: the returned address is not exactly the physical address: it
  * is actually a ram_addr_t (in system mode; the user mode emulation
@@ -349,7 +361,7 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
                  (addr & TARGET_PAGE_MASK))) {
         cpu_ldub_code(env1, addr);
     }
-    pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK;
+    pd = env1->iotlb[mmu_idx][page_index].addr & ~TARGET_PAGE_MASK;
     mr = iotlb_to_region(cpu, pd);
     if (memory_region_is_unassigned(mr)) {
         CPUClass *cc = CPU_GET_CLASS(cpu);
index 1fddf6a11cf5077a0ba9f0d19a45a0a4d316f38b..19901a80ecc96bd9d4b419271e1ac4bdab59b09b 100644 (file)
@@ -28,7 +28,8 @@ int dma_memory_set(AddressSpace *as, dma_addr_t addr, uint8_t c, dma_addr_t len)
     memset(fillbuf, c, FILLBUF_SIZE);
     while (len > 0) {
         l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE;
-        error |= address_space_rw(as, addr, fillbuf, l, true);
+        error |= address_space_rw(as, addr, MEMTXATTRS_UNSPECIFIED,
+                                  fillbuf, l, true);
         len -= l;
         addr += l;
     }
diff --git a/docs/bitmaps.md b/docs/bitmaps.md
new file mode 100644 (file)
index 0000000..f066b48
--- /dev/null
@@ -0,0 +1,352 @@
+<!--
+Copyright 2015 John Snow <jsnow@redhat.com> and Red Hat, Inc.
+All rights reserved.
+
+This file is licensed via The FreeBSD Documentation License, the full text of
+which is included at the end of this document.
+-->
+
+# Dirty Bitmaps and Incremental Backup
+
+* Dirty Bitmaps are objects that track which data needs to be backed up for the
+  next incremental backup.
+
+* Dirty bitmaps can be created at any time and attached to any node
+  (not just complete drives.)
+
+## Dirty Bitmap Names
+
+* A dirty bitmap's name is unique to the node, but bitmaps attached to different
+  nodes can share the same name.
+
+## Bitmap Modes
+
+* A Bitmap can be "frozen," which means that it is currently in-use by a backup
+  operation and cannot be deleted, renamed, written to, reset,
+  etc.
+
+## Basic QMP Usage
+
+### Supported Commands ###
+
+* block-dirty-bitmap-add
+* block-dirty-bitmap-remove
+* block-dirty-bitmap-clear
+
+### Creation
+
+* To create a new bitmap, enabled, on the drive with id=drive0:
+
+```json
+{ "execute": "block-dirty-bitmap-add",
+  "arguments": {
+    "node": "drive0",
+    "name": "bitmap0"
+  }
+}
+```
+
+* This bitmap will have a default granularity that matches the cluster size of
+  its associated drive, if available, clamped to between [4KiB, 64KiB].
+  The current default for qcow2 is 64KiB.
+
+* To create a new bitmap that tracks changes in 32KiB segments:
+
+```json
+{ "execute": "block-dirty-bitmap-add",
+  "arguments": {
+    "node": "drive0",
+    "name": "bitmap0",
+    "granularity": 32768
+  }
+}
+```
+
+### Deletion
+
+* Bitmaps that are frozen cannot be deleted.
+
+* Deleting the bitmap does not impact any other bitmaps attached to the same
+  node, nor does it affect any backups already created from this node.
+
+* Because bitmaps are only unique to the node to which they are attached,
+  you must specify the node/drive name here, too.
+
+```json
+{ "execute": "block-dirty-bitmap-remove",
+  "arguments": {
+    "node": "drive0",
+    "name": "bitmap0"
+  }
+}
+```
+
+### Resetting
+
+* Resetting a bitmap will clear all information it holds.
+
+* An incremental backup created from an empty bitmap will copy no data,
+  as if nothing has changed.
+
+```json
+{ "execute": "block-dirty-bitmap-clear",
+  "arguments": {
+    "node": "drive0",
+    "name": "bitmap0"
+  }
+}
+```
+
+## Transactions (Not yet implemented)
+
+* Transactional commands are forthcoming in a future version,
+  and are not yet available for use. This section serves as
+  documentation of intent for their design and usage.
+
+### Justification
+
+Bitmaps can be safely modified when the VM is paused or halted by using
+the basic QMP commands. For instance, you might perform the following actions:
+
+1. Boot the VM in a paused state.
+2. Create a full drive backup of drive0.
+3. Create a new bitmap attached to drive0.
+4. Resume execution of the VM.
+5. Incremental backups are ready to be created.
+
+At this point, the bitmap and drive backup would be correctly in sync,
+and incremental backups made from this point forward would be correctly aligned
+to the full drive backup.
+
+This is not particularly useful if we decide we want to start incremental
+backups after the VM has been running for a while, for which we will need to
+perform actions such as the following:
+
+1. Boot the VM and begin execution.
+2. Using a single transaction, perform the following operations:
+    * Create bitmap0.
+    * Create a full drive backup of drive0.
+3. Incremental backups are now ready to be created.
+
+### Supported Bitmap Transactions
+
+* block-dirty-bitmap-add
+* block-dirty-bitmap-clear
+
+The usages are identical to their respective QMP commands, but see below
+for examples.
+
+### Example: New Incremental Backup
+
+As outlined in the justification, perhaps we want to create a new incremental
+backup chain attached to a drive.
+
+```json
+{ "execute": "transaction",
+  "arguments": {
+    "actions": [
+      {"type": "block-dirty-bitmap-add",
+       "data": {"node": "drive0", "name": "bitmap0"} },
+      {"type": "drive-backup",
+       "data": {"device": "drive0", "target": "/path/to/full_backup.img",
+                "sync": "full", "format": "qcow2"} }
+    ]
+  }
+}
+```
+
+### Example: New Incremental Backup Anchor Point
+
+Maybe we just want to create a new full backup with an existing bitmap and
+want to reset the bitmap to track the new chain.
+
+```json
+{ "execute": "transaction",
+  "arguments": {
+    "actions": [
+      {"type": "block-dirty-bitmap-clear",
+       "data": {"node": "drive0", "name": "bitmap0"} },
+      {"type": "drive-backup",
+       "data": {"device": "drive0", "target": "/path/to/new_full_backup.img",
+                "sync": "full", "format": "qcow2"} }
+    ]
+  }
+}
+```
+
+## Incremental Backups
+
+The star of the show.
+
+**Nota Bene!** Only incremental backups of entire drives are supported for now.
+So despite the fact that you can attach a bitmap to any arbitrary node, they are
+only currently useful when attached to the root node. This is because
+drive-backup only supports drives/devices instead of arbitrary nodes.
+
+### Example: First Incremental Backup
+
+1. Create a full backup and sync it to the dirty bitmap, as in the transactional
+examples above; or with the VM offline, manually create a full copy and then
+create a new bitmap before the VM begins execution.
+
+    * Let's assume the full backup is named 'full_backup.img'.
+    * Let's assume the bitmap you created is 'bitmap0' attached to 'drive0'.
+
+2. Create a destination image for the incremental backup that utilizes the
+full backup as a backing image.
+
+    * Let's assume it is named 'incremental.0.img'.
+
+    ```sh
+    # qemu-img create -f qcow2 incremental.0.img -b full_backup.img -F qcow2
+    ```
+
+3. Issue the incremental backup command:
+
+    ```json
+    { "execute": "drive-backup",
+      "arguments": {
+        "device": "drive0",
+        "bitmap": "bitmap0",
+        "target": "incremental.0.img",
+        "format": "qcow2",
+        "sync": "dirty-bitmap",
+        "mode": "existing"
+      }
+    }
+    ```
+
+### Example: Second Incremental Backup
+
+1. Create a new destination image for the incremental backup that points to the
+   previous one, e.g.: 'incremental.1.img'
+
+    ```sh
+    # qemu-img create -f qcow2 incremental.1.img -b incremental.0.img -F qcow2
+    ```
+
+2. Issue a new incremental backup command. The only difference here is that we
+   have changed the target image below.
+
+    ```json
+    { "execute": "drive-backup",
+      "arguments": {
+        "device": "drive0",
+        "bitmap": "bitmap0",
+        "target": "incremental.1.img",
+        "format": "qcow2",
+        "sync": "dirty-bitmap",
+        "mode": "existing"
+      }
+    }
+    ```
+
+## Errors
+
+* In the event of an error that occurs after a backup job is successfully
+  launched, either by a direct QMP command or a QMP transaction, the user
+  will receive a BLOCK_JOB_COMPLETE event with a failure message, accompanied
+  by a BLOCK_JOB_ERROR event.
+
+* In the case of an event being cancelled, the user will receive a
+  BLOCK_JOB_CANCELLED event instead of a pair of COMPLETE and ERROR events.
+
+* In either case, the incremental backup data contained within the bitmap is
+  safely rolled back, and the data within the bitmap is not lost. The image
+  file created for the failed attempt can be safely deleted.
+
+* Once the underlying problem is fixed (e.g. more storage space is freed up),
+  you can simply retry the incremental backup command with the same bitmap.
+
+### Example
+
+1. Create a target image:
+
+    ```sh
+    # qemu-img create -f qcow2 incremental.0.img -b full_backup.img -F qcow2
+    ```
+
+2. Attempt to create an incremental backup via QMP:
+
+    ```json
+    { "execute": "drive-backup",
+      "arguments": {
+        "device": "drive0",
+        "bitmap": "bitmap0",
+        "target": "incremental.0.img",
+        "format": "qcow2",
+        "sync": "dirty-bitmap",
+        "mode": "existing"
+      }
+    }
+    ```
+
+3. Receive an event notifying us of failure:
+
+    ```json
+    { "timestamp": { "seconds": 1424709442, "microseconds": 844524 },
+      "data": { "speed": 0, "offset": 0, "len": 67108864,
+                "error": "No space left on device",
+                "device": "drive1", "type": "backup" },
+      "event": "BLOCK_JOB_COMPLETED" }
+    ```
+
+4. Delete the failed incremental, and re-create the image.
+
+    ```sh
+    # rm incremental.0.img
+    # qemu-img create -f qcow2 incremental.0.img -b full_backup.img -F qcow2
+    ```
+
+5. Retry the command after fixing the underlying problem,
+   such as freeing up space on the backup volume:
+
+    ```json
+    { "execute": "drive-backup",
+      "arguments": {
+        "device": "drive0",
+        "bitmap": "bitmap0",
+        "target": "incremental.0.img",
+        "format": "qcow2",
+        "sync": "dirty-bitmap",
+        "mode": "existing"
+      }
+    }
+    ```
+
+6. Receive confirmation that the job completed successfully:
+
+    ```json
+    { "timestamp": { "seconds": 1424709668, "microseconds": 526525 },
+      "data": { "device": "drive1", "type": "backup",
+                "speed": 0, "len": 67108864, "offset": 67108864},
+      "event": "BLOCK_JOB_COMPLETED" }
+    ```
+
+<!--
+The FreeBSD Documentation License
+
+Redistribution and use in source (Markdown) and 'compiled' forms (SGML, HTML,
+PDF, PostScript, RTF and so forth) with or without modification, are permitted
+provided that the following conditions are met:
+
+Redistributions of source code (Markdown) must retain the above copyright
+notice, this list of conditions and the following disclaimer of this file
+unmodified.
+
+Redistributions in compiled form (transformed to other DTDs, converted to PDF,
+PostScript, RTF and other formats) must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation and/or
+other materials provided with the distribution.
+
+THIS DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR  PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS  BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS DOCUMENTATION, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+-->
index d759d197486a3edf3b629fb11e9922ad92fb041a..b19e490eb5588439ee56751cff6383ca00bea988 100644 (file)
@@ -31,21 +31,26 @@ Example:
 BLOCK_IMAGE_CORRUPTED
 ---------------------
 
-Emitted when a disk image is being marked corrupt.
+Emitted when a disk image is being marked corrupt. The image can be
+identified by its device or node name. The 'device' field is always
+present for compatibility reasons, but it can be empty ("") if the
+image does not have a device name associated.
 
 Data:
 
-- "device": Device name (json-string)
-- "msg":    Informative message (e.g., reason for the corruption) (json-string)
-- "offset": If the corruption resulted from an image access, this is the access
-            offset into the image (json-int)
-- "size":   If the corruption resulted from an image access, this is the access
-            size (json-int)
+- "device":    Device name (json-string)
+- "node-name": Node name (json-string, optional)
+- "msg":       Informative message (e.g., reason for the corruption)
+               (json-string)
+- "offset":    If the corruption resulted from an image access, this
+               is the access offset into the image (json-int)
+- "size":      If the corruption resulted from an image access, this
+               is the access size (json-int)
 
 Example:
 
 { "event": "BLOCK_IMAGE_CORRUPTED",
-    "data": { "device": "ide0-hd0",
+    "data": { "device": "ide0-hd0", "node-name": "node0",
         "msg": "Prevented active L1 table overwrite", "offset": 196608,
         "size": 65536 },
     "timestamp": { "seconds": 1378126126, "microseconds": 966463 } }
diff --git a/exec.c b/exec.c
index 4717928cff3a7bdf45db261ff187d44eefbf8267..ae37b98e8c958dfbbfc3e86b56d8074f55aa9932 100644 (file)
--- a/exec.c
+++ b/exec.c
@@ -1847,7 +1847,7 @@ static const MemoryRegionOps notdirty_mem_ops = {
 };
 
 /* Generate a debug exception if a watchpoint has been hit.  */
-static void check_watchpoint(int offset, int len, int flags)
+static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
 {
     CPUState *cpu = current_cpu;
     CPUArchState *env = cpu->env_ptr;
@@ -1873,6 +1873,7 @@ static void check_watchpoint(int offset, int len, int flags)
                 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
             }
             wp->hitaddr = vaddr;
+            wp->hitattrs = attrs;
             if (!cpu->watchpoint_hit) {
                 cpu->watchpoint_hit = wp;
                 tb_check_watchpoint(cpu);
@@ -1894,69 +1895,93 @@ static void check_watchpoint(int offset, int len, int flags)
 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
    so these check for a hit then pass through to the normal out-of-line
    phys routines.  */
-static uint64_t watch_mem_read(void *opaque, hwaddr addr,
-                               unsigned size)
+static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
+                                  unsigned size, MemTxAttrs attrs)
 {
-    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
+    MemTxResult res;
+    uint64_t data;
+
+    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
     switch (size) {
-    case 1: return ldub_phys(&address_space_memory, addr);
-    case 2: return lduw_phys(&address_space_memory, addr);
-    case 4: return ldl_phys(&address_space_memory, addr);
+    case 1:
+        data = address_space_ldub(&address_space_memory, addr, attrs, &res);
+        break;
+    case 2:
+        data = address_space_lduw(&address_space_memory, addr, attrs, &res);
+        break;
+    case 4:
+        data = address_space_ldl(&address_space_memory, addr, attrs, &res);
+        break;
     default: abort();
     }
+    *pdata = data;
+    return res;
 }
 
-static void watch_mem_write(void *opaque, hwaddr addr,
-                            uint64_t val, unsigned size)
+static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
+                                   uint64_t val, unsigned size,
+                                   MemTxAttrs attrs)
 {
-    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
+    MemTxResult res;
+
+    check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
     switch (size) {
     case 1:
-        stb_phys(&address_space_memory, addr, val);
+        address_space_stb(&address_space_memory, addr, val, attrs, &res);
         break;
     case 2:
-        stw_phys(&address_space_memory, addr, val);
+        address_space_stw(&address_space_memory, addr, val, attrs, &res);
         break;
     case 4:
-        stl_phys(&address_space_memory, addr, val);
+        address_space_stl(&address_space_memory, addr, val, attrs, &res);
         break;
     default: abort();
     }
+    return res;
 }
 
 static const MemoryRegionOps watch_mem_ops = {
-    .read = watch_mem_read,
-    .write = watch_mem_write,
+    .read_with_attrs = watch_mem_read,
+    .write_with_attrs = watch_mem_write,
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static uint64_t subpage_read(void *opaque, hwaddr addr,
-                             unsigned len)
+static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
+                                unsigned len, MemTxAttrs attrs)
 {
     subpage_t *subpage = opaque;
     uint8_t buf[8];
+    MemTxResult res;
 
 #if defined(DEBUG_SUBPAGE)
     printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
            subpage, len, addr);
 #endif
-    address_space_read(subpage->as, addr + subpage->base, buf, len);
+    res = address_space_read(subpage->as, addr + subpage->base,
+                             attrs, buf, len);
+    if (res) {
+        return res;
+    }
     switch (len) {
     case 1:
-        return ldub_p(buf);
+        *data = ldub_p(buf);
+        return MEMTX_OK;
     case 2:
-        return lduw_p(buf);
+        *data = lduw_p(buf);
+        return MEMTX_OK;
     case 4:
-        return ldl_p(buf);
+        *data = ldl_p(buf);
+        return MEMTX_OK;
     case 8:
-        return ldq_p(buf);
+        *data = ldq_p(buf);
+        return MEMTX_OK;
     default:
         abort();
     }
 }
 
-static void subpage_write(void *opaque, hwaddr addr,
-                          uint64_t value, unsigned len)
+static MemTxResult subpage_write(void *opaque, hwaddr addr,
+                                 uint64_t value, unsigned len, MemTxAttrs attrs)
 {
     subpage_t *subpage = opaque;
     uint8_t buf[8];
@@ -1982,7 +2007,8 @@ static void subpage_write(void *opaque, hwaddr addr,
     default:
         abort();
     }
-    address_space_write(subpage->as, addr + subpage->base, buf, len);
+    return address_space_write(subpage->as, addr + subpage->base,
+                               attrs, buf, len);
 }
 
 static bool subpage_accepts(void *opaque, hwaddr addr,
@@ -1999,8 +2025,8 @@ static bool subpage_accepts(void *opaque, hwaddr addr,
 }
 
 static const MemoryRegionOps subpage_ops = {
-    .read = subpage_read,
-    .write = subpage_write,
+    .read_with_attrs = subpage_read,
+    .write_with_attrs = subpage_write,
     .impl.min_access_size = 1,
     .impl.max_access_size = 8,
     .valid.min_access_size = 1,
@@ -2293,15 +2319,15 @@ static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
     return l;
 }
 
-bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
-                      int len, bool is_write)
+MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
+                             uint8_t *buf, int len, bool is_write)
 {
     hwaddr l;
     uint8_t *ptr;
     uint64_t val;
     hwaddr addr1;
     MemoryRegion *mr;
-    bool error = false;
+    MemTxResult result = MEMTX_OK;
 
     while (len > 0) {
         l = len;
@@ -2316,22 +2342,26 @@ bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
                 case 8:
                     /* 64 bit write access */
                     val = ldq_p(buf);
-                    error |= io_mem_write(mr, addr1, val, 8);
+                    result |= memory_region_dispatch_write(mr, addr1, val, 8,
+                                                           attrs);
                     break;
                 case 4:
                     /* 32 bit write access */
                     val = ldl_p(buf);
-                    error |= io_mem_write(mr, addr1, val, 4);
+                    result |= memory_region_dispatch_write(mr, addr1, val, 4,
+                                                           attrs);
                     break;
                 case 2:
                     /* 16 bit write access */
                     val = lduw_p(buf);
-                    error |= io_mem_write(mr, addr1, val, 2);
+                    result |= memory_region_dispatch_write(mr, addr1, val, 2,
+                                                           attrs);
                     break;
                 case 1:
                     /* 8 bit write access */
                     val = ldub_p(buf);
-                    error |= io_mem_write(mr, addr1, val, 1);
+                    result |= memory_region_dispatch_write(mr, addr1, val, 1,
+                                                           attrs);
                     break;
                 default:
                     abort();
@@ -2350,22 +2380,26 @@ bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
                 switch (l) {
                 case 8:
                     /* 64 bit read access */
-                    error |= io_mem_read(mr, addr1, &val, 8);
+                    result |= memory_region_dispatch_read(mr, addr1, &val, 8,
+                                                          attrs);
                     stq_p(buf, val);
                     break;
                 case 4:
                     /* 32 bit read access */
-                    error |= io_mem_read(mr, addr1, &val, 4);
+                    result |= memory_region_dispatch_read(mr, addr1, &val, 4,
+                                                          attrs);
                     stl_p(buf, val);
                     break;
                 case 2:
                     /* 16 bit read access */
-                    error |= io_mem_read(mr, addr1, &val, 2);
+                    result |= memory_region_dispatch_read(mr, addr1, &val, 2,
+                                                          attrs);
                     stw_p(buf, val);
                     break;
                 case 1:
                     /* 8 bit read access */
-                    error |= io_mem_read(mr, addr1, &val, 1);
+                    result |= memory_region_dispatch_read(mr, addr1, &val, 1,
+                                                          attrs);
                     stb_p(buf, val);
                     break;
                 default:
@@ -2382,25 +2416,27 @@ bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
         addr += l;
     }
 
-    return error;
+    return result;
 }
 
-bool address_space_write(AddressSpace *as, hwaddr addr,
-                         const uint8_t *buf, int len)
+MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
+                                const uint8_t *buf, int len)
 {
-    return address_space_rw(as, addr, (uint8_t *)buf, len, true);
+    return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
 }
 
-bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
+MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
+                               uint8_t *buf, int len)
 {
-    return address_space_rw(as, addr, buf, len, false);
+    return address_space_rw(as, addr, attrs, buf, len, false);
 }
 
 
 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
                             int len, int is_write)
 {
-    address_space_rw(&address_space_memory, addr, buf, len, is_write);
+    address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
+                     buf, len, is_write);
 }
 
 enum write_rom_type {
@@ -2602,7 +2638,8 @@ void *address_space_map(AddressSpace *as,
         memory_region_ref(mr);
         bounce.mr = mr;
         if (!is_write) {
-            address_space_read(as, addr, bounce.buffer, l);
+            address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
+                               bounce.buffer, l);
         }
 
         *plen = l;
@@ -2655,7 +2692,8 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
         return;
     }
     if (is_write) {
-        address_space_write(as, bounce.addr, bounce.buffer, access_len);
+        address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
+                            bounce.buffer, access_len);
     }
     qemu_vfree(bounce.buffer);
     bounce.buffer = NULL;
@@ -2678,19 +2716,22 @@ void cpu_physical_memory_unmap(void *buffer, hwaddr len,
 }
 
 /* warning: addr must be aligned */
-static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
-                                         enum device_endian endian)
+static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
+                                                  MemTxAttrs attrs,
+                                                  MemTxResult *result,
+                                                  enum device_endian endian)
 {
     uint8_t *ptr;
     uint64_t val;
     MemoryRegion *mr;
     hwaddr l = 4;
     hwaddr addr1;
+    MemTxResult r;
 
     mr = address_space_translate(as, addr, &addr1, &l, false);
     if (l < 4 || !memory_access_is_direct(mr, false)) {
         /* I/O case */
-        io_mem_read(mr, addr1, &val, 4);
+        r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
 #if defined(TARGET_WORDS_BIGENDIAN)
         if (endian == DEVICE_LITTLE_ENDIAN) {
             val = bswap32(val);
@@ -2716,40 +2757,68 @@ static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
             val = ldl_p(ptr);
             break;
         }
+        r = MEMTX_OK;
+    }
+    if (result) {
+        *result = r;
     }
     return val;
 }
 
+uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
+                           MemTxAttrs attrs, MemTxResult *result)
+{
+    return address_space_ldl_internal(as, addr, attrs, result,
+                                      DEVICE_NATIVE_ENDIAN);
+}
+
+uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
+                              MemTxAttrs attrs, MemTxResult *result)
+{
+    return address_space_ldl_internal(as, addr, attrs, result,
+                                      DEVICE_LITTLE_ENDIAN);
+}
+
+uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
+                              MemTxAttrs attrs, MemTxResult *result)
+{
+    return address_space_ldl_internal(as, addr, attrs, result,
+                                      DEVICE_BIG_ENDIAN);
+}
+
 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
 {
-    return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
+    return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
 {
-    return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
+    return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
 {
-    return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
+    return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 /* warning: addr must be aligned */
-static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
-                                         enum device_endian endian)
+static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
+                                                  MemTxAttrs attrs,
+                                                  MemTxResult *result,
+                                                  enum device_endian endian)
 {
     uint8_t *ptr;
     uint64_t val;
     MemoryRegion *mr;
     hwaddr l = 8;
     hwaddr addr1;
+    MemTxResult r;
 
     mr = address_space_translate(as, addr, &addr1, &l,
                                  false);
     if (l < 8 || !memory_access_is_direct(mr, false)) {
         /* I/O case */
-        io_mem_read(mr, addr1, &val, 8);
+        r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
 #if defined(TARGET_WORDS_BIGENDIAN)
         if (endian == DEVICE_LITTLE_ENDIAN) {
             val = bswap64(val);
@@ -2775,48 +2844,88 @@ static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
             val = ldq_p(ptr);
             break;
         }
+        r = MEMTX_OK;
+    }
+    if (result) {
+        *result = r;
     }
     return val;
 }
 
+uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
+                           MemTxAttrs attrs, MemTxResult *result)
+{
+    return address_space_ldq_internal(as, addr, attrs, result,
+                                      DEVICE_NATIVE_ENDIAN);
+}
+
+uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
+                           MemTxAttrs attrs, MemTxResult *result)
+{
+    return address_space_ldq_internal(as, addr, attrs, result,
+                                      DEVICE_LITTLE_ENDIAN);
+}
+
+uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
+                           MemTxAttrs attrs, MemTxResult *result)
+{
+    return address_space_ldq_internal(as, addr, attrs, result,
+                                      DEVICE_BIG_ENDIAN);
+}
+
 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
 {
-    return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
+    return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
 {
-    return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
+    return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
 {
-    return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
+    return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 /* XXX: optimize */
-uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
+uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result)
 {
     uint8_t val;
-    address_space_rw(as, addr, &val, 1, 0);
+    MemTxResult r;
+
+    r = address_space_rw(as, addr, attrs, &val, 1, 0);
+    if (result) {
+        *result = r;
+    }
     return val;
 }
 
+uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
+{
+    return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
 /* warning: addr must be aligned */
-static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
-                                          enum device_endian endian)
+static inline uint32_t address_space_lduw_internal(AddressSpace *as,
+                                                   hwaddr addr,
+                                                   MemTxAttrs attrs,
+                                                   MemTxResult *result,
+                                                   enum device_endian endian)
 {
     uint8_t *ptr;
     uint64_t val;
     MemoryRegion *mr;
     hwaddr l = 2;
     hwaddr addr1;
+    MemTxResult r;
 
     mr = address_space_translate(as, addr, &addr1, &l,
                                  false);
     if (l < 2 || !memory_access_is_direct(mr, false)) {
         /* I/O case */
-        io_mem_read(mr, addr1, &val, 2);
+        r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
 #if defined(TARGET_WORDS_BIGENDIAN)
         if (endian == DEVICE_LITTLE_ENDIAN) {
             val = bswap16(val);
@@ -2842,39 +2951,66 @@ static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
             val = lduw_p(ptr);
             break;
         }
+        r = MEMTX_OK;
+    }
+    if (result) {
+        *result = r;
     }
     return val;
 }
 
+uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
+                           MemTxAttrs attrs, MemTxResult *result)
+{
+    return address_space_lduw_internal(as, addr, attrs, result,
+                                       DEVICE_NATIVE_ENDIAN);
+}
+
+uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
+                           MemTxAttrs attrs, MemTxResult *result)
+{
+    return address_space_lduw_internal(as, addr, attrs, result,
+                                       DEVICE_LITTLE_ENDIAN);
+}
+
+uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
+                           MemTxAttrs attrs, MemTxResult *result)
+{
+    return address_space_lduw_internal(as, addr, attrs, result,
+                                       DEVICE_BIG_ENDIAN);
+}
+
 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
 {
-    return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
+    return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
 {
-    return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
+    return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
 {
-    return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
+    return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 /* warning: addr must be aligned. The ram page is not masked as dirty
    and the code inside is not invalidated. It is useful if the dirty
    bits are used to track modified PTEs */
-void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
+void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
+                                MemTxAttrs attrs, MemTxResult *result)
 {
     uint8_t *ptr;
     MemoryRegion *mr;
     hwaddr l = 4;
     hwaddr addr1;
+    MemTxResult r;
 
     mr = address_space_translate(as, addr, &addr1, &l,
                                  true);
     if (l < 4 || !memory_access_is_direct(mr, true)) {
-        io_mem_write(mr, addr1, val, 4);
+        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
     } else {
         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
         ptr = qemu_get_ram_ptr(addr1);
@@ -2888,18 +3024,30 @@ void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
                 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
             }
         }
+        r = MEMTX_OK;
+    }
+    if (result) {
+        *result = r;
     }
 }
 
+void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
+{
+    address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
 /* warning: addr must be aligned */
-static inline void stl_phys_internal(AddressSpace *as,
-                                     hwaddr addr, uint32_t val,
-                                     enum device_endian endian)
+static inline void address_space_stl_internal(AddressSpace *as,
+                                              hwaddr addr, uint32_t val,
+                                              MemTxAttrs attrs,
+                                              MemTxResult *result,
+                                              enum device_endian endian)
 {
     uint8_t *ptr;
     MemoryRegion *mr;
     hwaddr l = 4;
     hwaddr addr1;
+    MemTxResult r;
 
     mr = address_space_translate(as, addr, &addr1, &l,
                                  true);
@@ -2913,7 +3061,7 @@ static inline void stl_phys_internal(AddressSpace *as,
             val = bswap32(val);
         }
 #endif
-        io_mem_write(mr, addr1, val, 4);
+        r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
     } else {
         /* RAM case */
         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
@@ -2930,40 +3078,79 @@ static inline void stl_phys_internal(AddressSpace *as,
             break;
         }
         invalidate_and_set_dirty(addr1, 4);
+        r = MEMTX_OK;
+    }
+    if (result) {
+        *result = r;
     }
 }
 
+void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
+                       MemTxAttrs attrs, MemTxResult *result)
+{
+    address_space_stl_internal(as, addr, val, attrs, result,
+                               DEVICE_NATIVE_ENDIAN);
+}
+
+void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
+                       MemTxAttrs attrs, MemTxResult *result)
+{
+    address_space_stl_internal(as, addr, val, attrs, result,
+                               DEVICE_LITTLE_ENDIAN);
+}
+
+void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
+                       MemTxAttrs attrs, MemTxResult *result)
+{
+    address_space_stl_internal(as, addr, val, attrs, result,
+                               DEVICE_BIG_ENDIAN);
+}
+
 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
 {
-    stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
+    address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
 {
-    stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
+    address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
 {
-    stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
+    address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 /* XXX: optimize */
-void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
+void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
+                       MemTxAttrs attrs, MemTxResult *result)
 {
     uint8_t v = val;
-    address_space_rw(as, addr, &v, 1, 1);
+    MemTxResult r;
+
+    r = address_space_rw(as, addr, attrs, &v, 1, 1);
+    if (result) {
+        *result = r;
+    }
+}
+
+void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
+{
+    address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 /* warning: addr must be aligned */
-static inline void stw_phys_internal(AddressSpace *as,
-                                     hwaddr addr, uint32_t val,
-                                     enum device_endian endian)
+static inline void address_space_stw_internal(AddressSpace *as,
+                                              hwaddr addr, uint32_t val,
+                                              MemTxAttrs attrs,
+                                              MemTxResult *result,
+                                              enum device_endian endian)
 {
     uint8_t *ptr;
     MemoryRegion *mr;
     hwaddr l = 2;
     hwaddr addr1;
+    MemTxResult r;
 
     mr = address_space_translate(as, addr, &addr1, &l, true);
     if (l < 2 || !memory_access_is_direct(mr, true)) {
@@ -2976,7 +3163,7 @@ static inline void stw_phys_internal(AddressSpace *as,
             val = bswap16(val);
         }
 #endif
-        io_mem_write(mr, addr1, val, 2);
+        r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
     } else {
         /* RAM case */
         addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
@@ -2993,41 +3180,95 @@ static inline void stw_phys_internal(AddressSpace *as,
             break;
         }
         invalidate_and_set_dirty(addr1, 2);
+        r = MEMTX_OK;
     }
+    if (result) {
+        *result = r;
+    }
+}
+
+void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
+                       MemTxAttrs attrs, MemTxResult *result)
+{
+    address_space_stw_internal(as, addr, val, attrs, result,
+                               DEVICE_NATIVE_ENDIAN);
+}
+
+void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
+                       MemTxAttrs attrs, MemTxResult *result)
+{
+    address_space_stw_internal(as, addr, val, attrs, result,
+                               DEVICE_LITTLE_ENDIAN);
+}
+
+void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
+                       MemTxAttrs attrs, MemTxResult *result)
+{
+    address_space_stw_internal(as, addr, val, attrs, result,
+                               DEVICE_BIG_ENDIAN);
 }
 
 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
 {
-    stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
+    address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
 {
-    stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
+    address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
 {
-    stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
+    address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 /* XXX: optimize */
-void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
+void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
+                       MemTxAttrs attrs, MemTxResult *result)
 {
+    MemTxResult r;
     val = tswap64(val);
-    address_space_rw(as, addr, (void *) &val, 8, 1);
+    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
+    if (result) {
+        *result = r;
+    }
 }
 
-void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
+void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
+                       MemTxAttrs attrs, MemTxResult *result)
 {
+    MemTxResult r;
     val = cpu_to_le64(val);
-    address_space_rw(as, addr, (void *) &val, 8, 1);
+    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
+    if (result) {
+        *result = r;
+    }
+}
+void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
+                       MemTxAttrs attrs, MemTxResult *result)
+{
+    MemTxResult r;
+    val = cpu_to_be64(val);
+    r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
+    if (result) {
+        *result = r;
+    }
+}
+
+void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
+{
+    address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
+}
+
+void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
+{
+    address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
 {
-    val = cpu_to_be64(val);
-    address_space_rw(as, addr, (void *) &val, 8, 1);
+    address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 /* virtual memory access for debug (includes writing to ROM) */
@@ -3051,7 +3292,8 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
         if (is_write) {
             cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
         } else {
-            address_space_rw(cpu->as, phys_addr, buf, l, 0);
+            address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
+                             buf, l, 0);
         }
         len -= l;
         buf += l;
index 3089533a4f723d8e43169c838cc3e8f9004e181c..a6de819f690816473aad59faf7e80afecccc5455 100644 (file)
@@ -998,8 +998,7 @@ ETEXI
         .params     = "protocol hostname port tls-port cert-subject",
         .help       = "send migration info to spice/vnc client",
         .user_print = monitor_user_noop,
-        .mhandler.cmd_async = client_migrate_info,
-        .flags      = MONITOR_CMD_ASYNC,
+        .mhandler.cmd_new = client_migrate_info,
     },
 
 STEXI
diff --git a/hmp.c b/hmp.c
index f142d366ef6567e6e87f8a545c9bbc0b825b25a2..d85d913a795e3171a30dc63ea2ae5b1582f6dfd3 100644 (file)
--- a/hmp.c
+++ b/hmp.c
@@ -391,8 +391,7 @@ static void print_block_info(Monitor *mon, BlockInfo *info,
                         inserted->iops_size);
     }
 
-    /* TODO: inserted->image should never be null */
-    if (verbose && inserted->image) {
+    if (verbose) {
         monitor_printf(mon, "\nImages:\n");
         image_info = inserted->image;
         while (1) {
@@ -1062,7 +1061,8 @@ void hmp_drive_backup(Monitor *mon, const QDict *qdict)
 
     qmp_drive_backup(device, filename, !!format, format,
                      full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
-                     true, mode, false, 0, false, 0, false, 0, &err);
+                     true, mode, false, 0, false, NULL,
+                     false, 0, false, 0, &err);
     hmp_handle_error(mon, &err);
 }
 
index 612fec03ee82db9a4671f8bfa9c5e5b117f7c6bd..77e1126f8f4820bba5f22da4ae4a0906d5d42518 100644 (file)
@@ -120,7 +120,7 @@ static bool acpi_pcihp_pc_no_hotplug(AcpiPciHpState *s, PCIDevice *dev)
 static void acpi_pcihp_eject_slot(AcpiPciHpState *s, unsigned bsel, unsigned slots)
 {
     BusChild *kid, *next;
-    int slot = ffs(slots) - 1;
+    int slot = ctz32(slots);
     PCIBus *bus = acpi_pcihp_find_hotplug_bus(s, bsel);
 
     if (!bus) {
index e82d61d28c6915b3f71851be9edf35325becb665..9fe7e8b5cbfd9f92fc34e4319a0682e08ae4ebf6 100644 (file)
@@ -157,9 +157,12 @@ static void clipper_init(MachineState *machine)
             load_image_targphys(initrd_filename, initrd_base,
                                 ram_size - initrd_base);
 
-            stq_phys(&address_space_memory,
-                     param_offset + 0x100, initrd_base + 0xfffffc0000000000ULL);
-            stq_phys(&address_space_memory, param_offset + 0x108, initrd_size);
+            address_space_stq(&address_space_memory, param_offset + 0x100,
+                              initrd_base + 0xfffffc0000000000ULL,
+                              MEMTXATTRS_UNSPECIFIED,
+                              NULL);
+            address_space_stq(&address_space_memory, param_offset + 0x108,
+                              initrd_size, MEMTXATTRS_UNSPECIFIED, NULL);
         }
     }
 }
index a6044f28c3daf38c3bca688197f2fa3cdba20e45..7df842dff77ef26d74ecaadd889028e480e956a2 100644 (file)
@@ -613,7 +613,8 @@ static bool make_iommu_tlbe(hwaddr taddr, hwaddr mask, IOMMUTLBEntry *ret)
    translation, given the address of the PTE.  */
 static bool pte_translate(hwaddr pte_addr, IOMMUTLBEntry *ret)
 {
-    uint64_t pte = ldq_phys(&address_space_memory, pte_addr);
+    uint64_t pte = address_space_ldq(&address_space_memory, pte_addr,
+                                     MEMTXATTRS_UNSPECIFIED, NULL);
 
     /* Check valid bit.  */
     if ((pte & 1) == 0) {
index a48d1b28d46ca01d009c13183ba26e93a925c838..fa6950352cd26ee8f857c67c8838867b6c98f337 100644 (file)
@@ -170,7 +170,8 @@ static void default_reset_secondary(ARMCPU *cpu,
 {
     CPUARMState *env = &cpu->env;
 
-    stl_phys_notdirty(&address_space_memory, info->smp_bootreg_addr, 0);
+    address_space_stl_notdirty(&address_space_memory, info->smp_bootreg_addr,
+                               0, MEMTXATTRS_UNSPECIFIED, NULL);
     env->regs[15] = info->smp_loader_start;
 }
 
@@ -180,7 +181,8 @@ static inline bool have_dtb(const struct arm_boot_info *info)
 }
 
 #define WRITE_WORD(p, value) do { \
-    stl_phys_notdirty(&address_space_memory, p, value);  \
+    address_space_stl_notdirty(&address_space_memory, p, value, \
+                               MEMTXATTRS_UNSPECIFIED, NULL);  \
     p += 4;                       \
 } while (0)
 
index dd2a67bcf08229bb7c2de37fa7a6b533f1dca073..b2d048b911de640b545e3db161a7729e4dc0500c 100644 (file)
@@ -69,11 +69,17 @@ static void hb_reset_secondary(ARMCPU *cpu, const struct arm_boot_info *info)
 
     switch (info->nb_cpus) {
     case 4:
-        stl_phys_notdirty(&address_space_memory, SMP_BOOT_REG + 0x30, 0);
+        address_space_stl_notdirty(&address_space_memory,
+                                   SMP_BOOT_REG + 0x30, 0,
+                                   MEMTXATTRS_UNSPECIFIED, NULL);
     case 3:
-        stl_phys_notdirty(&address_space_memory, SMP_BOOT_REG + 0x20, 0);
+        address_space_stl_notdirty(&address_space_memory,
+                                   SMP_BOOT_REG + 0x20, 0,
+                                   MEMTXATTRS_UNSPECIFIED, NULL);
     case 2:
-        stl_phys_notdirty(&address_space_memory, SMP_BOOT_REG + 0x10, 0);
+        address_space_stl_notdirty(&address_space_memory,
+                                   SMP_BOOT_REG + 0x10, 0,
+                                   MEMTXATTRS_UNSPECIFIED, NULL);
         env->regs[15] = SMP_BOOT_ADDR;
         break;
     default:
index 2a5406d98d76dc805bdbef68d6905852776cc2a3..d24315966460f77b121310552786a154931256ac 100644 (file)
@@ -579,7 +579,10 @@ static uint32_t mipid_txrx(void *opaque, uint32_t cmd, int len)
 
     case 0x26: /* GAMSET */
         if (!s->pm) {
-            s->gamma = ffs(s->param[0] & 0xf) - 1;
+            s->gamma = ctz32(s->param[0] & 0xf);
+            if (s->gamma == 32) {
+                s->gamma = -1; /* XXX: should this be 0? */
+            }
         } else if (s->pm < 0) {
             s->pm = 1;
         }
index 91ffb589e5817054c2da971f703bb41a83988519..de2b289257c713f718339ad2391503410c70bf04 100644 (file)
@@ -2004,8 +2004,7 @@ static void omap_mpuio_write(void *opaque, hwaddr addr,
     case 0x04: /* OUTPUT_REG */
         diff = (s->outputs ^ value) & ~s->dir;
         s->outputs = value;
-        while ((ln = ffs(diff))) {
-            ln --;
+        while ((ln = ctz32(diff)) != 32) {
             if (s->handler[ln])
                 qemu_set_irq(s->handler[ln], (value >> ln) & 1);
             diff &= ~(1 << ln);
@@ -2017,8 +2016,7 @@ static void omap_mpuio_write(void *opaque, hwaddr addr,
         s->dir = value;
 
         value = s->outputs & ~s->dir;
-        while ((ln = ffs(diff))) {
-            ln --;
+        while ((ln = ctz32(diff)) != 32) {
             if (s->handler[ln])
                 qemu_set_irq(s->handler[ln], (value >> ln) & 1);
             diff &= ~(1 << ln);
index 165ba2a169de738f57977d6bf9e99cd24408d84c..f921a5680c978e7213136e894d3272b74822a341 100644 (file)
@@ -274,7 +274,7 @@ static void pxa2xx_pwrmode_write(CPUARMState *env, const ARMCPRegInfo *ri,
         s->cpu->env.uncached_cpsr = ARM_CPU_MODE_SVC;
         s->cpu->env.daif = PSTATE_A | PSTATE_F | PSTATE_I;
         s->cpu->env.cp15.sctlr_ns = 0;
-        s->cpu->env.cp15.c1_coproc = 0;
+        s->cpu->env.cp15.cpacr_el1 = 0;
         s->cpu->env.cp15.ttbr0_el[1] = 0;
         s->cpu->env.cp15.dacr_ns = 0;
         s->pm_regs[PSSR >> 2] |= 0x8; /* Set STS */
index 354ccf1ea1af42bd0b3a4b217c3677fe81b4dd95..c89c8045c3a2a2197dc9bee6106bbb758cbe09f9 100644 (file)
@@ -137,7 +137,7 @@ static void pxa2xx_gpio_handler_update(PXA2xxGPIOInfo *s) {
         level = s->olevel[i] & s->dir[i];
 
         for (diff = s->prev_level[i] ^ level; diff; diff ^= 1 << bit) {
-            bit = ffs(diff) - 1;
+            bit = ctz32(diff);
             line = bit + 32 * i;
             qemu_set_irq(s->handler[line], (level >> bit) & 1);
         }
index 1ddea6d89c47028fbb79864e5477fded3c2018ef..da9fc1d51b583607e9fe100bb7f75990e0b1b3e3 100644 (file)
@@ -528,7 +528,7 @@ static void strongarm_gpio_handler_update(StrongARMGPIOInfo *s)
     level = s->olevel & s->dir;
 
     for (diff = s->prev_level ^ level; diff; diff ^= 1 << bit) {
-        bit = ffs(diff) - 1;
+        bit = ctz32(diff);
         qemu_set_irq(s->handler[bit], (level >> bit) & 1);
     }
 
@@ -745,7 +745,7 @@ static void strongarm_ppc_handler_update(StrongARMPPCInfo *s)
     level = s->olevel & s->dir;
 
     for (diff = s->prev_level ^ level; diff; diff ^= 1 << bit) {
-        bit = ffs(diff) - 1;
+        bit = ctz32(diff);
         qemu_set_irq(s->handler[bit], (level >> bit) & 1);
     }
 
index 2bf87c9eeab2f31563c8b9fdf9fded3337ab30f1..f72a39216347e722496797555db9f208b0c5b4b2 100644 (file)
@@ -535,8 +535,6 @@ struct FDCtrl {
     uint8_t pwrd;
     /* Floppy drives */
     uint8_t num_floppies;
-    /* Sun4m quirks? */
-    int sun4m;
     FDrive drives[MAX_FD];
     int reset_sensei;
     uint32_t check_media_rate;
@@ -885,13 +883,6 @@ static void fdctrl_reset_irq(FDCtrl *fdctrl)
 
 static void fdctrl_raise_irq(FDCtrl *fdctrl)
 {
-    /* Sparc mutation */
-    if (fdctrl->sun4m && (fdctrl->msr & FD_MSR_CMDBUSY)) {
-        /* XXX: not sure */
-        fdctrl->msr &= ~FD_MSR_CMDBUSY;
-        fdctrl->msr |= FD_MSR_RQM | FD_MSR_DIO;
-        return;
-    }
     if (!(fdctrl->sra & FD_SRA_INTPEND)) {
         qemu_set_irq(fdctrl->irq, 1);
         fdctrl->sra |= FD_SRA_INTPEND;
@@ -1080,12 +1071,6 @@ static uint32_t fdctrl_read_main_status(FDCtrl *fdctrl)
     fdctrl->dsr &= ~FD_DSR_PWRDOWN;
     fdctrl->dor |= FD_DOR_nRESET;
 
-    /* Sparc mutation */
-    if (fdctrl->sun4m) {
-        retval |= FD_MSR_DIO;
-        fdctrl_reset_irq(fdctrl);
-    };
-
     FLOPPY_DPRINTF("main status register: 0x%02x\n", retval);
 
     return retval;
@@ -2241,8 +2226,6 @@ static void sun4m_fdc_initfn(Object *obj)
     FDCtrlSysBus *sys = SYSBUS_FDC(obj);
     FDCtrl *fdctrl = &sys->state;
 
-    fdctrl->sun4m = 1;
-
     memory_region_init_io(&fdctrl->iomem, obj, &fdctrl_mem_strict_ops,
                           fdctrl, "fdctrl", 0x08);
     sysbus_init_mmio(sbd, &fdctrl->iomem);
index afe243b8114f7a0d7198e4eab64dfdc2ecd3f6cc..efc43dde6a4b4415117751510ee2e0c3cdaaddbe 100644 (file)
@@ -621,7 +621,6 @@ static int m25p80_init(SSISlave *ss)
 
     s->size = s->pi->sector_size * s->pi->n_sectors;
     s->dirty_page = -1;
-    s->storage = blk_blockalign(s->blk, s->size);
 
     /* FIXME use a qdev drive property instead of drive_get_next() */
     dinfo = drive_get_next(IF_MTD);
@@ -629,6 +628,9 @@ static int m25p80_init(SSISlave *ss)
     if (dinfo) {
         DB_PRINT_L(0, "Binding to IF_MTD drive\n");
         s->blk = blk_by_legacy_dinfo(dinfo);
+        blk_attach_dev_nofail(s->blk, s);
+
+        s->storage = blk_blockalign(s->blk, s->size);
 
         /* FIXME: Move to late init */
         if (blk_read(s->blk, 0, s->storage,
@@ -638,6 +640,7 @@ static int m25p80_init(SSISlave *ss)
         }
     } else {
         DB_PRINT_L(0, "No BDRV - binding to RAM\n");
+        s->storage = blk_blockalign(NULL, s->size);
         memset(s->storage, 0xFF, s->size);
     }
 
index 218e075df75f6593c85d53dcf30877b608914b9d..c90374795257f383a40d343b0ae5a1912c3364ec 100644 (file)
@@ -707,7 +707,7 @@ static void sdp_service_record_build(struct sdp_service_record_s *record,
         len += sdp_attr_max_size(&def->attributes[record->attributes ++].data,
                         &record->uuids);
     }
-    record->uuids = 1 << ffs(record->uuids - 1);
+    record->uuids = pow2ceil(record->uuids);
     record->attribute_list =
             g_malloc0(record->attributes * sizeof(*record->attribute_list));
     record->uuid =
index e336bdb4a98e8483480e0a76a9b924284f8740b1..6e2ad8221b0cd31f8db3e5d50dbeae8d5f4f84e5 100644 (file)
@@ -814,12 +814,12 @@ static uint32_t find_free_port_id(VirtIOSerial *vser)
 
     max_nr_ports = vser->serial.max_virtserial_ports;
     for (i = 0; i < (max_nr_ports + 31) / 32; i++) {
-        uint32_t map, bit;
+        uint32_t map, zeroes;
 
         map = vser->ports_map[i];
-        bit = ffs(~map);
-        if (bit) {
-            return (bit - 1) + i * 32;
+        zeroes = ctz32(~map);
+        if (zeroes != 32) {
+            return zeroes + i * 32;
         }
     }
     return VIRTIO_CONSOLE_BAD_ID;
index 4306adc959d4f05a252014615c4ce33b739f876e..66b7ade8da394136cc4181a8a8ca60ac44d927a8 100644 (file)
@@ -171,7 +171,7 @@ static void tc6393xb_gpio_handler_update(TC6393xbState *s)
     level = s->gpio_level & s->gpio_dir;
 
     for (diff = s->prev_level ^ level; diff; diff ^= 1 << bit) {
-        bit = ffs(diff) - 1;
+        bit = ctz32(diff);
         qemu_set_irq(s->handler[bit], (level >> bit) & 1);
     }
 
index 741dd20d31b08487b418132d874d6a672537cb52..b89b4744f79fe4562ec4b4f24c61c6f65482bd06 100644 (file)
@@ -205,10 +205,22 @@ again:
             if (size == 0) {
                 /* Transfer complete.  */
                 if (ch->lli) {
-                    ch->src = ldl_le_phys(&address_space_memory, ch->lli);
-                    ch->dest = ldl_le_phys(&address_space_memory, ch->lli + 4);
-                    ch->ctrl = ldl_le_phys(&address_space_memory, ch->lli + 12);
-                    ch->lli = ldl_le_phys(&address_space_memory, ch->lli + 8);
+                    ch->src = address_space_ldl_le(&address_space_memory,
+                                                   ch->lli,
+                                                   MEMTXATTRS_UNSPECIFIED,
+                                                   NULL);
+                    ch->dest = address_space_ldl_le(&address_space_memory,
+                                                    ch->lli + 4,
+                                                    MEMTXATTRS_UNSPECIFIED,
+                                                    NULL);
+                    ch->ctrl = address_space_ldl_le(&address_space_memory,
+                                                    ch->lli + 12,
+                                                    MEMTXATTRS_UNSPECIFIED,
+                                                    NULL);
+                    ch->lli = address_space_ldl_le(&address_space_memory,
+                                                   ch->lli + 8,
+                                                   MEMTXATTRS_UNSPECIFIED,
+                                                   NULL);
                 } else {
                     ch->conf &= ~PL080_CCONF_E;
                 }
index ec7c2efcd94c34201ba12668d30ef42ac5df79e0..9a488bc9b7229d62a304a941db46311fc7441dfe 100644 (file)
@@ -263,7 +263,8 @@ static uint32_t iommu_page_get_flags(IOMMUState *s, hwaddr addr)
     iopte = s->regs[IOMMU_BASE] << 4;
     addr &= ~s->iostart;
     iopte += (addr >> (IOMMU_PAGE_SHIFT - 2)) & ~3;
-    ret = ldl_be_phys(&address_space_memory, iopte);
+    ret = address_space_ldl_be(&address_space_memory, iopte,
+                               MEMTXATTRS_UNSPECIFIED, NULL);
     trace_sun4m_iommu_page_get_flags(pa, iopte, ret);
     return ret;
 }
index 7fbf313ce833a62d89e56994845e531e9c7bb91e..2f59b134ee5b579af9790f30cf077548854faa9e 100644 (file)
@@ -96,7 +96,7 @@ static int max7310_tx(I2CSlave *i2c, uint8_t data)
     case 0x01: /* Output port */
         for (diff = (data ^ s->level) & ~s->direction; diff;
                         diff &= ~(1 << line)) {
-            line = ffs(diff) - 1;
+            line = ctz32(diff);
             if (s->handler[line])
                 qemu_set_irq(s->handler[line], (data >> line) & 1);
         }
index 9a434868904bf3852e5b224ec84ed333812a248d..d92f8cfbae0257e92bc591bf70369bf8cb4ede96 100644 (file)
@@ -125,8 +125,7 @@ static void omap_gpio_write(void *opaque, hwaddr addr,
     case 0x04: /* DATA_OUTPUT */
         diff = (s->outputs ^ value) & ~s->dir;
         s->outputs = value;
-        while ((ln = ffs(diff))) {
-            ln --;
+        while ((ln = ctz32(diff)) != 32) {
             if (s->handler[ln])
                 qemu_set_irq(s->handler[ln], (value >> ln) & 1);
             diff &= ~(1 << ln);
@@ -138,8 +137,7 @@ static void omap_gpio_write(void *opaque, hwaddr addr,
         s->dir = value;
 
         value = s->outputs & ~s->dir;
-        while ((ln = ffs(diff))) {
-            ln --;
+        while ((ln = ctz32(diff)) != 32) {
             if (s->handler[ln])
                 qemu_set_irq(s->handler[ln], (value >> ln) & 1);
             diff &= ~(1 << ln);
@@ -253,8 +251,7 @@ static inline void omap2_gpio_module_out_update(struct omap2_gpio_s *s,
 
     s->outputs ^= diff;
     diff &= ~s->dir;
-    while ((ln = ffs(diff))) {
-        ln --;
+    while ((ln = ctz32(diff)) != 32) {
         qemu_set_irq(s->handler[ln], (s->outputs >> ln) & 1);
         diff &= ~(1 << ln);
     }
@@ -442,8 +439,8 @@ static void omap2_gpio_module_write(void *opaque, hwaddr addr,
         s->dir = value;
 
         value = s->outputs & ~s->dir;
-        while ((ln = ffs(diff))) {
-            diff &= ~(1 <<-- ln);
+        while ((ln = ctz32(diff)) != 32) {
+            diff &= ~(1 << ln);
             qemu_set_irq(s->handler[ln], (value >> ln) & 1);
         }
 
index 94083424f8fb4f6aae4b6c14fae3abdd189d13a2..24a77272d721073058a9d16c74fe16cbe0b75acf 100644 (file)
@@ -65,7 +65,7 @@ static inline void scoop_gpio_handler_update(ScoopInfo *s) {
     level = s->gpio_level & s->gpio_dir;
 
     for (diff = s->prev_level ^ level; diff; diff ^= 1 << bit) {
-        bit = ffs(diff) - 1;
+        bit = ctz32(diff);
         qemu_set_irq(s->handler[bit], (level >> bit) & 1);
     }
 
index d63278dbde539d1f635c5dd77078bf0e14bd59d9..b6f544a221023ffa51f1a84b552ae0c421f28218 100644 (file)
@@ -171,9 +171,13 @@ static uint32_t omap_i2c_read(void *opaque, hwaddr addr)
     case 0x0c: /* I2C_IV */
         if (s->revision >= OMAP2_INTR_REV)
             break;
-        ret = ffs(s->stat & s->mask);
-        if (ret)
-            s->stat ^= 1 << (ret - 1);
+        ret = ctz32(s->stat & s->mask);
+        if (ret != 32) {
+            s->stat ^= 1 << ret;
+            ret++;
+        } else {
+            ret = 0;
+        }
         omap_i2c_interrupts_update(s);
         return ret;
 
index 7da70ff3492e89f7d3c44b690f728278a953803a..08055a8d8a83f481925f2c62bf8e707e6f9ab2d7 100644 (file)
@@ -246,7 +246,8 @@ static void vtd_generate_interrupt(IntelIOMMUState *s, hwaddr mesg_addr_reg,
     data = vtd_get_long_raw(s, mesg_data_reg);
 
     VTD_DPRINTF(FLOG, "msi: addr 0x%"PRIx64 " data 0x%"PRIx32, addr, data);
-    stl_le_phys(&address_space_memory, addr, data);
+    address_space_stl_le(&address_space_memory, addr, data,
+                         MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 /* Generate a fault event to software via MSI if conditions are met.
index de820b97234d5e25e0681462f55d1e1e5af653af..eed7621f1367dc873baeca603b660cc3ea0f84e5 100644 (file)
@@ -23,7 +23,7 @@
 static void aw_a10_pic_update(AwA10PICState *s)
 {
     uint8_t i;
-    int irq = 0, fiq = 0, pending;
+    int irq = 0, fiq = 0, zeroes;
 
     s->vector = 0;
 
@@ -32,9 +32,9 @@ static void aw_a10_pic_update(AwA10PICState *s)
         fiq |= s->select[i] & s->irq_pending[i] & ~s->mask[i];
 
         if (!s->vector) {
-            pending = ffs(s->irq_pending[i] & ~s->mask[i]);
-            if (pending) {
-                s->vector = (i * 32 + pending - 1) * 4;
+            zeroes = ctz32(s->irq_pending[i] & ~s->mask[i]);
+            if (zeroes != 32) {
+                s->vector = (i * 32 + zeroes) * 4;
             }
         }
     }
index ad3931c1125ee966d6f02cabadf8089932f56069..e9b38a3c63ce301c6a41fccf2cbca9cf3449de24 100644 (file)
@@ -60,7 +60,7 @@ struct omap_intr_handler_s {
 
 static void omap_inth_sir_update(struct omap_intr_handler_s *s, int is_fiq)
 {
-    int i, j, sir_intr, p_intr, p, f;
+    int i, j, sir_intr, p_intr, p;
     uint32_t level;
     sir_intr = 0;
     p_intr = 255;
@@ -72,14 +72,15 @@ static void omap_inth_sir_update(struct omap_intr_handler_s *s, int is_fiq)
     for (j = 0; j < s->nbanks; ++j) {
         level = s->bank[j].irqs & ~s->bank[j].mask &
                 (is_fiq ? s->bank[j].fiq : ~s->bank[j].fiq);
-        for (f = ffs(level), i = f - 1, level >>= f - 1; f; i += f,
-                        level >>= f) {
+
+        while (level != 0) {
+            i = ctz32(level);
             p = s->bank[j].priority[i];
             if (p <= p_intr) {
                 p_intr = p;
                 sir_intr = 32 * j + i;
             }
-            f = ffs(level >> 1);
+            level &= level - 1;
         }
     }
     s->sir_intr[is_fiq] = sir_intr;
index 07f3c270d4c22522a42c21644e617a6d6f8a0796..2c153e092fa10b3bbc95854896b2e6b6c707b08f 100644 (file)
@@ -61,7 +61,8 @@ static void main_cpu_reset(void *opaque)
 static uint64_t rtc_read(void *opaque, hwaddr addr, unsigned size)
 {
     uint8_t val;
-    address_space_read(&address_space_memory, 0x90000071, &val, 1);
+    address_space_read(&address_space_memory, 0x90000071,
+                       MEMTXATTRS_UNSPECIFIED, &val, 1);
     return val;
 }
 
@@ -69,7 +70,8 @@ static void rtc_write(void *opaque, hwaddr addr,
                       uint64_t val, unsigned size)
 {
     uint8_t buf = val & 0xff;
-    address_space_write(&address_space_memory, 0x90000071, &buf, 1);
+    address_space_write(&address_space_memory, 0x90000071,
+                        MEMTXATTRS_UNSPECIFIED, &buf, 1);
 }
 
 static const MemoryRegionOps rtc_ops = {
index 312fa703c69a5a673c89c86563668bd896ef1c4a..599768e2d9608df054e264abc153d6c4e7ecd057 100644 (file)
@@ -289,7 +289,8 @@ static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr,
         }
     }
 
-    tte = ldq_be_phys(&address_space_memory, baseaddr + offset);
+    tte = address_space_ldq_be(&address_space_memory, baseaddr + offset,
+                               MEMTXATTRS_UNSPECIFIED, NULL);
 
     if (!(tte & IOMMU_TTE_DATA_V)) {
         /* Invalid mapping */
index 8134d0bcd0799f786a1c32e8c5cf507071584918..3a731fe18de124807096026e072af8646d1fb319 100644 (file)
@@ -427,7 +427,7 @@ static uint32_t bonito_sbridge_pciaddr(void *opaque, hwaddr addr)
     cfgaddr |= (s->regs[BONITO_PCIMAP_CFG] & 0xffff) << 16;
 
     idsel = (cfgaddr & BONITO_PCICONF_IDSEL_MASK) >> BONITO_PCICONF_IDSEL_OFFSET;
-    devno = ffs(idsel) - 1;
+    devno = ctz32(idsel);
     funno = (cfgaddr & BONITO_PCICONF_FUN_MASK) >> BONITO_PCICONF_FUN_OFFSET;
     regno = (cfgaddr & BONITO_PCICONF_REG_MASK) >> BONITO_PCICONF_REG_OFFSET;
 
index 6cea6ffebb54b6a23ba5318ac791b2a79e5a347a..c63f45d217e5efb99beeec1b3b2e286e7c620da7 100644 (file)
@@ -140,7 +140,8 @@ static uint64_t raven_io_read(void *opaque, hwaddr addr,
     uint8_t buf[4];
 
     addr = raven_io_address(s, addr);
-    address_space_read(&s->pci_io_as, addr + 0x80000000, buf, size);
+    address_space_read(&s->pci_io_as, addr + 0x80000000,
+                       MEMTXATTRS_UNSPECIFIED, buf, size);
 
     if (size == 1) {
         return buf[0];
@@ -171,7 +172,8 @@ static void raven_io_write(void *opaque, hwaddr addr,
         g_assert_not_reached();
     }
 
-    address_space_write(&s->pci_io_as, addr + 0x80000000, buf, size);
+    address_space_write(&s->pci_io_as, addr + 0x80000000,
+                        MEMTXATTRS_UNSPECIFIED, buf, size);
 }
 
 static const MemoryRegionOps raven_io_ops = {
index 53f2b59ae84f7fb8cd48086dc3c8979dc7d85b52..f0144eb7b01e29fddd23e82753ecdfbc1ed4a9f0 100644 (file)
@@ -92,7 +92,10 @@ static uint32_t unin_get_config_reg(uint32_t reg, uint32_t addr)
         uint32_t slot, func;
 
         /* Grab CFA0 style values */
-        slot = ffs(reg & 0xfffff800) - 1;
+        slot = ctz32(reg & 0xfffff800);
+        if (slot == 32) {
+            slot = -1; /* XXX: should this be 0? */
+        }
         func = (reg >> 8) & 7;
 
         /* ... and then convert them to x86 format */
index 52d23130d9717dd316747bcbb1c2eaef223fe40d..294993822360ea840b89f26a3ae74a5698d1feab 100644 (file)
@@ -72,7 +72,7 @@ static inline uint8_t msi_cap_sizeof(uint16_t flags)
 static inline unsigned int msi_nr_vectors(uint16_t flags)
 {
     return 1U <<
-        ((flags & PCI_MSI_FLAGS_QSIZE) >> (ffs(PCI_MSI_FLAGS_QSIZE) - 1));
+        ((flags & PCI_MSI_FLAGS_QSIZE) >> ctz32(PCI_MSI_FLAGS_QSIZE));
 }
 
 static inline uint8_t msi_flags_off(const PCIDevice* dev)
@@ -175,9 +175,9 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
     assert(nr_vectors > 0);
     assert(nr_vectors <= PCI_MSI_VECTORS_MAX);
     /* the nr of MSI vectors is up to 32 */
-    vectors_order = ffs(nr_vectors) - 1;
+    vectors_order = ctz32(nr_vectors);
 
-    flags = vectors_order << (ffs(PCI_MSI_FLAGS_QMASK) - 1);
+    flags = vectors_order << ctz32(PCI_MSI_FLAGS_QMASK);
     if (msi64bit) {
         flags |= PCI_MSI_FLAGS_64BIT;
     }
@@ -291,7 +291,8 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
                    "notify vector 0x%x"
                    " address: 0x%"PRIx64" data: 0x%"PRIx32"\n",
                    vector, msg.address, msg.data);
-    stl_le_phys(&dev->bus_master_as, msg.address, msg.data);
+    address_space_stl_le(&dev->bus_master_as, msg.address, msg.data,
+                         MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 /* Normally called by pci_default_write_config(). */
@@ -354,12 +355,12 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len)
      * just don't crash the host
      */
     log_num_vecs =
-        (flags & PCI_MSI_FLAGS_QSIZE) >> (ffs(PCI_MSI_FLAGS_QSIZE) - 1);
+        (flags & PCI_MSI_FLAGS_QSIZE) >> ctz32(PCI_MSI_FLAGS_QSIZE);
     log_max_vecs =
-        (flags & PCI_MSI_FLAGS_QMASK) >> (ffs(PCI_MSI_FLAGS_QMASK) - 1);
+        (flags & PCI_MSI_FLAGS_QMASK) >> ctz32(PCI_MSI_FLAGS_QMASK);
     if (log_num_vecs > log_max_vecs) {
         flags &= ~PCI_MSI_FLAGS_QSIZE;
-        flags |= log_max_vecs << (ffs(PCI_MSI_FLAGS_QSIZE) - 1);
+        flags |= log_max_vecs << ctz32(PCI_MSI_FLAGS_QSIZE);
         pci_set_word(dev->config + msi_flags_off(dev), flags);
     }
 
index 24de2605fbac4a2dae61e1ada7c531ff677eeb88..031eaabca9de1609e1d76204869dc03016f7e6ba 100644 (file)
@@ -435,7 +435,8 @@ void msix_notify(PCIDevice *dev, unsigned vector)
 
     msg = msix_get_message(dev, vector);
 
-    stl_le_phys(&dev->bus_master_as, msg.address, msg.data);
+    address_space_stl_le(&dev->bus_master_as, msg.address, msg.data,
+                         MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 void msix_reset(PCIDevice *dev)
index eaa3e6ea94f3c7c3100aaaab90e11847d3a5def6..b48c09cd11e1f52bb089328eedc7eb35ffa61532 100644 (file)
@@ -410,7 +410,7 @@ static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
 static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err)
 {
     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
-    uint8_t first_bit = ffs(err->status) - 1;
+    uint8_t first_bit = ctz32(err->status);
     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
     int i;
 
index 759910f79a88c0f53901f3de3104db57762716b7..a706486394d162840f077fd1ad2a4252461a081f 100644 (file)
@@ -61,7 +61,7 @@
 /* Same slot state masks are used for command and status registers */
 #define SHPC_SLOT_STATE_MASK     0x03
 #define SHPC_SLOT_STATE_SHIFT \
-    (ffs(SHPC_SLOT_STATE_MASK) - 1)
+    ctz32(SHPC_SLOT_STATE_MASK)
 
 #define SHPC_STATE_NO       0x0
 #define SHPC_STATE_PWRONLY  0x1
 
 #define SHPC_SLOT_PWR_LED_MASK   0xC
 #define SHPC_SLOT_PWR_LED_SHIFT \
-    (ffs(SHPC_SLOT_PWR_LED_MASK) - 1)
+    ctz32(SHPC_SLOT_PWR_LED_MASK)
 #define SHPC_SLOT_ATTN_LED_MASK  0x30
 #define SHPC_SLOT_ATTN_LED_SHIFT \
-    (ffs(SHPC_SLOT_ATTN_LED_MASK) - 1)
+    ctz32(SHPC_SLOT_ATTN_LED_MASK)
 
 #define SHPC_LED_NO     0x0
 #define SHPC_LED_ON     0x1
@@ -136,7 +136,7 @@ static int roundup_pow_of_two(int x)
 static uint16_t shpc_get_status(SHPCDevice *shpc, int slot, uint16_t msk)
 {
     uint8_t *status = shpc->config + SHPC_SLOT_STATUS(slot);
-    return (pci_get_word(status) & msk) >> (ffs(msk) - 1);
+    return (pci_get_word(status) & msk) >> ctz32(msk);
 }
 
 static void shpc_set_status(SHPCDevice *shpc,
@@ -144,7 +144,7 @@ static void shpc_set_status(SHPCDevice *shpc,
 {
     uint8_t *status = shpc->config + SHPC_SLOT_STATUS(slot);
     pci_word_test_and_clear_mask(status, msk);
-    pci_word_test_and_set_mask(status, value << (ffs(msk) - 1));
+    pci_word_test_and_set_mask(status, value << ctz32(msk));
 }
 
 static void shpc_interrupt_update(PCIDevice *d)
index 62f7bae2f12b6f026769581357e27d8bcc472914..1c01d346c927a87ad09849583472412182d010dc 100644 (file)
@@ -3,7 +3,7 @@
 #include "qemu/error-report.h"
 
 #define SLOTID_CAP_LENGTH 4
-#define SLOTID_NSLOTS_SHIFT (ffs(PCI_SID_ESR_NSLOTS) - 1)
+#define SLOTID_NSLOTS_SHIFT ctz32(PCI_SID_ESR_NSLOTS)
 
 int slotid_cap_init(PCIDevice *d, int nslots,
                     uint8_t chassis,
index d49f2b88037485705011a222bcb52c15a05bb758..a99f7b03970cb6513dfce7a09a21541608c75f80 100644 (file)
@@ -74,7 +74,7 @@ static void spin_reset(void *opaque)
 /* Create -kernel TLB entries for BookE, linearly spanning 256MB.  */
 static inline hwaddr booke206_page_size_to_tlb(uint64_t size)
 {
-    return (ffs(size >> 10) - 1) >> 1;
+    return ctz32(size >> 10) >> 1;
 }
 
 static void mmubooke_create_initial_mapping(CPUPPCState *env,
index 9a13b006dda019a9bda804a0735efc304b401301..5561d807dc74e571fc58182a23f027826a5ab77d 100644 (file)
@@ -745,20 +745,27 @@ static void css_update_chnmon(SubchDev *sch)
         /* Format 1, per-subchannel area. */
         uint32_t count;
 
-        count = ldl_phys(&address_space_memory, sch->curr_status.mba);
+        count = address_space_ldl(&address_space_memory,
+                                  sch->curr_status.mba,
+                                  MEMTXATTRS_UNSPECIFIED,
+                                  NULL);
         count++;
-        stl_phys(&address_space_memory, sch->curr_status.mba, count);
+        address_space_stl(&address_space_memory, sch->curr_status.mba, count,
+                          MEMTXATTRS_UNSPECIFIED, NULL);
     } else {
         /* Format 0, global area. */
         uint32_t offset;
         uint16_t count;
 
         offset = sch->curr_status.pmcw.mbi << 5;
-        count = lduw_phys(&address_space_memory,
-                          channel_subsys->chnmon_area + offset);
+        count = address_space_lduw(&address_space_memory,
+                                   channel_subsys->chnmon_area + offset,
+                                   MEMTXATTRS_UNSPECIFIED,
+                                   NULL);
         count++;
-        stw_phys(&address_space_memory,
-                 channel_subsys->chnmon_area + offset, count);
+        address_space_stw(&address_space_memory,
+                          channel_subsys->chnmon_area + offset, count,
+                          MEMTXATTRS_UNSPECIFIED, NULL);
     }
 }
 
index 3c086f6155b28e15a4ccb7df93da87e1f1286584..560b66a501e488a16989de56a3fa10a41674642b 100644 (file)
@@ -278,7 +278,8 @@ static uint64_t s390_guest_io_table_walk(uint64_t guest_iota,
     px = calc_px(guest_dma_address);
 
     sto_a = guest_iota + rtx * sizeof(uint64_t);
-    sto = ldq_phys(&address_space_memory, sto_a);
+    sto = address_space_ldq(&address_space_memory, sto_a,
+                            MEMTXATTRS_UNSPECIFIED, NULL);
     sto = get_rt_sto(sto);
     if (!sto) {
         pte = 0;
@@ -286,7 +287,8 @@ static uint64_t s390_guest_io_table_walk(uint64_t guest_iota,
     }
 
     pto_a = sto + sx * sizeof(uint64_t);
-    pto = ldq_phys(&address_space_memory, pto_a);
+    pto = address_space_ldq(&address_space_memory, pto_a,
+                            MEMTXATTRS_UNSPECIFIED, NULL);
     pto = get_st_pto(pto);
     if (!pto) {
         pte = 0;
@@ -294,7 +296,8 @@ static uint64_t s390_guest_io_table_walk(uint64_t guest_iota,
     }
 
     px_a = pto + px * sizeof(uint64_t);
-    pte = ldq_phys(&address_space_memory, px_a);
+    pte = address_space_ldq(&address_space_memory, px_a,
+                            MEMTXATTRS_UNSPECIFIED, NULL);
 
 out:
     return pte;
index 08d8aa6b4b54757d3348b8307aef8d4f872d58ad..8f7288fadfde825fc739ad305405b804d1165b24 100644 (file)
@@ -331,7 +331,8 @@ int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2)
             return 0;
         }
         MemoryRegion *mr = pbdev->pdev->io_regions[pcias].memory;
-        io_mem_read(mr, offset, &data, len);
+        memory_region_dispatch_read(mr, offset, &data, len,
+                                    MEMTXATTRS_UNSPECIFIED);
     } else if (pcias == 15) {
         if ((4 - (offset & 0x3)) < len) {
             program_interrupt(env, PGM_OPERAND, 4);
@@ -456,7 +457,8 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2)
             mr = pbdev->pdev->io_regions[pcias].memory;
         }
 
-        io_mem_write(mr, offset, data, len);
+        memory_region_dispatch_write(mr, offset, data, len,
+                                     MEMTXATTRS_UNSPECIFIED);
     } else if (pcias == 15) {
         if ((4 - (offset & 0x3)) < len) {
             program_interrupt(env, PGM_OPERAND, 4);
@@ -606,7 +608,9 @@ int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr)
     }
 
     for (i = 0; i < len / 8; i++) {
-        io_mem_write(mr, env->regs[r3] + i * 8, ldq_p(buffer + i * 8), 8);
+        memory_region_dispatch_write(mr, env->regs[r3] + i * 8,
+                                     ldq_p(buffer + i * 8), 8,
+                                     MEMTXATTRS_UNSPECIFIED);
     }
 
     setcc(cpu, ZPCI_PCI_LS_OK);
index 047c9636982ebdcf2f5038d2b4387ba6b9200540..0f93a644a2b73ce31f876ad862cf7d4a4c1d736c 100644 (file)
@@ -75,10 +75,12 @@ void s390_virtio_reset_idx(VirtIOS390Device *dev)
     for (i = 0; i < num_vq; i++) {
         idx_addr = virtio_queue_get_avail_addr(dev->vdev, i) +
             VIRTIO_VRING_AVAIL_IDX_OFFS;
-        stw_phys(&address_space_memory, idx_addr, 0);
+        address_space_stw(&address_space_memory, idx_addr, 0,
+                          MEMTXATTRS_UNSPECIFIED, NULL);
         idx_addr = virtio_queue_get_used_addr(dev->vdev, i) +
             VIRTIO_VRING_USED_IDX_OFFS;
-        stw_phys(&address_space_memory, idx_addr, 0);
+        address_space_stw(&address_space_memory, idx_addr, 0,
+                          MEMTXATTRS_UNSPECIFIED, NULL);
     }
 }
 
@@ -336,7 +338,8 @@ static uint64_t s390_virtio_device_vq_token(VirtIOS390Device *dev, int vq)
                 (vq * VIRTIO_VQCONFIG_LEN) +
                 VIRTIO_VQCONFIG_OFFS_TOKEN;
 
-    return ldq_be_phys(&address_space_memory, token_off);
+    return address_space_ldq_be(&address_space_memory, token_off,
+                                MEMTXATTRS_UNSPECIFIED, NULL);
 }
 
 static ram_addr_t s390_virtio_device_num_vq(VirtIOS390Device *dev)
@@ -371,21 +374,33 @@ void s390_virtio_device_sync(VirtIOS390Device *dev)
     virtio_reset(dev->vdev);
 
     /* Sync dev space */
-    stb_phys(&address_space_memory,
-             dev->dev_offs + VIRTIO_DEV_OFFS_TYPE, dev->vdev->device_id);
-
-    stb_phys(&address_space_memory,
-             dev->dev_offs + VIRTIO_DEV_OFFS_NUM_VQ,
-             s390_virtio_device_num_vq(dev));
-    stb_phys(&address_space_memory,
-             dev->dev_offs + VIRTIO_DEV_OFFS_FEATURE_LEN, dev->feat_len);
-
-    stb_phys(&address_space_memory,
-             dev->dev_offs + VIRTIO_DEV_OFFS_CONFIG_LEN, dev->vdev->config_len);
+    address_space_stb(&address_space_memory,
+                      dev->dev_offs + VIRTIO_DEV_OFFS_TYPE,
+                      dev->vdev->device_id,
+                      MEMTXATTRS_UNSPECIFIED,
+                      NULL);
+
+    address_space_stb(&address_space_memory,
+                      dev->dev_offs + VIRTIO_DEV_OFFS_NUM_VQ,
+                      s390_virtio_device_num_vq(dev),
+                      MEMTXATTRS_UNSPECIFIED,
+                      NULL);
+    address_space_stb(&address_space_memory,
+                      dev->dev_offs + VIRTIO_DEV_OFFS_FEATURE_LEN,
+                      dev->feat_len,
+                      MEMTXATTRS_UNSPECIFIED,
+                      NULL);
+
+    address_space_stb(&address_space_memory,
+                      dev->dev_offs + VIRTIO_DEV_OFFS_CONFIG_LEN,
+                      dev->vdev->config_len,
+                      MEMTXATTRS_UNSPECIFIED,
+                      NULL);
 
     num_vq = s390_virtio_device_num_vq(dev);
-    stb_phys(&address_space_memory,
-             dev->dev_offs + VIRTIO_DEV_OFFS_NUM_VQ, num_vq);
+    address_space_stb(&address_space_memory,
+                      dev->dev_offs + VIRTIO_DEV_OFFS_NUM_VQ, num_vq,
+                      MEMTXATTRS_UNSPECIFIED, NULL);
 
     /* Sync virtqueues */
     for (i = 0; i < num_vq; i++) {
@@ -396,11 +411,14 @@ void s390_virtio_device_sync(VirtIOS390Device *dev)
         vring = s390_virtio_next_ring(bus);
         virtio_queue_set_addr(dev->vdev, i, vring);
         virtio_queue_set_vector(dev->vdev, i, i);
-        stq_be_phys(&address_space_memory,
-                    vq + VIRTIO_VQCONFIG_OFFS_ADDRESS, vring);
-        stw_be_phys(&address_space_memory,
-                    vq + VIRTIO_VQCONFIG_OFFS_NUM,
-                    virtio_queue_get_num(dev->vdev, i));
+        address_space_stq_be(&address_space_memory,
+                             vq + VIRTIO_VQCONFIG_OFFS_ADDRESS, vring,
+                             MEMTXATTRS_UNSPECIFIED, NULL);
+        address_space_stw_be(&address_space_memory,
+                             vq + VIRTIO_VQCONFIG_OFFS_NUM,
+                             virtio_queue_get_num(dev->vdev, i),
+                             MEMTXATTRS_UNSPECIFIED,
+                             NULL);
     }
 
     cur_offs = dev->dev_offs;
@@ -408,7 +426,8 @@ void s390_virtio_device_sync(VirtIOS390Device *dev)
     cur_offs += num_vq * VIRTIO_VQCONFIG_LEN;
 
     /* Sync feature bitmap */
-    stl_le_phys(&address_space_memory, cur_offs, dev->host_features);
+    address_space_stl_le(&address_space_memory, cur_offs, dev->host_features,
+                         MEMTXATTRS_UNSPECIFIED, NULL);
 
     dev->feat_offs = cur_offs + dev->feat_len;
     cur_offs += dev->feat_len * 2;
@@ -426,12 +445,16 @@ void s390_virtio_device_update_status(VirtIOS390Device *dev)
     VirtIODevice *vdev = dev->vdev;
     uint32_t features;
 
-    virtio_set_status(vdev, ldub_phys(&address_space_memory,
-                                      dev->dev_offs + VIRTIO_DEV_OFFS_STATUS));
+    virtio_set_status(vdev,
+                      address_space_ldub(&address_space_memory,
+                                         dev->dev_offs + VIRTIO_DEV_OFFS_STATUS,
+                                         MEMTXATTRS_UNSPECIFIED, NULL));
 
     /* Update guest supported feature bitmap */
 
-    features = bswap32(ldl_be_phys(&address_space_memory, dev->feat_offs));
+    features = bswap32(address_space_ldl_be(&address_space_memory,
+                                            dev->feat_offs,
+                                            MEMTXATTRS_UNSPECIFIED, NULL));
     virtio_set_features(vdev, features);
 }
 
index bdb538859f5fee3a8a49b4d00a4cb517a7aa24ad..3a1b9ee2d07cee7bd6137d0f4d51f1387c1ef697 100644 (file)
@@ -97,7 +97,9 @@ static int s390_virtio_hcall_reset(const uint64_t *args)
         return -EINVAL;
     }
     virtio_reset(dev->vdev);
-    stb_phys(&address_space_memory, dev->dev_offs + VIRTIO_DEV_OFFS_STATUS, 0);
+    address_space_stb(&address_space_memory,
+                      dev->dev_offs + VIRTIO_DEV_OFFS_STATUS, 0,
+                      MEMTXATTRS_UNSPECIFIED, NULL);
     s390_virtio_device_sync(dev);
     s390_virtio_reset_idx(dev);
 
index d32ecafe9811c22c41a7ebbff171c57db62f3041..ed75c638bc1a606168d0ccb012d3ffa9f1685788 100644 (file)
@@ -335,16 +335,23 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
         if (!ccw.cda) {
             ret = -EFAULT;
         } else {
-            info.queue = ldq_phys(&address_space_memory, ccw.cda);
-            info.align = ldl_phys(&address_space_memory,
-                                  ccw.cda + sizeof(info.queue));
-            info.index = lduw_phys(&address_space_memory,
-                                   ccw.cda + sizeof(info.queue)
-                                   + sizeof(info.align));
-            info.num = lduw_phys(&address_space_memory,
-                                 ccw.cda + sizeof(info.queue)
-                                 + sizeof(info.align)
-                                 + sizeof(info.index));
+            info.queue = address_space_ldq(&address_space_memory, ccw.cda,
+                                           MEMTXATTRS_UNSPECIFIED, NULL);
+            info.align = address_space_ldl(&address_space_memory,
+                                           ccw.cda + sizeof(info.queue),
+                                           MEMTXATTRS_UNSPECIFIED,
+                                           NULL);
+            info.index = address_space_lduw(&address_space_memory,
+                                            ccw.cda + sizeof(info.queue)
+                                            + sizeof(info.align),
+                                            MEMTXATTRS_UNSPECIFIED,
+                                            NULL);
+            info.num = address_space_lduw(&address_space_memory,
+                                          ccw.cda + sizeof(info.queue)
+                                          + sizeof(info.align)
+                                          + sizeof(info.index),
+                                          MEMTXATTRS_UNSPECIFIED,
+                                          NULL);
             ret = virtio_ccw_set_vqs(sch, info.queue, info.align, info.index,
                                      info.num);
             sch->curr_status.scsw.count = 0;
@@ -369,15 +376,20 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
         if (!ccw.cda) {
             ret = -EFAULT;
         } else {
-            features.index = ldub_phys(&address_space_memory,
-                                       ccw.cda + sizeof(features.features));
+            features.index = address_space_ldub(&address_space_memory,
+                                                ccw.cda
+                                                + sizeof(features.features),
+                                                MEMTXATTRS_UNSPECIFIED,
+                                                NULL);
             if (features.index < ARRAY_SIZE(dev->host_features)) {
                 features.features = dev->host_features[features.index];
             } else {
                 /* Return zeroes if the guest supports more feature bits. */
                 features.features = 0;
             }
-            stl_le_phys(&address_space_memory, ccw.cda, features.features);
+            address_space_stl_le(&address_space_memory, ccw.cda,
+                                 features.features, MEMTXATTRS_UNSPECIFIED,
+                                 NULL);
             sch->curr_status.scsw.count = ccw.count - sizeof(features);
             ret = 0;
         }
@@ -396,9 +408,15 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
         if (!ccw.cda) {
             ret = -EFAULT;
         } else {
-            features.index = ldub_phys(&address_space_memory,
-                                       ccw.cda + sizeof(features.features));
-            features.features = ldl_le_phys(&address_space_memory, ccw.cda);
+            features.index = address_space_ldub(&address_space_memory,
+                                                ccw.cda
+                                                + sizeof(features.features),
+                                                MEMTXATTRS_UNSPECIFIED,
+                                                NULL);
+            features.features = address_space_ldl_le(&address_space_memory,
+                                                     ccw.cda,
+                                                     MEMTXATTRS_UNSPECIFIED,
+                                                     NULL);
             if (features.index < ARRAY_SIZE(dev->host_features)) {
                 virtio_set_features(vdev, features.features);
             } else {
@@ -474,7 +492,8 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
         if (!ccw.cda) {
             ret = -EFAULT;
         } else {
-            status = ldub_phys(&address_space_memory, ccw.cda);
+            status = address_space_ldub(&address_space_memory, ccw.cda,
+                                        MEMTXATTRS_UNSPECIFIED, NULL);
             if (!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
                 virtio_ccw_stop_ioeventfd(dev);
             }
@@ -508,7 +527,8 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
         if (!ccw.cda) {
             ret = -EFAULT;
         } else {
-            indicators = ldq_be_phys(&address_space_memory, ccw.cda);
+            indicators = address_space_ldq_be(&address_space_memory, ccw.cda,
+                                              MEMTXATTRS_UNSPECIFIED, NULL);
             dev->indicators = get_indicator(indicators, sizeof(uint64_t));
             sch->curr_status.scsw.count = ccw.count - sizeof(indicators);
             ret = 0;
@@ -528,7 +548,8 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
         if (!ccw.cda) {
             ret = -EFAULT;
         } else {
-            indicators = ldq_be_phys(&address_space_memory, ccw.cda);
+            indicators = address_space_ldq_be(&address_space_memory, ccw.cda,
+                                              MEMTXATTRS_UNSPECIFIED, NULL);
             dev->indicators2 = get_indicator(indicators, sizeof(uint64_t));
             sch->curr_status.scsw.count = ccw.count - sizeof(indicators);
             ret = 0;
@@ -548,15 +569,21 @@ static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
         if (!ccw.cda) {
             ret = -EFAULT;
         } else {
-            vq_config.index = lduw_be_phys(&address_space_memory, ccw.cda);
+            vq_config.index = address_space_lduw_be(&address_space_memory,
+                                                    ccw.cda,
+                                                    MEMTXATTRS_UNSPECIFIED,
+                                                    NULL);
             if (vq_config.index >= VIRTIO_PCI_QUEUE_MAX) {
                 ret = -EINVAL;
                 break;
             }
             vq_config.num_max = virtio_queue_get_num(vdev,
                                                      vq_config.index);
-            stw_be_phys(&address_space_memory,
-                        ccw.cda + sizeof(vq_config.index), vq_config.num_max);
+            address_space_stw_be(&address_space_memory,
+                                 ccw.cda + sizeof(vq_config.index),
+                                 vq_config.num_max,
+                                 MEMTXATTRS_UNSPECIFIED,
+                                 NULL);
             sch->curr_status.scsw.count = ccw.count - sizeof(vq_config);
             ret = 0;
         }
@@ -1068,9 +1095,13 @@ static void virtio_ccw_notify(DeviceState *d, uint16_t vector)
                 css_adapter_interrupt(dev->thinint_isc);
             }
         } else {
-            indicators = ldq_phys(&address_space_memory, dev->indicators->addr);
+            indicators = address_space_ldq(&address_space_memory,
+                                           dev->indicators->addr,
+                                           MEMTXATTRS_UNSPECIFIED,
+                                           NULL);
             indicators |= 1ULL << vector;
-            stq_phys(&address_space_memory, dev->indicators->addr, indicators);
+            address_space_stq(&address_space_memory, dev->indicators->addr,
+                              indicators, MEMTXATTRS_UNSPECIFIED, NULL);
             css_conditional_io_interrupt(sch);
         }
     } else {
@@ -1078,9 +1109,13 @@ static void virtio_ccw_notify(DeviceState *d, uint16_t vector)
             return;
         }
         vector = 0;
-        indicators = ldq_phys(&address_space_memory, dev->indicators2->addr);
+        indicators = address_space_ldq(&address_space_memory,
+                                       dev->indicators2->addr,
+                                       MEMTXATTRS_UNSPECIFIED,
+                                       NULL);
         indicators |= 1ULL << vector;
-        stq_phys(&address_space_memory, dev->indicators2->addr, indicators);
+        address_space_stq(&address_space_memory, dev->indicators2->addr,
+                          indicators, MEMTXATTRS_UNSPECIFIED, NULL);
         css_conditional_io_interrupt(sch);
     }
 }
index ad7317bfe9c5baf2686d0c7c85ab9b2e4a33fefa..91a5d97c73e64900db4aa57f440c103ac7ce4bce 100644 (file)
@@ -804,7 +804,7 @@ static int megasas_ctrl_get_info(MegasasState *s, MegasasCmd *cmd)
                                MFI_INFO_LDOPS_READ_POLICY);
     info.max_strips_per_io = cpu_to_le16(s->fw_sge);
     info.stripe_sz_ops.min = 3;
-    info.stripe_sz_ops.max = ffs(MEGASAS_MAX_SECTORS + 1) - 1;
+    info.stripe_sz_ops.max = ctz32(MEGASAS_MAX_SECTORS + 1);
     info.properties.pred_fail_poll_interval = cpu_to_le16(300);
     info.properties.intr_throttle_cnt = cpu_to_le16(16);
     info.properties.intr_throttle_timeout = cpu_to_le16(50);
index f955265f74f52e5f76267301fb2c74905152f1ea..8abf0c9e31e4b8a6a87f11ff422a3b64c9023b01 100644 (file)
@@ -796,8 +796,9 @@ static sd_rsp_type_t sd_normal_command(SDState *sd,
             sd->vhs = 0;
 
             /* No response if not exactly one VHS bit is set.  */
-            if (!(req.arg >> 8) || (req.arg >> ffs(req.arg & ~0xff)))
+            if (!(req.arg >> 8) || (req.arg >> (ctz32(req.arg & ~0xff) + 1))) {
                 return sd->spi ? sd_r7 : sd_r0;
+            }
 
             /* Accept.  */
             sd->vhs = req.arg;
index d1d0847ba2b34120869e57e5c073d4a17e6f6013..4221060308d45133c35b2ed65d060b110954bd8f 100644 (file)
@@ -318,8 +318,10 @@ static void r2d_init(MachineState *machine)
         }
 
         /* initialization which should be done by firmware */
-        stl_phys(&address_space_memory, SH7750_BCR1, 1<<3); /* cs3 SDRAM */
-        stw_phys(&address_space_memory, SH7750_BCR2, 3<<(3*2)); /* cs3 32bit */
+        address_space_stl(&address_space_memory, SH7750_BCR1, 1 << 3,
+                          MEMTXATTRS_UNSPECIFIED, NULL); /* cs3 SDRAM */
+        address_space_stw(&address_space_memory, SH7750_BCR2, 3 << (3 * 2),
+                          MEMTXATTRS_UNSPECIFIED, NULL); /* cs3 32bit */
         reset_info->vector = (SDRAM_BASE + LINUX_LOAD_OFFSET) | 0xa0000000; /* Start from P2 area */
     }
 
index 78d86be91c97fa817c2598d5d616862ec454bb7c..b6b8a2063da63ad052539db79d95a7ac707d197c 100644 (file)
@@ -206,8 +206,9 @@ static void update_irq(struct HPETTimer *timer, int set)
             }
         }
     } else if (timer_fsb_route(timer)) {
-        stl_le_phys(&address_space_memory,
-                    timer->fsb >> 32, timer->fsb & 0xffffffff);
+        address_space_stl_le(&address_space_memory, timer->fsb >> 32,
+                             timer->fsb & 0xffffffff, MEMTXATTRS_UNSPECIFIED,
+                             NULL);
     } else if (timer->config & HPET_TN_TYPE_LEVEL) {
         s->isr |= mask;
         /* fold the ICH PIRQ# pin's internal inversion logic into hpet */
index 6b80539c1f96e78b74ec87fe76aa960c2f5d3254..e0e339a534a1e91483aabb0d0ff78d78734f46c3 100644 (file)
@@ -154,6 +154,7 @@ typedef struct VFIOPCIDevice {
     PCIHostDeviceAddress host;
     EventNotifier err_notifier;
     EventNotifier req_notifier;
+    int (*resetfn)(struct VFIOPCIDevice *);
     uint32_t features;
 #define VFIO_FEATURE_ENABLE_VGA_BIT 0
 #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
@@ -1531,9 +1532,12 @@ static uint64_t vfio_rtl8168_window_quirk_read(void *opaque,
                 return 0;
             }
 
-            io_mem_read(&vdev->pdev.msix_table_mmio,
-                        (hwaddr)(quirk->data.address_match & 0xfff),
-                        &val, size);
+            memory_region_dispatch_read(&vdev->pdev.msix_table_mmio,
+                                        (hwaddr)(quirk->data.address_match
+                                                 & 0xfff),
+                                        &val,
+                                        size,
+                                        MEMTXATTRS_UNSPECIFIED);
             return val;
         }
     }
@@ -1561,9 +1565,12 @@ static void vfio_rtl8168_window_quirk_write(void *opaque, hwaddr addr,
                         memory_region_name(&quirk->mem),
                         vdev->vbasedev.name);
 
-                io_mem_write(&vdev->pdev.msix_table_mmio,
-                             (hwaddr)(quirk->data.address_match & 0xfff),
-                             data, size);
+                memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
+                                             (hwaddr)(quirk->data.address_match
+                                                      & 0xfff),
+                                             data,
+                                             size,
+                                             MEMTXATTRS_UNSPECIFIED);
             }
 
             quirk->data.flags = 1;
@@ -2394,7 +2401,7 @@ static void vfio_map_bar(VFIOPCIDevice *vdev, int nr)
     if (vdev->msix && vdev->msix->table_bar == nr) {
         uint64_t start;
 
-        start = HOST_PAGE_ALIGN(vdev->msix->table_offset +
+        start = HOST_PAGE_ALIGN((uint64_t)vdev->msix->table_offset +
                                 (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE));
 
         size = start < bar->region.size ? bar->region.size - start : 0;
@@ -3319,6 +3326,162 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
     vdev->req_enabled = false;
 }
 
+/*
+ * AMD Radeon PCI config reset, based on Linux:
+ *   drivers/gpu/drm/radeon/ci_smc.c:ci_is_smc_running()
+ *   drivers/gpu/drm/radeon/radeon_device.c:radeon_pci_config_reset
+ *   drivers/gpu/drm/radeon/ci_smc.c:ci_reset_smc()
+ *   drivers/gpu/drm/radeon/ci_smc.c:ci_stop_smc_clock()
+ * IDs: include/drm/drm_pciids.h
+ * Registers: http://cgit.freedesktop.org/~agd5f/linux/commit/?id=4e2aa447f6f0
+ *
+ * Bonaire and Hawaii GPUs do not respond to a bus reset.  This is a bug in the
+ * hardware that should be fixed on future ASICs.  The symptom of this is that
+ * once the accerlated driver loads, Windows guests will bsod on subsequent
+ * attmpts to load the driver, such as after VM reset or shutdown/restart.  To
+ * work around this, we do an AMD specific PCI config reset, followed by an SMC
+ * reset.  The PCI config reset only works if SMC firmware is running, so we
+ * have a dependency on the state of the device as to whether this reset will
+ * be effective.  There are still cases where we won't be able to kick the
+ * device into working, but this greatly improves the usability overall.  The
+ * config reset magic is relatively common on AMD GPUs, but the setup and SMC
+ * poking is largely ASIC specific.
+ */
+static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev)
+{
+    uint32_t clk, pc_c;
+
+    /*
+     * Registers 200h and 204h are index and data registers for acessing
+     * indirect configuration registers within the device.
+     */
+    vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
+    clk = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
+    vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000370, 4);
+    pc_c = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
+
+    return (!(clk & 1) && (0x20100 <= pc_c));
+}
+
+/*
+ * The scope of a config reset is controlled by a mode bit in the misc register
+ * and a fuse, exposed as a bit in another register.  The fuse is the default
+ * (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the forumula
+ * scope = !(misc ^ fuse), where the resulting scope is defined the same as
+ * the fuse.  A truth table therefore tells us that if misc == fuse, we need
+ * to flip the value of the bit in the misc register.
+ */
+static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev)
+{
+    uint32_t misc, fuse;
+    bool a, b;
+
+    vfio_region_write(&vdev->bars[5].region, 0x200, 0xc00c0000, 4);
+    fuse = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
+    b = fuse & 64;
+
+    vfio_region_write(&vdev->bars[5].region, 0x200, 0xc0000010, 4);
+    misc = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
+    a = misc & 2;
+
+    if (a == b) {
+        vfio_region_write(&vdev->bars[5].region, 0x204, misc ^ 2, 4);
+        vfio_region_read(&vdev->bars[5].region, 0x204, 4); /* flush */
+    }
+}
+
+static int vfio_radeon_reset(VFIOPCIDevice *vdev)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    int i, ret = 0;
+    uint32_t data;
+
+    /* Defer to a kernel implemented reset */
+    if (vdev->vbasedev.reset_works) {
+        return -ENODEV;
+    }
+
+    /* Enable only memory BAR access */
+    vfio_pci_write_config(pdev, PCI_COMMAND, PCI_COMMAND_MEMORY, 2);
+
+    /* Reset only works if SMC firmware is loaded and running */
+    if (!vfio_radeon_smc_is_running(vdev)) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* Make sure only the GFX function is reset */
+    vfio_radeon_set_gfx_only_reset(vdev);
+
+    /* AMD PCI config reset */
+    vfio_pci_write_config(pdev, 0x7c, 0x39d5e86b, 4);
+    usleep(100);
+
+    /* Read back the memory size to make sure we're out of reset */
+    for (i = 0; i < 100000; i++) {
+        if (vfio_region_read(&vdev->bars[5].region, 0x5428, 4) != 0xffffffff) {
+            break;
+        }
+        usleep(1);
+    }
+
+    /* Reset SMC */
+    vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000000, 4);
+    data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
+    data |= 1;
+    vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
+
+    /* Disable SMC clock */
+    vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
+    data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
+    data |= 1;
+    vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
+
+out:
+    /* Restore PCI command register */
+    vfio_pci_write_config(pdev, PCI_COMMAND, 0, 2);
+
+    return ret;
+}
+
+static void vfio_setup_resetfn(VFIOPCIDevice *vdev)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    uint16_t vendor, device;
+
+    vendor = pci_get_word(pdev->config + PCI_VENDOR_ID);
+    device = pci_get_word(pdev->config + PCI_DEVICE_ID);
+
+    switch (vendor) {
+    case 0x1002:
+        switch (device) {
+        /* Bonaire */
+        case 0x6649: /* Bonaire [FirePro W5100] */
+        case 0x6650:
+        case 0x6651:
+        case 0x6658: /* Bonaire XTX [Radeon R7 260X] */
+        case 0x665c: /* Bonaire XT [Radeon HD 7790/8770 / R9 260 OEM] */
+        case 0x665d: /* Bonaire [Radeon R7 200 Series] */
+        /* Hawaii */
+        case 0x67A0: /* Hawaii XT GL [FirePro W9100] */
+        case 0x67A1: /* Hawaii PRO GL [FirePro W8100] */
+        case 0x67A2:
+        case 0x67A8:
+        case 0x67A9:
+        case 0x67AA:
+        case 0x67B0: /* Hawaii XT [Radeon R9 290X] */
+        case 0x67B1: /* Hawaii PRO [Radeon R9 290] */
+        case 0x67B8:
+        case 0x67B9:
+        case 0x67BA:
+        case 0x67BE:
+            vdev->resetfn = vfio_radeon_reset;
+            break;
+        }
+        break;
+    }
+}
+
 static int vfio_initfn(PCIDevice *pdev)
 {
     VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
@@ -3352,7 +3515,7 @@ static int vfio_initfn(PCIDevice *pdev)
     len = readlink(path, iommu_group_path, sizeof(path));
     if (len <= 0 || len >= sizeof(path)) {
         error_report("vfio: error no iommu_group for device");
-        return len < 0 ? -errno : ENAMETOOLONG;
+        return len < 0 ? -errno : -ENAMETOOLONG;
     }
 
     iommu_group_path[len] = 0;
@@ -3467,6 +3630,7 @@ static int vfio_initfn(PCIDevice *pdev)
 
     vfio_register_err_notifier(vdev);
     vfio_register_req_notifier(vdev);
+    vfio_setup_resetfn(vdev);
 
     return 0;
 
@@ -3514,6 +3678,10 @@ static void vfio_pci_reset(DeviceState *dev)
 
     vfio_pci_pre_reset(vdev);
 
+    if (vdev->resetfn && !vdev->resetfn(vdev)) {
+        goto post_reset;
+    }
+
     if (vdev->vbasedev.reset_works &&
         (vdev->has_flr || !vdev->has_pm_reset) &&
         !ioctl(vdev->vbasedev.fd, VFIO_DEVICE_RESET)) {
index 95b0643448b8175ea3dff7ff58700a4cdc8a77e9..484c3c333c3fac488339b9d42b1252dd5bbd52d3 100644 (file)
@@ -383,7 +383,7 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp)
                                    virtio_balloon_stat, s);
 
     if (ret < 0) {
-        error_setg(errp, "Adding balloon handler failed");
+        error_setg(errp, "Only one balloon device is supported");
         virtio_cleanup(vdev);
         return;
     }
index 7d1e26b33b5d8a1f2a8c719bd61424a2cee8934e..d2bb423de1af73ca9d869c1ed3c55167f79106ef 100644 (file)
@@ -82,9 +82,6 @@ struct AioContext {
     /* Used for aio_notify.  */
     EventNotifier notifier;
 
-    /* GPollFDs for aio_poll() */
-    GArray *pollfds;
-
     /* Thread pool for performing work and receiving completion callbacks */
     struct ThreadPool *thread_pool;
 
@@ -121,13 +118,14 @@ void aio_context_ref(AioContext *ctx);
 void aio_context_unref(AioContext *ctx);
 
 /* Take ownership of the AioContext.  If the AioContext will be shared between
- * threads, a thread must have ownership when calling aio_poll().
+ * threads, and a thread does not want to be interrupted, it will have to
+ * take ownership around calls to aio_poll().  Otherwise, aio_poll()
+ * automatically takes care of calling aio_context_acquire and
+ * aio_context_release.
  *
- * Note that multiple threads calling aio_poll() means timers, BHs, and
- * callbacks may be invoked from a different thread than they were registered
- * from.  Therefore, code must use AioContext acquire/release or use
- * fine-grained synchronization to protect shared state if other threads will
- * be accessing it simultaneously.
+ * Access to timers and BHs from a thread that has not acquired AioContext
+ * is possible.  Access to callbacks for now must be done while the AioContext
+ * is owned by the thread (FIXME).
  */
 void aio_context_acquire(AioContext *ctx);
 
index 4c57d63fe2607a2f61d5b5765871dd5ad59a6d4d..7d1a7174f6b6264f4a367866a5dc20412cc1f801 100644 (file)
@@ -382,7 +382,7 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked);
 void bdrv_eject(BlockDriverState *bs, bool eject_flag);
 const char *bdrv_get_format_name(BlockDriverState *bs);
 BlockDriverState *bdrv_find_node(const char *node_name);
-BlockDeviceInfoList *bdrv_named_nodes_list(void);
+BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp);
 BlockDriverState *bdrv_lookup_bs(const char *device,
                                  const char *node_name,
                                  Error **errp);
@@ -398,6 +398,7 @@ void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
                          void *opaque);
 const char *bdrv_get_node_name(const BlockDriverState *bs);
 const char *bdrv_get_device_name(const BlockDriverState *bs);
+const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
 int bdrv_get_flags(BlockDriverState *bs);
 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
                           const uint8_t *buf, int nb_sectors);
@@ -449,18 +450,39 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
 
 struct HBitmapIter;
 typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
-BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
+                                          uint32_t granularity,
+                                          const char *name,
                                           Error **errp);
+int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
+                                       BdrvDirtyBitmap *bitmap,
+                                       Error **errp);
+BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
+                                            BdrvDirtyBitmap *bitmap,
+                                            Error **errp);
+BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
+                                           BdrvDirtyBitmap *bitmap,
+                                           Error **errp);
+BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
+                                        const char *name);
+void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap);
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
+void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
+void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
+uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs);
+uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap);
+bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
+bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector);
-void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
                            int64_t cur_sector, int nr_sectors);
-void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
                              int64_t cur_sector, int nr_sectors);
-void bdrv_dirty_iter_init(BlockDriverState *bs,
-                          BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
-int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
+void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap);
+void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
+void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
+int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
 
 void bdrv_enable_copy_on_read(BlockDriverState *bs);
 void bdrv_disable_copy_on_read(BlockDriverState *bs);
index dccb092df79d4cdd1dc5ab42544837cd110e31d1..db29b7424e1343a25f63232cdb0614ccf0f152a2 100644 (file)
@@ -439,6 +439,14 @@ extern BlockDriver bdrv_file;
 extern BlockDriver bdrv_raw;
 extern BlockDriver bdrv_qcow2;
 
+/**
+ * bdrv_setup_io_funcs:
+ *
+ * Prepare a #BlockDriver for I/O request processing by populating
+ * unimplemented coroutine and AIO interfaces with generic wrapper functions
+ * that fall back to implemented interfaces.
+ */
+void bdrv_setup_io_funcs(BlockDriver *bdrv);
 
 int get_tmp_filename(char *filename, int size);
 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
@@ -590,7 +598,7 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
  */
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
                   const char *replaces,
-                  int64_t speed, int64_t granularity, int64_t buf_size,
+                  int64_t speed, uint32_t granularity, int64_t buf_size,
                   MirrorSyncMode mode, BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
                   BlockCompletionFunc *cb,
@@ -602,6 +610,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
  * @target: Block device to write to.
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @sync_mode: What parts of the disk image should be copied to the destination.
+ * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_DIRTY_BITMAP.
  * @on_source_error: The action to take upon error reading from the source.
  * @on_target_error: The action to take upon error writing to the target.
  * @cb: Completion function for the job.
@@ -612,6 +621,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
  */
 void backup_start(BlockDriverState *bs, BlockDriverState *target,
                   int64_t speed, MirrorSyncMode sync_mode,
+                  BdrvDirtyBitmap *sync_bitmap,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
                   BlockCompletionFunc *cb, void *opaque,
@@ -624,4 +634,8 @@ bool blk_dev_is_tray_open(BlockBackend *blk);
 bool blk_dev_is_medium_locked(BlockBackend *blk);
 void blk_dev_resize_cb(BlockBackend *blk);
 
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
+                      int nr_sectors);
+
 #endif /* BLOCK_INT_H */
index b6d4ebbe03c5c7f198baea74c72846a8b2e4c618..57d8ef13e21bde3878062074e1dcdfc970301b5c 100644 (file)
@@ -79,10 +79,16 @@ struct BlockJob {
     bool cancelled;
 
     /**
-     * Set to true if the job is either paused, or will pause itself
-     * as soon as possible (if busy == true).
+     * Counter for pause request. If non-zero, the block job is either paused,
+     * or if busy == true will pause itself as soon as possible.
      */
-    bool paused;
+    int pause_count;
+
+    /**
+     * Set to true if the job is paused by user.  Can be unpaused with the
+     * block-job-resume QMP command.
+     */
+    bool user_paused;
 
     /**
      * Set to false by the job while it is in a quiescent state, where
@@ -225,10 +231,18 @@ void block_job_pause(BlockJob *job);
  * block_job_resume:
  * @job: The job to be resumed.
  *
- * Resume the specified job.
+ * Resume the specified job.  Must be paired with a preceding block_job_pause.
  */
 void block_job_resume(BlockJob *job);
 
+/**
+ * block_job_enter:
+ * @job: The job to enter.
+ *
+ * Continue the specified job by entering the coroutine.
+ */
+void block_job_enter(BlockJob *job);
+
 /**
  * block_job_event_cancelled:
  * @job: The job whose information is requested.
index 168d788521144e5afb92f65601094fc59efeface..327549d91723cba4b00d01472e900ab9234b4819 100644 (file)
@@ -29,7 +29,7 @@
 #include "block/block.h"
 #include "block/snapshot.h"
 
-BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs);
+BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp);
 int bdrv_query_snapshot_info_list(BlockDriverState *bs,
                                   SnapshotInfoList **p_list,
                                   Error **errp);
index 0ca6f0b953cfdc476453ad360c852a5b3fc4565c..3f565460662246b8fb424b33a46961995caee333 100644 (file)
@@ -30,6 +30,7 @@
 #ifndef CONFIG_USER_ONLY
 #include "exec/hwaddr.h"
 #endif
+#include "exec/memattrs.h"
 
 #ifndef TARGET_LONG_BITS
 #error TARGET_LONG_BITS must be defined before including this header
@@ -102,12 +103,22 @@ typedef struct CPUTLBEntry {
 
 QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));
 
+/* The IOTLB is not accessed directly inline by generated TCG code,
+ * so the CPUIOTLBEntry layout is not as critical as that of the
+ * CPUTLBEntry. (This is also why we don't want to combine the two
+ * structs into one.)
+ */
+typedef struct CPUIOTLBEntry {
+    hwaddr addr;
+    MemTxAttrs attrs;
+} CPUIOTLBEntry;
+
 #define CPU_COMMON_TLB \
     /* The meaning of the MMU modes is defined in the target code. */   \
     CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE];                  \
     CPUTLBEntry tlb_v_table[NB_MMU_MODES][CPU_VTLB_SIZE];               \
-    hwaddr iotlb[NB_MMU_MODES][CPU_TLB_SIZE];                           \
-    hwaddr iotlb_v[NB_MMU_MODES][CPU_VTLB_SIZE];                        \
+    CPUIOTLBEntry iotlb[NB_MMU_MODES][CPU_TLB_SIZE];                    \
+    CPUIOTLBEntry iotlb_v[NB_MMU_MODES][CPU_VTLB_SIZE];                 \
     target_ulong tlb_flush_addr;                                        \
     target_ulong tlb_flush_mask;                                        \
     target_ulong vtlb_index;                                            \
index 8eb0db3910e8611b339af66778b9c3c40d85921f..b58cd47ced71a47f6d1d8636067eb87ab777d24b 100644 (file)
@@ -105,6 +105,9 @@ void tlb_flush(CPUState *cpu, int flush_global);
 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
                   hwaddr paddr, int prot,
                   int mmu_idx, target_ulong size);
+void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
+                             hwaddr paddr, MemTxAttrs attrs,
+                             int prot, int mmu_idx, target_ulong size);
 void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr);
 #else
 static inline void tlb_flush_page(CPUState *cpu, target_ulong addr)
@@ -341,10 +344,6 @@ void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align));
 
 struct MemoryRegion *iotlb_to_region(CPUState *cpu,
                                      hwaddr index);
-bool io_mem_read(struct MemoryRegion *mr, hwaddr addr,
-                 uint64_t *pvalue, unsigned size);
-bool io_mem_write(struct MemoryRegion *mr, hwaddr addr,
-                  uint64_t value, unsigned size);
 
 void tlb_fill(CPUState *cpu, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr);
diff --git a/include/exec/memattrs.h b/include/exec/memattrs.h
new file mode 100644 (file)
index 0000000..1389b4b
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Memory transaction attributes
+ *
+ * Copyright (c) 2015 Linaro Limited.
+ *
+ * Authors:
+ *  Peter Maydell <peter.maydell@linaro.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef MEMATTRS_H
+#define MEMATTRS_H
+
+/* Every memory transaction has associated with it a set of
+ * attributes. Some of these are generic (such as the ID of
+ * the bus master); some are specific to a particular kind of
+ * bus (such as the ARM Secure/NonSecure bit). We define them
+ * all as non-overlapping bitfields in a single struct to avoid
+ * confusion if different parts of QEMU used the same bit for
+ * different semantics.
+ */
+typedef struct MemTxAttrs {
+    /* Bus masters which don't specify any attributes will get this
+     * (via the MEMTXATTRS_UNSPECIFIED constant), so that we can
+     * distinguish "all attributes deliberately clear" from
+     * "didn't specify" if necessary.
+     */
+    unsigned int unspecified:1;
+    /* ARM/AMBA TrustZone Secure access */
+    unsigned int secure:1;
+    /* Memory access is usermode (unprivileged) */
+    unsigned int user:1;
+} MemTxAttrs;
+
+/* Bus masters which don't specify any attributes will get this,
+ * which has all attribute bits clear except the topmost one
+ * (so that we can distinguish "all attributes deliberately clear"
+ * from "didn't specify" if necessary).
+ */
+#define MEMTXATTRS_UNSPECIFIED ((MemTxAttrs) { .unspecified = 1 })
+
+#endif
index a2ea58776d84b4764fc4cdeaca07019e55f4ace3..0ccfd3b42a1ddd07c655409f51f72f8c13cfac18 100644 (file)
@@ -28,6 +28,7 @@
 #ifndef CONFIG_USER_ONLY
 #include "exec/hwaddr.h"
 #endif
+#include "exec/memattrs.h"
 #include "qemu/queue.h"
 #include "qemu/int128.h"
 #include "qemu/notify.h"
@@ -68,6 +69,16 @@ struct IOMMUTLBEntry {
     IOMMUAccessFlags perm;
 };
 
+/* New-style MMIO accessors can indicate that the transaction failed.
+ * A zero (MEMTX_OK) response means success; anything else is a failure
+ * of some kind. The memory subsystem will bitwise-OR together results
+ * if it is synthesizing an operation from multiple smaller accesses.
+ */
+#define MEMTX_OK 0
+#define MEMTX_ERROR             (1U << 0) /* device returned an error */
+#define MEMTX_DECODE_ERROR      (1U << 1) /* nothing at that address */
+typedef uint32_t MemTxResult;
+
 /*
  * Memory region callbacks
  */
@@ -84,6 +95,17 @@ struct MemoryRegionOps {
                   uint64_t data,
                   unsigned size);
 
+    MemTxResult (*read_with_attrs)(void *opaque,
+                                   hwaddr addr,
+                                   uint64_t *data,
+                                   unsigned size,
+                                   MemTxAttrs attrs);
+    MemTxResult (*write_with_attrs)(void *opaque,
+                                    hwaddr addr,
+                                    uint64_t data,
+                                    unsigned size,
+                                    MemTxAttrs attrs);
+
     enum device_endian endianness;
     /* Guest-visible constraints: */
     struct {
@@ -1042,6 +1064,37 @@ void memory_global_dirty_log_stop(void);
 
 void mtree_info(fprintf_function mon_printf, void *f);
 
+/**
+ * memory_region_dispatch_read: perform a read directly to the specified
+ * MemoryRegion.
+ *
+ * @mr: #MemoryRegion to access
+ * @addr: address within that region
+ * @pval: pointer to uint64_t which the data is written to
+ * @size: size of the access in bytes
+ * @attrs: memory transaction attributes to use for the access
+ */
+MemTxResult memory_region_dispatch_read(MemoryRegion *mr,
+                                        hwaddr addr,
+                                        uint64_t *pval,
+                                        unsigned size,
+                                        MemTxAttrs attrs);
+/**
+ * memory_region_dispatch_write: perform a write directly to the specified
+ * MemoryRegion.
+ *
+ * @mr: #MemoryRegion to access
+ * @addr: address within that region
+ * @data: data to write
+ * @size: size of the access in bytes
+ * @attrs: memory transaction attributes to use for the access
+ */
+MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
+                                         hwaddr addr,
+                                         uint64_t data,
+                                         unsigned size,
+                                         MemTxAttrs attrs);
+
 /**
  * address_space_init: initializes an address space
  *
@@ -1067,41 +1120,117 @@ void address_space_destroy(AddressSpace *as);
 /**
  * address_space_rw: read from or write to an address space.
  *
- * Return true if the operation hit any unassigned memory or encountered an
- * IOMMU fault.
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).
  *
  * @as: #AddressSpace to be accessed
  * @addr: address within that address space
+ * @attrs: memory transaction attributes
  * @buf: buffer with the data transferred
  * @is_write: indicates the transfer direction
  */
-bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
-                      int len, bool is_write);
+MemTxResult address_space_rw(AddressSpace *as, hwaddr addr,
+                             MemTxAttrs attrs, uint8_t *buf,
+                             int len, bool is_write);
 
 /**
  * address_space_write: write to address space.
  *
- * Return true if the operation hit any unassigned memory or encountered an
- * IOMMU fault.
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).
  *
  * @as: #AddressSpace to be accessed
  * @addr: address within that address space
+ * @attrs: memory transaction attributes
  * @buf: buffer with the data transferred
  */
-bool address_space_write(AddressSpace *as, hwaddr addr,
-                         const uint8_t *buf, int len);
+MemTxResult address_space_write(AddressSpace *as, hwaddr addr,
+                                MemTxAttrs attrs,
+                                const uint8_t *buf, int len);
 
 /**
  * address_space_read: read from an address space.
  *
- * Return true if the operation hit any unassigned memory or encountered an
- * IOMMU fault.
+ * Return a MemTxResult indicating whether the operation succeeded
+ * or failed (eg unassigned memory, device rejected the transaction,
+ * IOMMU fault).
  *
  * @as: #AddressSpace to be accessed
  * @addr: address within that address space
+ * @attrs: memory transaction attributes
  * @buf: buffer with the data transferred
  */
-bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len);
+MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
+                               uint8_t *buf, int len);
+
+/**
+ * address_space_ld*: load from an address space
+ * address_space_st*: store to an address space
+ *
+ * These functions perform a load or store of the byte, word,
+ * longword or quad to the specified address within the AddressSpace.
+ * The _le suffixed functions treat the data as little endian;
+ * _be indicates big endian; no suffix indicates "same endianness
+ * as guest CPU".
+ *
+ * The "guest CPU endianness" accessors are deprecated for use outside
+ * target-* code; devices should be CPU-agnostic and use either the LE
+ * or the BE accessors.
+ *
+ * @as #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @val: data value, for stores
+ * @attrs: memory transaction attributes
+ * @result: location to write the success/failure of the transaction;
+ *   if NULL, this information is discarded
+ */
+uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+
+#ifdef NEED_CPU_H
+uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
+                            MemTxAttrs attrs, MemTxResult *result);
+#endif
 
 /* address_space_translate: translate an address range into an address space
  * into a MemoryRegion and an address range into that section
index b97c2956ec8264fad564ba692dce6ac5e4c8fd74..d4ffead48a8cc49e17f6e3ecd667d6a8ffefa5db 100644 (file)
@@ -568,7 +568,7 @@ static inline void
 pci_set_byte_by_mask(uint8_t *config, uint8_t mask, uint8_t reg)
 {
     uint8_t val = pci_get_byte(config);
-    uint8_t rval = reg << (ffs(mask) - 1);
+    uint8_t rval = reg << ctz32(mask);
     pci_set_byte(config, (~mask & val) | (mask & rval));
 }
 
@@ -576,14 +576,14 @@ static inline uint8_t
 pci_get_byte_by_mask(uint8_t *config, uint8_t mask)
 {
     uint8_t val = pci_get_byte(config);
-    return (val & mask) >> (ffs(mask) - 1);
+    return (val & mask) >> ctz32(mask);
 }
 
 static inline void
 pci_set_word_by_mask(uint8_t *config, uint16_t mask, uint16_t reg)
 {
     uint16_t val = pci_get_word(config);
-    uint16_t rval = reg << (ffs(mask) - 1);
+    uint16_t rval = reg << ctz32(mask);
     pci_set_word(config, (~mask & val) | (mask & rval));
 }
 
@@ -591,14 +591,14 @@ static inline uint16_t
 pci_get_word_by_mask(uint8_t *config, uint16_t mask)
 {
     uint16_t val = pci_get_word(config);
-    return (val & mask) >> (ffs(mask) - 1);
+    return (val & mask) >> ctz32(mask);
 }
 
 static inline void
 pci_set_long_by_mask(uint8_t *config, uint32_t mask, uint32_t reg)
 {
     uint32_t val = pci_get_long(config);
-    uint32_t rval = reg << (ffs(mask) - 1);
+    uint32_t rval = reg << ctz32(mask);
     pci_set_long(config, (~mask & val) | (mask & rval));
 }
 
@@ -606,14 +606,14 @@ static inline uint32_t
 pci_get_long_by_mask(uint8_t *config, uint32_t mask)
 {
     uint32_t val = pci_get_long(config);
-    return (val & mask) >> (ffs(mask) - 1);
+    return (val & mask) >> ctz32(mask);
 }
 
 static inline void
 pci_set_quad_by_mask(uint8_t *config, uint64_t mask, uint64_t reg)
 {
     uint64_t val = pci_get_quad(config);
-    uint64_t rval = reg << (ffs(mask) - 1);
+    uint64_t rval = reg << ctz32(mask);
     pci_set_quad(config, (~mask & val) | (mask & rval));
 }
 
@@ -621,7 +621,7 @@ static inline uint64_t
 pci_get_quad_by_mask(uint8_t *config, uint64_t mask)
 {
     uint64_t val = pci_get_quad(config);
-    return (val & mask) >> (ffs(mask) - 1);
+    return (val & mask) >> ctz32(mask);
 }
 
 PCIDevice *pci_create_multifunction(PCIBus *bus, int devfn, bool multifunction,
index 848ab1c206241ebde787f753e5bde43cc5a53982..6a28b33e696e96cbe1b0ae49d146a52ef9548e90 100644 (file)
 
 /* PCI_EXP_FLAGS */
 #define PCI_EXP_FLAGS_VER2              2 /* for now, supports only ver. 2 */
-#define PCI_EXP_FLAGS_IRQ_SHIFT         (ffs(PCI_EXP_FLAGS_IRQ) - 1)
-#define PCI_EXP_FLAGS_TYPE_SHIFT        (ffs(PCI_EXP_FLAGS_TYPE) - 1)
+#define PCI_EXP_FLAGS_IRQ_SHIFT         ctz32(PCI_EXP_FLAGS_IRQ)
+#define PCI_EXP_FLAGS_TYPE_SHIFT        ctz32(PCI_EXP_FLAGS_TYPE)
 
 
 /* PCI_EXP_LINK{CAP, STA} */
 /* link speed */
 #define PCI_EXP_LNK_LS_25               1
 
-#define PCI_EXP_LNK_MLW_SHIFT           (ffs(PCI_EXP_LNKCAP_MLW) - 1)
+#define PCI_EXP_LNK_MLW_SHIFT           ctz32(PCI_EXP_LNKCAP_MLW)
 #define PCI_EXP_LNK_MLW_1               (1 << PCI_EXP_LNK_MLW_SHIFT)
 
 /* PCI_EXP_LINKCAP */
-#define PCI_EXP_LNKCAP_ASPMS_SHIFT      (ffs(PCI_EXP_LNKCAP_ASPMS) - 1)
+#define PCI_EXP_LNKCAP_ASPMS_SHIFT      ctz32(PCI_EXP_LNKCAP_ASPMS)
 #define PCI_EXP_LNKCAP_ASPMS_0S         (1 << PCI_EXP_LNKCAP_ASPMS_SHIFT)
 
-#define PCI_EXP_LNKCAP_PN_SHIFT         (ffs(PCI_EXP_LNKCAP_PN) - 1)
+#define PCI_EXP_LNKCAP_PN_SHIFT         ctz32(PCI_EXP_LNKCAP_PN)
 
-#define PCI_EXP_SLTCAP_PSN_SHIFT        (ffs(PCI_EXP_SLTCAP_PSN) - 1)
+#define PCI_EXP_SLTCAP_PSN_SHIFT        ctz32(PCI_EXP_SLTCAP_PSN)
 
 #define PCI_EXP_SLTCTL_IND_RESERVED     0x0
 #define PCI_EXP_SLTCTL_IND_ON           0x1
 #define PCI_EXP_SLTCTL_IND_BLINK        0x2
 #define PCI_EXP_SLTCTL_IND_OFF          0x3
-#define PCI_EXP_SLTCTL_AIC_SHIFT        (ffs(PCI_EXP_SLTCTL_AIC) - 1)
+#define PCI_EXP_SLTCTL_AIC_SHIFT        ctz32(PCI_EXP_SLTCTL_AIC)
 #define PCI_EXP_SLTCTL_AIC_OFF                          \
     (PCI_EXP_SLTCTL_IND_OFF << PCI_EXP_SLTCTL_AIC_SHIFT)
 
-#define PCI_EXP_SLTCTL_PIC_SHIFT        (ffs(PCI_EXP_SLTCTL_PIC) - 1)
+#define PCI_EXP_SLTCTL_PIC_SHIFT        ctz32(PCI_EXP_SLTCTL_PIC)
 #define PCI_EXP_SLTCTL_PIC_OFF                          \
     (PCI_EXP_SLTCTL_IND_OFF << PCI_EXP_SLTCTL_PIC_SHIFT)
 #define PCI_EXP_SLTCTL_PIC_ON                          \
 
 #define PCI_ERR_ROOT_IRQ_MAX            32
 #define PCI_ERR_ROOT_IRQ                0xf8000000
-#define PCI_ERR_ROOT_IRQ_SHIFT          (ffs(PCI_ERR_ROOT_IRQ) - 1)
+#define PCI_ERR_ROOT_IRQ_SHIFT          ctz32(PCI_ERR_ROOT_IRQ)
 #define PCI_ERR_ROOT_STATUS_REPORT_MASK (PCI_ERR_ROOT_COR_RCV |         \
                                          PCI_ERR_ROOT_MULTI_COR_RCV |   \
                                          PCI_ERR_ROOT_UNCOR_RCV |       \
index 57a62d4b763b673e7db4040e77cceb0906c9f2b5..e5673394d39f56c0231eeff030ee1b5d390a768a 100644 (file)
@@ -37,9 +37,6 @@ void qerror_report_err(Error *err);
 #define QERR_BASE_NOT_FOUND \
     ERROR_CLASS_GENERIC_ERROR, "Base '%s' not found"
 
-#define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \
-    ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by device '%s' does not support feature '%s'"
-
 #define QERR_BLOCK_JOB_NOT_READY \
     ERROR_CLASS_GENERIC_ERROR, "The active block job for device '%s' cannot be completed"
 
@@ -58,9 +55,6 @@ void qerror_report_err(Error *err);
 #define QERR_DEVICE_IN_USE \
     ERROR_CLASS_GENERIC_ERROR, "Device '%s' is in use"
 
-#define QERR_DEVICE_IS_READ_ONLY \
-    ERROR_CLASS_GENERIC_ERROR, "Device '%s' is read only"
-
 #define QERR_DEVICE_NO_HOTPLUG \
     ERROR_CLASS_GENERIC_ERROR, "Device '%s' does not support hotplugging"
 
index 550d7ce2c397ca1a61cff78ce690edfe54e2f6a4..f0a85f86491da394fa23b32d35384acb6976c2cb 100644 (file)
@@ -64,6 +64,29 @@ struct HBitmapIter {
  */
 HBitmap *hbitmap_alloc(uint64_t size, int granularity);
 
+/**
+ * hbitmap_truncate:
+ * @hb: The bitmap to change the size of.
+ * @size: The number of elements to change the bitmap to accommodate.
+ *
+ * truncate or grow an existing bitmap to accommodate a new number of elements.
+ * This may invalidate existing HBitmapIterators.
+ */
+void hbitmap_truncate(HBitmap *hb, uint64_t size);
+
+/**
+ * hbitmap_merge:
+ * @a: The bitmap to store the result in.
+ * @b: The bitmap to merge into @a.
+ * @return true if the merge was successful,
+ *         false if it was not attempted.
+ *
+ * Merge two bitmaps together.
+ * A := A (BITOR) B.
+ * B is left unmodified.
+ */
+bool hbitmap_merge(HBitmap *a, const HBitmap *b);
+
 /**
  * hbitmap_empty:
  * @hb: HBitmap to operate on.
index 9dafb4817e0f2e16422f5e86c6a370f0d252eb9b..39f0f19fb00138b9984a65999f51d08119a5aed9 100644 (file)
@@ -24,6 +24,7 @@
 #include <setjmp.h>
 #include "hw/qdev-core.h"
 #include "exec/hwaddr.h"
+#include "exec/memattrs.h"
 #include "qemu/queue.h"
 #include "qemu/thread.h"
 #include "qemu/tls.h"
@@ -195,6 +196,7 @@ typedef struct CPUWatchpoint {
     vaddr vaddr;
     vaddr len;
     vaddr hitaddr;
+    MemTxAttrs hitattrs;
     int flags; /* BP_* */
     QTAILQ_ENTRY(CPUWatchpoint) entry;
 } CPUWatchpoint;
index 12016b47f38109293fe0ebcf438ffeb8bc0f72b1..cd601f4069950f52fb2ec8325d108d0a8820f44a 100644 (file)
@@ -58,7 +58,7 @@ struct virtio_blk_config {
        uint32_t size_max;
        /* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */
        uint32_t seg_max;
-       /* geometry the device (if VIRTIO_BLK_F_GEOMETRY) */
+       /* geometry of the device (if VIRTIO_BLK_F_GEOMETRY) */
        struct virtio_blk_geometry {
                uint16_t cylinders;
                uint8_t heads;
@@ -117,7 +117,11 @@ struct virtio_blk_config {
 #define VIRTIO_BLK_T_BARRIER   0x80000000
 #endif /* !VIRTIO_BLK_NO_LEGACY */
 
-/* This is the first element of the read scatter-gather list. */
+/*
+ * This comes first in the read scatter-gather list.
+ * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated,
+ * this is the first element of the read scatter-gather list.
+ */
 struct virtio_blk_outhdr {
        /* VIRTIO_BLK_T* */
        __virtio32 type;
index 77e9b9c370ecae2052b01d9f1b4b122cac1d2d3b..b4a4d5e0b950d4d81baaa667af096380deedd747 100644 (file)
@@ -87,6 +87,8 @@ int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
                          int nb_sectors);
 int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
               int nb_sectors);
+int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
+                     int nb_sectors, BdrvRequestFlags flags);
 BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num,
                                  int nb_sectors, BdrvRequestFlags flags,
                                  BlockCompletionFunc *cb, void *opaque);
index 3f2f4c89e3fea44d9f74df6a6bd86e30adebda9f..efa8b9993ae1e28ae289807ab5196c5a45867c32 100644 (file)
@@ -88,7 +88,8 @@ static inline int dma_memory_rw_relaxed(AddressSpace *as, dma_addr_t addr,
                                         void *buf, dma_addr_t len,
                                         DMADirection dir)
 {
-    return address_space_rw(as, addr, buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
+    return (bool)address_space_rw(as, addr, MEMTXATTRS_UNSPECIFIED,
+                                  buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
 }
 
 static inline int dma_memory_read_relaxed(AddressSpace *as, dma_addr_t addr,
index 9cc9e081392277afc970655368f7f773ca1d12ef..4035c4fe5408e7b19ccb98c651c8e17ee356a134 100644 (file)
@@ -72,9 +72,6 @@
 #define sigsetjmp(env, savemask) setjmp(env)
 #define siglongjmp(env, val) longjmp(env, val)
 
-/* Declaration of ffs() is missing in MinGW's strings.h. */
-int ffs(int i);
-
 /* Missing POSIX functions. Don't use MinGW-w64 macros. */
 #undef gmtime_r
 struct tm *gmtime_r(const time_t *timep, struct tm *result);
index 2f5b9f06348b3a2b4fef59b6146337cb60be787d..03cd665a8f7f5b793342130191920ce58ed56089 100644 (file)
@@ -307,6 +307,7 @@ QemuConsole *qemu_console_lookup_by_device(DeviceState *dev, uint32_t head);
 bool qemu_console_is_visible(QemuConsole *con);
 bool qemu_console_is_graphic(QemuConsole *con);
 bool qemu_console_is_fixedsize(QemuConsole *con);
+char *qemu_console_get_label(QemuConsole *con);
 int qemu_console_get_index(QemuConsole *con);
 uint32_t qemu_console_get_head(QemuConsole *con);
 QemuUIInfo *qemu_console_get_ui_info(QemuConsole *con);
index 5d7a9ac6f25e494326ed05b5732695796f8aa22f..e34c4effcbfe22defe37ddb79778bdbf113f9bb3 100644 (file)
@@ -35,6 +35,7 @@
 # define PIXMAN_BE_r8g8b8a8   PIXMAN_r8g8b8a8
 # define PIXMAN_BE_x8b8g8r8   PIXMAN_x8b8g8r8
 # define PIXMAN_BE_a8b8g8r8   PIXMAN_a8b8g8r8
+# define PIXMAN_LE_x8r8g8b8   PIXMAN_b8g8r8x8
 #else
 # define PIXMAN_BE_r8g8b8     PIXMAN_b8g8r8
 # define PIXMAN_BE_x8r8g8b8   PIXMAN_b8g8r8x8
@@ -45,6 +46,7 @@
 # define PIXMAN_BE_r8g8b8a8   PIXMAN_a8b8g8r8
 # define PIXMAN_BE_x8b8g8r8   PIXMAN_r8g8b8x8
 # define PIXMAN_BE_a8b8g8r8   PIXMAN_r8g8b8a8
+# define PIXMAN_LE_x8r8g8b8   PIXMAN_x8r8g8b8
 #endif
 
 /* -------------------------------------------------------------------- */
index 25b94c753026a888764e61cc2cd1e0f6321a7ae1..42db3c1645bdbbc308cae354c1112afdb0bb410d 100644 (file)
@@ -42,8 +42,7 @@ int qemu_spice_set_passwd(const char *passwd,
                           bool fail_if_connected, bool disconnect_if_connected);
 int qemu_spice_set_pw_expire(time_t expires);
 int qemu_spice_migrate_info(const char *hostname, int port, int tls_port,
-                            const char *subject,
-                            MonitorCompletion cb, void *opaque);
+                            const char *subject);
 
 CharDriverState *qemu_chr_open_spice_vmc(const char *type);
 #if SPICE_SERVER_VERSION >= 0x000c02
@@ -70,10 +69,8 @@ static inline int qemu_spice_set_pw_expire(time_t expires)
     return -1;
 }
 static inline int qemu_spice_migrate_info(const char *h, int p, int t,
-                                          const char *s,
-                                          MonitorCompletion cb, void *opaque)
+                                          const char *s)
 {
-    cb(opaque, NULL);
     return -1;
 }
 
index 53883a17fcc64aa3be7d5b609f7a1c0ba681e2c5..b25328a6ba00a5136bd5161233122e29746a2ab0 100644 (file)
@@ -97,7 +97,8 @@ struct SimpleSpiceDisplay {
     /* cursor (without qxl): displaychangelistener -> spice server */
     SimpleSpiceCursor *ptr_define;
     SimpleSpiceCursor *ptr_move;
-    uint16_t ptr_x, ptr_y;
+    int16_t ptr_x, ptr_y;
+    int16_t hot_x, hot_y;
 
     /* cursor (with qxl): qxl local renderer -> displaychangelistener */
     QEMUCursor *cursor;
index 304d5d6f59e01f59fd19f5dca4c35a14acc89a2a..e39093edb9f3e1b8b761f720f58eff302c750a01 100644 (file)
--- a/ioport.c
+++ b/ioport.c
@@ -64,7 +64,8 @@ void cpu_outb(pio_addr_t addr, uint8_t val)
 {
     LOG_IOPORT("outb: %04"FMT_pioaddr" %02"PRIx8"\n", addr, val);
     trace_cpu_out(addr, val);
-    address_space_write(&address_space_io, addr, &val, 1);
+    address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED,
+                        &val, 1);
 }
 
 void cpu_outw(pio_addr_t addr, uint16_t val)
@@ -74,7 +75,8 @@ void cpu_outw(pio_addr_t addr, uint16_t val)
     LOG_IOPORT("outw: %04"FMT_pioaddr" %04"PRIx16"\n", addr, val);
     trace_cpu_out(addr, val);
     stw_p(buf, val);
-    address_space_write(&address_space_io, addr, buf, 2);
+    address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED,
+                        buf, 2);
 }
 
 void cpu_outl(pio_addr_t addr, uint32_t val)
@@ -84,14 +86,16 @@ void cpu_outl(pio_addr_t addr, uint32_t val)
     LOG_IOPORT("outl: %04"FMT_pioaddr" %08"PRIx32"\n", addr, val);
     trace_cpu_out(addr, val);
     stl_p(buf, val);
-    address_space_write(&address_space_io, addr, buf, 4);
+    address_space_write(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED,
+                        buf, 4);
 }
 
 uint8_t cpu_inb(pio_addr_t addr)
 {
     uint8_t val;
 
-    address_space_read(&address_space_io, addr, &val, 1);
+    address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED,
+                       &val, 1);
     trace_cpu_in(addr, val);
     LOG_IOPORT("inb : %04"FMT_pioaddr" %02"PRIx8"\n", addr, val);
     return val;
@@ -102,7 +106,7 @@ uint16_t cpu_inw(pio_addr_t addr)
     uint8_t buf[2];
     uint16_t val;
 
-    address_space_read(&address_space_io, addr, buf, 2);
+    address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, buf, 2);
     val = lduw_p(buf);
     trace_cpu_in(addr, val);
     LOG_IOPORT("inw : %04"FMT_pioaddr" %04"PRIx16"\n", addr, val);
@@ -114,7 +118,7 @@ uint32_t cpu_inl(pio_addr_t addr)
     uint8_t buf[4];
     uint32_t val;
 
-    address_space_read(&address_space_io, addr, buf, 4);
+    address_space_read(&address_space_io, addr, MEMTXATTRS_UNSPECIFIED, buf, 4);
     val = ldl_p(buf);
     trace_cpu_in(addr, val);
     LOG_IOPORT("inl : %04"FMT_pioaddr" %08"PRIx32"\n", addr, val);
index 342a23fcb02b67e199be6eac0064a189fb18aa88..a1f91099bc69900627b99d89caebcee013d9df8f 100644 (file)
@@ -31,21 +31,14 @@ typedef ObjectClass IOThreadClass;
 static void *iothread_run(void *opaque)
 {
     IOThread *iothread = opaque;
-    bool blocking;
 
     qemu_mutex_lock(&iothread->init_done_lock);
     iothread->thread_id = qemu_get_thread_id();
     qemu_cond_signal(&iothread->init_done_cond);
     qemu_mutex_unlock(&iothread->init_done_lock);
 
-    while (!iothread->stopping) {
-        aio_context_acquire(iothread->ctx);
-        blocking = true;
-        while (!iothread->stopping && aio_poll(iothread->ctx, blocking)) {
-            /* Progress was made, keep going */
-            blocking = false;
-        }
-        aio_context_release(iothread->ctx);
+    while (!atomic_read(&iothread->stopping)) {
+        aio_poll(iothread->ctx, true);
     }
     return NULL;
 }
index dd44f8c7534838d0a9be2541bedf29b72b3b576d..2a717e5b504b28b72fd9cdfbf02211e88bb100d3 100644 (file)
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1141,18 +1141,18 @@ static int kvm_irqchip_get_virq(KVMState *s)
 {
     uint32_t *word = s->used_gsi_bitmap;
     int max_words = ALIGN(s->gsi_count, 32) / 32;
-    int i, bit;
+    int i, zeroes;
     bool retry = true;
 
 again:
     /* Return the lowest unused GSI in the bitmap */
     for (i = 0; i < max_words; i++) {
-        bit = ffs(~word[i]);
-        if (!bit) {
+        zeroes = ctz32(~word[i]);
+        if (zeroes == 32) {
             continue;
         }
 
-        return bit - 1 + i * 32;
+        return zeroes + i * 32;
     }
     if (!s->direct_msi && retry) {
         retry = false;
@@ -1667,7 +1667,8 @@ static void kvm_handle_io(uint16_t port, void *data, int direction, int size,
     uint8_t *ptr = data;
 
     for (i = 0; i < count; i++) {
-        address_space_rw(&address_space_io, port, ptr, size,
+        address_space_rw(&address_space_io, port, MEMTXATTRS_UNSPECIFIED,
+                         ptr, size,
                          direction == KVM_EXIT_IO_OUT);
         ptr += size;
     }
index a11e9bf08a2a66e678be2489974aae3a299a7d45..0f6cb812db583b0ffdaf489ad904ef4bad7d5be6 100644 (file)
--- a/memory.c
+++ b/memory.c
@@ -368,57 +368,84 @@ static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
     }
 }
 
-static void memory_region_oldmmio_read_accessor(MemoryRegion *mr,
+static MemTxResult memory_region_oldmmio_read_accessor(MemoryRegion *mr,
+                                                       hwaddr addr,
+                                                       uint64_t *value,
+                                                       unsigned size,
+                                                       unsigned shift,
+                                                       uint64_t mask,
+                                                       MemTxAttrs attrs)
+{
+    uint64_t tmp;
+
+    tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
+    trace_memory_region_ops_read(mr, addr, tmp, size);
+    *value |= (tmp & mask) << shift;
+    return MEMTX_OK;
+}
+
+static MemTxResult  memory_region_read_accessor(MemoryRegion *mr,
                                                 hwaddr addr,
                                                 uint64_t *value,
                                                 unsigned size,
                                                 unsigned shift,
-                                                uint64_t mask)
+                                                uint64_t mask,
+                                                MemTxAttrs attrs)
 {
     uint64_t tmp;
 
-    tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
+    if (mr->flush_coalesced_mmio) {
+        qemu_flush_coalesced_mmio_buffer();
+    }
+    tmp = mr->ops->read(mr->opaque, addr, size);
     trace_memory_region_ops_read(mr, addr, tmp, size);
     *value |= (tmp & mask) << shift;
+    return MEMTX_OK;
 }
 
-static void memory_region_read_accessor(MemoryRegion *mr,
-                                        hwaddr addr,
-                                        uint64_t *value,
-                                        unsigned size,
-                                        unsigned shift,
-                                        uint64_t mask)
+static MemTxResult memory_region_read_with_attrs_accessor(MemoryRegion *mr,
+                                                          hwaddr addr,
+                                                          uint64_t *value,
+                                                          unsigned size,
+                                                          unsigned shift,
+                                                          uint64_t mask,
+                                                          MemTxAttrs attrs)
 {
-    uint64_t tmp;
+    uint64_t tmp = 0;
+    MemTxResult r;
 
     if (mr->flush_coalesced_mmio) {
         qemu_flush_coalesced_mmio_buffer();
     }
-    tmp = mr->ops->read(mr->opaque, addr, size);
+    r = mr->ops->read_with_attrs(mr->opaque, addr, &tmp, size, attrs);
     trace_memory_region_ops_read(mr, addr, tmp, size);
     *value |= (tmp & mask) << shift;
+    return r;
 }
 
-static void memory_region_oldmmio_write_accessor(MemoryRegion *mr,
-                                                 hwaddr addr,
-                                                 uint64_t *value,
-                                                 unsigned size,
-                                                 unsigned shift,
-                                                 uint64_t mask)
+static MemTxResult memory_region_oldmmio_write_accessor(MemoryRegion *mr,
+                                                        hwaddr addr,
+                                                        uint64_t *value,
+                                                        unsigned size,
+                                                        unsigned shift,
+                                                        uint64_t mask,
+                                                        MemTxAttrs attrs)
 {
     uint64_t tmp;
 
     tmp = (*value >> shift) & mask;
     trace_memory_region_ops_write(mr, addr, tmp, size);
     mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, tmp);
+    return MEMTX_OK;
 }
 
-static void memory_region_write_accessor(MemoryRegion *mr,
-                                         hwaddr addr,
-                                         uint64_t *value,
-                                         unsigned size,
-                                         unsigned shift,
-                                         uint64_t mask)
+static MemTxResult memory_region_write_accessor(MemoryRegion *mr,
+                                                hwaddr addr,
+                                                uint64_t *value,
+                                                unsigned size,
+                                                unsigned shift,
+                                                uint64_t mask,
+                                                MemTxAttrs attrs)
 {
     uint64_t tmp;
 
@@ -428,24 +455,46 @@ static void memory_region_write_accessor(MemoryRegion *mr,
     tmp = (*value >> shift) & mask;
     trace_memory_region_ops_write(mr, addr, tmp, size);
     mr->ops->write(mr->opaque, addr, tmp, size);
+    return MEMTX_OK;
 }
 
-static void access_with_adjusted_size(hwaddr addr,
+static MemTxResult memory_region_write_with_attrs_accessor(MemoryRegion *mr,
+                                                           hwaddr addr,
+                                                           uint64_t *value,
+                                                           unsigned size,
+                                                           unsigned shift,
+                                                           uint64_t mask,
+                                                           MemTxAttrs attrs)
+{
+    uint64_t tmp;
+
+    if (mr->flush_coalesced_mmio) {
+        qemu_flush_coalesced_mmio_buffer();
+    }
+    tmp = (*value >> shift) & mask;
+    trace_memory_region_ops_write(mr, addr, tmp, size);
+    return mr->ops->write_with_attrs(mr->opaque, addr, tmp, size, attrs);
+}
+
+static MemTxResult access_with_adjusted_size(hwaddr addr,
                                       uint64_t *value,
                                       unsigned size,
                                       unsigned access_size_min,
                                       unsigned access_size_max,
-                                      void (*access)(MemoryRegion *mr,
-                                                     hwaddr addr,
-                                                     uint64_t *value,
-                                                     unsigned size,
-                                                     unsigned shift,
-                                                     uint64_t mask),
-                                      MemoryRegion *mr)
+                                      MemTxResult (*access)(MemoryRegion *mr,
+                                                            hwaddr addr,
+                                                            uint64_t *value,
+                                                            unsigned size,
+                                                            unsigned shift,
+                                                            uint64_t mask,
+                                                            MemTxAttrs attrs),
+                                      MemoryRegion *mr,
+                                      MemTxAttrs attrs)
 {
     uint64_t access_mask;
     unsigned access_size;
     unsigned i;
+    MemTxResult r = MEMTX_OK;
 
     if (!access_size_min) {
         access_size_min = 1;
@@ -459,14 +508,16 @@ static void access_with_adjusted_size(hwaddr addr,
     access_mask = -1ULL >> (64 - access_size * 8);
     if (memory_region_big_endian(mr)) {
         for (i = 0; i < size; i += access_size) {
-            access(mr, addr + i, value, access_size,
-                   (size - access_size - i) * 8, access_mask);
+            r |= access(mr, addr + i, value, access_size,
+                        (size - access_size - i) * 8, access_mask, attrs);
         }
     } else {
         for (i = 0; i < size; i += access_size) {
-            access(mr, addr + i, value, access_size, i * 8, access_mask);
+            r |= access(mr, addr + i, value, access_size, i * 8,
+                        access_mask, attrs);
         }
     }
+    return r;
 }
 
 static AddressSpace *memory_region_to_address_space(MemoryRegion *mr)
@@ -1053,62 +1104,82 @@ bool memory_region_access_valid(MemoryRegion *mr,
     return true;
 }
 
-static uint64_t memory_region_dispatch_read1(MemoryRegion *mr,
-                                             hwaddr addr,
-                                             unsigned size)
+static MemTxResult memory_region_dispatch_read1(MemoryRegion *mr,
+                                                hwaddr addr,
+                                                uint64_t *pval,
+                                                unsigned size,
+                                                MemTxAttrs attrs)
 {
-    uint64_t data = 0;
+    *pval = 0;
 
     if (mr->ops->read) {
-        access_with_adjusted_size(addr, &data, size,
-                                  mr->ops->impl.min_access_size,
-                                  mr->ops->impl.max_access_size,
-                                  memory_region_read_accessor, mr);
+        return access_with_adjusted_size(addr, pval, size,
+                                         mr->ops->impl.min_access_size,
+                                         mr->ops->impl.max_access_size,
+                                         memory_region_read_accessor,
+                                         mr, attrs);
+    } else if (mr->ops->read_with_attrs) {
+        return access_with_adjusted_size(addr, pval, size,
+                                         mr->ops->impl.min_access_size,
+                                         mr->ops->impl.max_access_size,
+                                         memory_region_read_with_attrs_accessor,
+                                         mr, attrs);
     } else {
-        access_with_adjusted_size(addr, &data, size, 1, 4,
-                                  memory_region_oldmmio_read_accessor, mr);
+        return access_with_adjusted_size(addr, pval, size, 1, 4,
+                                         memory_region_oldmmio_read_accessor,
+                                         mr, attrs);
     }
-
-    return data;
 }
 
-static bool memory_region_dispatch_read(MemoryRegion *mr,
+MemTxResult memory_region_dispatch_read(MemoryRegion *mr,
                                         hwaddr addr,
                                         uint64_t *pval,
-                                        unsigned size)
+                                        unsigned size,
+                                        MemTxAttrs attrs)
 {
+    MemTxResult r;
+
     if (!memory_region_access_valid(mr, addr, size, false)) {
         *pval = unassigned_mem_read(mr, addr, size);
-        return true;
+        return MEMTX_DECODE_ERROR;
     }
 
-    *pval = memory_region_dispatch_read1(mr, addr, size);
+    r = memory_region_dispatch_read1(mr, addr, pval, size, attrs);
     adjust_endianness(mr, pval, size);
-    return false;
+    return r;
 }
 
-static bool memory_region_dispatch_write(MemoryRegion *mr,
+MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
                                          hwaddr addr,
                                          uint64_t data,
-                                         unsigned size)
+                                         unsigned size,
+                                         MemTxAttrs attrs)
 {
     if (!memory_region_access_valid(mr, addr, size, true)) {
         unassigned_mem_write(mr, addr, data, size);
-        return true;
+        return MEMTX_DECODE_ERROR;
     }
 
     adjust_endianness(mr, &data, size);
 
     if (mr->ops->write) {
-        access_with_adjusted_size(addr, &data, size,
-                                  mr->ops->impl.min_access_size,
-                                  mr->ops->impl.max_access_size,
-                                  memory_region_write_accessor, mr);
+        return access_with_adjusted_size(addr, &data, size,
+                                         mr->ops->impl.min_access_size,
+                                         mr->ops->impl.max_access_size,
+                                         memory_region_write_accessor, mr,
+                                         attrs);
+    } else if (mr->ops->write_with_attrs) {
+        return
+            access_with_adjusted_size(addr, &data, size,
+                                      mr->ops->impl.min_access_size,
+                                      mr->ops->impl.max_access_size,
+                                      memory_region_write_with_attrs_accessor,
+                                      mr, attrs);
     } else {
-        access_with_adjusted_size(addr, &data, size, 1, 4,
-                                  memory_region_oldmmio_write_accessor, mr);
+        return access_with_adjusted_size(addr, &data, size, 1, 4,
+                                         memory_region_oldmmio_write_accessor,
+                                         mr, attrs);
     }
-    return false;
 }
 
 void memory_region_init_io(MemoryRegion *mr,
@@ -1999,17 +2070,6 @@ void address_space_destroy(AddressSpace *as)
     call_rcu(as, do_address_space_destroy, rcu);
 }
 
-bool io_mem_read(MemoryRegion *mr, hwaddr addr, uint64_t *pval, unsigned size)
-{
-    return memory_region_dispatch_read(mr, addr, pval, size);
-}
-
-bool io_mem_write(MemoryRegion *mr, hwaddr addr,
-                  uint64_t val, unsigned size)
-{
-    return memory_region_dispatch_write(mr, addr, val, size);
-}
-
 typedef struct MemoryRegionList MemoryRegionList;
 
 struct MemoryRegionList {
index 085c0fae054f5542749484f8f57d93f6641df3a6..ddb59ccf877ba6d383fbf323d0e6eb19f4a29472 100644 (file)
@@ -304,7 +304,7 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
     blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
                                 nr_sectors, blk_mig_read_cb, blk);
 
-    bdrv_reset_dirty_bitmap(bs, bmds->dirty_bitmap, cur_sector, nr_sectors);
+    bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector, nr_sectors);
     qemu_mutex_unlock_iothread();
 
     bmds->cur_sector = cur_sector + nr_sectors;
@@ -320,7 +320,7 @@ static int set_dirty_tracking(void)
 
     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
         bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE,
-                                                      NULL);
+                                                      NULL, NULL);
         if (!bmds->dirty_bitmap) {
             ret = -errno;
             goto fail;
@@ -497,8 +497,7 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
                 g_free(blk);
             }
 
-            bdrv_reset_dirty_bitmap(bmds->bs, bmds->dirty_bitmap, sector,
-                                    nr_sectors);
+            bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, sector, nr_sectors);
             break;
         }
         sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
@@ -584,7 +583,7 @@ static int64_t get_remaining_dirty(void)
     int64_t dirty = 0;
 
     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        dirty += bdrv_get_dirty_count(bmds->bs, bmds->dirty_bitmap);
+        dirty += bdrv_get_dirty_count(bmds->dirty_bitmap);
     }
 
     return dirty << BDRV_SECTOR_BITS;
index 68873ec09cd6d39e7d907eeca24f779adff5b131..31369d3b0934972048b45d75fc226c7aa5aeebd4 100644 (file)
--- a/monitor.c
+++ b/monitor.c
@@ -1086,7 +1086,7 @@ static void hmp_info_trace_events(Monitor *mon, const QDict *qdict)
 }
 
 static int client_migrate_info(Monitor *mon, const QDict *qdict,
-                               MonitorCompletion cb, void *opaque)
+                               QObject **ret_data)
 {
     const char *protocol = qdict_get_str(qdict, "protocol");
     const char *hostname = qdict_get_str(qdict, "hostname");
@@ -1108,8 +1108,7 @@ static int client_migrate_info(Monitor *mon, const QDict *qdict,
             return -1;
         }
 
-        ret = qemu_spice_migrate_info(hostname, port, tls_port, subject,
-                                      cb, opaque);
+        ret = qemu_spice_migrate_info(hostname, port, tls_port, subject);
         if (ret != 0) {
             qerror_report(QERR_UNDEFINED_ERROR);
             return -1;
@@ -1385,7 +1384,8 @@ static void hmp_sum(Monitor *mon, const QDict *qdict)
 
     sum = 0;
     for(addr = start; addr < (start + size); addr++) {
-        uint8_t val = ldub_phys(&address_space_memory, addr);
+        uint8_t val = address_space_ldub(&address_space_memory, addr,
+                                         MEMTXATTRS_UNSPECIFIED, NULL);
         /* BSD sum algorithm ('sum' Unix command) */
         sum = (sum >> 1) | (sum << 15);
         sum += val;
@@ -4783,10 +4783,22 @@ static int monitor_can_read(void *opaque)
     return (mon->suspend_cnt == 0) ? 1 : 0;
 }
 
-static int invalid_qmp_mode(const Monitor *mon, const mon_cmd_t *cmd)
+static bool invalid_qmp_mode(const Monitor *mon, const mon_cmd_t *cmd)
 {
-    int is_cap = cmd->mhandler.cmd_new == do_qmp_capabilities;
-    return (qmp_cmd_mode(mon) ? is_cap : !is_cap);
+    bool is_cap = cmd->mhandler.cmd_new == do_qmp_capabilities;
+    if (is_cap && qmp_cmd_mode(mon)) {
+        qerror_report(ERROR_CLASS_COMMAND_NOT_FOUND,
+                      "Capabilities negotiation is already complete, command "
+                      "'%s' ignored", cmd->name);
+        return true;
+    }
+    if (!is_cap && !qmp_cmd_mode(mon)) {
+        qerror_report(ERROR_CLASS_COMMAND_NOT_FOUND,
+                      "Expecting capabilities negotiation with "
+                      "'qmp_capabilities' before command '%s'", cmd->name);
+        return true;
+    }
+    return false;
 }
 
 /*
@@ -5080,11 +5092,14 @@ static void handle_qmp_command(JSONMessageParser *parser, QList *tokens)
     cmd_name = qdict_get_str(input, "execute");
     trace_handle_qmp_command(mon, cmd_name);
     cmd = qmp_find_cmd(cmd_name);
-    if (!cmd || invalid_qmp_mode(mon, cmd)) {
+    if (!cmd) {
         qerror_report(ERROR_CLASS_COMMAND_NOT_FOUND,
                       "The command %s has not been found", cmd_name);
         goto err_out;
     }
+    if (invalid_qmp_mode(mon, cmd)) {
+        goto err_out;
+    }
 
     obj = qdict_get(input, "arguments");
     if (!obj) {
index 78730846c24b4e88c3b9306d44e4bcd2e2193ef7..1c17224c77211b76e19d4664a95ce91f8773325f 100644 (file)
 #
 # Block dirty bitmap information.
 #
+# @name: #optional the name of the dirty bitmap (Since 2.4)
+#
 # @count: number of dirty bytes according to the dirty bitmap
 #
 # @granularity: granularity of the dirty bitmap in bytes (since 1.4)
 #
+# @frozen: whether the dirty bitmap is frozen (Since 2.4)
+#
 # Since: 1.3
 ##
 { 'type': 'BlockDirtyInfo',
-  'data': {'count': 'int', 'granularity': 'int'} }
+  'data': {'*name': 'str', 'count': 'int', 'granularity': 'uint32',
+           'frozen': 'bool'} }
 
 ##
 # @BlockInfo:
 #
 # @none: only copy data written from now on
 #
+# @dirty-bitmap: only copy data described by the dirty bitmap. Since: 2.4
+#
 # Since: 1.3
 ##
 { 'enum': 'MirrorSyncMode',
-  'data': ['top', 'full', 'none'] }
+  'data': ['top', 'full', 'none', 'dirty-bitmap'] }
 
 ##
 # @BlockJobType:
 #          probe if @mode is 'existing', else the format of the source
 #
 # @sync: what parts of the disk image should be copied to the destination
-#        (all the disk, only the sectors allocated in the topmost image, or
-#        only new I/O).
+#        (all the disk, only the sectors allocated in the topmost image, from a
+#        dirty bitmap, or only new I/O).
 #
 # @mode: #optional whether and how QEMU should create a new image, default is
 #        'absolute-paths'.
 #
 # @speed: #optional the maximum speed, in bytes per second
 #
+# @bitmap: #optional the name of dirty bitmap if sync is "dirty-bitmap".
+#          Must be present if sync is "dirty-bitmap", must NOT be present
+#          otherwise. (Since 2.4)
+#
 # @on-source-error: #optional the action to take on an error on the source,
 #                   default 'report'.  'stop' and 'enospc' can only be used
 #                   if the block device supports io-status (see BlockInfo).
 { 'type': 'DriveBackup',
   'data': { 'device': 'str', 'target': 'str', '*format': 'str',
             'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
-            '*speed': 'int',
+            '*speed': 'int', '*bitmap': 'str',
             '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError' } }
 
             '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError' } }
 
+##
+# @BlockDirtyBitmap
+#
+# @node: name of device/node which the bitmap is tracking
+#
+# @name: name of the dirty bitmap
+#
+# Since 2.4
+##
+{ 'type': 'BlockDirtyBitmap',
+  'data': { 'node': 'str', 'name': 'str' } }
+
+##
+# @BlockDirtyBitmapAdd
+#
+# @node: name of device/node which the bitmap is tracking
+#
+# @name: name of the dirty bitmap
+#
+# @granularity: #optional the bitmap granularity, default is 64k for
+#               block-dirty-bitmap-add
+#
+# Since 2.4
+##
+{ 'type': 'BlockDirtyBitmapAdd',
+  'data': { 'node': 'str', 'name': 'str', '*granularity': 'uint32' } }
+
+##
+# @block-dirty-bitmap-add
+#
+# Create a dirty bitmap with a name on the node
+#
+# Returns: nothing on success
+#          If @node is not a valid block device or node, DeviceNotFound
+#          If @name is already taken, GenericError with an explanation
+#
+# Since 2.4
+##
+{ 'command': 'block-dirty-bitmap-add',
+  'data': 'BlockDirtyBitmapAdd' }
+
+##
+# @block-dirty-bitmap-remove
+#
+# Remove a dirty bitmap on the node
+#
+# Returns: nothing on success
+#          If @node is not a valid block device or node, DeviceNotFound
+#          If @name is not found, GenericError with an explanation
+#          if @name is frozen by an operation, GenericError
+#
+# Since 2.4
+##
+{ 'command': 'block-dirty-bitmap-remove',
+  'data': 'BlockDirtyBitmap' }
+
+##
+# @block-dirty-bitmap-clear
+#
+# Clear (reset) a dirty bitmap on the device
+#
+# Returns: nothing on success
+#          If @node is not a valid block device, DeviceNotFound
+#          If @name is not found, GenericError with an explanation
+#
+# Since 2.4
+##
+{ 'command': 'block-dirty-bitmap-clear',
+  'data': 'BlockDirtyBitmap' }
+
 ##
 # @block_set_io_throttle:
 #
 # Driver specific block device options for the null backend.
 #
 # @size:    #optional size of the device in bytes.
+# @latency-ns: #optional emulated latency (in nanoseconds) in processing
+#              requests. Default to zero which completes requests immediately.
+#              (Since 2.4)
 #
 # Since: 2.2
 ##
 { 'type': 'BlockdevOptionsNull',
-  'data': { '*size': 'int' } }
+  'data': { '*size': 'int', '*latency-ns': 'uint64' } }
 
 ##
 # @BlockdevOptionsVVFAT
 #
 # Emitted when a corruption has been detected in a disk image
 #
-# @device: device name
+# @device: device name. This is always present for compatibility
+#          reasons, but it can be empty ("") if the image does not
+#          have a device name associated.
+#
+# @node-name: #optional node name (Since: 2.4)
 #
 # @msg: informative message for human consumption, such as the kind of
 #       corruption being detected. It should not be parsed by machine as it is
 # Since: 1.7
 ##
 { 'event': 'BLOCK_IMAGE_CORRUPTED',
-  'data': { 'device' : 'str',
-            'msg'    : 'str',
-            '*offset': 'int',
-            '*size'  : 'int',
-            'fatal'  : 'bool' } }
+  'data': { 'device'     : 'str',
+            '*node-name' : 'str',
+            'msg'        : 'str',
+            '*offset'    : 'int',
+            '*size'      : 'int',
+            'fatal'      : 'bool' } }
 
 ##
 # @BLOCK_IO_ERROR
index 9dddfbefce6bb79c1d00ec5e4064c9cbcf496b9c..8d30e43b5318363a7e3820422ce95805e3fdd0b8 100644 (file)
@@ -1305,20 +1305,312 @@ out3:
     return ret;
 }
 
+enum ImgConvertBlockStatus {
+    BLK_DATA,
+    BLK_ZERO,
+    BLK_BACKING_FILE,
+};
+
+typedef struct ImgConvertState {
+    BlockBackend **src;
+    int64_t *src_sectors;
+    int src_cur, src_num;
+    int64_t src_cur_offset;
+    int64_t total_sectors;
+    int64_t allocated_sectors;
+    enum ImgConvertBlockStatus status;
+    int64_t sector_next_status;
+    BlockBackend *target;
+    bool has_zero_init;
+    bool compressed;
+    bool target_has_backing;
+    int min_sparse;
+    size_t cluster_sectors;
+    size_t buf_sectors;
+} ImgConvertState;
+
+static void convert_select_part(ImgConvertState *s, int64_t sector_num)
+{
+    assert(sector_num >= s->src_cur_offset);
+    while (sector_num - s->src_cur_offset >= s->src_sectors[s->src_cur]) {
+        s->src_cur_offset += s->src_sectors[s->src_cur];
+        s->src_cur++;
+        assert(s->src_cur < s->src_num);
+    }
+}
+
+static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
+{
+    int64_t ret;
+    int n;
+
+    convert_select_part(s, sector_num);
+
+    assert(s->total_sectors > sector_num);
+    n = MIN(s->total_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
+
+    if (s->sector_next_status <= sector_num) {
+        ret = bdrv_get_block_status(blk_bs(s->src[s->src_cur]),
+                                    sector_num - s->src_cur_offset,
+                                    n, &n);
+        if (ret < 0) {
+            return ret;
+        }
+
+        if (ret & BDRV_BLOCK_ZERO) {
+            s->status = BLK_ZERO;
+        } else if (ret & BDRV_BLOCK_DATA) {
+            s->status = BLK_DATA;
+        } else if (!s->target_has_backing) {
+            /* Without a target backing file we must copy over the contents of
+             * the backing file as well. */
+            /* TODO Check block status of the backing file chain to avoid
+             * needlessly reading zeroes and limiting the iteration to the
+             * buffer size */
+            s->status = BLK_DATA;
+        } else {
+            s->status = BLK_BACKING_FILE;
+        }
+
+        s->sector_next_status = sector_num + n;
+    }
+
+    n = MIN(n, s->sector_next_status - sector_num);
+    if (s->status == BLK_DATA) {
+        n = MIN(n, s->buf_sectors);
+    }
+
+    /* We need to write complete clusters for compressed images, so if an
+     * unallocated area is shorter than that, we must consider the whole
+     * cluster allocated. */
+    if (s->compressed) {
+        if (n < s->cluster_sectors) {
+            n = MIN(s->cluster_sectors, s->total_sectors - sector_num);
+            s->status = BLK_DATA;
+        } else {
+            n = QEMU_ALIGN_DOWN(n, s->cluster_sectors);
+        }
+    }
+
+    return n;
+}
+
+static int convert_read(ImgConvertState *s, int64_t sector_num, int nb_sectors,
+                        uint8_t *buf)
+{
+    int n;
+    int ret;
+
+    if (s->status == BLK_ZERO || s->status == BLK_BACKING_FILE) {
+        return 0;
+    }
+
+    assert(nb_sectors <= s->buf_sectors);
+    while (nb_sectors > 0) {
+        BlockBackend *blk;
+        int64_t bs_sectors;
+
+        /* In the case of compression with multiple source files, we can get a
+         * nb_sectors that spreads into the next part. So we must be able to
+         * read across multiple BDSes for one convert_read() call. */
+        convert_select_part(s, sector_num);
+        blk = s->src[s->src_cur];
+        bs_sectors = s->src_sectors[s->src_cur];
+
+        n = MIN(nb_sectors, bs_sectors - (sector_num - s->src_cur_offset));
+        ret = blk_read(blk, sector_num - s->src_cur_offset, buf, n);
+        if (ret < 0) {
+            return ret;
+        }
+
+        sector_num += n;
+        nb_sectors -= n;
+        buf += n * BDRV_SECTOR_SIZE;
+    }
+
+    return 0;
+}
+
+static int convert_write(ImgConvertState *s, int64_t sector_num, int nb_sectors,
+                         const uint8_t *buf)
+{
+    int ret;
+
+    while (nb_sectors > 0) {
+        int n = nb_sectors;
+
+        switch (s->status) {
+        case BLK_BACKING_FILE:
+            /* If we have a backing file, leave clusters unallocated that are
+             * unallocated in the source image, so that the backing file is
+             * visible at the respective offset. */
+            assert(s->target_has_backing);
+            break;
+
+        case BLK_DATA:
+            /* We must always write compressed clusters as a whole, so don't
+             * try to find zeroed parts in the buffer. We can only save the
+             * write if the buffer is completely zeroed and we're allowed to
+             * keep the target sparse. */
+            if (s->compressed) {
+                if (s->has_zero_init && s->min_sparse &&
+                    buffer_is_zero(buf, n * BDRV_SECTOR_SIZE))
+                {
+                    assert(!s->target_has_backing);
+                    break;
+                }
+
+                ret = blk_write_compressed(s->target, sector_num, buf, n);
+                if (ret < 0) {
+                    return ret;
+                }
+                break;
+            }
+
+            /* If there is real non-zero data or we're told to keep the target
+             * fully allocated (-S 0), we must write it. Otherwise we can treat
+             * it as zero sectors. */
+            if (!s->min_sparse ||
+                is_allocated_sectors_min(buf, n, &n, s->min_sparse))
+            {
+                ret = blk_write(s->target, sector_num, buf, n);
+                if (ret < 0) {
+                    return ret;
+                }
+                break;
+            }
+            /* fall-through */
+
+        case BLK_ZERO:
+            if (s->has_zero_init) {
+                break;
+            }
+            ret = blk_write_zeroes(s->target, sector_num, n, 0);
+            if (ret < 0) {
+                return ret;
+            }
+            break;
+        }
+
+        sector_num += n;
+        nb_sectors -= n;
+        buf += n * BDRV_SECTOR_SIZE;
+    }
+
+    return 0;
+}
+
+static int convert_do_copy(ImgConvertState *s)
+{
+    uint8_t *buf = NULL;
+    int64_t sector_num, allocated_done;
+    int ret;
+    int n;
+
+    /* Check whether we have zero initialisation or can get it efficiently */
+    s->has_zero_init = s->min_sparse && !s->target_has_backing
+                     ? bdrv_has_zero_init(blk_bs(s->target))
+                     : false;
+
+    if (!s->has_zero_init && !s->target_has_backing &&
+        bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
+    {
+        ret = bdrv_make_zero(blk_bs(s->target), BDRV_REQ_MAY_UNMAP);
+        if (ret == 0) {
+            s->has_zero_init = true;
+        }
+    }
+
+    /* Allocate buffer for copied data. For compressed images, only one cluster
+     * can be copied at a time. */
+    if (s->compressed) {
+        if (s->cluster_sectors <= 0 || s->cluster_sectors > s->buf_sectors) {
+            error_report("invalid cluster size");
+            ret = -EINVAL;
+            goto fail;
+        }
+        s->buf_sectors = s->cluster_sectors;
+    }
+    buf = blk_blockalign(s->target, s->buf_sectors * BDRV_SECTOR_SIZE);
+
+    /* Calculate allocated sectors for progress */
+    s->allocated_sectors = 0;
+    sector_num = 0;
+    while (sector_num < s->total_sectors) {
+        n = convert_iteration_sectors(s, sector_num);
+        if (n < 0) {
+            ret = n;
+            goto fail;
+        }
+        if (s->status == BLK_DATA) {
+            s->allocated_sectors += n;
+        }
+        sector_num += n;
+    }
+
+    /* Do the copy */
+    s->src_cur = 0;
+    s->src_cur_offset = 0;
+    s->sector_next_status = 0;
+
+    sector_num = 0;
+    allocated_done = 0;
+
+    while (sector_num < s->total_sectors) {
+        n = convert_iteration_sectors(s, sector_num);
+        if (n < 0) {
+            ret = n;
+            goto fail;
+        }
+        if (s->status == BLK_DATA) {
+            allocated_done += n;
+            qemu_progress_print(100.0 * allocated_done / s->allocated_sectors,
+                                0);
+        }
+
+        ret = convert_read(s, sector_num, n, buf);
+        if (ret < 0) {
+            error_report("error while reading sector %" PRId64
+                         ": %s", sector_num, strerror(-ret));
+            goto fail;
+        }
+
+        ret = convert_write(s, sector_num, n, buf);
+        if (ret < 0) {
+            error_report("error while writing sector %" PRId64
+                         ": %s", sector_num, strerror(-ret));
+            goto fail;
+        }
+
+        sector_num += n;
+    }
+
+    if (s->compressed) {
+        /* signal EOF to align */
+        ret = blk_write_compressed(s->target, 0, NULL, 0);
+        if (ret < 0) {
+            goto fail;
+        }
+    }
+
+    ret = 0;
+fail:
+    qemu_vfree(buf);
+    return ret;
+}
+
 static int img_convert(int argc, char **argv)
 {
-    int c, n, n1, bs_n, bs_i, compress, cluster_sectors, skip_create;
+    int c, bs_n, bs_i, compress, cluster_sectors, skip_create;
     int64_t ret = 0;
     int progress = 0, flags, src_flags;
     const char *fmt, *out_fmt, *cache, *src_cache, *out_baseimg, *out_filename;
     BlockDriver *drv, *proto_drv;
     BlockBackend **blk = NULL, *out_blk = NULL;
     BlockDriverState **bs = NULL, *out_bs = NULL;
-    int64_t total_sectors, nb_sectors, sector_num, bs_offset;
+    int64_t total_sectors;
     int64_t *bs_sectors = NULL;
-    uint8_t * buf = NULL;
     size_t bufsectors = IO_BUF_SIZE / BDRV_SECTOR_SIZE;
-    const uint8_t *buf1;
     BlockDriverInfo bdi;
     QemuOpts *opts = NULL;
     QemuOptsList *create_opts = NULL;
@@ -1329,6 +1621,7 @@ static int img_convert(int argc, char **argv)
     bool quiet = false;
     Error *local_err = NULL;
     QemuOpts *sn_opts = NULL;
+    ImgConvertState state;
 
     fmt = NULL;
     out_fmt = "raw";
@@ -1627,9 +1920,6 @@ static int img_convert(int argc, char **argv)
     }
     out_bs = blk_bs(out_blk);
 
-    bs_i = 0;
-    bs_offset = 0;
-
     /* increase bufsectors from the default 4096 (2M) if opt_transfer_length
      * or discard_alignment of the out_bs is greater. Limit to 32768 (16MB)
      * as maximum. */
@@ -1638,8 +1928,6 @@ static int img_convert(int argc, char **argv)
                                          out_bs->bl.discard_alignment))
                     );
 
-    buf = blk_blockalign(out_blk, bufsectors * BDRV_SECTOR_SIZE);
-
     if (skip_create) {
         int64_t output_sectors = blk_nb_sectors(out_blk);
         if (output_sectors < 0) {
@@ -1666,203 +1954,20 @@ static int img_convert(int argc, char **argv)
         cluster_sectors = bdi.cluster_size / BDRV_SECTOR_SIZE;
     }
 
-    if (compress) {
-        if (cluster_sectors <= 0 || cluster_sectors > bufsectors) {
-            error_report("invalid cluster size");
-            ret = -1;
-            goto out;
-        }
-        sector_num = 0;
-
-        nb_sectors = total_sectors;
-
-        for(;;) {
-            int64_t bs_num;
-            int remainder;
-            uint8_t *buf2;
-
-            nb_sectors = total_sectors - sector_num;
-            if (nb_sectors <= 0)
-                break;
-            if (nb_sectors >= cluster_sectors)
-                n = cluster_sectors;
-            else
-                n = nb_sectors;
-
-            bs_num = sector_num - bs_offset;
-            assert (bs_num >= 0);
-            remainder = n;
-            buf2 = buf;
-            while (remainder > 0) {
-                int nlow;
-                while (bs_num == bs_sectors[bs_i]) {
-                    bs_offset += bs_sectors[bs_i];
-                    bs_i++;
-                    assert (bs_i < bs_n);
-                    bs_num = 0;
-                    /* printf("changing part: sector_num=%" PRId64 ", "
-                       "bs_i=%d, bs_offset=%" PRId64 ", bs_sectors=%" PRId64
-                       "\n", sector_num, bs_i, bs_offset, bs_sectors[bs_i]); */
-                }
-                assert (bs_num < bs_sectors[bs_i]);
-
-                nlow = remainder > bs_sectors[bs_i] - bs_num
-                    ? bs_sectors[bs_i] - bs_num : remainder;
-
-                ret = blk_read(blk[bs_i], bs_num, buf2, nlow);
-                if (ret < 0) {
-                    error_report("error while reading sector %" PRId64 ": %s",
-                                 bs_num, strerror(-ret));
-                    goto out;
-                }
-
-                buf2 += nlow * 512;
-                bs_num += nlow;
-
-                remainder -= nlow;
-            }
-            assert (remainder == 0);
-
-            if (!buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)) {
-                ret = blk_write_compressed(out_blk, sector_num, buf, n);
-                if (ret != 0) {
-                    error_report("error while compressing sector %" PRId64
-                                 ": %s", sector_num, strerror(-ret));
-                    goto out;
-                }
-            }
-            sector_num += n;
-            qemu_progress_print(100.0 * sector_num / total_sectors, 0);
-        }
-        /* signal EOF to align */
-        blk_write_compressed(out_blk, 0, NULL, 0);
-    } else {
-        int64_t sectors_to_read, sectors_read, sector_num_next_status;
-        bool count_allocated_sectors;
-        int has_zero_init = min_sparse ? bdrv_has_zero_init(out_bs) : 0;
-
-        if (!has_zero_init && bdrv_can_write_zeroes_with_unmap(out_bs)) {
-            ret = bdrv_make_zero(out_bs, BDRV_REQ_MAY_UNMAP);
-            if (ret < 0) {
-                goto out;
-            }
-            has_zero_init = 1;
-        }
-
-        sectors_to_read = total_sectors;
-        count_allocated_sectors = progress && (out_baseimg || has_zero_init);
-restart:
-        sector_num = 0; // total number of sectors converted so far
-        sectors_read = 0;
-        sector_num_next_status = 0;
-
-        for(;;) {
-            nb_sectors = total_sectors - sector_num;
-            if (nb_sectors <= 0) {
-                if (count_allocated_sectors) {
-                    sectors_to_read = sectors_read;
-                    count_allocated_sectors = false;
-                    goto restart;
-                }
-                ret = 0;
-                break;
-            }
-
-            while (sector_num - bs_offset >= bs_sectors[bs_i]) {
-                bs_offset += bs_sectors[bs_i];
-                bs_i ++;
-                assert (bs_i < bs_n);
-                /* printf("changing part: sector_num=%" PRId64 ", bs_i=%d, "
-                  "bs_offset=%" PRId64 ", bs_sectors=%" PRId64 "\n",
-                   sector_num, bs_i, bs_offset, bs_sectors[bs_i]); */
-            }
-
-            if ((out_baseimg || has_zero_init) &&
-                sector_num >= sector_num_next_status) {
-                n = nb_sectors > INT_MAX ? INT_MAX : nb_sectors;
-                ret = bdrv_get_block_status(bs[bs_i], sector_num - bs_offset,
-                                            n, &n1);
-                if (ret < 0) {
-                    error_report("error while reading block status of sector %"
-                                 PRId64 ": %s", sector_num - bs_offset,
-                                 strerror(-ret));
-                    goto out;
-                }
-                /* If the output image is zero initialized, we are not working
-                 * on a shared base and the input is zero we can skip the next
-                 * n1 sectors */
-                if (has_zero_init && !out_baseimg && (ret & BDRV_BLOCK_ZERO)) {
-                    sector_num += n1;
-                    continue;
-                }
-                /* If the output image is being created as a copy on write
-                 * image, assume that sectors which are unallocated in the
-                 * input image are present in both the output's and input's
-                 * base images (no need to copy them). */
-                if (out_baseimg) {
-                    if (!(ret & BDRV_BLOCK_DATA)) {
-                        sector_num += n1;
-                        continue;
-                    }
-                    /* The next 'n1' sectors are allocated in the input image.
-                     * Copy only those as they may be followed by unallocated
-                     * sectors. */
-                    nb_sectors = n1;
-                }
-                /* avoid redundant callouts to get_block_status */
-                sector_num_next_status = sector_num + n1;
-            }
-
-            n = MIN(nb_sectors, bufsectors);
-
-            /* round down request length to an aligned sector, but
-             * do not bother doing this on short requests. They happen
-             * when we found an all-zero area, and the next sector to
-             * write will not be sector_num + n. */
-            if (cluster_sectors > 0 && n >= cluster_sectors) {
-                int64_t next_aligned_sector = (sector_num + n);
-                next_aligned_sector -= next_aligned_sector % cluster_sectors;
-                if (sector_num + n > next_aligned_sector) {
-                    n = next_aligned_sector - sector_num;
-                }
-            }
-
-            n = MIN(n, bs_sectors[bs_i] - (sector_num - bs_offset));
-
-            sectors_read += n;
-            if (count_allocated_sectors) {
-                sector_num += n;
-                continue;
-            }
+    state = (ImgConvertState) {
+        .src                = blk,
+        .src_sectors        = bs_sectors,
+        .src_num            = bs_n,
+        .total_sectors      = total_sectors,
+        .target             = out_blk,
+        .compressed         = compress,
+        .target_has_backing = (bool) out_baseimg,
+        .min_sparse         = min_sparse,
+        .cluster_sectors    = cluster_sectors,
+        .buf_sectors        = bufsectors,
+    };
+    ret = convert_do_copy(&state);
 
-            n1 = n;
-            ret = blk_read(blk[bs_i], sector_num - bs_offset, buf, n);
-            if (ret < 0) {
-                error_report("error while reading sector %" PRId64 ": %s",
-                             sector_num - bs_offset, strerror(-ret));
-                goto out;
-            }
-            /* NOTE: at the same time we convert, we do not write zero
-               sectors to have a chance to compress the image. Ideally, we
-               should add a specific call to have the info to go faster */
-            buf1 = buf;
-            while (n > 0) {
-                if (!has_zero_init ||
-                    is_allocated_sectors_min(buf1, n, &n1, min_sparse)) {
-                    ret = blk_write(out_blk, sector_num, buf1, n1);
-                    if (ret < 0) {
-                        error_report("error while writing sector %" PRId64
-                                     ": %s", sector_num, strerror(-ret));
-                        goto out;
-                    }
-                }
-                sector_num += n1;
-                n -= n1;
-                buf1 += n1 * 512;
-            }
-            qemu_progress_print(100.0 * sectors_read / sectors_to_read, 0);
-        }
-    }
 out:
     if (!ret) {
         qemu_progress_print(100, 0);
@@ -1870,7 +1975,6 @@ out:
     qemu_progress_end();
     qemu_opts_del(opts);
     qemu_opts_free(create_opts);
-    qemu_vfree(buf);
     qemu_opts_del(sn_opts);
     blk_unref(out_blk);
     g_free(bs);
index 3a42ad0bffeb23778f877410f6e2038943da46c0..213508fe5d2e6ada854437aafdbac7946e7055b3 100644 (file)
@@ -332,7 +332,7 @@ EQMP
 
     {
         .name       = "send-key",
-        .args_type  = "keys:O,hold-time:i?",
+        .args_type  = "keys:q,hold-time:i?",
         .mhandler.cmd_new = qmp_marshal_input_send_key,
     },
 
@@ -785,8 +785,7 @@ EQMP
         .args_type  = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?",
         .params     = "protocol hostname port tls-port cert-subject",
         .help       = "send migration info to spice/vnc client",
-        .mhandler.cmd_async = client_migrate_info,
-        .flags      = MONITOR_CMD_ASYNC,
+        .mhandler.cmd_new = client_migrate_info,
     },
 
 SQMP
@@ -1008,6 +1007,43 @@ EQMP
         .mhandler.cmd_new = qmp_marshal_input_block_stream,
     },
 
+SQMP
+block-stream
+------------
+
+Copy data from a backing file into a block device.
+
+Arguments:
+
+- "device": The device's ID, must be unique (json-string)
+- "base": The file name of the backing image above which copying starts
+          (json-string, optional)
+- "backing-file": The backing file string to write into the active layer. This
+                  filename is not validated.
+
+                  If a pathname string is such that it cannot be resolved by
+                  QEMU, that means that subsequent QMP or HMP commands must use
+                  node-names for the image in question, as filename lookup
+                  methods will fail.
+
+                  If not specified, QEMU will automatically determine the
+                  backing file string to use, or error out if there is no
+                  obvious choice.  Care should be taken when specifying the
+                  string, to specify a valid filename or protocol.
+                  (json-string, optional) (Since 2.1)
+- "speed":  the maximum speed, in bytes per second (json-int, optional)
+- "on-error": the action to take on an error (default 'report').  'stop' and
+              'enospc' can only be used if the block device supports io-status.
+              (json-string, optional) (Since 2.1)
+
+Example:
+
+-> { "execute": "block-stream", "arguments": { "device": "virtio0",
+                                               "base": "/tmp/master.qcow2" } }
+<- { "return": {} }
+
+EQMP
+
     {
         .name       = "block-commit",
         .args_type  = "device:B,base:s?,top:s?,backing-file:s?,speed:o?",
@@ -1074,7 +1110,7 @@ EQMP
     {
         .name       = "drive-backup",
         .args_type  = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
-                      "on-source-error:s?,on-target-error:s?",
+                      "bitmap:s?,on-source-error:s?,on-target-error:s?",
         .mhandler.cmd_new = qmp_marshal_input_drive_backup,
     },
 
@@ -1101,8 +1137,10 @@ Arguments:
             (json-string, optional)
 - "sync": what parts of the disk image should be copied to the destination;
   possibilities include "full" for all the disk, "top" for only the sectors
-  allocated in the topmost image, or "none" to only replicate new I/O
-  (MirrorSyncMode).
+  allocated in the topmost image, "dirty-bitmap" for only the dirty sectors in
+  the bitmap, or "none" to only replicate new I/O (MirrorSyncMode).
+- "bitmap": dirty bitmap name for sync==dirty-bitmap. Must be present if sync
+            is "dirty-bitmap", must NOT be present otherwise.
 - "mode": whether and how QEMU should create a new image
           (NewImageMode, optional, default 'absolute-paths')
 - "speed": the maximum speed, in bytes per second (json-int, optional)
@@ -1267,6 +1305,91 @@ Example:
                                          "name": "snapshot0" } } ] } }
 <- { "return": {} }
 
+EQMP
+
+    {
+        .name       = "block-dirty-bitmap-add",
+        .args_type  = "node:B,name:s,granularity:i?",
+        .mhandler.cmd_new = qmp_marshal_input_block_dirty_bitmap_add,
+    },
+
+SQMP
+
+block-dirty-bitmap-add
+----------------------
+Since 2.4
+
+Create a dirty bitmap with a name on the device, and start tracking the writes.
+
+Arguments:
+
+- "node": device/node on which to create dirty bitmap (json-string)
+- "name": name of the new dirty bitmap (json-string)
+- "granularity": granularity to track writes with (int, optional)
+
+Example:
+
+-> { "execute": "block-dirty-bitmap-add", "arguments": { "node": "drive0",
+                                                   "name": "bitmap0" } }
+<- { "return": {} }
+
+EQMP
+
+    {
+        .name       = "block-dirty-bitmap-remove",
+        .args_type  = "node:B,name:s",
+        .mhandler.cmd_new = qmp_marshal_input_block_dirty_bitmap_remove,
+    },
+
+SQMP
+
+block-dirty-bitmap-remove
+-------------------------
+Since 2.4
+
+Stop write tracking and remove the dirty bitmap that was created with
+block-dirty-bitmap-add.
+
+Arguments:
+
+- "node": device/node on which to remove dirty bitmap (json-string)
+- "name": name of the dirty bitmap to remove (json-string)
+
+Example:
+
+-> { "execute": "block-dirty-bitmap-remove", "arguments": { "node": "drive0",
+                                                      "name": "bitmap0" } }
+<- { "return": {} }
+
+EQMP
+
+    {
+        .name       = "block-dirty-bitmap-clear",
+        .args_type  = "node:B,name:s",
+        .mhandler.cmd_new = qmp_marshal_input_block_dirty_bitmap_clear,
+    },
+
+SQMP
+
+block-dirty-bitmap-clear
+------------------------
+Since 2.4
+
+Reset the dirty bitmap associated with a node so that an incremental backup
+from this point in time forward will only backup clusters modified after this
+clear operation.
+
+Arguments:
+
+- "node": device/node on which to remove dirty bitmap (json-string)
+- "name": name of the dirty bitmap to remove (json-string)
+
+Example:
+
+-> { "execute": "block-dirty-bitmap-clear", "arguments": { "node": "drive0",
+                                                           "name": "bitmap0" } }
+<- { "return": {} }
+
 EQMP
 
     {
@@ -3288,7 +3411,7 @@ EQMP
 
     {
         .name       = "migrate-set-capabilities",
-        .args_type  = "capabilities:O",
+        .args_type  = "capabilities:q",
         .params     = "capability:s,state:b",
        .mhandler.cmd_new = qmp_marshal_input_migrate_set_capabilities,
     },
index 5df61f9aa97b8e0a05a33199ccd80dfe473a21ba..7f0aae977d9dd980e58be380efca0dd00e5e719d 100755 (executable)
@@ -2911,6 +2911,17 @@ sub process {
                if ($rawline =~ /\b(?:Qemu|QEmu)\b/) {
                        WARN("use QEMU instead of Qemu or QEmu\n" . $herecurr);
                }
+
+# check for non-portable ffs() calls that have portable alternatives in QEMU
+               if ($line =~ /\bffs\(/) {
+                       ERROR("use ctz32() instead of ffs()\n" . $herecurr);
+               }
+               if ($line =~ /\bffsl\(/) {
+                       ERROR("use ctz32() or ctz64() instead of ffsl()\n" . $herecurr);
+               }
+               if ($line =~ /\bffsll\(/) {
+                       ERROR("use ctz64() instead of ffsll()\n" . $herecurr);
+               }
        }
 
        # If we have no input at all, then there is nothing to report on
index cdda2591d9469ce11a930550295697138cb5eacf..224d2d18738201ed9d0144a32c10e5bce0b96acf 100644 (file)
@@ -46,6 +46,8 @@ typedef struct va_list_str *va_list;
 
 typedef struct AddressSpace AddressSpace;
 typedef uint64_t hwaddr;
+typedef uint32_t MemTxResult;
+typedef uint64_t MemTxAttrs;
 
 static void __write(uint8_t *buf, ssize_t len)
 {
@@ -65,10 +67,10 @@ static void __read(uint8_t *buf, ssize_t len)
     int last = buf[len-1];
 }
 
-bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
-                      int len, bool is_write)
+MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
+                             uint8_t *buf, int len, bool is_write)
 {
-    bool result;
+    MemTxResult result;
 
     // TODO: investigate impact of treating reads as producing
     // tainted data, with __coverity_tainted_data_argument__(buf).
index 8f845a2b2910963324310df4b52f880564d80483..1be4d67d8ab6722722388de6f74db5dc074db459 100644 (file)
@@ -2,7 +2,7 @@
 # QAPI visitor generator
 #
 # Copyright IBM, Corp. 2011
-# Copyright (C) 2014 Red Hat, Inc.
+# Copyright (C) 2014-2015 Red Hat, Inc.
 #
 # Authors:
 #  Anthony Liguori <aliguori@us.ibm.com>
@@ -401,34 +401,31 @@ out:
 
     return ret
 
-def generate_declaration(name, members, genlist=True, builtin_type=False):
+def generate_declaration(name, members, builtin_type=False):
     ret = ""
     if not builtin_type:
         ret += mcgen('''
 
 void visit_type_%(name)s(Visitor *m, %(name)s **obj, const char *name, Error **errp);
 ''',
-                    name=name)
+                     name=name)
 
-    if genlist:
-        ret += mcgen('''
+    ret += mcgen('''
 void visit_type_%(name)sList(Visitor *m, %(name)sList **obj, const char *name, Error **errp);
 ''',
                  name=name)
 
     return ret
 
-def generate_enum_declaration(name, members, genlist=True):
-    ret = ""
-    if genlist:
-        ret += mcgen('''
+def generate_enum_declaration(name, members):
+    ret = mcgen('''
 void visit_type_%(name)sList(Visitor *m, %(name)sList **obj, const char *name, Error **errp);
 ''',
-                     name=name)
+                name=name)
 
     return ret
 
-def generate_decl_enum(name, members, genlist=True):
+def generate_decl_enum(name, members):
     return mcgen('''
 
 void visit_type_%(name)s(Visitor *m, %(name)s *obj, const char *name, Error **errp);
@@ -542,8 +539,7 @@ exprs = parse_schema(input_file)
 # for built-in types in our header files and simply guard them
 fdecl.write(guardstart("QAPI_VISIT_BUILTIN_VISITOR_DECL"))
 for typename in builtin_types:
-    fdecl.write(generate_declaration(typename, None, genlist=True,
-                                     builtin_type=True))
+    fdecl.write(generate_declaration(typename, None, builtin_type=True))
 fdecl.write(guardend("QAPI_VISIT_BUILTIN_VISITOR_DECL"))
 
 # ...this doesn't work for cases where we link in multiple objects that
index 8a0f30534fac182689cc35930663583b0160042c..6c7f4fbe53629bb451016f1a5f13651f52e3be3a 100644 (file)
@@ -22,12 +22,86 @@ def isnull(ptr):
 def int128(p):
     return long(p['lo']) + (long(p['hi']) << 64)
 
+def get_fs_base():
+    '''Fetch %fs base value using arch_prctl(ARCH_GET_FS)'''
+    # %rsp - 120 is scratch space according to the SystemV ABI
+    old = gdb.parse_and_eval('*(uint64_t*)($rsp - 120)')
+    gdb.execute('call arch_prctl(0x1003, $rsp - 120)', False, True)
+    fs_base = gdb.parse_and_eval('*(uint64_t*)($rsp - 120)')
+    gdb.execute('set *(uint64_t*)($rsp - 120) = %s' % old, False, True)
+    return fs_base
+
+def get_glibc_pointer_guard():
+    '''Fetch glibc pointer guard value'''
+    fs_base = get_fs_base()
+    return gdb.parse_and_eval('*(uint64_t*)((uint64_t)%s + 0x30)' % fs_base)
+
+def glibc_ptr_demangle(val, pointer_guard):
+    '''Undo effect of glibc's PTR_MANGLE()'''
+    return gdb.parse_and_eval('(((uint64_t)%s >> 0x11) | ((uint64_t)%s << (64 - 0x11))) ^ (uint64_t)%s' % (val, val, pointer_guard))
+
+def bt_jmpbuf(jmpbuf):
+    '''Backtrace a jmpbuf'''
+    JB_RBX  = 0
+    JB_RBP  = 1
+    JB_R12  = 2
+    JB_R13  = 3
+    JB_R14  = 4
+    JB_R15  = 5
+    JB_RSP  = 6
+    JB_PC   = 7
+
+    old_rbx = gdb.parse_and_eval('(uint64_t)$rbx')
+    old_rbp = gdb.parse_and_eval('(uint64_t)$rbp')
+    old_rsp = gdb.parse_and_eval('(uint64_t)$rsp')
+    old_r12 = gdb.parse_and_eval('(uint64_t)$r12')
+    old_r13 = gdb.parse_and_eval('(uint64_t)$r13')
+    old_r14 = gdb.parse_and_eval('(uint64_t)$r14')
+    old_r15 = gdb.parse_and_eval('(uint64_t)$r15')
+    old_rip = gdb.parse_and_eval('(uint64_t)$rip')
+
+    pointer_guard = get_glibc_pointer_guard()
+    gdb.execute('set $rbx = %s' % jmpbuf[JB_RBX])
+    gdb.execute('set $rbp = %s' % glibc_ptr_demangle(jmpbuf[JB_RBP], pointer_guard))
+    gdb.execute('set $rsp = %s' % glibc_ptr_demangle(jmpbuf[JB_RSP], pointer_guard))
+    gdb.execute('set $r12 = %s' % jmpbuf[JB_R12])
+    gdb.execute('set $r13 = %s' % jmpbuf[JB_R13])
+    gdb.execute('set $r14 = %s' % jmpbuf[JB_R14])
+    gdb.execute('set $r15 = %s' % jmpbuf[JB_R15])
+    gdb.execute('set $rip = %s' % glibc_ptr_demangle(jmpbuf[JB_PC], pointer_guard))
+
+    gdb.execute('bt')
+
+    gdb.execute('set $rbx = %s' % old_rbx)
+    gdb.execute('set $rbp = %s' % old_rbp)
+    gdb.execute('set $rsp = %s' % old_rsp)
+    gdb.execute('set $r12 = %s' % old_r12)
+    gdb.execute('set $r13 = %s' % old_r13)
+    gdb.execute('set $r14 = %s' % old_r14)
+    gdb.execute('set $r15 = %s' % old_r15)
+    gdb.execute('set $rip = %s' % old_rip)
+
 class QemuCommand(gdb.Command):
     '''Prefix for QEMU debug support commands'''
     def __init__(self):
         gdb.Command.__init__(self, 'qemu', gdb.COMMAND_DATA,
                              gdb.COMPLETE_NONE, True)
 
+class CoroutineCommand(gdb.Command):
+    '''Display coroutine backtrace'''
+    def __init__(self):
+        gdb.Command.__init__(self, 'qemu coroutine', gdb.COMMAND_DATA,
+                             gdb.COMPLETE_NONE)
+
+    def invoke(self, arg, from_tty):
+        argv = gdb.string_to_argv(arg)
+        if len(argv) != 1:
+            gdb.write('usage: qemu coroutine <coroutine-pointer>\n')
+            return
+
+        coroutine_pointer = gdb.parse_and_eval(argv[0]).cast(gdb.lookup_type('CoroutineUContext').pointer())
+        bt_jmpbuf(coroutine_pointer['env']['__jmpbuf'])
+
 class MtreeCommand(gdb.Command):
     '''Display the memory tree hierarchy'''
     def __init__(self):
@@ -86,4 +160,5 @@ class MtreeCommand(gdb.Command):
             subregion = subregion['subregions_link']['tqe_next']
 
 QemuCommand()
+CoroutineCommand()
 MtreeCommand()
index 20b6ec795e38daff24718c0976881863feba58fe..1d38e3e9e787dc59cf4972ad1efda88c7aaa6fd6 100644 (file)
@@ -21,6 +21,9 @@ class QMPConnectError(QMPError):
 class QMPCapabilitiesError(QMPError):
     pass
 
+class QMPTimeoutError(QMPError):
+    pass
+
 class QEMUMonitorProtocol:
     def __init__(self, address, server=False):
         """
@@ -72,6 +75,44 @@ class QEMUMonitorProtocol:
 
     error = socket.error
 
+    def __get_events(self, wait=False):
+        """
+        Check for new events in the stream and cache them in __events.
+
+        @param wait (bool): block until an event is available.
+        @param wait (float): If wait is a float, treat it as a timeout value.
+
+        @raise QMPTimeoutError: If a timeout float is provided and the timeout
+                                period elapses.
+        @raise QMPConnectError: If wait is True but no events could be retrieved
+                                or if some other error occurred.
+        """
+
+        # Check for new events regardless and pull them into the cache:
+        self.__sock.setblocking(0)
+        try:
+            self.__json_read()
+        except socket.error, err:
+            if err[0] == errno.EAGAIN:
+                # No data available
+                pass
+        self.__sock.setblocking(1)
+
+        # Wait for new events, if needed.
+        # if wait is 0.0, this means "no wait" and is also implicitly false.
+        if not self.__events and wait:
+            if isinstance(wait, float):
+                self.__sock.settimeout(wait)
+            try:
+                ret = self.__json_read(only_event=True)
+            except socket.timeout:
+                raise QMPTimeoutError("Timeout waiting for event")
+            except:
+                raise QMPConnectError("Error while reading from socket")
+            if ret is None:
+                raise QMPConnectError("Error while reading from socket")
+            self.__sock.settimeout(None)
+
     def connect(self, negotiate=True):
         """
         Connect to the QMP Monitor and perform capabilities negotiation.
@@ -140,43 +181,37 @@ class QEMUMonitorProtocol:
         """
         Get and delete the first available QMP event.
 
-        @param wait: block until an event is available (bool)
+        @param wait (bool): block until an event is available.
+        @param wait (float): If wait is a float, treat it as a timeout value.
+
+        @raise QMPTimeoutError: If a timeout float is provided and the timeout
+                                period elapses.
+        @raise QMPConnectError: If wait is True but no events could be retrieved
+                                or if some other error occurred.
+
+        @return The first available QMP event, or None.
         """
-        self.__sock.setblocking(0)
-        try:
-            self.__json_read()
-        except socket.error, err:
-            if err[0] == errno.EAGAIN:
-                # No data available
-                pass
-        self.__sock.setblocking(1)
-        if not self.__events and wait:
-            self.__json_read(only_event=True)
-        event = self.__events[0]
-        del self.__events[0]
-        return event
+        self.__get_events(wait)
+
+        if self.__events:
+            return self.__events.pop(0)
+        return None
 
     def get_events(self, wait=False):
         """
         Get a list of available QMP events.
 
-        @param wait: block until an event is available (bool)
-        """
-        self.__sock.setblocking(0)
-        try:
-            self.__json_read()
-        except socket.error, err:
-            if err[0] == errno.EAGAIN:
-                # No data available
-                pass
-        self.__sock.setblocking(1)
-        if not self.__events and wait:
-            ret = self.__json_read(only_event=True)
-            if ret == None:
-                # We are in blocking mode, if don't get anything, something
-                # went wrong
-                raise QMPConnectError("Error while reading from socket")
+        @param wait (bool): block until an event is available.
+        @param wait (float): If wait is a float, treat it as a timeout value.
 
+        @raise QMPTimeoutError: If a timeout float is provided and the timeout
+                                period elapses.
+        @raise QMPConnectError: If wait is True but no events could be retrieved
+                                or if some other error occurred.
+
+        @return The list of available QMP events.
+        """
+        self.__get_events(wait)
         return self.__events
 
     def clear_events(self):
index 0e3dd35fe1e45d9e107da7d4496e1c5c37bb3930..16b08523e9808b57fc485b3817c46cfce2ad5606 100644 (file)
      * victim tlb. try to refill from the victim tlb before walking the       \
      * page table. */                                                         \
     int vidx;                                                                 \
-    hwaddr tmpiotlb;                                                          \
+    CPUIOTLBEntry tmpiotlb;                                                   \
     CPUTLBEntry tmptlb;                                                       \
     for (vidx = CPU_VTLB_SIZE-1; vidx >= 0; --vidx) {                         \
         if (env->tlb_v_table[mmu_idx][vidx].ty == (addr & TARGET_PAGE_MASK)) {\
 
 #ifndef SOFTMMU_CODE_ACCESS
 static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
-                                              hwaddr physaddr,
+                                              CPUIOTLBEntry *iotlbentry,
                                               target_ulong addr,
                                               uintptr_t retaddr)
 {
     uint64_t val;
     CPUState *cpu = ENV_GET_CPU(env);
+    hwaddr physaddr = iotlbentry->addr;
     MemoryRegion *mr = iotlb_to_region(cpu, physaddr);
 
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
@@ -158,7 +159,8 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
     }
 
     cpu->mem_io_vaddr = addr;
-    io_mem_read(mr, physaddr, &val, 1 << SHIFT);
+    memory_region_dispatch_read(mr, physaddr, &val, 1 << SHIFT,
+                                iotlbentry->attrs);
     return val;
 }
 #endif
@@ -195,15 +197,15 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
 
     /* Handle an IO access.  */
     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
-        hwaddr ioaddr;
+        CPUIOTLBEntry *iotlbentry;
         if ((addr & (DATA_SIZE - 1)) != 0) {
             goto do_unaligned_access;
         }
-        ioaddr = env->iotlb[mmu_idx][index];
+        iotlbentry = &env->iotlb[mmu_idx][index];
 
         /* ??? Note that the io helpers always read data in the target
            byte ordering.  We should push the LE/BE request down into io.  */
-        res = glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr);
+        res = glue(io_read, SUFFIX)(env, iotlbentry, addr, retaddr);
         res = TGT_LE(res);
         return res;
     }
@@ -283,15 +285,15 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
 
     /* Handle an IO access.  */
     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
-        hwaddr ioaddr;
+        CPUIOTLBEntry *iotlbentry;
         if ((addr & (DATA_SIZE - 1)) != 0) {
             goto do_unaligned_access;
         }
-        ioaddr = env->iotlb[mmu_idx][index];
+        iotlbentry = &env->iotlb[mmu_idx][index];
 
         /* ??? Note that the io helpers always read data in the target
            byte ordering.  We should push the LE/BE request down into io.  */
-        res = glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr);
+        res = glue(io_read, SUFFIX)(env, iotlbentry, addr, retaddr);
         res = TGT_BE(res);
         return res;
     }
@@ -363,12 +365,13 @@ WORD_TYPE helper_be_lds_name(CPUArchState *env, target_ulong addr,
 #endif
 
 static inline void glue(io_write, SUFFIX)(CPUArchState *env,
-                                          hwaddr physaddr,
+                                          CPUIOTLBEntry *iotlbentry,
                                           DATA_TYPE val,
                                           target_ulong addr,
                                           uintptr_t retaddr)
 {
     CPUState *cpu = ENV_GET_CPU(env);
+    hwaddr physaddr = iotlbentry->addr;
     MemoryRegion *mr = iotlb_to_region(cpu, physaddr);
 
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
@@ -378,7 +381,8 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env,
 
     cpu->mem_io_vaddr = addr;
     cpu->mem_io_pc = retaddr;
-    io_mem_write(mr, physaddr, val, 1 << SHIFT);
+    memory_region_dispatch_write(mr, physaddr, val, 1 << SHIFT,
+                                 iotlbentry->attrs);
 }
 
 void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
@@ -408,16 +412,16 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
 
     /* Handle an IO access.  */
     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
-        hwaddr ioaddr;
+        CPUIOTLBEntry *iotlbentry;
         if ((addr & (DATA_SIZE - 1)) != 0) {
             goto do_unaligned_access;
         }
-        ioaddr = env->iotlb[mmu_idx][index];
+        iotlbentry = &env->iotlb[mmu_idx][index];
 
         /* ??? Note that the io helpers always read data in the target
            byte ordering.  We should push the LE/BE request down into io.  */
         val = TGT_LE(val);
-        glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
+        glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr);
         return;
     }
 
@@ -489,16 +493,16 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
 
     /* Handle an IO access.  */
     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
-        hwaddr ioaddr;
+        CPUIOTLBEntry *iotlbentry;
         if ((addr & (DATA_SIZE - 1)) != 0) {
             goto do_unaligned_access;
         }
-        ioaddr = env->iotlb[mmu_idx][index];
+        iotlbentry = &env->iotlb[mmu_idx][index];
 
         /* ??? Note that the io helpers always read data in the target
            byte ordering.  We should push the LE/BE request down into io.  */
         val = TGT_BE(val);
-        glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
+        glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr);
         return;
     }
 
index 986f04cfd62f6bf05bcf8bf813861a0ded14c695..3ca3fa8d218db578a3bd4e81c0c6b772c6c71cf6 100644 (file)
@@ -111,7 +111,7 @@ static void arm_cpu_reset(CPUState *s)
         /* Userspace expects access to DC ZVA, CTL_EL0 and the cache ops */
         env->cp15.sctlr_el[1] |= SCTLR_UCT | SCTLR_UCI | SCTLR_DZE;
         /* and to the FP/Neon instructions */
-        env->cp15.c1_coproc = deposit64(env->cp15.c1_coproc, 20, 2, 3);
+        env->cp15.cpacr_el1 = deposit64(env->cp15.cpacr_el1, 20, 2, 3);
 #else
         /* Reset into the highest available EL */
         if (arm_feature(env, ARM_FEATURE_EL3)) {
@@ -126,7 +126,7 @@ static void arm_cpu_reset(CPUState *s)
     } else {
 #if defined(CONFIG_USER_ONLY)
         /* Userspace expects access to cp10 and cp11 for FP/Neon */
-        env->cp15.c1_coproc = deposit64(env->cp15.c1_coproc, 20, 4, 0xf);
+        env->cp15.cpacr_el1 = deposit64(env->cp15.cpacr_el1, 20, 4, 0xf);
 #endif
     }
 
@@ -524,9 +524,10 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
         unset_feature(env, ARM_FEATURE_EL3);
 
         /* Disable the security extension feature bits in the processor feature
-         * register as well.  This is id_pfr1[7:4].
+         * registers as well. These are id_pfr1[7:4] and id_aa64pfr0[15:12].
          */
         cpu->id_pfr1 &= ~0xf0;
+        cpu->id_aa64pfr0 &= ~0xf000;
     }
 
     register_cp_regs_for_features(cpu);
index 083211ce3903cb3539e7d731b51f8ac640cf37bb..d63d9b20f1344cd9f7530874f7c0fb070a3151ee 100644 (file)
@@ -201,7 +201,7 @@ typedef struct CPUARMState {
             };
             uint64_t sctlr_el[4];
         };
-        uint64_t c1_coproc; /* Coprocessor access register.  */
+        uint64_t cpacr_el1; /* Architectural feature access control register */
         uint32_t c1_xscaleauxcr; /* XScale auxiliary control register.  */
         uint64_t sder; /* Secure debug enable register. */
         uint32_t nsacr; /* Non-secure access control register. */
@@ -1813,7 +1813,7 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
     int fpen;
 
     if (arm_feature(env, ARM_FEATURE_V6)) {
-        fpen = extract32(env->cp15.c1_coproc, 20, 2);
+        fpen = extract32(env->cp15.cpacr_el1, 20, 2);
     } else {
         /* CPACR doesn't exist before v6, so VFP is always accessible */
         fpen = 3;
index d77c6de40c51aed8211370b70bb9d499e4452837..f8f8d76fc0374c5d1fbc3bbb0c16dc394bbe9bd3 100644 (file)
@@ -14,7 +14,7 @@
 #ifndef CONFIG_USER_ONLY
 static inline int get_phys_addr(CPUARMState *env, target_ulong address,
                                 int access_type, ARMMMUIdx mmu_idx,
-                                hwaddr *phys_ptr, int *prot,
+                                hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
                                 target_ulong *page_size);
 
 /* Definitions for the PMCCNTR and PMCR registers */
@@ -589,7 +589,7 @@ static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri,
         }
         value &= mask;
     }
-    env->cp15.c1_coproc = value;
+    env->cp15.cpacr_el1 = value;
 }
 
 static const ARMCPRegInfo v6_cp_reginfo[] = {
@@ -615,7 +615,7 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
       .access = PL1_RW, .type = ARM_CP_CONST, .resetvalue = 0, },
     { .name = "CPACR", .state = ARM_CP_STATE_BOTH, .opc0 = 3,
       .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2,
-      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.c1_coproc),
+      .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.cpacr_el1),
       .resetvalue = 0, .writefn = cpacr_write },
     REGINFO_SENTINEL
 };
@@ -816,8 +816,10 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
          * supported if EL2 exists. The bit is UNK/SBZP when
          * EL2 is unavailable. In QEMU ARMv7, we force it to always zero
          * when EL2 is unavailable.
+         * On ARMv8, this bit is always available.
          */
-        if (arm_feature(env, ARM_FEATURE_V7)) {
+        if (arm_feature(env, ARM_FEATURE_V7) &&
+            !arm_feature(env, ARM_FEATURE_V8)) {
             valid_mask &= ~SCR_SMD;
         }
     }
@@ -1466,9 +1468,10 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value,
     int prot;
     int ret;
     uint64_t par64;
+    MemTxAttrs attrs = {};
 
     ret = get_phys_addr(env, value, access_type, mmu_idx,
-                        &phys_addr, &prot, &page_size);
+                        &phys_addr, &attrs, &prot, &page_size);
     if (extended_addresses_enabled(env)) {
         /* ret is a DFSR/IFSR value for the long descriptor
          * translation table format, but with WnR always clear.
@@ -1477,6 +1480,9 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value,
         par64 = (1 << 11); /* LPAE bit always set */
         if (ret == 0) {
             par64 |= phys_addr & ~0xfffULL;
+            if (!attrs.secure) {
+                par64 |= (1 << 9); /* NS */
+            }
             /* We don't set the ATTR or SH fields in the PAR. */
         } else {
             par64 |= 1; /* F */
@@ -1499,6 +1505,9 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value,
             } else {
                 par64 = phys_addr & 0xfffff000;
             }
+            if (!attrs.secure) {
+                par64 |= (1 << 9); /* NS */
+            }
         } else {
             par64 = ((ret & (1 << 10)) >> 5) | ((ret & (1 << 12)) >> 6) |
                     ((ret & 0xf) << 1) | 1;
@@ -4858,6 +4867,26 @@ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx)
     }
 }
 
+/* Return true if this address translation regime is secure */
+static inline bool regime_is_secure(CPUARMState *env, ARMMMUIdx mmu_idx)
+{
+    switch (mmu_idx) {
+    case ARMMMUIdx_S12NSE0:
+    case ARMMMUIdx_S12NSE1:
+    case ARMMMUIdx_S1NSE0:
+    case ARMMMUIdx_S1NSE1:
+    case ARMMMUIdx_S1E2:
+    case ARMMMUIdx_S2NS:
+        return false;
+    case ARMMMUIdx_S1E3:
+    case ARMMMUIdx_S1SE0:
+    case ARMMMUIdx_S1SE1:
+        return true;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 /* Return the SCTLR value which controls this address translation regime */
 static inline uint32_t regime_sctlr(CPUARMState *env, ARMMMUIdx mmu_idx)
 {
@@ -5102,6 +5131,29 @@ static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx,
     return true;
 }
 
+/* All loads done in the course of a page table walk go through here.
+ * TODO: rather than ignoring errors from physical memory reads (which
+ * are external aborts in ARM terminology) we should propagate this
+ * error out so that we can turn it into a Data Abort if this walk
+ * was being done for a CPU load/store or an address translation instruction
+ * (but not if it was for a debug access).
+ */
+static uint32_t arm_ldl_ptw(CPUState *cs, hwaddr addr, bool is_secure)
+{
+    MemTxAttrs attrs = {};
+
+    attrs.secure = is_secure;
+    return address_space_ldl(cs->as, addr, attrs, NULL);
+}
+
+static uint64_t arm_ldq_ptw(CPUState *cs, hwaddr addr, bool is_secure)
+{
+    MemTxAttrs attrs = {};
+
+    attrs.secure = is_secure;
+    return address_space_ldq(cs->as, addr, attrs, NULL);
+}
+
 static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type,
                             ARMMMUIdx mmu_idx, hwaddr *phys_ptr,
                             int *prot, target_ulong *page_size)
@@ -5124,7 +5176,7 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type,
         code = 5;
         goto do_fault;
     }
-    desc = ldl_phys(cs->as, table);
+    desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx));
     type = (desc & 3);
     domain = (desc >> 5) & 0x0f;
     if (regime_el(env, mmu_idx) == 1) {
@@ -5160,7 +5212,7 @@ static int get_phys_addr_v5(CPUARMState *env, uint32_t address, int access_type,
             /* Fine pagetable.  */
             table = (desc & 0xfffff000) | ((address >> 8) & 0xffc);
         }
-        desc = ldl_phys(cs->as, table);
+        desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx));
         switch (desc & 3) {
         case 0: /* Page translation fault.  */
             code = 7;
@@ -5210,6 +5262,7 @@ do_fault:
 
 static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type,
                             ARMMMUIdx mmu_idx, hwaddr *phys_ptr,
+                            MemTxAttrs *attrs,
                             int *prot, target_ulong *page_size)
 {
     CPUState *cs = CPU(arm_env_get_cpu(env));
@@ -5224,6 +5277,7 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type,
     int domain_prot;
     hwaddr phys_addr;
     uint32_t dacr;
+    bool ns;
 
     /* Pagetable walk.  */
     /* Lookup l1 descriptor.  */
@@ -5232,7 +5286,7 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type,
         code = 5;
         goto do_fault;
     }
-    desc = ldl_phys(cs->as, table);
+    desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx));
     type = (desc & 3);
     if (type == 0 || (type == 3 && !arm_feature(env, ARM_FEATURE_PXN))) {
         /* Section translation fault, or attempt to use the encoding
@@ -5273,13 +5327,15 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type,
         xn = desc & (1 << 4);
         pxn = desc & 1;
         code = 13;
+        ns = extract32(desc, 19, 1);
     } else {
         if (arm_feature(env, ARM_FEATURE_PXN)) {
             pxn = (desc >> 2) & 1;
         }
+        ns = extract32(desc, 3, 1);
         /* Lookup l2 entry.  */
         table = (desc & 0xfffffc00) | ((address >> 10) & 0x3fc);
-        desc = ldl_phys(cs->as, table);
+        desc = arm_ldl_ptw(cs, table, regime_is_secure(env, mmu_idx));
         ap = ((desc >> 4) & 3) | ((desc >> 7) & 4);
         switch (desc & 3) {
         case 0: /* Page translation fault.  */
@@ -5330,6 +5386,13 @@ static int get_phys_addr_v6(CPUARMState *env, uint32_t address, int access_type,
             goto do_fault;
         }
     }
+    if (ns) {
+        /* The NS bit will (as required by the architecture) have no effect if
+         * the CPU doesn't support TZ or this is a non-secure translation
+         * regime, because the attribute will already be non-secure.
+         */
+        attrs->secure = false;
+    }
     *phys_ptr = phys_addr;
     return 0;
 do_fault:
@@ -5347,7 +5410,7 @@ typedef enum {
 
 static int get_phys_addr_lpae(CPUARMState *env, target_ulong address,
                               int access_type, ARMMMUIdx mmu_idx,
-                              hwaddr *phys_ptr, int *prot,
+                              hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
                               target_ulong *page_size_ptr)
 {
     CPUState *cs = CPU(arm_env_get_cpu(env));
@@ -5487,13 +5550,20 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address,
     descaddr = extract64(ttbr, 0, 48);
     descaddr &= ~((1ULL << (va_size - tsz - (granule_sz * (4 - level)))) - 1);
 
-    tableattrs = 0;
+    /* Secure accesses start with the page table in secure memory and
+     * can be downgraded to non-secure at any step. Non-secure accesses
+     * remain non-secure. We implement this by just ORing in the NSTable/NS
+     * bits at each step.
+     */
+    tableattrs = regime_is_secure(env, mmu_idx) ? 0 : (1 << 4);
     for (;;) {
         uint64_t descriptor;
+        bool nstable;
 
         descaddr |= (address >> (granule_sz * (4 - level))) & descmask;
         descaddr &= ~7ULL;
-        descriptor = ldq_phys(cs->as, descaddr);
+        nstable = extract32(tableattrs, 4, 1);
+        descriptor = arm_ldq_ptw(cs, descaddr, !nstable);
         if (!(descriptor & 1) ||
             (!(descriptor & 2) && (level == 3))) {
             /* Invalid, or the Reserved level 3 encoding */
@@ -5528,7 +5598,7 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address,
         if (extract32(tableattrs, 2, 1)) {
             attrs &= ~(1 << 4);
         }
-        attrs |= extract32(tableattrs, 4, 1) << 3; /* NS */
+        attrs |= nstable << 3; /* NS */
         break;
     }
     /* Here descaddr is the final physical address, and attributes
@@ -5552,6 +5622,13 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address,
         goto do_fault;
     }
 
+    if (ns) {
+        /* The NS bit will (as required by the architecture) have no effect if
+         * the CPU doesn't support TZ or this is a non-secure translation
+         * regime, because the attribute will already be non-secure.
+         */
+        txattrs->secure = false;
+    }
     *phys_ptr = descaddr;
     *page_size_ptr = page_size;
     return 0;
@@ -5635,8 +5712,8 @@ static int get_phys_addr_mpu(CPUARMState *env, uint32_t address,
  * by doing a translation table walk on MMU based systems or using the
  * MPU state on MPU based systems.
  *
- * Returns 0 if the translation was successful. Otherwise, phys_ptr,
- * prot and page_size are not filled in, and the return value provides
+ * Returns 0 if the translation was successful. Otherwise, phys_ptr, attrs,
+ * prot and page_size may not be filled in, and the return value provides
  * information on why the translation aborted, in the format of a
  * DFSR/IFSR fault register, with the following caveats:
  *  * we honour the short vs long DFSR format differences.
@@ -5649,24 +5726,33 @@ static int get_phys_addr_mpu(CPUARMState *env, uint32_t address,
  * @access_type: 0 for read, 1 for write, 2 for execute
  * @mmu_idx: MMU index indicating required translation regime
  * @phys_ptr: set to the physical address corresponding to the virtual address
+ * @attrs: set to the memory transaction attributes to use
  * @prot: set to the permissions for the page containing phys_ptr
  * @page_size: set to the size of the page containing phys_ptr
  */
 static inline int get_phys_addr(CPUARMState *env, target_ulong address,
                                 int access_type, ARMMMUIdx mmu_idx,
-                                hwaddr *phys_ptr, int *prot,
+                                hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
                                 target_ulong *page_size)
 {
     if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) {
         /* TODO: when we support EL2 we should here call ourselves recursively
-         * to do the stage 1 and then stage 2 translations. The ldl_phys
-         * calls for stage 1 will also need changing.
+         * to do the stage 1 and then stage 2 translations. The arm_ld*_ptw
+         * functions will also need changing to perform ARMMMUIdx_S2NS loads
+         * rather than direct physical memory loads when appropriate.
          * For non-EL2 CPUs a stage1+stage2 translation is just stage 1.
          */
         assert(!arm_feature(env, ARM_FEATURE_EL2));
         mmu_idx += ARMMMUIdx_S1NSE0;
     }
 
+    /* The page table entries may downgrade secure to non-secure, but
+     * cannot upgrade an non-secure translation regime's attributes
+     * to secure.
+     */
+    attrs->secure = regime_is_secure(env, mmu_idx);
+    attrs->user = regime_is_user(env, mmu_idx);
+
     /* Fast Context Switch Extension. This doesn't exist at all in v8.
      * In v7 and earlier it affects all stage 1 translations.
      */
@@ -5695,10 +5781,10 @@ static inline int get_phys_addr(CPUARMState *env, target_ulong address,
 
     if (regime_using_lpae_format(env, mmu_idx)) {
         return get_phys_addr_lpae(env, address, access_type, mmu_idx, phys_ptr,
-                                  prot, page_size);
+                                  attrs, prot, page_size);
     } else if (regime_sctlr(env, mmu_idx) & SCTLR_XP) {
         return get_phys_addr_v6(env, address, access_type, mmu_idx, phys_ptr,
-                                prot, page_size);
+                                attrs, prot, page_size);
     } else {
         return get_phys_addr_v5(env, address, access_type, mmu_idx, phys_ptr,
                                 prot, page_size);
@@ -5716,14 +5802,16 @@ int arm_cpu_handle_mmu_fault(CPUState *cs, vaddr address,
     int ret;
     uint32_t syn;
     bool same_el = (arm_current_el(env) != 0);
+    MemTxAttrs attrs = {};
 
-    ret = get_phys_addr(env, address, access_type, mmu_idx, &phys_addr, &prot,
-                        &page_size);
+    ret = get_phys_addr(env, address, access_type, mmu_idx, &phys_addr,
+                        &attrs, &prot, &page_size);
     if (ret == 0) {
         /* Map a single [sub]page.  */
         phys_addr &= TARGET_PAGE_MASK;
         address &= TARGET_PAGE_MASK;
-        tlb_set_page(cs, address, phys_addr, prot, mmu_idx, page_size);
+        tlb_set_page_with_attrs(cs, address, phys_addr, attrs,
+                                prot, mmu_idx, page_size);
         return 0;
     }
 
@@ -5758,9 +5846,10 @@ hwaddr arm_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
     target_ulong page_size;
     int prot;
     int ret;
+    MemTxAttrs attrs = {};
 
     ret = get_phys_addr(env, addr, 0, cpu_mmu_index(env), &phys_addr,
-                        &prot, &page_size);
+                        &attrs, &prot, &page_size);
 
     if (ret != 0) {
         return -1;
index 7713022752775a0ab799d61fd4a0d7a00725eb2f..3df9c57c91dad0c0901654578f8cb0fa03776e92 100644 (file)
@@ -600,15 +600,26 @@ static bool bp_wp_matches(ARMCPU *cpu, int n, bool is_wp)
     CPUARMState *env = &cpu->env;
     uint64_t cr;
     int pac, hmc, ssc, wt, lbn;
-    /* TODO: check against CPU security state when we implement TrustZone */
-    bool is_secure = false;
+    /* Note that for watchpoints the check is against the CPU security
+     * state, not the S/NS attribute on the offending data access.
+     */
+    bool is_secure = arm_is_secure(env);
+    int access_el = arm_current_el(env);
 
     if (is_wp) {
-        if (!env->cpu_watchpoint[n]
-            || !(env->cpu_watchpoint[n]->flags & BP_WATCHPOINT_HIT)) {
+        CPUWatchpoint *wp = env->cpu_watchpoint[n];
+
+        if (!wp || !(wp->flags & BP_WATCHPOINT_HIT)) {
             return false;
         }
         cr = env->cp15.dbgwcr[n];
+        if (wp->hitattrs.user) {
+            /* The LDRT/STRT/LDT/STT "unprivileged access" instructions should
+             * match watchpoints as if they were accesses done at EL0, even if
+             * the CPU is at EL1 or higher.
+             */
+            access_el = 0;
+        }
     } else {
         uint64_t pc = is_a64(env) ? env->pc : env->regs[15];
 
@@ -649,15 +660,7 @@ static bool bp_wp_matches(ARMCPU *cpu, int n, bool is_wp)
         break;
     }
 
-    /* TODO: this is not strictly correct because the LDRT/STRT/LDT/STT
-     * "unprivileged access" instructions should match watchpoints as if
-     * they were accesses done at EL0, even if the CPU is at EL1 or higher.
-     * Implementing this would require reworking the core watchpoint code
-     * to plumb the mmu_idx through to this point. Luckily Linux does not
-     * rely on this behaviour currently.
-     * For breakpoints we do want to use the current CPU state.
-     */
-    switch (arm_current_el(env)) {
+    switch (access_el) {
     case 3:
     case 2:
         if (!hmc) {
index 2d35f63e1e35e880ea3c92a6d68b8181027049cf..01563fecceb7cf4d929753a419d8a006f6b7bc70 100644 (file)
@@ -27,7 +27,7 @@ static void walk_pte(MemoryMappingList *list, AddressSpace *as,
 
     for (i = 0; i < 512; i++) {
         pte_addr = (pte_start_addr + i * 8) & a20_mask;
-        pte = ldq_phys(as, pte_addr);
+        pte = address_space_ldq(as, pte_addr, MEMTXATTRS_UNSPECIFIED, NULL);
         if (!(pte & PG_PRESENT_MASK)) {
             /* not present */
             continue;
@@ -57,7 +57,7 @@ static void walk_pte2(MemoryMappingList *list, AddressSpace *as,
 
     for (i = 0; i < 1024; i++) {
         pte_addr = (pte_start_addr + i * 4) & a20_mask;
-        pte = ldl_phys(as, pte_addr);
+        pte = address_space_ldl(as, pte_addr, MEMTXATTRS_UNSPECIFIED, NULL);
         if (!(pte & PG_PRESENT_MASK)) {
             /* not present */
             continue;
@@ -89,7 +89,7 @@ static void walk_pde(MemoryMappingList *list, AddressSpace *as,
 
     for (i = 0; i < 512; i++) {
         pde_addr = (pde_start_addr + i * 8) & a20_mask;
-        pde = ldq_phys(as, pde_addr);
+        pde = address_space_ldq(as, pde_addr, MEMTXATTRS_UNSPECIFIED, NULL);
         if (!(pde & PG_PRESENT_MASK)) {
             /* not present */
             continue;
@@ -126,7 +126,7 @@ static void walk_pde2(MemoryMappingList *list, AddressSpace *as,
 
     for (i = 0; i < 1024; i++) {
         pde_addr = (pde_start_addr + i * 4) & a20_mask;
-        pde = ldl_phys(as, pde_addr);
+        pde = address_space_ldl(as, pde_addr, MEMTXATTRS_UNSPECIFIED, NULL);
         if (!(pde & PG_PRESENT_MASK)) {
             /* not present */
             continue;
@@ -167,7 +167,7 @@ static void walk_pdpe2(MemoryMappingList *list, AddressSpace *as,
 
     for (i = 0; i < 4; i++) {
         pdpe_addr = (pdpe_start_addr + i * 8) & a20_mask;
-        pdpe = ldq_phys(as, pdpe_addr);
+        pdpe = address_space_ldq(as, pdpe_addr, MEMTXATTRS_UNSPECIFIED, NULL);
         if (!(pdpe & PG_PRESENT_MASK)) {
             /* not present */
             continue;
@@ -192,7 +192,7 @@ static void walk_pdpe(MemoryMappingList *list, AddressSpace *as,
 
     for (i = 0; i < 512; i++) {
         pdpe_addr = (pdpe_start_addr + i * 8) & a20_mask;
-        pdpe = ldq_phys(as, pdpe_addr);
+        pdpe = address_space_ldq(as, pdpe_addr, MEMTXATTRS_UNSPECIFIED, NULL);
         if (!(pdpe & PG_PRESENT_MASK)) {
             /* not present */
             continue;
@@ -228,7 +228,8 @@ static void walk_pml4e(MemoryMappingList *list, AddressSpace *as,
 
     for (i = 0; i < 512; i++) {
         pml4e_addr = (pml4e_start_addr + i * 8) & a20_mask;
-        pml4e = ldq_phys(as, pml4e_addr);
+        pml4e = address_space_ldq(as, pml4e_addr, MEMTXATTRS_UNSPECIFIED,
+                                  NULL);
         if (!(pml4e & PG_PRESENT_MASK)) {
             /* not present */
             continue;
index 03b33cf3bd9d028a2734d49eb841d6962c4426d1..3305e094138a52cec66f54722f8e920ba3d65a8c 100644 (file)
@@ -688,7 +688,6 @@ static X86CPUDefinition builtin_x86_defs[] = {
         .features[FEAT_1_ECX] =
             CPUID_EXT_SSE3 | CPUID_EXT_CX16 | CPUID_EXT_POPCNT,
         .features[FEAT_8000_0001_EDX] =
-            (PPRO_FEATURES & CPUID_EXT2_AMD_ALIASES) |
             CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
         .features[FEAT_8000_0001_ECX] =
             CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM |
@@ -711,7 +710,6 @@ static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_EXT_SSE3 | CPUID_EXT_MONITOR | CPUID_EXT_CX16 |
             CPUID_EXT_POPCNT,
         .features[FEAT_8000_0001_EDX] =
-            (PPRO_FEATURES & CPUID_EXT2_AMD_ALIASES) |
             CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX |
             CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT | CPUID_EXT2_MMXEXT |
             CPUID_EXT2_FFXSR | CPUID_EXT2_PDPE1GB | CPUID_EXT2_RDTSCP,
@@ -769,7 +767,6 @@ static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_EXT_SSE3 | CPUID_EXT_CX16,
         /* Missing: CPUID_EXT2_PDPE1GB, CPUID_EXT2_RDTSCP */
         .features[FEAT_8000_0001_EDX] =
-            (PPRO_FEATURES & CPUID_EXT2_AMD_ALIASES) |
             CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
         /* Missing: CPUID_EXT3_LAHF_LM, CPUID_EXT3_CMP_LEG, CPUID_EXT3_EXTAPIC,
                     CPUID_EXT3_CR8LEG, CPUID_EXT3_ABM, CPUID_EXT3_SSE4A,
@@ -805,8 +802,6 @@ static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA | CPUID_PSE36,
         .features[FEAT_1_ECX] =
             CPUID_EXT_SSE3,
-        .features[FEAT_8000_0001_EDX] =
-            PPRO_FEATURES & CPUID_EXT2_AMD_ALIASES,
         .features[FEAT_8000_0001_ECX] =
             0,
         .xlevel = 0x80000008,
@@ -888,7 +883,6 @@ static X86CPUDefinition builtin_x86_defs[] = {
             PPRO_FEATURES | CPUID_PSE36 | CPUID_VME | CPUID_MTRR |
             CPUID_MCA,
         .features[FEAT_8000_0001_EDX] =
-            (PPRO_FEATURES & CPUID_EXT2_AMD_ALIASES) |
             CPUID_EXT2_MMXEXT | CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT,
         .xlevel = 0x80000008,
     },
@@ -912,7 +906,6 @@ static X86CPUDefinition builtin_x86_defs[] = {
             CPUID_EXT_SSE3 | CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 |
             CPUID_EXT_MOVBE,
         .features[FEAT_8000_0001_EDX] =
-            (PPRO_FEATURES & CPUID_EXT2_AMD_ALIASES) |
             CPUID_EXT2_NX,
         .features[FEAT_8000_0001_ECX] =
             CPUID_EXT3_LAHF_LM,
@@ -1618,38 +1611,6 @@ static void x86_cpuid_version_set_stepping(Object *obj, Visitor *v,
     env->cpuid_version |= value & 0xf;
 }
 
-static void x86_cpuid_get_level(Object *obj, Visitor *v, void *opaque,
-                                const char *name, Error **errp)
-{
-    X86CPU *cpu = X86_CPU(obj);
-
-    visit_type_uint32(v, &cpu->env.cpuid_level, name, errp);
-}
-
-static void x86_cpuid_set_level(Object *obj, Visitor *v, void *opaque,
-                                const char *name, Error **errp)
-{
-    X86CPU *cpu = X86_CPU(obj);
-
-    visit_type_uint32(v, &cpu->env.cpuid_level, name, errp);
-}
-
-static void x86_cpuid_get_xlevel(Object *obj, Visitor *v, void *opaque,
-                                 const char *name, Error **errp)
-{
-    X86CPU *cpu = X86_CPU(obj);
-
-    visit_type_uint32(v, &cpu->env.cpuid_xlevel, name, errp);
-}
-
-static void x86_cpuid_set_xlevel(Object *obj, Visitor *v, void *opaque,
-                                 const char *name, Error **errp)
-{
-    X86CPU *cpu = X86_CPU(obj);
-
-    visit_type_uint32(v, &cpu->env.cpuid_xlevel, name, errp);
-}
-
 static char *x86_cpuid_get_vendor(Object *obj, Error **errp)
 {
     X86CPU *cpu = X86_CPU(obj);
@@ -2109,7 +2070,7 @@ static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp)
     object_property_set_int(OBJECT(cpu), def->model, "model", errp);
     object_property_set_int(OBJECT(cpu), def->stepping, "stepping", errp);
     object_property_set_int(OBJECT(cpu), def->xlevel, "xlevel", errp);
-    env->cpuid_xlevel2 = def->xlevel2;
+    object_property_set_int(OBJECT(cpu), def->xlevel2, "xlevel2", errp);
     cpu->cache_info_passthrough = def->cache_info_passthrough;
     object_property_set_str(OBJECT(cpu), def->model_id, "model-id", errp);
     for (w = 0; w < FEATURE_WORDS; w++) {
@@ -2900,12 +2861,6 @@ static void x86_cpu_initfn(Object *obj)
     object_property_add(obj, "stepping", "int",
                         x86_cpuid_version_get_stepping,
                         x86_cpuid_version_set_stepping, NULL, NULL, NULL);
-    object_property_add(obj, "level", "int",
-                        x86_cpuid_get_level,
-                        x86_cpuid_set_level, NULL, NULL, NULL);
-    object_property_add(obj, "xlevel", "int",
-                        x86_cpuid_get_xlevel,
-                        x86_cpuid_set_xlevel, NULL, NULL, NULL);
     object_property_add_str(obj, "vendor",
                             x86_cpuid_get_vendor,
                             x86_cpuid_set_vendor, NULL);
@@ -2998,6 +2953,9 @@ static Property x86_cpu_properties[] = {
     DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, false),
     DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
     DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true),
+    DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, 0),
+    DEFINE_PROP_UINT32("xlevel", X86CPU, env.cpuid_xlevel, 0),
+    DEFINE_PROP_UINT32("xlevel2", X86CPU, env.cpuid_xlevel2, 0),
     DEFINE_PROP_END_OF_LIST()
 };
 
index f15815f11b1f9fc529ec1b712f63487e70bb7a52..c05c5033052295b59e1bbcf6ac138dbd2219e84b 100644 (file)
@@ -2251,8 +2251,8 @@ static inline ppcmas_tlb_t *booke206_get_tlbm(CPUPPCState *env, const int tlbn,
 {
     int r;
     uint32_t ways = booke206_tlb_ways(env, tlbn);
-    int ways_bits = ffs(ways) - 1;
-    int tlb_bits = ffs(booke206_tlb_size(env, tlbn)) - 1;
+    int ways_bits = ctz32(ways);
+    int tlb_bits = ctz32(booke206_tlb_size(env, tlbn));
     int i;
 
     way &= ways - 1;
index 55aa7452b41a23058bd1322642e9377ed61d9e68..309e8697fdf0080cdba613f5dda9b7001f33ebe6 100644 (file)
@@ -415,6 +415,7 @@ GCOV_OPTIONS = -n $(if $(V),-f,)
 $(patsubst %, check-qtest-%, $(QTEST_TARGETS)): check-qtest-%: $(check-qtest-y)
        $(if $(CONFIG_GCOV),@rm -f *.gcda */*.gcda */*/*.gcda */*/*/*.gcda,)
        $(call quiet-command,QTEST_QEMU_BINARY=$*-softmmu/qemu-system-$* \
+               QTEST_QEMU_IMG=qemu-img$(EXESUF) \
                MALLOC_PERTURB_=$${MALLOC_PERTURB_:-$$((RANDOM % 255 + 1))} \
                gtester $(GTESTER_OPTIONS) -m=$(SPEED) $(check-qtest-$*-y),"GTESTER $@")
        $(if $(CONFIG_GCOV),@for f in $(gcov-files-$*-y); do \
index ea62e249f5b6bdb399be73779150514c9acde4c6..7c23bb2180aac41fb0bf64bc68572be0f3029586 100644 (file)
 #include "hw/pci/pci_ids.h"
 #include "hw/pci/pci_regs.h"
 
-/* Test-specific defines. */
-#define TEST_IMAGE_SIZE    (64 * 1024 * 1024)
+/* Test-specific defines -- in MiB */
+#define TEST_IMAGE_SIZE_MB (200 * 1024)
+#define TEST_IMAGE_SECTORS ((TEST_IMAGE_SIZE_MB / AHCI_SECTOR_SIZE)     \
+                            * 1024 * 1024)
 
 /*** Globals ***/
 static char tmp_path[] = "/tmp/qtest.XXXXXX";
+static char debug_path[] = "/tmp/qtest-blkdebug.XXXXXX";
 static bool ahci_pedantic;
 
 /*** Function Declarations ***/
@@ -99,19 +102,12 @@ static void generate_pattern(void *buffer, size_t len, size_t cycle_len)
 /**
  * Start a Q35 machine and bookmark a handle to the AHCI device.
  */
-static AHCIQState *ahci_boot(void)
+static AHCIQState *ahci_vboot(const char *cli, va_list ap)
 {
     AHCIQState *s;
-    const char *cli;
 
     s = g_malloc0(sizeof(AHCIQState));
-
-    cli = "-drive if=none,id=drive0,file=%s,cache=writeback,serial=%s"
-        ",format=raw"
-        " -M q35 "
-        "-device ide-hd,drive=drive0 "
-        "-global ide-hd.ver=%s";
-    s->parent = qtest_pc_boot(cli, tmp_path, "testdisk", "version");
+    s->parent = qtest_pc_vboot(cli, ap);
     alloc_set_flags(s->parent->alloc, ALLOC_LEAK_ASSERT);
 
     /* Verify that we have an AHCI device present. */
@@ -120,13 +116,36 @@ static AHCIQState *ahci_boot(void)
     return s;
 }
 
+/**
+ * Start a Q35 machine and bookmark a handle to the AHCI device.
+ */
+static AHCIQState *ahci_boot(const char *cli, ...)
+{
+    AHCIQState *s;
+    va_list ap;
+
+    if (cli) {
+        va_start(ap, cli);
+        s = ahci_vboot(cli, ap);
+        va_end(ap);
+    } else {
+        cli = "-drive if=none,id=drive0,file=%s,cache=writeback,serial=%s"
+            ",format=qcow2"
+            " -M q35 "
+            "-device ide-hd,drive=drive0 "
+            "-global ide-hd.ver=%s";
+        s = ahci_boot(cli, tmp_path, "testdisk", "version");
+    }
+
+    return s;
+}
+
 /**
  * Clean up the PCI device, then terminate the QEMU instance.
  */
 static void ahci_shutdown(AHCIQState *ahci)
 {
     QOSState *qs = ahci->parent;
-
     ahci_clean_mem(ahci);
     free_ahci_device(ahci->dev);
     g_free(ahci);
@@ -137,10 +156,18 @@ static void ahci_shutdown(AHCIQState *ahci)
  * Boot and fully enable the HBA device.
  * @see ahci_boot, ahci_pci_enable and ahci_hba_enable.
  */
-static AHCIQState *ahci_boot_and_enable(void)
+static AHCIQState *ahci_boot_and_enable(const char *cli, ...)
 {
     AHCIQState *ahci;
-    ahci = ahci_boot();
+    va_list ap;
+
+    if (cli) {
+        va_start(ap, cli);
+        ahci = ahci_vboot(cli, ap);
+        va_end(ap);
+    } else {
+        ahci = ahci_boot(NULL);
+    }
 
     ahci_pci_enable(ahci);
     ahci_hba_enable(ahci);
@@ -738,7 +765,7 @@ static void ahci_test_identify(AHCIQState *ahci)
     ahci_port_clear(ahci, px);
 
     /* "Read" 512 bytes using CMD_IDENTIFY into the host buffer. */
-    ahci_io(ahci, px, CMD_IDENTIFY, &buff, buffsize);
+    ahci_io(ahci, px, CMD_IDENTIFY, &buff, buffsize, 0);
 
     /* Check serial number/version in the buffer */
     /* NB: IDENTIFY strings are packed in 16bit little endian chunks.
@@ -754,11 +781,12 @@ static void ahci_test_identify(AHCIQState *ahci)
     g_assert_cmphex(rc, ==, 0);
 
     sect_size = le16_to_cpu(*((uint16_t *)(&buff[5])));
-    g_assert_cmphex(sect_size, ==, 0x200);
+    g_assert_cmphex(sect_size, ==, AHCI_SECTOR_SIZE);
 }
 
 static void ahci_test_io_rw_simple(AHCIQState *ahci, unsigned bufsize,
-                                   uint8_t read_cmd, uint8_t write_cmd)
+                                   uint64_t sector, uint8_t read_cmd,
+                                   uint8_t write_cmd)
 {
     uint64_t ptr;
     uint8_t port;
@@ -781,9 +809,9 @@ static void ahci_test_io_rw_simple(AHCIQState *ahci, unsigned bufsize,
     memwrite(ptr, tx, bufsize);
 
     /* Write this buffer to disk, then read it back to the DMA buffer. */
-    ahci_guest_io(ahci, port, write_cmd, ptr, bufsize);
+    ahci_guest_io(ahci, port, write_cmd, ptr, bufsize, sector);
     qmemset(ptr, 0x00, bufsize);
-    ahci_guest_io(ahci, port, read_cmd, ptr, bufsize);
+    ahci_guest_io(ahci, port, read_cmd, ptr, bufsize, sector);
 
     /*** Read back the Data ***/
     memread(ptr, rx, bufsize);
@@ -794,6 +822,29 @@ static void ahci_test_io_rw_simple(AHCIQState *ahci, unsigned bufsize,
     g_free(rx);
 }
 
+static void ahci_test_nondata(AHCIQState *ahci, uint8_t ide_cmd)
+{
+    uint8_t px;
+    AHCICommand *cmd;
+
+    /* Sanitize */
+    px = ahci_port_select(ahci);
+    ahci_port_clear(ahci, px);
+
+    /* Issue Command */
+    cmd = ahci_command_create(ide_cmd);
+    ahci_command_commit(ahci, cmd, px);
+    ahci_command_issue(ahci, cmd);
+    ahci_command_verify(ahci, cmd);
+    ahci_command_free(cmd);
+}
+
+static void ahci_test_flush(AHCIQState *ahci)
+{
+    ahci_test_nondata(ahci, CMD_FLUSH_CACHE);
+}
+
+
 /******************************************************************************/
 /* Test Interfaces                                                            */
 /******************************************************************************/
@@ -804,7 +855,7 @@ static void ahci_test_io_rw_simple(AHCIQState *ahci, unsigned bufsize,
 static void test_sanity(void)
 {
     AHCIQState *ahci;
-    ahci = ahci_boot();
+    ahci = ahci_boot(NULL);
     ahci_shutdown(ahci);
 }
 
@@ -815,7 +866,7 @@ static void test_sanity(void)
 static void test_pci_spec(void)
 {
     AHCIQState *ahci;
-    ahci = ahci_boot();
+    ahci = ahci_boot(NULL);
     ahci_test_pci_spec(ahci);
     ahci_shutdown(ahci);
 }
@@ -827,8 +878,7 @@ static void test_pci_spec(void)
 static void test_pci_enable(void)
 {
     AHCIQState *ahci;
-
-    ahci = ahci_boot();
+    ahci = ahci_boot(NULL);
     ahci_pci_enable(ahci);
     ahci_shutdown(ahci);
 }
@@ -841,7 +891,7 @@ static void test_hba_spec(void)
 {
     AHCIQState *ahci;
 
-    ahci = ahci_boot();
+    ahci = ahci_boot(NULL);
     ahci_pci_enable(ahci);
     ahci_test_hba_spec(ahci);
     ahci_shutdown(ahci);
@@ -855,7 +905,7 @@ static void test_hba_enable(void)
 {
     AHCIQState *ahci;
 
-    ahci = ahci_boot();
+    ahci = ahci_boot(NULL);
     ahci_pci_enable(ahci);
     ahci_hba_enable(ahci);
     ahci_shutdown(ahci);
@@ -869,7 +919,7 @@ static void test_identify(void)
 {
     AHCIQState *ahci;
 
-    ahci = ahci_boot_and_enable();
+    ahci = ahci_boot_and_enable(NULL);
     ahci_test_identify(ahci);
     ahci_shutdown(ahci);
 }
@@ -890,7 +940,7 @@ static void test_dma_fragmented(void)
     unsigned char *rx = g_malloc0(bufsize);
     uint64_t ptr;
 
-    ahci = ahci_boot_and_enable();
+    ahci = ahci_boot_and_enable(NULL);
     px = ahci_port_select(ahci);
     ahci_port_clear(ahci, px);
 
@@ -928,6 +978,50 @@ static void test_dma_fragmented(void)
     g_free(tx);
 }
 
+static void test_flush(void)
+{
+    AHCIQState *ahci;
+
+    ahci = ahci_boot_and_enable(NULL);
+    ahci_test_flush(ahci);
+    ahci_shutdown(ahci);
+}
+
+static void test_flush_retry(void)
+{
+    AHCIQState *ahci;
+    AHCICommand *cmd;
+    uint8_t port;
+    const char *s;
+
+    prepare_blkdebug_script(debug_path, "flush_to_disk");
+    ahci = ahci_boot_and_enable("-drive file=blkdebug:%s:%s,if=none,id=drive0,"
+                                "format=qcow2,cache=writeback,"
+                                "rerror=stop,werror=stop "
+                                "-M q35 "
+                                "-device ide-hd,drive=drive0 ",
+                                debug_path,
+                                tmp_path);
+
+    /* Issue Flush Command */
+    port = ahci_port_select(ahci);
+    ahci_port_clear(ahci, port);
+    cmd = ahci_command_create(CMD_FLUSH_CACHE);
+    ahci_command_commit(ahci, cmd, port);
+    ahci_command_issue_async(ahci, cmd);
+    qmp_eventwait("STOP");
+
+    /* Complete the command */
+    s = "{'execute':'cont' }";
+    qmp_async(s);
+    qmp_eventwait("RESUME");
+    ahci_command_wait(ahci, cmd);
+    ahci_command_verify(ahci, cmd);
+
+    ahci_command_free(cmd);
+    ahci_shutdown(ahci);
+}
+
 /******************************************************************************/
 /* AHCI I/O Test Matrix Definitions                                           */
 
@@ -968,12 +1062,45 @@ enum IOOps {
     NUM_IO_OPS
 };
 
+enum OffsetType {
+    OFFSET_BEGIN = 0,
+    OFFSET_ZERO = OFFSET_BEGIN,
+    OFFSET_LOW,
+    OFFSET_HIGH,
+    NUM_OFFSETS
+};
+
+static const char *offset_str[NUM_OFFSETS] = { "zero", "low", "high" };
+
 typedef struct AHCIIOTestOptions {
     enum BuffLen length;
     enum AddrMode address_type;
     enum IOMode io_type;
+    enum OffsetType offset;
 } AHCIIOTestOptions;
 
+static uint64_t offset_sector(enum OffsetType ofst,
+                              enum AddrMode addr_type,
+                              uint64_t buffsize)
+{
+    uint64_t ceil;
+    uint64_t nsectors;
+
+    switch (ofst) {
+    case OFFSET_ZERO:
+        return 0;
+    case OFFSET_LOW:
+        return 1;
+    case OFFSET_HIGH:
+        ceil = (addr_type == ADDR_MODE_LBA28) ? 0xfffffff : 0xffffffffffff;
+        ceil = MIN(ceil, TEST_IMAGE_SECTORS - 1);
+        nsectors = buffsize / AHCI_SECTOR_SIZE;
+        return ceil - nsectors + 1;
+    default:
+        g_assert_not_reached();
+    }
+}
+
 /**
  * Table of possible I/O ATA commands given a set of enumerations.
  */
@@ -1001,12 +1128,12 @@ static const uint8_t io_cmds[NUM_MODES][NUM_ADDR_MODES][NUM_IO_OPS] = {
  * transfer modes, and buffer sizes.
  */
 static void test_io_rw_interface(enum AddrMode lba48, enum IOMode dma,
-                                 unsigned bufsize)
+                                 unsigned bufsize, uint64_t sector)
 {
     AHCIQState *ahci;
 
-    ahci = ahci_boot_and_enable();
-    ahci_test_io_rw_simple(ahci, bufsize,
+    ahci = ahci_boot_and_enable(NULL);
+    ahci_test_io_rw_simple(ahci, bufsize, sector,
                            io_cmds[dma][lba48][IO_READ],
                            io_cmds[dma][lba48][IO_WRITE]);
     ahci_shutdown(ahci);
@@ -1019,6 +1146,7 @@ static void test_io_interface(gconstpointer opaque)
 {
     AHCIIOTestOptions *opts = (AHCIIOTestOptions *)opaque;
     unsigned bufsize;
+    uint64_t sector;
 
     switch (opts->length) {
     case LEN_SIMPLE:
@@ -1037,13 +1165,14 @@ static void test_io_interface(gconstpointer opaque)
         g_assert_not_reached();
     }
 
-    test_io_rw_interface(opts->address_type, opts->io_type, bufsize);
+    sector = offset_sector(opts->offset, opts->address_type, bufsize);
+    test_io_rw_interface(opts->address_type, opts->io_type, bufsize, sector);
     g_free(opts);
     return;
 }
 
 static void create_ahci_io_test(enum IOMode type, enum AddrMode addr,
-                                enum BuffLen len)
+                                enum BuffLen len, enum OffsetType offset)
 {
     static const char *arch;
     char *name;
@@ -1052,15 +1181,17 @@ static void create_ahci_io_test(enum IOMode type, enum AddrMode addr,
     opts->length = len;
     opts->address_type = addr;
     opts->io_type = type;
+    opts->offset = offset;
 
     if (!arch) {
         arch = qtest_get_arch();
     }
 
-    name = g_strdup_printf("/%s/ahci/io/%s/%s/%s", arch,
+    name = g_strdup_printf("/%s/ahci/io/%s/%s/%s/%s", arch,
                            io_mode_str[type],
                            addr_mode_str[addr],
-                           buff_len_str[len]);
+                           buff_len_str[len],
+                           offset_str[offset]);
 
     g_test_add_data_func(name, opts, test_io_interface);
     g_free(name);
@@ -1071,10 +1202,10 @@ static void create_ahci_io_test(enum IOMode type, enum AddrMode addr,
 int main(int argc, char **argv)
 {
     const char *arch;
-    int fd;
     int ret;
+    int fd;
     int c;
-    int i, j, k;
+    int i, j, k, m;
 
     static struct option long_options[] = {
         {"pedantic", no_argument, 0, 'p' },
@@ -1108,11 +1239,13 @@ int main(int argc, char **argv)
         return 0;
     }
 
-    /* Create a temporary raw image */
-    fd = mkstemp(tmp_path);
+    /* Create a temporary qcow2 image */
+    close(mkstemp(tmp_path));
+    mkqcow2(tmp_path, TEST_IMAGE_SIZE_MB);
+
+    /* Create temporary blkdebug instructions */
+    fd = mkstemp(debug_path);
     g_assert(fd >= 0);
-    ret = ftruncate(fd, TEST_IMAGE_SIZE);
-    g_assert(ret == 0);
     close(fd);
 
     /* Run the tests */
@@ -1126,17 +1259,23 @@ int main(int argc, char **argv)
     for (i = MODE_BEGIN; i < NUM_MODES; i++) {
         for (j = ADDR_MODE_BEGIN; j < NUM_ADDR_MODES; j++) {
             for (k = LEN_BEGIN; k < NUM_LENGTHS; k++) {
-                create_ahci_io_test(i, j, k);
+                for (m = OFFSET_BEGIN; m < NUM_OFFSETS; m++) {
+                    create_ahci_io_test(i, j, k, m);
+                }
             }
         }
     }
 
     qtest_add_func("/ahci/io/dma/lba28/fragmented", test_dma_fragmented);
 
+    qtest_add_func("/ahci/flush/simple", test_flush);
+    qtest_add_func("/ahci/flush/retry", test_flush_retry);
+
     ret = g_test_run();
 
     /* Cleanup */
     unlink(tmp_path);
+    unlink(debug_path);
 
     return ret;
 }
index b28a3023c2e6c80a21477a44ac706e7f6b5af1a1..78382e9c75cdcf3cfcd2599ed92293420be4d37e 100644 (file)
@@ -29,6 +29,7 @@
 #include <glib.h>
 
 #include "libqtest.h"
+#include "libqos/libqos.h"
 #include "libqos/pci-pc.h"
 #include "libqos/malloc-pc.h"
 
@@ -494,33 +495,10 @@ static void test_flush(void)
     ide_test_quit();
 }
 
-static void prepare_blkdebug_script(const char *debug_fn, const char *event)
-{
-    FILE *debug_file = fopen(debug_fn, "w");
-    int ret;
-
-    fprintf(debug_file, "[inject-error]\n");
-    fprintf(debug_file, "event = \"%s\"\n", event);
-    fprintf(debug_file, "errno = \"5\"\n");
-    fprintf(debug_file, "state = \"1\"\n");
-    fprintf(debug_file, "immediately = \"off\"\n");
-    fprintf(debug_file, "once = \"on\"\n");
-
-    fprintf(debug_file, "[set-state]\n");
-    fprintf(debug_file, "event = \"%s\"\n", event);
-    fprintf(debug_file, "new_state = \"2\"\n");
-    fflush(debug_file);
-    g_assert(!ferror(debug_file));
-
-    ret = fclose(debug_file);
-    g_assert(ret == 0);
-}
-
 static void test_retry_flush(const char *machine)
 {
     uint8_t data;
     const char *s;
-    QDict *response;
 
     prepare_blkdebug_script(debug_path, "flush_to_disk");
 
@@ -539,15 +517,7 @@ static void test_retry_flush(const char *machine)
     assert_bit_set(data, BSY | DRDY);
     assert_bit_clear(data, DF | ERR | DRQ);
 
-    for (;; response = NULL) {
-        response = qmp_receive();
-        if ((qdict_haskey(response, "event")) &&
-            (strcmp(qdict_get_str(response, "event"), "STOP") == 0)) {
-            QDECREF(response);
-            break;
-        }
-        QDECREF(response);
-    }
+    qmp_eventwait("STOP");
 
     /* Complete the command */
     s = "{'execute':'cont' }";
index b0f39a5e328b478afda5d888c03a2719cd1ad19d..a18c12bcc13a99f7aab6bf07496b9a9b16a7b0ef 100644 (file)
@@ -568,13 +568,15 @@ inline unsigned size_to_prdtl(unsigned bytes, unsigned bytes_per_prd)
 
 /* Given a guest buffer address, perform an IO operation */
 void ahci_guest_io(AHCIQState *ahci, uint8_t port, uint8_t ide_cmd,
-                   uint64_t buffer, size_t bufsize)
+                   uint64_t buffer, size_t bufsize, uint64_t sector)
 {
     AHCICommand *cmd;
-
     cmd = ahci_command_create(ide_cmd);
     ahci_command_set_buffer(cmd, buffer);
     ahci_command_set_size(cmd, bufsize);
+    if (sector) {
+        ahci_command_set_offset(cmd, sector);
+    }
     ahci_command_commit(ahci, cmd, port);
     ahci_command_issue(ahci, cmd);
     ahci_command_verify(ahci, cmd);
@@ -612,7 +614,7 @@ static AHCICommandProp *ahci_command_find(uint8_t command_name)
 
 /* Given a HOST buffer, create a buffer address and perform an IO operation. */
 void ahci_io(AHCIQState *ahci, uint8_t port, uint8_t ide_cmd,
-             void *buffer, size_t bufsize)
+             void *buffer, size_t bufsize, uint64_t sector)
 {
     uint64_t ptr;
     AHCICommandProp *props;
@@ -626,7 +628,7 @@ void ahci_io(AHCIQState *ahci, uint8_t port, uint8_t ide_cmd,
         memwrite(ptr, buffer, bufsize);
     }
 
-    ahci_guest_io(ahci, port, ide_cmd, ptr, bufsize);
+    ahci_guest_io(ahci, port, ide_cmd, ptr, bufsize, sector);
 
     if (props->read) {
         memread(ptr, buffer, bufsize);
index 888545d5a2a29d1c7f5bb5fe91ced7abfc607c54..40e8ca48ba858b1ef439d14b3c484d691bd77b25 100644 (file)
@@ -523,9 +523,9 @@ void ahci_write_fis(AHCIQState *ahci, RegH2DFIS *fis, uint64_t addr);
 unsigned ahci_pick_cmd(AHCIQState *ahci, uint8_t port);
 unsigned size_to_prdtl(unsigned bytes, unsigned bytes_per_prd);
 void ahci_guest_io(AHCIQState *ahci, uint8_t port, uint8_t ide_cmd,
-                   uint64_t gbuffer, size_t size);
+                   uint64_t gbuffer, size_t size, uint64_t sector);
 void ahci_io(AHCIQState *ahci, uint8_t port, uint8_t ide_cmd,
-             void *buffer, size_t bufsize);
+             void *buffer, size_t bufsize, uint64_t sector);
 
 /* Command Lifecycle */
 AHCICommand *ahci_command_create(uint8_t command_name);
index bbace893fb1a3b89c37691f1ecaa439b5ce6463e..14036993778037466205a16c664fba54408c2362 100644 (file)
@@ -6,6 +6,11 @@ static QOSOps qos_ops = {
     .uninit_allocator = pc_alloc_uninit
 };
 
+QOSState *qtest_pc_vboot(const char *cmdline_fmt, va_list ap)
+{
+    return qtest_vboot(&qos_ops, cmdline_fmt, ap);
+}
+
 QOSState *qtest_pc_boot(const char *cmdline_fmt, ...)
 {
     QOSState *qs;
index 316857d32fcce779ad2867a44fef9f7a7e5cf397..b1820c57398038a8c6a1dad19619d8539ac3c51d 100644 (file)
@@ -3,6 +3,7 @@
 
 #include "libqos/libqos.h"
 
+QOSState *qtest_pc_vboot(const char *cmdline_fmt, va_list ap);
 QOSState *qtest_pc_boot(const char *cmdline_fmt, ...);
 void qtest_pc_shutdown(QOSState *qs);
 
index bc8beb281f350dd71bcc81bc62738b41c6a06f86..7e7207856ef6c053f3a8a19340bb7c694bd78e7d 100644 (file)
@@ -61,3 +61,69 @@ void qtest_shutdown(QOSState *qs)
     qtest_quit(qs->qts);
     g_free(qs);
 }
+
+void mkimg(const char *file, const char *fmt, unsigned size_mb)
+{
+    gchar *cli;
+    bool ret;
+    int rc;
+    GError *err = NULL;
+    char *qemu_img_path;
+    gchar *out, *out2;
+    char *abs_path;
+
+    qemu_img_path = getenv("QTEST_QEMU_IMG");
+    abs_path = realpath(qemu_img_path, NULL);
+    assert(qemu_img_path);
+
+    cli = g_strdup_printf("%s create -f %s %s %uM", abs_path,
+                          fmt, file, size_mb);
+    ret = g_spawn_command_line_sync(cli, &out, &out2, &rc, &err);
+    if (err) {
+        fprintf(stderr, "%s\n", err->message);
+        g_error_free(err);
+    }
+    g_assert(ret && !err);
+
+    /* In glib 2.34, we have g_spawn_check_exit_status. in 2.12, we don't.
+     * glib 2.43.91 implementation assumes that any non-zero is an error for
+     * windows, but uses extra precautions for Linux. However,
+     * 0 is only possible if the program exited normally, so that should be
+     * sufficient for our purposes on all platforms, here. */
+    if (rc) {
+        fprintf(stderr, "qemu-img returned status code %d\n", rc);
+    }
+    g_assert(!rc);
+
+    g_free(out);
+    g_free(out2);
+    g_free(cli);
+    free(abs_path);
+}
+
+void mkqcow2(const char *file, unsigned size_mb)
+{
+    return mkimg(file, "qcow2", size_mb);
+}
+
+void prepare_blkdebug_script(const char *debug_fn, const char *event)
+{
+    FILE *debug_file = fopen(debug_fn, "w");
+    int ret;
+
+    fprintf(debug_file, "[inject-error]\n");
+    fprintf(debug_file, "event = \"%s\"\n", event);
+    fprintf(debug_file, "errno = \"5\"\n");
+    fprintf(debug_file, "state = \"1\"\n");
+    fprintf(debug_file, "immediately = \"off\"\n");
+    fprintf(debug_file, "once = \"on\"\n");
+
+    fprintf(debug_file, "[set-state]\n");
+    fprintf(debug_file, "event = \"%s\"\n", event);
+    fprintf(debug_file, "new_state = \"2\"\n");
+    fflush(debug_file);
+    g_assert(!ferror(debug_file));
+
+    ret = fclose(debug_file);
+    g_assert(ret == 0);
+}
index 612d41e5e92ab041310ca76c116c409b6ae40686..f57362b688196bfcd59f90fe55d89f84874c7ec1 100644 (file)
@@ -19,6 +19,9 @@ typedef struct QOSState {
 QOSState *qtest_vboot(QOSOps *ops, const char *cmdline_fmt, va_list ap);
 QOSState *qtest_boot(QOSOps *ops, const char *cmdline_fmt, ...);
 void qtest_shutdown(QOSState *qs);
+void mkimg(const char *file, const char *fmt, unsigned size_mb);
+void mkqcow2(const char *file, unsigned size_mb);
+void prepare_blkdebug_script(const char *debug_fn, const char *event);
 
 static inline uint64_t qmalloc(QOSState *q, size_t bytes)
 {
index 12d65bd1e6ca1fc85cb6d23468b42a3054895496..a525dc532c48c9ab2a63e68ed8ec89f030784167 100644 (file)
@@ -388,7 +388,12 @@ QDict *qtest_qmp_receive(QTestState *s)
     return qmp.response;
 }
 
-QDict *qtest_qmpv(QTestState *s, const char *fmt, va_list ap)
+/**
+ * Allow users to send a message without waiting for the reply,
+ * in the case that they choose to discard all replies up until
+ * a particular EVENT is received.
+ */
+void qtest_async_qmpv(QTestState *s, const char *fmt, va_list ap)
 {
     va_list ap_copy;
     QObject *qobj;
@@ -417,6 +422,11 @@ QDict *qtest_qmpv(QTestState *s, const char *fmt, va_list ap)
         QDECREF(qstr);
         qobject_decref(qobj);
     }
+}
+
+QDict *qtest_qmpv(QTestState *s, const char *fmt, va_list ap)
+{
+    qtest_async_qmpv(s, fmt, ap);
 
     /* Receive reply */
     return qtest_qmp_receive(s);
@@ -433,6 +443,15 @@ QDict *qtest_qmp(QTestState *s, const char *fmt, ...)
     return response;
 }
 
+void qtest_async_qmp(QTestState *s, const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    qtest_async_qmpv(s, fmt, ap);
+    va_end(ap);
+}
+
 void qtest_qmpv_discard_response(QTestState *s, const char *fmt, va_list ap)
 {
     QDict *response = qtest_qmpv(s, fmt, ap);
@@ -450,9 +469,26 @@ void qtest_qmp_discard_response(QTestState *s, const char *fmt, ...)
     QDECREF(response);
 }
 
+void qtest_qmp_eventwait(QTestState *s, const char *event)
+{
+    QDict *response;
+
+    for (;;) {
+        response = qtest_qmp_receive(s);
+        if ((qdict_haskey(response, "event")) &&
+            (strcmp(qdict_get_str(response, "event"), event) == 0)) {
+            QDECREF(response);
+            break;
+        }
+        QDECREF(response);
+    }
+}
+
+
 const char *qtest_get_arch(void)
 {
     const char *qemu = getenv("QTEST_QEMU_BINARY");
+    g_assert(qemu != NULL);
     const char *end = strrchr(qemu, '/');
 
     return end + strlen("/qemu-system-");
@@ -695,6 +731,15 @@ QDict *qmp(const char *fmt, ...)
     return response;
 }
 
+void qmp_async(const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    qtest_async_qmpv(global_qtest, fmt, ap);
+    va_end(ap);
+}
+
 void qmp_discard_response(const char *fmt, ...)
 {
     va_list ap;
index 03469b87817e76e1df87cce3ea5f4ddb5ab4607e..4b54b5da9eac93960856fb55bfed6b343e581b5b 100644 (file)
@@ -63,6 +63,15 @@ void qtest_qmp_discard_response(QTestState *s, const char *fmt, ...);
  */
 QDict *qtest_qmp(QTestState *s, const char *fmt, ...);
 
+/**
+ * qtest_async_qmp:
+ * @s: #QTestState instance to operate on.
+ * @fmt...: QMP message to send to qemu
+ *
+ * Sends a QMP message to QEMU and leaves the response in the stream.
+ */
+void qtest_async_qmp(QTestState *s, const char *fmt, ...);
+
 /**
  * qtest_qmpv_discard_response:
  * @s: #QTestState instance to operate on.
@@ -83,6 +92,16 @@ void qtest_qmpv_discard_response(QTestState *s, const char *fmt, va_list ap);
  */
 QDict *qtest_qmpv(QTestState *s, const char *fmt, va_list ap);
 
+/**
+ * qtest_async_qmpv:
+ * @s: #QTestState instance to operate on.
+ * @fmt: QMP message to send to QEMU
+ * @ap: QMP message arguments
+ *
+ * Sends a QMP message to QEMU and leaves the response in the stream.
+ */
+void qtest_async_qmpv(QTestState *s, const char *fmt, va_list ap);
+
 /**
  * qtest_receive:
  * @s: #QTestState instance to operate on.
@@ -91,6 +110,15 @@ QDict *qtest_qmpv(QTestState *s, const char *fmt, va_list ap);
  */
 QDict *qtest_qmp_receive(QTestState *s);
 
+/**
+ * qtest_qmp_eventwait:
+ * @s: #QTestState instance to operate on.
+ * @s: #event event to wait for.
+ *
+ * Continuosly polls for QMP responses until it receives the desired event.
+ */
+void qtest_qmp_eventwait(QTestState *s, const char *event);
+
 /**
  * qtest_get_irq:
  * @s: #QTestState instance to operate on.
@@ -410,6 +438,14 @@ static inline void qtest_end(void)
  */
 QDict *qmp(const char *fmt, ...);
 
+/**
+ * qmp_async:
+ * @fmt...: QMP message to send to qemu
+ *
+ * Sends a QMP message to QEMU and leaves the response in the stream.
+ */
+void qmp_async(const char *fmt, ...);
+
 /**
  * qmp_discard_response:
  * @fmt...: QMP message to send to qemu
@@ -428,6 +464,17 @@ static inline QDict *qmp_receive(void)
     return qtest_qmp_receive(global_qtest);
 }
 
+/**
+ * qmp_eventwait:
+ * @s: #event event to wait for.
+ *
+ * Continuosly polls for QMP responses until it receives the desired event.
+ */
+static inline void qmp_eventwait(const char *event)
+{
+    return qtest_qmp_eventwait(global_qtest, event);
+}
+
 /**
  * get_irq:
  * @num: Interrupt to observe.
diff --git a/tests/qemu-iotests/122 b/tests/qemu-iotests/122
new file mode 100755 (executable)
index 0000000..350ca9c
--- /dev/null
@@ -0,0 +1,223 @@
+#!/bin/bash
+#
+# Test some qemu-img convert cases
+#
+# Copyright (C) 2015 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=kwolf@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+tmp=/tmp/$$
+status=1       # failure is the default!
+
+_cleanup()
+{
+    rm -f "$TEST_IMG".[123]
+       _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+
+
+TEST_IMG="$TEST_IMG".base _make_test_img 64M
+$QEMU_IO -c "write -P 0x11 0 64M" "$TEST_IMG".base 2>&1 | _filter_qemu_io | _filter_testdir
+
+
+echo
+echo "=== Check allocation status regression with -B ==="
+echo
+
+_make_test_img -b "$TEST_IMG".base
+$QEMU_IO -c "write -P 0x22 0 3M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IMG convert -O $IMGFMT -B "$TEST_IMG".base "$TEST_IMG" "$TEST_IMG".orig
+$QEMU_IMG map "$TEST_IMG".orig | _filter_qemu_img_map
+
+
+echo
+echo "=== Check that zero clusters are kept in overlay ==="
+echo
+
+_make_test_img -b "$TEST_IMG".base
+
+$QEMU_IO -c "write -P 0 0 3M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IMG convert -O $IMGFMT -B "$TEST_IMG".base "$TEST_IMG" "$TEST_IMG".orig
+$QEMU_IO -c "read -P 0 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IMG convert -O $IMGFMT -c -B "$TEST_IMG".base "$TEST_IMG" "$TEST_IMG".orig
+$QEMU_IO -c "read -P 0 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+
+$QEMU_IO -c "write -z 0 3M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IMG convert -O $IMGFMT -B "$TEST_IMG".base "$TEST_IMG" "$TEST_IMG".orig
+$QEMU_IO -c "read -P 0 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IMG convert -O $IMGFMT -c -B "$TEST_IMG".base "$TEST_IMG" "$TEST_IMG".orig
+$QEMU_IO -c "read -P 0 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+
+
+echo
+echo "=== Concatenate multiple source images ==="
+echo
+
+TEST_IMG="$TEST_IMG".1 _make_test_img 4M
+TEST_IMG="$TEST_IMG".2 _make_test_img 4M
+TEST_IMG="$TEST_IMG".3 _make_test_img 4M
+
+$QEMU_IO -c "write -P 0x11 0 64k" "$TEST_IMG".1 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "write -P 0x22 0 64k" "$TEST_IMG".2 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "write -P 0x33 0 64k" "$TEST_IMG".3 2>&1 | _filter_qemu_io | _filter_testdir
+
+$QEMU_IMG convert -O $IMGFMT "$TEST_IMG".[123] "$TEST_IMG"
+$QEMU_IMG map "$TEST_IMG" | _filter_qemu_img_map
+$QEMU_IO -c "read -P 0x11 0 64k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0x22 4M 64k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0x33 8M 64k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+
+$QEMU_IMG convert -c -O $IMGFMT "$TEST_IMG".[123] "$TEST_IMG"
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+$QEMU_IO -c "read -P 0x11 0 64k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0x22 4M 64k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0x33 8M 64k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+
+# -B can't be combined with concatenation
+$QEMU_IMG convert -O $IMGFMT -B "$TEST_IMG".base "$TEST_IMG".[123] "$TEST_IMG"
+$QEMU_IMG convert -O $IMGFMT -c -B "$TEST_IMG".base "$TEST_IMG".[123] "$TEST_IMG"
+
+
+echo
+echo "=== Compression with misaligned allocations and image sizes ==="
+echo
+
+TEST_IMG="$TEST_IMG".1 _make_test_img 1023k -o cluster_size=1024
+TEST_IMG="$TEST_IMG".2 _make_test_img 1023k -o cluster_size=1024
+
+$QEMU_IO -c "write -P 0x11   16k  16k" "$TEST_IMG".1 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "write -P 0x22  130k 130k" "$TEST_IMG".1 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "write -P 0x33 1022k   1k" "$TEST_IMG".1 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "write -P 0x44    0k   1k" "$TEST_IMG".2 2>&1 | _filter_qemu_io | _filter_testdir
+
+$QEMU_IMG convert -c -O $IMGFMT "$TEST_IMG".[12] "$TEST_IMG"
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+$QEMU_IO -c "read -P 0       0k   16k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0x11   16k   16k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0      32k   98k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0x22  130k  130k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0     260k  762k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0x33 1022k    1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0x44 1023k    1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0    1024k 1022k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+
+
+echo
+echo "=== Full allocation with -S 0 ==="
+echo
+
+# Standalone image
+_make_test_img 64M
+$QEMU_IO -c "write -P 0x22 0 3M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "write -P 0 3M 3M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo
+echo convert -S 0:
+$QEMU_IMG convert -O $IMGFMT -S 0 "$TEST_IMG" "$TEST_IMG".orig
+$QEMU_IO -c "read -P 0x22 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0 3M 61M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IMG map --output=json "$TEST_IMG".orig | _filter_qemu_img_map
+
+echo
+echo convert -c -S 0:
+$QEMU_IMG convert -O $IMGFMT -c -S 0 "$TEST_IMG" "$TEST_IMG".orig
+$QEMU_IO -c "read -P 0x22 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0 3M 61M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IMG map --output=json "$TEST_IMG".orig | _filter_qemu_img_map
+
+# With backing file
+TEST_IMG="$TEST_IMG".base _make_test_img 64M
+$QEMU_IO -c "write -P 0x11 0 32M" "$TEST_IMG".base 2>&1 | _filter_qemu_io | _filter_testdir
+
+_make_test_img -b "$TEST_IMG".base 64M
+$QEMU_IO -c "write -P 0x22 0 3M" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+
+echo
+echo convert -S 0 with source backing file:
+$QEMU_IMG convert -O $IMGFMT -S 0 "$TEST_IMG" "$TEST_IMG".orig
+$QEMU_IO -c "read -P 0x22 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0x11 3M 29M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0 32M 32M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IMG map --output=json "$TEST_IMG".orig | _filter_qemu_img_map
+
+echo
+echo convert -c -S 0 with source backing file:
+$QEMU_IMG convert -O $IMGFMT -c -S 0 "$TEST_IMG" "$TEST_IMG".orig
+$QEMU_IO -c "read -P 0x22 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0x11 3M 29M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0 32M 32M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IMG map --output=json "$TEST_IMG".orig | _filter_qemu_img_map
+
+# With keeping the backing file
+echo
+echo convert -S 0 -B ...
+$QEMU_IMG convert -O $IMGFMT -S 0 "$TEST_IMG" "$TEST_IMG".orig
+$QEMU_IO -c "read -P 0x22 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0x11 3M 29M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0 32M 32M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IMG map --output=json "$TEST_IMG".orig | _filter_qemu_img_map
+
+echo
+echo convert -c -S 0 -B ...
+$QEMU_IMG convert -O $IMGFMT -c -S 0 "$TEST_IMG" "$TEST_IMG".orig
+$QEMU_IO -c "read -P 0x22 0 3M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0x11 3M 29M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "read -P 0 32M 32M" "$TEST_IMG".orig 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IMG map --output=json "$TEST_IMG".orig | _filter_qemu_img_map
+
+
+echo
+echo "=== Non-zero -S ==="
+echo
+
+_make_test_img 64M -o cluster_size=1k
+$QEMU_IO -c "write -P 0 0 64k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "write 0 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "write 8k 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IO -c "write 17k 1k" "$TEST_IMG" 2>&1 | _filter_qemu_io | _filter_testdir
+
+for min_sparse in 4k 8k; do
+    echo
+    echo convert -S $min_sparse
+    $QEMU_IMG convert -O $IMGFMT -o cluster_size=1k -S $min_sparse "$TEST_IMG" "$TEST_IMG".orig
+    $QEMU_IMG map --output=json "$TEST_IMG".orig | _filter_qemu_img_map
+
+    echo
+    echo convert -c -S $min_sparse
+    # For compressed images, -S values other than 0 are ignored
+    $QEMU_IMG convert -O $IMGFMT -o cluster_size=1k -c -S $min_sparse "$TEST_IMG" "$TEST_IMG".orig
+    $QEMU_IMG map --output=json "$TEST_IMG".orig | _filter_qemu_img_map
+done
+
+# success, all done
+echo '*** done'
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/122.out b/tests/qemu-iotests/122.out
new file mode 100644 (file)
index 0000000..1f853b9
--- /dev/null
@@ -0,0 +1,209 @@
+QA output created by 122
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
+wrote 67108864/67108864 bytes at offset 0
+64 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Check allocation status regression with -B ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base'
+wrote 3145728/3145728 bytes at offset 0
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0               0x300000        TEST_DIR/t.IMGFMT.orig
+0x300000        0x3d00000       TEST_DIR/t.IMGFMT.base
+
+=== Check that zero clusters are kept in overlay ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base'
+wrote 3145728/3145728 bytes at offset 0
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 3145728/3145728 bytes at offset 0
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 3145728/3145728 bytes at offset 0
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 3145728/3145728 bytes at offset 0
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 3145728/3145728 bytes at offset 0
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 3145728/3145728 bytes at offset 0
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Concatenate multiple source images ===
+
+Formatting 'TEST_DIR/t.IMGFMT.1', fmt=IMGFMT size=4194304
+Formatting 'TEST_DIR/t.IMGFMT.2', fmt=IMGFMT size=4194304
+Formatting 'TEST_DIR/t.IMGFMT.3', fmt=IMGFMT size=4194304
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Offset          Length          File
+0               0x10000         TEST_DIR/t.IMGFMT
+0x400000        0x10000         TEST_DIR/t.IMGFMT
+0x800000        0x10000         TEST_DIR/t.IMGFMT
+read 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 4194304
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 8388608
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+[{ "start": 0, "length": 65536, "depth": 0, "zero": false, "data": true},
+{ "start": 65536, "length": 4128768, "depth": 0, "zero": true, "data": false},
+{ "start": 4194304, "length": 65536, "depth": 0, "zero": false, "data": true},
+{ "start": 4259840, "length": 4128768, "depth": 0, "zero": true, "data": false},
+{ "start": 8388608, "length": 65536, "depth": 0, "zero": false, "data": true},
+{ "start": 8454144, "length": 4128768, "depth": 0, "zero": true, "data": false}]
+read 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 4194304
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 8388608
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-img: -B makes no sense when concatenating multiple input images
+qemu-img: -B makes no sense when concatenating multiple input images
+
+=== Compression with misaligned allocations and image sizes ===
+
+Formatting 'TEST_DIR/t.IMGFMT.1', fmt=IMGFMT size=1047552
+Formatting 'TEST_DIR/t.IMGFMT.2', fmt=IMGFMT size=1047552
+wrote 16384/16384 bytes at offset 16384
+16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 133120/133120 bytes at offset 133120
+130 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1024/1024 bytes at offset 1046528
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1024/1024 bytes at offset 0
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+[{ "start": 0, "length": 65536, "depth": 0, "zero": false, "data": true},
+{ "start": 65536, "length": 65536, "depth": 0, "zero": true, "data": false},
+{ "start": 131072, "length": 196608, "depth": 0, "zero": false, "data": true},
+{ "start": 327680, "length": 655360, "depth": 0, "zero": true, "data": false},
+{ "start": 983040, "length": 65536, "depth": 0, "zero": false, "data": true},
+{ "start": 1048576, "length": 1046528, "depth": 0, "zero": true, "data": false}]
+read 16384/16384 bytes at offset 0
+16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 16384/16384 bytes at offset 16384
+16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 100352/100352 bytes at offset 32768
+98 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 133120/133120 bytes at offset 133120
+130 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 780288/780288 bytes at offset 266240
+762 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1024/1024 bytes at offset 1046528
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1024/1024 bytes at offset 1047552
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1046528/1046528 bytes at offset 1048576
+1022 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Full allocation with -S 0 ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 3145728/3145728 bytes at offset 0
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 3145728/3145728 bytes at offset 3145728
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+convert -S 0:
+read 3145728/3145728 bytes at offset 0
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 63963136/63963136 bytes at offset 3145728
+61 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+[{ "start": 0, "length": 6291456, "depth": 0, "zero": false, "data": true, "offset": 327680},
+{ "start": 6291456, "length": 60817408, "depth": 0, "zero": true, "data": false}]
+
+convert -c -S 0:
+read 3145728/3145728 bytes at offset 0
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 63963136/63963136 bytes at offset 3145728
+61 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+[{ "start": 0, "length": 6291456, "depth": 0, "zero": false, "data": true},
+{ "start": 6291456, "length": 60817408, "depth": 0, "zero": true, "data": false}]
+Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864
+wrote 33554432/33554432 bytes at offset 0
+32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 backing_file='TEST_DIR/t.IMGFMT.base'
+wrote 3145728/3145728 bytes at offset 0
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+convert -S 0 with source backing file:
+read 3145728/3145728 bytes at offset 0
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 30408704/30408704 bytes at offset 3145728
+29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 33554432/33554432 bytes at offset 33554432
+32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": 327680}]
+
+convert -c -S 0 with source backing file:
+read 3145728/3145728 bytes at offset 0
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 30408704/30408704 bytes at offset 3145728
+29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 33554432/33554432 bytes at offset 33554432
+32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true}]
+
+convert -S 0 -B ...
+read 3145728/3145728 bytes at offset 0
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 30408704/30408704 bytes at offset 3145728
+29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 33554432/33554432 bytes at offset 33554432
+32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, "offset": 327680}]
+
+convert -c -S 0 -B ...
+read 3145728/3145728 bytes at offset 0
+3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 30408704/30408704 bytes at offset 3145728
+29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 33554432/33554432 bytes at offset 33554432
+32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true}]
+
+=== Non-zero -S ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1024/1024 bytes at offset 0
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1024/1024 bytes at offset 8192
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1024/1024 bytes at offset 17408
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+convert -S 4k
+[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 8192},
+{ "start": 1024, "length": 7168, "depth": 0, "zero": true, "data": false},
+{ "start": 8192, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 9216},
+{ "start": 9216, "length": 8192, "depth": 0, "zero": true, "data": false},
+{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 10240},
+{ "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}]
+
+convert -c -S 4k
+[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true},
+{ "start": 1024, "length": 7168, "depth": 0, "zero": true, "data": false},
+{ "start": 8192, "length": 1024, "depth": 0, "zero": false, "data": true},
+{ "start": 9216, "length": 8192, "depth": 0, "zero": true, "data": false},
+{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true},
+{ "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}]
+
+convert -S 8k
+[{ "start": 0, "length": 9216, "depth": 0, "zero": false, "data": true, "offset": 8192},
+{ "start": 9216, "length": 8192, "depth": 0, "zero": true, "data": false},
+{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, "offset": 17408},
+{ "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}]
+
+convert -c -S 8k
+[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true},
+{ "start": 1024, "length": 7168, "depth": 0, "zero": true, "data": false},
+{ "start": 8192, "length": 1024, "depth": 0, "zero": false, "data": true},
+{ "start": 9216, "length": 8192, "depth": 0, "zero": true, "data": false},
+{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true},
+{ "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}]
+*** done
diff --git a/tests/qemu-iotests/124 b/tests/qemu-iotests/124
new file mode 100644 (file)
index 0000000..3ee78cd
--- /dev/null
@@ -0,0 +1,363 @@
+#!/usr/bin/env python
+#
+# Tests for incremental drive-backup
+#
+# Copyright (C) 2015 John Snow for Red Hat, Inc.
+#
+# Based on 056.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import iotests
+
+
+def io_write_patterns(img, patterns):
+    for pattern in patterns:
+        iotests.qemu_io('-c', 'write -P%s %s %s' % pattern, img)
+
+
+def try_remove(img):
+    try:
+        os.remove(img)
+    except OSError:
+        pass
+
+
+class Bitmap:
+    def __init__(self, name, drive):
+        self.name = name
+        self.drive = drive
+        self.num = 0
+        self.backups = list()
+
+    def base_target(self):
+        return (self.drive['backup'], None)
+
+    def new_target(self, num=None):
+        if num is None:
+            num = self.num
+        self.num = num + 1
+        base = os.path.join(iotests.test_dir,
+                            "%s.%s." % (self.drive['id'], self.name))
+        suff = "%i.%s" % (num, self.drive['fmt'])
+        target = base + "inc" + suff
+        reference = base + "ref" + suff
+        self.backups.append((target, reference))
+        return (target, reference)
+
+    def last_target(self):
+        if self.backups:
+            return self.backups[-1]
+        return self.base_target()
+
+    def del_target(self):
+        for image in self.backups.pop():
+            try_remove(image)
+        self.num -= 1
+
+    def cleanup(self):
+        for backup in self.backups:
+            for image in backup:
+                try_remove(image)
+
+
+class TestIncrementalBackup(iotests.QMPTestCase):
+    def setUp(self):
+        self.bitmaps = list()
+        self.files = list()
+        self.drives = list()
+        self.vm = iotests.VM()
+        self.err_img = os.path.join(iotests.test_dir, 'err.%s' % iotests.imgfmt)
+
+        # Create a base image with a distinctive patterning
+        drive0 = self.add_node('drive0')
+        self.img_create(drive0['file'], drive0['fmt'])
+        self.vm.add_drive(drive0['file'])
+        io_write_patterns(drive0['file'], (('0x41', 0, 512),
+                                           ('0xd5', '1M', '32k'),
+                                           ('0xdc', '32M', '124k')))
+        self.vm.launch()
+
+
+    def add_node(self, node_id, fmt=iotests.imgfmt, path=None, backup=None):
+        if path is None:
+            path = os.path.join(iotests.test_dir, '%s.%s' % (node_id, fmt))
+        if backup is None:
+            backup = os.path.join(iotests.test_dir,
+                                  '%s.full.backup.%s' % (node_id, fmt))
+
+        self.drives.append({
+            'id': node_id,
+            'file': path,
+            'backup': backup,
+            'fmt': fmt })
+        return self.drives[-1]
+
+
+    def img_create(self, img, fmt=iotests.imgfmt, size='64M',
+                   parent=None, parentFormat=None):
+        if parent:
+            if parentFormat is None:
+                parentFormat = fmt
+            iotests.qemu_img('create', '-f', fmt, img, size,
+                             '-b', parent, '-F', parentFormat)
+        else:
+            iotests.qemu_img('create', '-f', fmt, img, size)
+        self.files.append(img)
+
+
+    def do_qmp_backup(self, error='Input/output error', **kwargs):
+        res = self.vm.qmp('drive-backup', **kwargs)
+        self.assert_qmp(res, 'return', {})
+
+        event = self.vm.event_wait(name="BLOCK_JOB_COMPLETED",
+                                   match={'data': {'device': kwargs['device']}})
+        self.assertIsNotNone(event)
+
+        try:
+            failure = self.dictpath(event, 'data/error')
+        except AssertionError:
+            # Backup succeeded.
+            self.assert_qmp(event, 'data/offset', event['data']['len'])
+            return True
+        else:
+            # Backup failed.
+            self.assert_qmp(event, 'data/error', error)
+            return False
+
+
+    def create_anchor_backup(self, drive=None):
+        if drive is None:
+            drive = self.drives[-1]
+        res = self.do_qmp_backup(device=drive['id'], sync='full',
+                                 format=drive['fmt'], target=drive['backup'])
+        self.assertTrue(res)
+        self.files.append(drive['backup'])
+        return drive['backup']
+
+
+    def make_reference_backup(self, bitmap=None):
+        if bitmap is None:
+            bitmap = self.bitmaps[-1]
+        _, reference = bitmap.last_target()
+        res = self.do_qmp_backup(device=bitmap.drive['id'], sync='full',
+                                 format=bitmap.drive['fmt'], target=reference)
+        self.assertTrue(res)
+
+
+    def add_bitmap(self, name, drive, **kwargs):
+        bitmap = Bitmap(name, drive)
+        self.bitmaps.append(bitmap)
+        result = self.vm.qmp('block-dirty-bitmap-add', node=drive['id'],
+                             name=bitmap.name, **kwargs)
+        self.assert_qmp(result, 'return', {})
+        return bitmap
+
+
+    def prepare_backup(self, bitmap=None, parent=None):
+        if bitmap is None:
+            bitmap = self.bitmaps[-1]
+        if parent is None:
+            parent, _ = bitmap.last_target()
+
+        target, _ = bitmap.new_target()
+        self.img_create(target, bitmap.drive['fmt'], parent=parent)
+        return target
+
+
+    def create_incremental(self, bitmap=None, parent=None,
+                           parentFormat=None, validate=True):
+        if bitmap is None:
+            bitmap = self.bitmaps[-1]
+        if parent is None:
+            parent, _ = bitmap.last_target()
+
+        target = self.prepare_backup(bitmap, parent)
+        res = self.do_qmp_backup(device=bitmap.drive['id'],
+                                 sync='dirty-bitmap', bitmap=bitmap.name,
+                                 format=bitmap.drive['fmt'], target=target,
+                                 mode='existing')
+        if not res:
+            bitmap.del_target();
+            self.assertFalse(validate)
+        else:
+            self.make_reference_backup(bitmap)
+        return res
+
+
+    def check_backups(self):
+        for bitmap in self.bitmaps:
+            for incremental, reference in bitmap.backups:
+                self.assertTrue(iotests.compare_images(incremental, reference))
+            last = bitmap.last_target()[0]
+            self.assertTrue(iotests.compare_images(last, bitmap.drive['file']))
+
+
+    def hmp_io_writes(self, drive, patterns):
+        for pattern in patterns:
+            self.vm.hmp_qemu_io(drive, 'write -P%s %s %s' % pattern)
+        self.vm.hmp_qemu_io(drive, 'flush')
+
+
+    def do_incremental_simple(self, **kwargs):
+        self.create_anchor_backup()
+        self.add_bitmap('bitmap0', self.drives[0], **kwargs)
+
+        # Sanity: Create a "hollow" incremental backup
+        self.create_incremental()
+        # Three writes: One complete overwrite, one new segment,
+        # and one partial overlap.
+        self.hmp_io_writes(self.drives[0]['id'], (('0xab', 0, 512),
+                                                  ('0xfe', '16M', '256k'),
+                                                  ('0x64', '32736k', '64k')))
+        self.create_incremental()
+        # Three more writes, one of each kind, like above
+        self.hmp_io_writes(self.drives[0]['id'], (('0x9a', 0, 512),
+                                                  ('0x55', '8M', '352k'),
+                                                  ('0x78', '15872k', '1M')))
+        self.create_incremental()
+        self.vm.shutdown()
+        self.check_backups()
+
+
+    def test_incremental_simple(self):
+        '''
+        Test: Create and verify three incremental backups.
+
+        Create a bitmap and a full backup before VM execution begins,
+        then create a series of three incremental backups "during execution,"
+        i.e.; after IO requests begin modifying the drive.
+        '''
+        return self.do_incremental_simple()
+
+
+    def test_small_granularity(self):
+        '''
+        Test: Create and verify backups made with a small granularity bitmap.
+
+        Perform the same test as test_incremental_simple, but with a granularity
+        of only 32KiB instead of the present default of 64KiB.
+        '''
+        return self.do_incremental_simple(granularity=32768)
+
+
+    def test_large_granularity(self):
+        '''
+        Test: Create and verify backups made with a large granularity bitmap.
+
+        Perform the same test as test_incremental_simple, but with a granularity
+        of 128KiB instead of the present default of 64KiB.
+        '''
+        return self.do_incremental_simple(granularity=131072)
+
+
+    def test_incremental_failure(self):
+        '''Test: Verify backups made after a failure are correct.
+
+        Simulate a failure during an incremental backup block job,
+        emulate additional writes, then create another incremental backup
+        afterwards and verify that the backup created is correct.
+        '''
+
+        # Create a blkdebug interface to this img as 'drive1',
+        # but don't actually create a new image.
+        drive1 = self.add_node('drive1', self.drives[0]['fmt'],
+                               path=self.drives[0]['file'],
+                               backup=self.drives[0]['backup'])
+        result = self.vm.qmp('blockdev-add', options={
+            'id': drive1['id'],
+            'driver': drive1['fmt'],
+            'file': {
+                'driver': 'blkdebug',
+                'image': {
+                    'driver': 'file',
+                    'filename': drive1['file']
+                },
+                'set-state': [{
+                    'event': 'flush_to_disk',
+                    'state': 1,
+                    'new_state': 2
+                }],
+                'inject-error': [{
+                    'event': 'read_aio',
+                    'errno': 5,
+                    'state': 2,
+                    'immediately': False,
+                    'once': True
+                }],
+            }
+        })
+        self.assert_qmp(result, 'return', {})
+
+        self.create_anchor_backup(self.drives[0])
+        self.add_bitmap('bitmap0', drive1)
+        # Note: at this point, during a normal execution,
+        # Assume that the VM resumes and begins issuing IO requests here.
+
+        self.hmp_io_writes(drive1['id'], (('0xab', 0, 512),
+                                          ('0xfe', '16M', '256k'),
+                                          ('0x64', '32736k', '64k')))
+
+        result = self.create_incremental(validate=False)
+        self.assertFalse(result)
+        self.hmp_io_writes(drive1['id'], (('0x9a', 0, 512),
+                                          ('0x55', '8M', '352k'),
+                                          ('0x78', '15872k', '1M')))
+        self.create_incremental()
+        self.vm.shutdown()
+        self.check_backups()
+
+
+    def test_sync_dirty_bitmap_missing(self):
+        self.assert_no_active_block_jobs()
+        self.files.append(self.err_img)
+        result = self.vm.qmp('drive-backup', device=self.drives[0]['id'],
+                             sync='dirty-bitmap', format=self.drives[0]['fmt'],
+                             target=self.err_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+
+    def test_sync_dirty_bitmap_not_found(self):
+        self.assert_no_active_block_jobs()
+        self.files.append(self.err_img)
+        result = self.vm.qmp('drive-backup', device=self.drives[0]['id'],
+                             sync='dirty-bitmap', bitmap='unknown',
+                             format=self.drives[0]['fmt'], target=self.err_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+
+    def test_sync_dirty_bitmap_bad_granularity(self):
+        '''
+        Test: Test what happens if we provide an improper granularity.
+
+        The granularity must always be a power of 2.
+        '''
+        self.assert_no_active_block_jobs()
+        self.assertRaises(AssertionError, self.add_bitmap,
+                          'bitmap0', self.drives[0],
+                          granularity=64000)
+
+
+    def tearDown(self):
+        self.vm.shutdown()
+        for bitmap in self.bitmaps:
+            bitmap.cleanup()
+        for filename in self.files:
+            try_remove(filename)
+
+
+if __name__ == '__main__':
+    iotests.main(supported_fmts=['qcow2'])
diff --git a/tests/qemu-iotests/124.out b/tests/qemu-iotests/124.out
new file mode 100644 (file)
index 0000000..2f7d390
--- /dev/null
@@ -0,0 +1,5 @@
+.......
+----------------------------------------------------------------------
+Ran 7 tests
+
+OK
diff --git a/tests/qemu-iotests/129 b/tests/qemu-iotests/129
new file mode 100644 (file)
index 0000000..9e87e1c
--- /dev/null
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+#
+# Tests that "bdrv_drain_all" doesn't drain block jobs
+#
+# Copyright (C) 2015 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import iotests
+import time
+
+class TestStopWithBlockJob(iotests.QMPTestCase):
+    test_img = os.path.join(iotests.test_dir, 'test.img')
+    target_img = os.path.join(iotests.test_dir, 'target.img')
+    base_img = os.path.join(iotests.test_dir, 'base.img')
+
+    def setUp(self):
+        iotests.qemu_img('create', '-f', iotests.imgfmt, self.base_img, "1G")
+        iotests.qemu_img('create', '-f', iotests.imgfmt, self.test_img, "-b", self.base_img)
+        iotests.qemu_io('-f', iotests.imgfmt, '-c', 'write -P0x5d 1M 128M', self.test_img)
+        self.vm = iotests.VM().add_drive(self.test_img)
+        self.vm.launch()
+
+    def tearDown(self):
+        params = {"device": "drive0",
+                  "bps": 0,
+                  "bps_rd": 0,
+                  "bps_wr": 0,
+                  "iops": 0,
+                  "iops_rd": 0,
+                  "iops_wr": 0,
+                 }
+        result = self.vm.qmp("block_set_io_throttle", conv_keys=False,
+                             **params)
+        self.vm.shutdown()
+
+    def do_test_stop(self, cmd, **args):
+        """Test 'stop' while block job is running on a throttled drive.
+        The 'stop' command shouldn't drain the job"""
+        params = {"device": "drive0",
+                  "bps": 1024,
+                  "bps_rd": 0,
+                  "bps_wr": 0,
+                  "iops": 0,
+                  "iops_rd": 0,
+                  "iops_wr": 0,
+                 }
+        result = self.vm.qmp("block_set_io_throttle", conv_keys=False,
+                             **params)
+        self.assert_qmp(result, 'return', {})
+        result = self.vm.qmp(cmd, **args)
+        self.assert_qmp(result, 'return', {})
+        result = self.vm.qmp("stop")
+        self.assert_qmp(result, 'return', {})
+        result = self.vm.qmp("query-block-jobs")
+        self.assert_qmp(result, 'return[0]/busy', True)
+        self.assert_qmp(result, 'return[0]/ready', False)
+
+    def test_drive_mirror(self):
+        self.do_test_stop("drive-mirror", device="drive0",
+                          target=self.target_img,
+                          sync="full")
+
+    def test_drive_backup(self):
+        self.do_test_stop("drive-backup", device="drive0",
+                          target=self.target_img,
+                          sync="full")
+
+    def test_block_commit(self):
+        self.do_test_stop("block-commit", device="drive0")
+
+if __name__ == '__main__':
+    iotests.main(supported_fmts=["qcow2"])
diff --git a/tests/qemu-iotests/129.out b/tests/qemu-iotests/129.out
new file mode 100644 (file)
index 0000000..8d7e996
--- /dev/null
@@ -0,0 +1,5 @@
+...
+----------------------------------------------------------------------
+Ran 3 tests
+
+OK
index bcf25786ab4f69938b73cd48a8accc6b79817033..6ca3466ec52b011461a8b9eec1199ddb896cd735 100644 (file)
 115 rw auto
 116 rw auto quick
 121 rw auto
+122 rw auto
 123 rw auto quick
+124 rw auto backing
 128 rw auto quick
+129 rw auto quick
 130 rw auto quick
index 14028540b3dda364800f2ef686c3978f7d7ada1a..e93e62387b78ae8574e38685ebf9a6bcada87c6d 100644 (file)
@@ -78,6 +78,23 @@ def create_image(name, size):
         i = i + 512
     file.close()
 
+# Test if 'match' is a recursive subset of 'event'
+def event_match(event, match=None):
+    if match is None:
+        return True
+
+    for key in match:
+        if key in event:
+            if isinstance(event[key], dict):
+                if not event_match(event[key], match[key]):
+                    return False
+            elif event[key] != match[key]:
+                return False
+        else:
+            return False
+
+    return True
+
 class VM(object):
     '''A QEMU VM'''
 
@@ -92,6 +109,7 @@ class VM(object):
                      '-machine', 'accel=qtest',
                      '-display', 'none', '-vga', 'none']
         self._num_drives = 0
+        self._events = []
 
     # This can be used to add an unused monitor instance.
     def add_monitor_telnet(self, ip, port):
@@ -202,14 +220,34 @@ class VM(object):
 
     def get_qmp_event(self, wait=False):
         '''Poll for one queued QMP events and return it'''
+        if len(self._events) > 0:
+            return self._events.pop(0)
         return self._qmp.pull_event(wait=wait)
 
     def get_qmp_events(self, wait=False):
         '''Poll for queued QMP events and return a list of dicts'''
         events = self._qmp.get_events(wait=wait)
+        events.extend(self._events)
+        del self._events[:]
         self._qmp.clear_events()
         return events
 
+    def event_wait(self, name='BLOCK_JOB_COMPLETED', timeout=60.0, match=None):
+        # Search cached events
+        for event in self._events:
+            if (event['event'] == name) and event_match(event, match):
+                self._events.remove(event)
+                return event
+
+        # Poll for new events
+        while True:
+            event = self._qmp.pull_event(wait=timeout)
+            if (event['event'] == name) and event_match(event, match):
+                return event
+            self._events.append(event)
+
+        return None
+
 index_re = re.compile(r'([^\[]+)\[([^\]]+)\]')
 
 class QMPTestCase(unittest.TestCase):
index a7cb5c99152233b1d94480083f874faf8554eb68..4b0cb45d31b56399d8592cc97a30d2c4b1a0f7eb 100644 (file)
@@ -107,6 +107,7 @@ static void test_notify(void)
 
 typedef struct {
     QemuMutex start_lock;
+    EventNotifier notifier;
     bool thread_acquired;
 } AcquireTestData;
 
@@ -118,6 +119,8 @@ static void *test_acquire_thread(void *opaque)
     qemu_mutex_lock(&data->start_lock);
     qemu_mutex_unlock(&data->start_lock);
 
+    g_usleep(500000);
+    event_notifier_set(&data->notifier);
     aio_context_acquire(ctx);
     aio_context_release(ctx);
 
@@ -126,20 +129,19 @@ static void *test_acquire_thread(void *opaque)
     return NULL;
 }
 
-static void dummy_notifier_read(EventNotifier *unused)
+static void dummy_notifier_read(EventNotifier *n)
 {
-    g_assert(false); /* should never be invoked */
+    event_notifier_test_and_clear(n);
 }
 
 static void test_acquire(void)
 {
     QemuThread thread;
-    EventNotifier notifier;
     AcquireTestData data;
 
     /* Dummy event notifier ensures aio_poll() will block */
-    event_notifier_init(&notifier, false);
-    aio_set_event_notifier(ctx, &notifier, dummy_notifier_read);
+    event_notifier_init(&data.notifier, false);
+    aio_set_event_notifier(ctx, &data.notifier, dummy_notifier_read);
     g_assert(!aio_poll(ctx, false)); /* consume aio_notify() */
 
     qemu_mutex_init(&data.start_lock);
@@ -153,12 +155,13 @@ static void test_acquire(void)
     /* Block in aio_poll(), let other thread kick us and acquire context */
     aio_context_acquire(ctx);
     qemu_mutex_unlock(&data.start_lock); /* let the thread run */
-    g_assert(!aio_poll(ctx, true));
+    g_assert(aio_poll(ctx, true));
+    g_assert(!data.thread_acquired);
     aio_context_release(ctx);
 
     qemu_thread_join(&thread);
-    aio_set_event_notifier(ctx, &notifier, NULL);
-    event_notifier_cleanup(&notifier);
+    aio_set_event_notifier(ctx, &data.notifier, NULL);
+    event_notifier_cleanup(&data.notifier);
 
     g_assert(data.thread_acquired);
 }
index 8c902f2055fb45359d352f0cec183ac671e3550f..9f41b5fd2ebfbc039efb6cc2fa8313dc39b1d25b 100644 (file)
@@ -11,6 +11,8 @@
 
 #include <glib.h>
 #include <stdarg.h>
+#include <string.h>
+#include <sys/types.h>
 #include "qemu/hbitmap.h"
 
 #define LOG_BITS_PER_LONG          (BITS_PER_LONG == 32 ? 5 : 6)
@@ -23,6 +25,7 @@ typedef struct TestHBitmapData {
     HBitmap       *hb;
     unsigned long *bits;
     size_t         size;
+    size_t         old_size;
     int            granularity;
 } TestHBitmapData;
 
@@ -91,6 +94,44 @@ static void hbitmap_test_init(TestHBitmapData *data,
     }
 }
 
+static inline size_t hbitmap_test_array_size(size_t bits)
+{
+    size_t n = (bits + BITS_PER_LONG - 1) / BITS_PER_LONG;
+    return n ? n : 1;
+}
+
+static void hbitmap_test_truncate_impl(TestHBitmapData *data,
+                                       size_t size)
+{
+    size_t n;
+    size_t m;
+    data->old_size = data->size;
+    data->size = size;
+
+    if (data->size == data->old_size) {
+        return;
+    }
+
+    n = hbitmap_test_array_size(size);
+    m = hbitmap_test_array_size(data->old_size);
+    data->bits = g_realloc(data->bits, sizeof(unsigned long) * n);
+    if (n > m) {
+        memset(&data->bits[m], 0x00, sizeof(unsigned long) * (n - m));
+    }
+
+    /* If we shrink to an uneven multiple of sizeof(unsigned long),
+     * scrub the leftover memory. */
+    if (data->size < data->old_size) {
+        m = size % (sizeof(unsigned long) * 8);
+        if (m) {
+            unsigned long mask = (1ULL << m) - 1;
+            data->bits[n-1] &= mask;
+        }
+    }
+
+    hbitmap_truncate(data->hb, size);
+}
+
 static void hbitmap_test_teardown(TestHBitmapData *data,
                                   const void *unused)
 {
@@ -369,6 +410,198 @@ static void test_hbitmap_iter_granularity(TestHBitmapData *data,
     g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
 }
 
+static void hbitmap_test_set_boundary_bits(TestHBitmapData *data, ssize_t diff)
+{
+    size_t size = data->size;
+
+    /* First bit */
+    hbitmap_test_set(data, 0, 1);
+    if (diff < 0) {
+        /* Last bit in new, shortened map */
+        hbitmap_test_set(data, size + diff - 1, 1);
+
+        /* First bit to be truncated away */
+        hbitmap_test_set(data, size + diff, 1);
+    }
+    /* Last bit */
+    hbitmap_test_set(data, size - 1, 1);
+    if (data->granularity == 0) {
+        hbitmap_test_check_get(data);
+    }
+}
+
+static void hbitmap_test_check_boundary_bits(TestHBitmapData *data)
+{
+    size_t size = MIN(data->size, data->old_size);
+
+    if (data->granularity == 0) {
+        hbitmap_test_check_get(data);
+        hbitmap_test_check(data, 0);
+    } else {
+        /* If a granularity was set, note that every distinct
+         * (bit >> granularity) value that was set will increase
+         * the bit pop count by 2^granularity, not just 1.
+         *
+         * The hbitmap_test_check facility does not currently tolerate
+         * non-zero granularities, so test the boundaries and the population
+         * count manually.
+         */
+        g_assert(hbitmap_get(data->hb, 0));
+        g_assert(hbitmap_get(data->hb, size - 1));
+        g_assert_cmpint(2 << data->granularity, ==, hbitmap_count(data->hb));
+    }
+}
+
+/* Generic truncate test. */
+static void hbitmap_test_truncate(TestHBitmapData *data,
+                                  size_t size,
+                                  ssize_t diff,
+                                  int granularity)
+{
+    hbitmap_test_init(data, size, granularity);
+    hbitmap_test_set_boundary_bits(data, diff);
+    hbitmap_test_truncate_impl(data, size + diff);
+    hbitmap_test_check_boundary_bits(data);
+}
+
+static void test_hbitmap_truncate_nop(TestHBitmapData *data,
+                                      const void *unused)
+{
+    hbitmap_test_truncate(data, L2, 0, 0);
+}
+
+/**
+ * Grow by an amount smaller than the granularity, without crossing
+ * a granularity alignment boundary. Effectively a NOP.
+ */
+static void test_hbitmap_truncate_grow_negligible(TestHBitmapData *data,
+                                                  const void *unused)
+{
+    size_t size = L2 - 1;
+    size_t diff = 1;
+    int granularity = 1;
+
+    hbitmap_test_truncate(data, size, diff, granularity);
+}
+
+/**
+ * Shrink by an amount smaller than the granularity, without crossing
+ * a granularity alignment boundary. Effectively a NOP.
+ */
+static void test_hbitmap_truncate_shrink_negligible(TestHBitmapData *data,
+                                                    const void *unused)
+{
+    size_t size = L2;
+    ssize_t diff = -1;
+    int granularity = 1;
+
+    hbitmap_test_truncate(data, size, diff, granularity);
+}
+
+/**
+ * Grow by an amount smaller than the granularity, but crossing over
+ * a granularity alignment boundary.
+ */
+static void test_hbitmap_truncate_grow_tiny(TestHBitmapData *data,
+                                            const void *unused)
+{
+    size_t size = L2 - 2;
+    ssize_t diff = 1;
+    int granularity = 1;
+
+    hbitmap_test_truncate(data, size, diff, granularity);
+}
+
+/**
+ * Shrink by an amount smaller than the granularity, but crossing over
+ * a granularity alignment boundary.
+ */
+static void test_hbitmap_truncate_shrink_tiny(TestHBitmapData *data,
+                                              const void *unused)
+{
+    size_t size = L2 - 1;
+    ssize_t diff = -1;
+    int granularity = 1;
+
+    hbitmap_test_truncate(data, size, diff, granularity);
+}
+
+/**
+ * Grow by an amount smaller than sizeof(long), and not crossing over
+ * a sizeof(long) alignment boundary.
+ */
+static void test_hbitmap_truncate_grow_small(TestHBitmapData *data,
+                                             const void *unused)
+{
+    size_t size = L2 + 1;
+    size_t diff = sizeof(long) / 2;
+
+    hbitmap_test_truncate(data, size, diff, 0);
+}
+
+/**
+ * Shrink by an amount smaller than sizeof(long), and not crossing over
+ * a sizeof(long) alignment boundary.
+ */
+static void test_hbitmap_truncate_shrink_small(TestHBitmapData *data,
+                                               const void *unused)
+{
+    size_t size = L2;
+    size_t diff = sizeof(long) / 2;
+
+    hbitmap_test_truncate(data, size, -diff, 0);
+}
+
+/**
+ * Grow by an amount smaller than sizeof(long), while crossing over
+ * a sizeof(long) alignment boundary.
+ */
+static void test_hbitmap_truncate_grow_medium(TestHBitmapData *data,
+                                              const void *unused)
+{
+    size_t size = L2 - 1;
+    size_t diff = sizeof(long) / 2;
+
+    hbitmap_test_truncate(data, size, diff, 0);
+}
+
+/**
+ * Shrink by an amount smaller than sizeof(long), while crossing over
+ * a sizeof(long) alignment boundary.
+ */
+static void test_hbitmap_truncate_shrink_medium(TestHBitmapData *data,
+                                                const void *unused)
+{
+    size_t size = L2 + 1;
+    size_t diff = sizeof(long) / 2;
+
+    hbitmap_test_truncate(data, size, -diff, 0);
+}
+
+/**
+ * Grow by an amount larger than sizeof(long).
+ */
+static void test_hbitmap_truncate_grow_large(TestHBitmapData *data,
+                                             const void *unused)
+{
+    size_t size = L2;
+    size_t diff = 8 * sizeof(long);
+
+    hbitmap_test_truncate(data, size, diff, 0);
+}
+
+/**
+ * Shrink by an amount larger than sizeof(long).
+ */
+static void test_hbitmap_truncate_shrink_large(TestHBitmapData *data,
+                                               const void *unused)
+{
+    size_t size = L2;
+    size_t diff = 8 * sizeof(long);
+
+    hbitmap_test_truncate(data, size, -diff, 0);
+}
+
 static void hbitmap_test_add(const char *testpath,
                                    void (*test_func)(TestHBitmapData *data, const void *user_data))
 {
@@ -395,6 +628,28 @@ int main(int argc, char **argv)
     hbitmap_test_add("/hbitmap/reset/empty", test_hbitmap_reset_empty);
     hbitmap_test_add("/hbitmap/reset/general", test_hbitmap_reset);
     hbitmap_test_add("/hbitmap/granularity", test_hbitmap_granularity);
+
+    hbitmap_test_add("/hbitmap/truncate/nop", test_hbitmap_truncate_nop);
+    hbitmap_test_add("/hbitmap/truncate/grow/negligible",
+                     test_hbitmap_truncate_grow_negligible);
+    hbitmap_test_add("/hbitmap/truncate/shrink/negligible",
+                     test_hbitmap_truncate_shrink_negligible);
+    hbitmap_test_add("/hbitmap/truncate/grow/tiny",
+                     test_hbitmap_truncate_grow_tiny);
+    hbitmap_test_add("/hbitmap/truncate/shrink/tiny",
+                     test_hbitmap_truncate_shrink_tiny);
+    hbitmap_test_add("/hbitmap/truncate/grow/small",
+                     test_hbitmap_truncate_grow_small);
+    hbitmap_test_add("/hbitmap/truncate/shrink/small",
+                     test_hbitmap_truncate_shrink_small);
+    hbitmap_test_add("/hbitmap/truncate/grow/medium",
+                     test_hbitmap_truncate_grow_medium);
+    hbitmap_test_add("/hbitmap/truncate/shrink/medium",
+                     test_hbitmap_truncate_shrink_medium);
+    hbitmap_test_add("/hbitmap/truncate/grow/large",
+                     test_hbitmap_truncate_grow_large);
+    hbitmap_test_add("/hbitmap/truncate/shrink/large",
+                     test_hbitmap_truncate_shrink_large);
     g_test_run();
 
     return 0;
index e2cac8e4ffda23495e5c430e994f64ae26dde2b3..ac909f498675325f689747b636300146bdb251f6 100644 (file)
@@ -170,12 +170,12 @@ restart:
         if (elem->state != THREAD_DONE) {
             continue;
         }
-        if (elem->state == THREAD_DONE) {
-            trace_thread_pool_complete(pool, elem, elem->common.opaque,
-                                       elem->ret);
-        }
-        if (elem->state == THREAD_DONE && elem->common.cb) {
-            QLIST_REMOVE(elem, all);
+
+        trace_thread_pool_complete(pool, elem, elem->common.opaque,
+                                   elem->ret);
+        QLIST_REMOVE(elem, all);
+
+        if (elem->common.cb) {
             /* Read state before ret.  */
             smp_rmb();
 
@@ -188,8 +188,6 @@ restart:
             qemu_aio_unref(elem);
             goto restart;
         } else {
-            /* remove the request */
-            QLIST_REMOVE(elem, all);
             qemu_aio_unref(elem);
         }
     }
index b15ca87f0f27f631d3efdc00b2d424e4438181ce..29275136c78321f1bbb1a8edf0041c2a3f28d020 100644 (file)
@@ -1788,6 +1788,21 @@ bool qemu_console_is_fixedsize(QemuConsole *con)
     return con && (con->console_type != TEXT_CONSOLE);
 }
 
+char *qemu_console_get_label(QemuConsole *con)
+{
+    if (con->console_type == GRAPHIC_CONSOLE) {
+        if (con->device) {
+            return g_strdup(object_get_typename(con->device));
+        }
+        return g_strdup("VGA");
+    } else {
+        if (con->chr && con->chr->label) {
+            return g_strdup(con->chr->label);
+        }
+        return g_strdup_printf("vc%d", con->index);
+    }
+}
+
 int qemu_console_get_index(QemuConsole *con)
 {
     if (con == NULL) {
index 51abac9f47bbf98ce1e714900d3c2443720f0876..51ea1b95ee998cf5641e5f5b4ad962cb7b2a07ba 100644 (file)
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -170,6 +170,7 @@ typedef struct VirtualConsole {
     GtkWidget *window;
     GtkWidget *menu_item;
     GtkWidget *tab_item;
+    GtkWidget *focus;
     VirtualConsoleType type;
     union {
         VirtualGfxConsole gfx;
@@ -230,6 +231,7 @@ struct GtkDisplayState {
 
     bool modifier_pressed[ARRAY_SIZE(modifier_keycode)];
     bool has_evdev;
+    bool ignore_keys;
 };
 
 static void gd_grab_pointer(VirtualConsole *vc);
@@ -290,7 +292,8 @@ static void gd_update_cursor(VirtualConsole *vc)
     GtkDisplayState *s = vc->s;
     GdkWindow *window;
 
-    if (vc->type != GD_VC_GFX) {
+    if (vc->type != GD_VC_GFX ||
+        !qemu_console_is_graphic(vc->gfx.dcl.con)) {
         return;
     }
 
@@ -363,6 +366,9 @@ static void gd_update_geometry_hints(VirtualConsole *vc)
     GtkWindow *geo_window;
 
     if (vc->type == GD_VC_GFX) {
+        if (!vc->gfx.ds) {
+            return;
+        }
         if (s->free_scale) {
             geo.min_width  = surface_width(vc->gfx.ds) * VC_SCALE_MIN;
             geo.min_height = surface_height(vc->gfx.ds) * VC_SCALE_MIN;
@@ -427,7 +433,8 @@ static void gtk_release_modifiers(GtkDisplayState *s)
     VirtualConsole *vc = gd_vc_find_current(s);
     int i, keycode;
 
-    if (vc->type != GD_VC_GFX) {
+    if (vc->type != GD_VC_GFX ||
+        !qemu_console_is_graphic(vc->gfx.dcl.con)) {
         return;
     }
     for (i = 0; i < ARRAY_SIZE(modifier_keycode); i++) {
@@ -455,6 +462,7 @@ static void gd_update(DisplayChangeListener *dcl,
                       int x, int y, int w, int h)
 {
     VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
+    GdkWindow *win;
     int x1, x2, y1, y2;
     int mx, my;
     int fbw, fbh;
@@ -481,8 +489,11 @@ static void gd_update(DisplayChangeListener *dcl,
     fbw = surface_width(vc->gfx.ds) * vc->gfx.scale_x;
     fbh = surface_height(vc->gfx.ds) * vc->gfx.scale_y;
 
-    gdk_drawable_get_size(gtk_widget_get_window(vc->gfx.drawing_area),
-                          &ww, &wh);
+    win = gtk_widget_get_window(vc->gfx.drawing_area);
+    if (!win) {
+        return;
+    }
+    gdk_drawable_get_size(win, &ww, &wh);
 
     mx = my = 0;
     if (ww > fbw) {
@@ -574,22 +585,28 @@ static void gd_switch(DisplayChangeListener *dcl,
     VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
     bool resized = true;
 
-    trace_gd_switch(vc->label, surface_width(surface), surface_height(surface));
+    trace_gd_switch(vc->label,
+                    surface ? surface_width(surface)  : 0,
+                    surface ? surface_height(surface) : 0);
 
     if (vc->gfx.surface) {
         cairo_surface_destroy(vc->gfx.surface);
+        vc->gfx.surface = NULL;
+    }
+    if (vc->gfx.convert) {
+        pixman_image_unref(vc->gfx.convert);
+        vc->gfx.convert = NULL;
     }
 
-    if (vc->gfx.ds &&
+    if (vc->gfx.ds && surface &&
         surface_width(vc->gfx.ds) == surface_width(surface) &&
         surface_height(vc->gfx.ds) == surface_height(surface)) {
         resized = false;
     }
     vc->gfx.ds = surface;
 
-    if (vc->gfx.convert) {
-        pixman_image_unref(vc->gfx.convert);
-        vc->gfx.convert = NULL;
+    if (!surface) {
+        return;
     }
 
     if (surface->format == PIXMAN_x8r8g8b8) {
@@ -690,6 +707,9 @@ static gboolean gd_draw_event(GtkWidget *widget, cairo_t *cr, void *opaque)
     if (!gtk_widget_get_realized(widget)) {
         return FALSE;
     }
+    if (!vc->gfx.ds) {
+        return FALSE;
+    }
 
     fbw = surface_width(vc->gfx.ds);
     fbh = surface_height(vc->gfx.ds);
@@ -771,6 +791,10 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion,
     int fbh, fbw;
     int ww, wh;
 
+    if (!vc->gfx.ds) {
+        return TRUE;
+    }
+
     fbw = surface_width(vc->gfx.ds) * vc->gfx.scale_x;
     fbh = surface_height(vc->gfx.ds) * vc->gfx.scale_y;
 
@@ -945,6 +969,23 @@ static int gd_map_keycode(GtkDisplayState *s, GdkDisplay *dpy, int gdk_keycode)
     return qemu_keycode;
 }
 
+static gboolean gd_text_key_down(GtkWidget *widget,
+                                 GdkEventKey *key, void *opaque)
+{
+    VirtualConsole *vc = opaque;
+    QemuConsole *con = vc->gfx.dcl.con;
+
+    if (key->length) {
+        kbd_put_string_console(con, key->string, key->length);
+    } else {
+        int num = gd_map_keycode(vc->s, gtk_widget_get_display(widget),
+                                 key->hardware_keycode);
+        int qcode = qemu_input_key_number_to_qcode(num);
+        kbd_put_qcode_console(con, qcode);
+    }
+    return TRUE;
+}
+
 static gboolean gd_key_event(GtkWidget *widget, GdkEventKey *key, void *opaque)
 {
     VirtualConsole *vc = opaque;
@@ -953,6 +994,11 @@ static gboolean gd_key_event(GtkWidget *widget, GdkEventKey *key, void *opaque)
     int qemu_keycode;
     int i;
 
+    if (s->ignore_keys) {
+        s->ignore_keys = (key->type == GDK_KEY_PRESS);
+        return TRUE;
+    }
+
     if (key->keyval == GDK_KEY_Pause) {
         qemu_input_event_send_key_qcode(vc->gfx.dcl.con, Q_KEY_CODE_PAUSE,
                                         key->type == GDK_KEY_PRESS);
@@ -1021,22 +1067,26 @@ static void gd_menu_switch_vc(GtkMenuItem *item, void *opaque)
     GtkDisplayState *s = opaque;
     VirtualConsole *vc = gd_vc_find_by_menu(s);
     GtkNotebook *nb = GTK_NOTEBOOK(s->notebook);
-    GtkWidget *child;
     gint page;
 
     gtk_release_modifiers(s);
     if (vc) {
         page = gtk_notebook_page_num(nb, vc->tab_item);
         gtk_notebook_set_current_page(nb, page);
-        child = gtk_notebook_get_nth_page(nb, page);
-        gtk_widget_grab_focus(child);
+        gtk_widget_grab_focus(vc->focus);
     }
+    s->ignore_keys = false;
 }
 
 static void gd_accel_switch_vc(void *opaque)
 {
     VirtualConsole *vc = opaque;
+
     gtk_check_menu_item_set_active(GTK_CHECK_MENU_ITEM(vc->menu_item), TRUE);
+#if !GTK_CHECK_VERSION(3, 0, 0)
+    /* GTK2 sends the accel key to the target console - ignore this until */
+    vc->s->ignore_keys = true;
+#endif
 }
 
 static void gd_menu_show_tabs(GtkMenuItem *item, void *opaque)
@@ -1086,7 +1136,8 @@ static void gd_menu_untabify(GtkMenuItem *item, void *opaque)
     GtkDisplayState *s = opaque;
     VirtualConsole *vc = gd_vc_find_current(s);
 
-    if (vc->type == GD_VC_GFX) {
+    if (vc->type == GD_VC_GFX &&
+        qemu_console_is_graphic(vc->gfx.dcl.con)) {
         gtk_check_menu_item_set_active(GTK_CHECK_MENU_ITEM(s->grab_item),
                                        FALSE);
     }
@@ -1099,11 +1150,14 @@ static void gd_menu_untabify(GtkMenuItem *item, void *opaque)
                          G_CALLBACK(gd_tab_window_close), vc);
         gtk_widget_show_all(vc->window);
 
-        GtkAccelGroup *ag = gtk_accel_group_new();
-        gtk_window_add_accel_group(GTK_WINDOW(vc->window), ag);
+        if (qemu_console_is_graphic(vc->gfx.dcl.con)) {
+            GtkAccelGroup *ag = gtk_accel_group_new();
+            gtk_window_add_accel_group(GTK_WINDOW(vc->window), ag);
 
-        GClosure *cb = g_cclosure_new_swap(G_CALLBACK(gd_win_grab), vc, NULL);
-        gtk_accel_group_connect(ag, GDK_KEY_g, HOTKEY_MODIFIERS, 0, cb);
+            GClosure *cb = g_cclosure_new_swap(G_CALLBACK(gd_win_grab),
+                                               vc, NULL);
+            gtk_accel_group_connect(ag, GDK_KEY_g, HOTKEY_MODIFIERS, 0, cb);
+        }
 
         gd_update_geometry_hints(vc);
         gd_update_caption(s);
@@ -1120,8 +1174,10 @@ static void gd_menu_full_screen(GtkMenuItem *item, void *opaque)
         gtk_widget_hide(s->menu_bar);
         if (vc->type == GD_VC_GFX) {
             gtk_widget_set_size_request(vc->gfx.drawing_area, -1, -1);
-            gtk_check_menu_item_set_active(GTK_CHECK_MENU_ITEM(s->grab_item),
-                                           TRUE);
+            if (qemu_console_is_graphic(vc->gfx.dcl.con)) {
+                gtk_check_menu_item_set_active
+                    (GTK_CHECK_MENU_ITEM(s->grab_item), TRUE);
+            }
         }
         gtk_window_fullscreen(GTK_WINDOW(s->window));
         s->full_screen = TRUE;
@@ -1370,7 +1426,8 @@ static void gd_change_page(GtkNotebook *nb, gpointer arg1, guint arg2,
 #endif
     gtk_check_menu_item_set_active(GTK_CHECK_MENU_ITEM(vc->menu_item),
                                    TRUE);
-    on_vga = (vc->type == GD_VC_GFX);
+    on_vga = (vc->type == GD_VC_GFX &&
+              qemu_console_is_graphic(vc->gfx.dcl.con));
     if (!on_vga) {
         gtk_check_menu_item_set_active(GTK_CHECK_MENU_ITEM(s->grab_item),
                                        FALSE);
@@ -1542,6 +1599,7 @@ static GSList *gd_vc_vte_init(GtkDisplayState *s, VirtualConsole *vc,
 
     vc->type = GD_VC_VTE;
     vc->tab_item = box;
+    vc->focus = vc->vte.terminal;
     gtk_notebook_append_page(GTK_NOTEBOOK(s->notebook), vc->tab_item,
                              gtk_label_new(vc->label));
 
@@ -1577,25 +1635,30 @@ static void gd_connect_vc_gfx_signals(VirtualConsole *vc)
     g_signal_connect(vc->gfx.drawing_area, "expose-event",
                      G_CALLBACK(gd_expose_event), vc);
 #endif
-    g_signal_connect(vc->gfx.drawing_area, "event",
-                     G_CALLBACK(gd_event), vc);
-    g_signal_connect(vc->gfx.drawing_area, "button-press-event",
-                     G_CALLBACK(gd_button_event), vc);
-    g_signal_connect(vc->gfx.drawing_area, "button-release-event",
-                     G_CALLBACK(gd_button_event), vc);
-    g_signal_connect(vc->gfx.drawing_area, "scroll-event",
-                     G_CALLBACK(gd_scroll_event), vc);
-    g_signal_connect(vc->gfx.drawing_area, "key-press-event",
-                     G_CALLBACK(gd_key_event), vc);
-    g_signal_connect(vc->gfx.drawing_area, "key-release-event",
-                     G_CALLBACK(gd_key_event), vc);
-
-    g_signal_connect(vc->gfx.drawing_area, "enter-notify-event",
-                     G_CALLBACK(gd_enter_event), vc);
-    g_signal_connect(vc->gfx.drawing_area, "leave-notify-event",
-                     G_CALLBACK(gd_leave_event), vc);
-    g_signal_connect(vc->gfx.drawing_area, "focus-out-event",
-                     G_CALLBACK(gd_focus_out_event), vc);
+    if (qemu_console_is_graphic(vc->gfx.dcl.con)) {
+        g_signal_connect(vc->gfx.drawing_area, "event",
+                         G_CALLBACK(gd_event), vc);
+        g_signal_connect(vc->gfx.drawing_area, "button-press-event",
+                         G_CALLBACK(gd_button_event), vc);
+        g_signal_connect(vc->gfx.drawing_area, "button-release-event",
+                         G_CALLBACK(gd_button_event), vc);
+        g_signal_connect(vc->gfx.drawing_area, "scroll-event",
+                         G_CALLBACK(gd_scroll_event), vc);
+        g_signal_connect(vc->gfx.drawing_area, "key-press-event",
+                         G_CALLBACK(gd_key_event), vc);
+        g_signal_connect(vc->gfx.drawing_area, "key-release-event",
+                         G_CALLBACK(gd_key_event), vc);
+
+        g_signal_connect(vc->gfx.drawing_area, "enter-notify-event",
+                         G_CALLBACK(gd_enter_event), vc);
+        g_signal_connect(vc->gfx.drawing_area, "leave-notify-event",
+                         G_CALLBACK(gd_leave_event), vc);
+        g_signal_connect(vc->gfx.drawing_area, "focus-out-event",
+                         G_CALLBACK(gd_focus_out_event), vc);
+    } else {
+        g_signal_connect(vc->gfx.drawing_area, "key-press-event",
+                         G_CALLBACK(gd_text_key_down), vc);
+    }
 }
 
 static void gd_connect_signals(GtkDisplayState *s)
@@ -1679,15 +1742,7 @@ static GSList *gd_vc_gfx_init(GtkDisplayState *s, VirtualConsole *vc,
                               QemuConsole *con, int idx,
                               GSList *group, GtkWidget *view_menu)
 {
-    Object *obj;
-
-    obj = object_property_get_link(OBJECT(con), "device", NULL);
-    if (obj) {
-        vc->label = g_strdup_printf("%s", object_get_typename(obj));
-    } else {
-        vc->label = g_strdup_printf("VGA");
-    }
-
+    vc->label = qemu_console_get_label(con);
     vc->s = s;
     vc->gfx.scale_x = 1.0;
     vc->gfx.scale_y = 1.0;
@@ -1706,16 +1761,17 @@ static GSList *gd_vc_gfx_init(GtkDisplayState *s, VirtualConsole *vc,
 
     vc->type = GD_VC_GFX;
     vc->tab_item = vc->gfx.drawing_area;
+    vc->focus = vc->gfx.drawing_area;
     gtk_notebook_append_page(GTK_NOTEBOOK(s->notebook),
                              vc->tab_item, gtk_label_new(vc->label));
-    gd_connect_vc_gfx_signals(vc);
-
-    group = gd_vc_menu_init(s, vc, idx, group, view_menu);
 
     vc->gfx.dcl.ops = &dcl_ops;
     vc->gfx.dcl.con = con;
     register_displaychangelistener(&vc->gfx.dcl);
 
+    gd_connect_vc_gfx_signals(vc);
+    group = gd_vc_menu_init(s, vc, idx, group, view_menu);
+
     return group;
 }
 
@@ -1787,7 +1843,7 @@ static GtkWidget *gd_create_menu_view(GtkDisplayState *s)
     /* gfx */
     for (vc = 0;; vc++) {
         con = qemu_console_lookup_by_index(vc);
-        if (!con || !qemu_console_is_graphic(con)) {
+        if (!con) {
             break;
         }
         group = gd_vc_gfx_init(s, &s->vc[vc], con,
index c8f7f183c6f26aa4dedb8569eb92c3efddb5920c..f00e0742b4adac2e08bb22349c0e55d39ab5e98e 100644 (file)
@@ -273,14 +273,6 @@ static SpiceCoreInterface core_interface = {
     .channel_event      = channel_event,
 };
 
-typedef struct SpiceMigration {
-    SpiceMigrateInstance sin;
-    struct {
-        MonitorCompletion *cb;
-        void *opaque;
-    } connect_complete;
-} SpiceMigration;
-
 static void migrate_connect_complete_cb(SpiceMigrateInstance *sin);
 static void migrate_end_complete_cb(SpiceMigrateInstance *sin);
 
@@ -293,15 +285,11 @@ static const SpiceMigrateInterface migrate_interface = {
     .migrate_end_complete = migrate_end_complete_cb,
 };
 
-static SpiceMigration spice_migrate;
+static SpiceMigrateInstance spice_migrate;
 
 static void migrate_connect_complete_cb(SpiceMigrateInstance *sin)
 {
-    SpiceMigration *sm = container_of(sin, SpiceMigration, sin);
-    if (sm->connect_complete.cb) {
-        sm->connect_complete.cb(sm->connect_complete.opaque, NULL);
-    }
-    sm->connect_complete.cb = NULL;
+    /* nothing, but libspice-server expects this cb being present. */
 }
 
 static void migrate_end_complete_cb(SpiceMigrateInstance *sin)
@@ -585,13 +573,10 @@ static void migration_state_notifier(Notifier *notifier, void *data)
 }
 
 int qemu_spice_migrate_info(const char *hostname, int port, int tls_port,
-                            const char *subject,
-                            MonitorCompletion *cb, void *opaque)
+                            const char *subject)
 {
     int ret;
 
-    spice_migrate.connect_complete.cb = cb;
-    spice_migrate.connect_complete.opaque = opaque;
     ret = spice_server_migrate_connect(spice_server, hostname,
                                        port, tls_port, subject);
     spice_have_target_host = true;
@@ -812,9 +797,8 @@ void qemu_spice_init(void)
 
     migration_state.notify = migration_state_notifier;
     add_migration_state_change_notifier(&migration_state);
-    spice_migrate.sin.base.sif = &migrate_interface.base;
-    spice_migrate.connect_complete.cb = NULL;
-    qemu_spice_add_interface(&spice_migrate.sin.base);
+    spice_migrate.base.sif = &migrate_interface.base;
+    qemu_spice_add_interface(&spice_migrate.base);
 
     qemu_spice_input_init();
     qemu_spice_audio_init();
index 16441852e4add6181b6194483aac23e9b30734ff..c71a059e35017ebaabb49f1cae0c75069f1b7a1d 100644 (file)
@@ -178,7 +178,7 @@ static void qemu_spice_create_one_update(SimpleSpiceDisplay *ssd,
     image->bitmap.palette = 0;
     image->bitmap.format = SPICE_BITMAP_FMT_32BIT;
 
-    dest = pixman_image_create_bits(PIXMAN_x8r8g8b8, bw, bh,
+    dest = pixman_image_create_bits(PIXMAN_LE_x8r8g8b8, bw, bh,
                                     (void *)update->bitmap, bw * 4);
     pixman_image_composite(PIXMAN_OP_SRC, ssd->surface, NULL, ssd->mirror,
                            rect->left, rect->top, 0, 0,
@@ -260,7 +260,8 @@ static void qemu_spice_create_update(SimpleSpiceDisplay *ssd)
 
 static SimpleSpiceCursor*
 qemu_spice_create_cursor_update(SimpleSpiceDisplay *ssd,
-                                QEMUCursor *c)
+                                QEMUCursor *c,
+                                int on)
 {
     size_t size = c ? c->width * c->height * 4 : 0;
     SimpleSpiceCursor *update;
@@ -275,8 +276,8 @@ qemu_spice_create_cursor_update(SimpleSpiceDisplay *ssd,
 
     if (c) {
         ccmd->type = QXL_CURSOR_SET;
-        ccmd->u.set.position.x = ssd->ptr_x;
-        ccmd->u.set.position.y = ssd->ptr_y;
+        ccmd->u.set.position.x = ssd->ptr_x + ssd->hot_x;
+        ccmd->u.set.position.y = ssd->ptr_y + ssd->hot_y;
         ccmd->u.set.visible    = true;
         ccmd->u.set.shape      = (uintptr_t)cursor;
         cursor->header.unique     = ssd->unique++;
@@ -288,10 +289,12 @@ qemu_spice_create_cursor_update(SimpleSpiceDisplay *ssd,
         cursor->data_size         = size;
         cursor->chunk.data_size   = size;
         memcpy(cursor->chunk.data, c->data, size);
+    } else if (!on) {
+        ccmd->type = QXL_CURSOR_HIDE;
     } else {
         ccmd->type = QXL_CURSOR_MOVE;
-        ccmd->u.position.x = ssd->ptr_x;
-        ccmd->u.position.y = ssd->ptr_y;
+        ccmd->u.position.x = ssd->ptr_x + ssd->hot_x;
+        ccmd->u.position.y = ssd->ptr_y + ssd->hot_y;
     }
     ccmd->release_info.id = (uintptr_t)(&update->ext);
 
@@ -734,11 +737,11 @@ static void display_mouse_set(DisplayChangeListener *dcl,
 
     qemu_mutex_lock(&ssd->lock);
     ssd->ptr_x = x;
-    ssd->ptr_y = x;
+    ssd->ptr_y = y;
     if (ssd->ptr_move) {
         g_free(ssd->ptr_move);
     }
-    ssd->ptr_move = qemu_spice_create_cursor_update(ssd, NULL);
+    ssd->ptr_move = qemu_spice_create_cursor_update(ssd, NULL, on);
     qemu_mutex_unlock(&ssd->lock);
 }
 
@@ -748,6 +751,8 @@ static void display_mouse_define(DisplayChangeListener *dcl,
     SimpleSpiceDisplay *ssd = container_of(dcl, SimpleSpiceDisplay, dcl);
 
     qemu_mutex_lock(&ssd->lock);
+    ssd->hot_x = c->hot_x;
+    ssd->hot_y = c->hot_y;
     if (ssd->ptr_move) {
         g_free(ssd->ptr_move);
         ssd->ptr_move = NULL;
@@ -755,7 +760,7 @@ static void display_mouse_define(DisplayChangeListener *dcl,
     if (ssd->ptr_define) {
         g_free(ssd->ptr_define);
     }
-    ssd->ptr_define = qemu_spice_create_cursor_update(ssd, c);
+    ssd->ptr_define = qemu_spice_create_cursor_update(ssd, c, 0);
     qemu_mutex_unlock(&ssd->lock);
 }
 
index ab139717f57168b71119b0ce24ecfd076a4c06eb..a10c7aeeda6cd48c51b859066297e23424e19735 100644 (file)
@@ -90,6 +90,9 @@ struct HBitmap {
      * bitmap will still allocate HBITMAP_LEVELS arrays.
      */
     unsigned long *levels[HBITMAP_LEVELS];
+
+    /* The length of each levels[] array. */
+    uint64_t sizes[HBITMAP_LEVELS];
 };
 
 /* Advance hbi to the next nonzero word and return it.  hbi->pos
@@ -384,6 +387,7 @@ HBitmap *hbitmap_alloc(uint64_t size, int granularity)
     hb->granularity = granularity;
     for (i = HBITMAP_LEVELS; i-- > 0; ) {
         size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
+        hb->sizes[i] = size;
         hb->levels[i] = g_new0(unsigned long, size);
     }
 
@@ -395,3 +399,84 @@ HBitmap *hbitmap_alloc(uint64_t size, int granularity)
     hb->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
     return hb;
 }
+
+void hbitmap_truncate(HBitmap *hb, uint64_t size)
+{
+    bool shrink;
+    unsigned i;
+    uint64_t num_elements = size;
+    uint64_t old;
+
+    /* Size comes in as logical elements, adjust for granularity. */
+    size = (size + (1ULL << hb->granularity) - 1) >> hb->granularity;
+    assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE));
+    shrink = size < hb->size;
+
+    /* bit sizes are identical; nothing to do. */
+    if (size == hb->size) {
+        return;
+    }
+
+    /* If we're losing bits, let's clear those bits before we invalidate all of
+     * our invariants. This helps keep the bitcount consistent, and will prevent
+     * us from carrying around garbage bits beyond the end of the map.
+     */
+    if (shrink) {
+        /* Don't clear partial granularity groups;
+         * start at the first full one. */
+        uint64_t start = QEMU_ALIGN_UP(num_elements, 1 << hb->granularity);
+        uint64_t fix_count = (hb->size << hb->granularity) - start;
+
+        assert(fix_count);
+        hbitmap_reset(hb, start, fix_count);
+    }
+
+    hb->size = size;
+    for (i = HBITMAP_LEVELS; i-- > 0; ) {
+        size = MAX(BITS_TO_LONGS(size), 1);
+        if (hb->sizes[i] == size) {
+            break;
+        }
+        old = hb->sizes[i];
+        hb->sizes[i] = size;
+        hb->levels[i] = g_realloc(hb->levels[i], size * sizeof(unsigned long));
+        if (!shrink) {
+            memset(&hb->levels[i][old], 0x00,
+                   (size - old) * sizeof(*hb->levels[i]));
+        }
+    }
+}
+
+
+/**
+ * Given HBitmaps A and B, let A := A (BITOR) B.
+ * Bitmap B will not be modified.
+ *
+ * @return true if the merge was successful,
+ *         false if it was not attempted.
+ */
+bool hbitmap_merge(HBitmap *a, const HBitmap *b)
+{
+    int i;
+    uint64_t j;
+
+    if ((a->size != b->size) || (a->granularity != b->granularity)) {
+        return false;
+    }
+
+    if (hbitmap_count(b) == 0) {
+        return true;
+    }
+
+    /* This merge is O(size), as BITS_PER_LONG and HBITMAP_LEVELS are constant.
+     * It may be possible to improve running times for sparsely populated maps
+     * by using hbitmap_iter_next, but this is suboptimal for dense maps.
+     */
+    for (i = HBITMAP_LEVELS - 1; i >= 0; i--) {
+        for (j = 0; j < a->sizes[i]; j++) {
+            a->levels[i][j] |= b->levels[i][j];
+        }
+    }
+
+    return true;
+}
index 2d32ce7e91edc60edb9c69abc2acde6504a8ff1c..a393a3d785d4f3f0eb61e7b2d4636eee8f08ef55 100644 (file)
@@ -413,7 +413,9 @@ int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname)
             opts = qemu_opts_create(list, NULL, 0, &error_abort);
             continue;
         }
-        if (sscanf(line, " %63s = \"%1023[^\"]\"", arg, value) == 2) {
+        value[0] = '\0';
+        if (sscanf(line, " %63s = \"%1023[^\"]\"", arg, value) == 2 ||
+            sscanf(line, " %63s = \"\"", arg) == 1) {
             /* arg = value */
             if (opts == NULL) {
                 error_report("no group defined");