]> git.proxmox.com Git - qemu.git/blobdiff - block.c
vhost: fix miration during device start
[qemu.git] / block.c
diff --git a/block.c b/block.c
index 39724c17d3a3815e8eb5795812cfcd43875bdc36..65cf4dc9a445868b4abf92f6a26b68bbb6e2a574 100644 (file)
--- a/block.c
+++ b/block.c
@@ -63,6 +63,9 @@ static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
     QLIST_HEAD_INITIALIZER(bdrv_drivers);
 
+/* The device to use for VM snapshots */
+static BlockDriverState *bs_snapshots;
+
 /* If non-zero, use only whitelisted block drivers */
 static int use_bdrv_whitelist;
 
@@ -288,23 +291,30 @@ BlockDriver *bdrv_find_protocol(const char *filename)
     char protocol[128];
     int len;
     const char *p;
-    int is_drive;
 
     /* TODO Drivers without bdrv_file_open must be specified explicitly */
 
+    /*
+     * XXX(hch): we really should not let host device detection
+     * override an explicit protocol specification, but moving this
+     * later breaks access to device names with colons in them.
+     * Thanks to the brain-dead persistent naming schemes on udev-
+     * based Linux systems those actually are quite common.
+     */
+    drv1 = find_hdev_driver(filename);
+    if (drv1) {
+        return drv1;
+    }
+
 #ifdef _WIN32
-    is_drive = is_windows_drive(filename) ||
-        is_windows_drive_prefix(filename);
-#else
-    is_drive = 0;
+     if (is_windows_drive(filename) ||
+         is_windows_drive_prefix(filename))
+         return bdrv_find_format("file");
 #endif
+
     p = strchr(filename, ':');
-    if (!p || is_drive) {
-        drv1 = find_hdev_driver(filename);
-        if (!drv1) {
-            drv1 = bdrv_find_format("file");
-        }
-        return drv1;
+    if (!p) {
+        return bdrv_find_format("file");
     }
     len = p - filename;
     if (len > sizeof(protocol) - 1)
@@ -331,8 +341,8 @@ static BlockDriver *find_image_format(const char *filename)
     if (ret < 0)
         return NULL;
 
-    /* Return the raw BlockDriver * to scsi-generic devices */
-    if (bs->sg) {
+    /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
+    if (bs->sg || !bdrv_is_inserted(bs)) {
         bdrv_delete(bs);
         return bdrv_find_format("raw");
     }
@@ -393,7 +403,6 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename,
 
     bs->file = NULL;
     bs->total_sectors = 0;
-    bs->is_temporary = 0;
     bs->encrypted = 0;
     bs->valid_key = 0;
     bs->open_flags = flags;
@@ -522,7 +531,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
             bdrv_delete(bs1);
             return ret;
         }
-        total_size = bdrv_getlength(bs1) >> BDRV_SECTOR_BITS;
+        total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
 
         if (bs1->drv && bs1->drv->protocol_name)
             is_protocol = 1;
@@ -541,7 +550,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
         bdrv_qcow2 = bdrv_find_format("qcow2");
         options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
 
-        set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size * 512);
+        set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
         set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
         if (drv) {
             set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
@@ -623,6 +632,9 @@ unlink_and_fail:
 void bdrv_close(BlockDriverState *bs)
 {
     if (bs->drv) {
+        if (bs == bs_snapshots) {
+            bs_snapshots = NULL;
+        }
         if (bs->backing_hd) {
             bdrv_delete(bs->backing_hd);
             bs->backing_hd = NULL;
@@ -648,8 +660,19 @@ void bdrv_close(BlockDriverState *bs)
     }
 }
 
+void bdrv_close_all(void)
+{
+    BlockDriverState *bs;
+
+    QTAILQ_FOREACH(bs, &bdrv_states, list) {
+        bdrv_close(bs);
+    }
+}
+
 void bdrv_delete(BlockDriverState *bs)
 {
+    assert(!bs->peer);
+
     /* remove from list, if necessary */
     if (bs->device_name[0] != '\0') {
         QTAILQ_REMOVE(&bdrv_states, bs, list);
@@ -660,21 +683,45 @@ void bdrv_delete(BlockDriverState *bs)
         bdrv_delete(bs->file);
     }
 
+    assert(bs != bs_snapshots);
     qemu_free(bs);
 }
 
+int bdrv_attach(BlockDriverState *bs, DeviceState *qdev)
+{
+    if (bs->peer) {
+        return -EBUSY;
+    }
+    bs->peer = qdev;
+    return 0;
+}
+
+void bdrv_detach(BlockDriverState *bs, DeviceState *qdev)
+{
+    assert(bs->peer == qdev);
+    bs->peer = NULL;
+}
+
+DeviceState *bdrv_get_attached(BlockDriverState *bs)
+{
+    return bs->peer;
+}
+
 /*
  * Run consistency checks on an image
  *
- * Returns the number of errors or -errno when an internal error occurs
+ * Returns 0 if the check could be completed (it doesn't mean that the image is
+ * free of errors) or -errno when an internal error occured. The results of the
+ * check are stored in res.
  */
-int bdrv_check(BlockDriverState *bs)
+int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
 {
     if (bs->drv->bdrv_check == NULL) {
         return -ENOTSUP;
     }
 
-    return bs->drv->bdrv_check(bs);
+    memset(res, 0, sizeof(*res));
+    return bs->drv->bdrv_check(bs, res);
 }
 
 /* commit COW file into the raw image */
@@ -684,7 +731,7 @@ int bdrv_commit(BlockDriverState *bs)
     int64_t i, total_sectors;
     int n, j, ro, open_flags;
     int ret = 0, rw_ret = 0;
-    unsigned char sector[512];
+    unsigned char sector[BDRV_SECTOR_SIZE];
     char filename[1024];
     BlockDriverState *bs_rw, *bs_ro;
 
@@ -779,6 +826,15 @@ ro_cleanup:
     return ret;
 }
 
+void bdrv_commit_all(void)
+{
+    BlockDriverState *bs;
+
+    QTAILQ_FOREACH(bs, &bdrv_states, list) {
+        bdrv_commit(bs);
+    }
+}
+
 /*
  * Return values:
  * 0        - success
@@ -824,7 +880,8 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
                               int nb_sectors)
 {
-    return bdrv_check_byte_request(bs, sector_num * 512, nb_sectors * 512);
+    return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
+                                   nb_sectors * BDRV_SECTOR_SIZE);
 }
 
 /* return < 0 if error. See bdrv_write() for the return codes */
@@ -991,6 +1048,43 @@ int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
     return count1;
 }
 
+/*
+ * Writes to the file and ensures that no writes are reordered across this
+ * request (acts as a barrier)
+ *
+ * Returns 0 on success, -errno in error cases.
+ */
+int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
+    const void *buf, int count)
+{
+    int ret;
+
+    ret = bdrv_pwrite(bs, offset, buf, count);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* No flush needed for cache=writethrough, it uses O_DSYNC */
+    if ((bs->open_flags & BDRV_O_CACHE_MASK) != 0) {
+        bdrv_flush(bs);
+    }
+
+    return 0;
+}
+
+/*
+ * Writes to the file and ensures that no writes are reordered across this
+ * request (acts as a barrier)
+ *
+ * Returns 0 on success, -errno in error cases.
+ */
+int bdrv_write_sync(BlockDriverState *bs, int64_t sector_num,
+    const uint8_t *buf, int nb_sectors)
+{
+    return bdrv_pwrite_sync(bs, BDRV_SECTOR_SIZE * sector_num,
+        buf, BDRV_SECTOR_SIZE * nb_sectors);
+}
+
 /**
  * Truncate file to 'offset' bytes (needed only for file protocols)
  */
@@ -1059,7 +1153,7 @@ struct partition {
 static int guess_disk_lchs(BlockDriverState *bs,
                            int *pcylinders, int *pheads, int *psectors)
 {
-    uint8_t buf[512];
+    uint8_t buf[BDRV_SECTOR_SIZE];
     int ret, i, heads, sectors, cylinders;
     struct partition *p;
     uint32_t nr_sects;
@@ -1196,6 +1290,26 @@ int bdrv_get_translation_hint(BlockDriverState *bs)
     return bs->translation;
 }
 
+void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
+                       BlockErrorAction on_write_error)
+{
+    bs->on_read_error = on_read_error;
+    bs->on_write_error = on_write_error;
+}
+
+BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
+{
+    return is_read ? bs->on_read_error : bs->on_write_error;
+}
+
+void bdrv_set_removable(BlockDriverState *bs, int removable)
+{
+    bs->removable = removable;
+    if (removable && bs == bs_snapshots) {
+        bs_snapshots = NULL;
+    }
+}
+
 int bdrv_is_removable(BlockDriverState *bs)
 {
     return bs->removable;
@@ -1299,6 +1413,14 @@ BlockDriverState *bdrv_find(const char *name)
     return NULL;
 }
 
+BlockDriverState *bdrv_next(BlockDriverState *bs)
+{
+    if (!bs) {
+        return QTAILQ_FIRST(&bdrv_states);
+    }
+    return QTAILQ_NEXT(bs, list);
+}
+
 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
 {
     BlockDriverState *bs;
@@ -1535,7 +1657,8 @@ static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
                              "} }",
                              bs->rd_bytes, bs->wr_bytes,
                              bs->rd_ops, bs->wr_ops,
-                             bs->wr_highest_sector * 512);
+                             bs->wr_highest_sector *
+                             (uint64_t)BDRV_SECTOR_SIZE);
     dict  = qobject_to_qdict(res);
 
     if (*bs->device_name) {
@@ -1621,9 +1744,11 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
     BlockDriver *drv = bs->drv;
     if (!drv)
         return -ENOMEDIUM;
-    if (!drv->bdrv_save_vmstate)
-        return -ENOTSUP;
-    return drv->bdrv_save_vmstate(bs, buf, pos, size);
+    if (drv->bdrv_save_vmstate)
+        return drv->bdrv_save_vmstate(bs, buf, pos, size);
+    if (bs->file)
+        return bdrv_save_vmstate(bs->file, buf, pos, size);
+    return -ENOTSUP;
 }
 
 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
@@ -1632,9 +1757,11 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
     BlockDriver *drv = bs->drv;
     if (!drv)
         return -ENOMEDIUM;
-    if (!drv->bdrv_load_vmstate)
-        return -ENOTSUP;
-    return drv->bdrv_load_vmstate(bs, buf, pos, size);
+    if (drv->bdrv_load_vmstate)
+        return drv->bdrv_load_vmstate(bs, buf, pos, size);
+    if (bs->file)
+        return bdrv_load_vmstate(bs->file, buf, pos, size);
+    return -ENOTSUP;
 }
 
 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
@@ -1652,26 +1779,78 @@ void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
 /**************************************************************/
 /* handling of snapshots */
 
+int bdrv_can_snapshot(BlockDriverState *bs)
+{
+    BlockDriver *drv = bs->drv;
+    if (!drv || bdrv_is_removable(bs) || bdrv_is_read_only(bs)) {
+        return 0;
+    }
+
+    if (!drv->bdrv_snapshot_create) {
+        if (bs->file != NULL) {
+            return bdrv_can_snapshot(bs->file);
+        }
+        return 0;
+    }
+
+    return 1;
+}
+
+BlockDriverState *bdrv_snapshots(void)
+{
+    BlockDriverState *bs;
+
+    if (bs_snapshots) {
+        return bs_snapshots;
+    }
+
+    bs = NULL;
+    while ((bs = bdrv_next(bs))) {
+        if (bdrv_can_snapshot(bs)) {
+            bs_snapshots = bs;
+            return bs;
+        }
+    }
+    return NULL;
+}
+
 int bdrv_snapshot_create(BlockDriverState *bs,
                          QEMUSnapshotInfo *sn_info)
 {
     BlockDriver *drv = bs->drv;
     if (!drv)
         return -ENOMEDIUM;
-    if (!drv->bdrv_snapshot_create)
-        return -ENOTSUP;
-    return drv->bdrv_snapshot_create(bs, sn_info);
+    if (drv->bdrv_snapshot_create)
+        return drv->bdrv_snapshot_create(bs, sn_info);
+    if (bs->file)
+        return bdrv_snapshot_create(bs->file, sn_info);
+    return -ENOTSUP;
 }
 
 int bdrv_snapshot_goto(BlockDriverState *bs,
                        const char *snapshot_id)
 {
     BlockDriver *drv = bs->drv;
+    int ret, open_ret;
+
     if (!drv)
         return -ENOMEDIUM;
-    if (!drv->bdrv_snapshot_goto)
-        return -ENOTSUP;
-    return drv->bdrv_snapshot_goto(bs, snapshot_id);
+    if (drv->bdrv_snapshot_goto)
+        return drv->bdrv_snapshot_goto(bs, snapshot_id);
+
+    if (bs->file) {
+        drv->bdrv_close(bs);
+        ret = bdrv_snapshot_goto(bs->file, snapshot_id);
+        open_ret = drv->bdrv_open(bs, bs->open_flags);
+        if (open_ret < 0) {
+            bdrv_delete(bs->file);
+            bs->drv = NULL;
+            return open_ret;
+        }
+        return ret;
+    }
+
+    return -ENOTSUP;
 }
 
 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
@@ -1679,9 +1858,11 @@ int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
     BlockDriver *drv = bs->drv;
     if (!drv)
         return -ENOMEDIUM;
-    if (!drv->bdrv_snapshot_delete)
-        return -ENOTSUP;
-    return drv->bdrv_snapshot_delete(bs, snapshot_id);
+    if (drv->bdrv_snapshot_delete)
+        return drv->bdrv_snapshot_delete(bs, snapshot_id);
+    if (bs->file)
+        return bdrv_snapshot_delete(bs->file, snapshot_id);
+    return -ENOTSUP;
 }
 
 int bdrv_snapshot_list(BlockDriverState *bs,
@@ -1690,9 +1871,11 @@ int bdrv_snapshot_list(BlockDriverState *bs,
     BlockDriver *drv = bs->drv;
     if (!drv)
         return -ENOMEDIUM;
-    if (!drv->bdrv_snapshot_list)
-        return -ENOTSUP;
-    return drv->bdrv_snapshot_list(bs, psn_info);
+    if (drv->bdrv_snapshot_list)
+        return drv->bdrv_snapshot_list(bs, psn_info);
+    if (bs->file)
+        return bdrv_snapshot_list(bs->file, psn_info);
+    return -ENOTSUP;
 }
 
 #define NB_SUFFIXES 4
@@ -1862,14 +2045,11 @@ static void multiwrite_cb(void *opaque, int ret)
 
     if (ret < 0 && !mcb->error) {
         mcb->error = ret;
-        multiwrite_user_cb(mcb);
     }
 
     mcb->num_requests--;
     if (mcb->num_requests == 0) {
-        if (mcb->error == 0) {
-            multiwrite_user_cb(mcb);
-        }
+        multiwrite_user_cb(mcb);
         qemu_free(mcb);
     }
 }
@@ -2003,8 +2183,29 @@ int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
     // Check for mergable requests
     num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
 
-    // Run the aio requests
+    /*
+     * Run the aio requests. As soon as one request can't be submitted
+     * successfully, fail all requests that are not yet submitted (we must
+     * return failure for all requests anyway)
+     *
+     * num_requests cannot be set to the right value immediately: If
+     * bdrv_aio_writev fails for some request, num_requests would be too high
+     * and therefore multiwrite_cb() would never recognize the multiwrite
+     * request as completed. We also cannot use the loop variable i to set it
+     * when the first request fails because the callback may already have been
+     * called for previously submitted requests. Thus, num_requests must be
+     * incremented for each request that is submitted.
+     *
+     * The problem that callbacks may be called early also means that we need
+     * to take care that num_requests doesn't become 0 before all requests are
+     * submitted - multiwrite_cb() would consider the multiwrite request
+     * completed. A dummy request that is "completed" by a manual call to
+     * multiwrite_cb() takes care of this.
+     */
+    mcb->num_requests = 1;
+
     for (i = 0; i < num_reqs; i++) {
+        mcb->num_requests++;
         acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
             reqs[i].nb_sectors, multiwrite_cb, mcb);
 
@@ -2012,22 +2213,24 @@ int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
             // We can only fail the whole thing if no request has been
             // submitted yet. Otherwise we'll wait for the submitted AIOs to
             // complete and report the error in the callback.
-            if (mcb->num_requests == 0) {
-                reqs[i].error = -EIO;
+            if (i == 0) {
                 goto fail;
             } else {
-                mcb->num_requests++;
                 multiwrite_cb(mcb, -EIO);
                 break;
             }
-        } else {
-            mcb->num_requests++;
         }
     }
 
+    /* Complete the dummy request */
+    multiwrite_cb(mcb, 0);
+
     return 0;
 
 fail:
+    for (i = 0; i < mcb->num_callbacks; i++) {
+        reqs[i].error = -EIO;
+    }
     qemu_free(mcb);
     return -1;
 }
@@ -2197,7 +2400,7 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
 
     async_ret = NOT_DONE;
     iov.iov_base = (void *)buf;
-    iov.iov_len = nb_sectors * 512;
+    iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
     qemu_iovec_init_external(&qiov, &iov, 1);
     acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
         bdrv_rw_em_cb, &async_ret);
@@ -2228,7 +2431,7 @@ static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
 
     async_ret = NOT_DONE;
     iov.iov_base = (void *)buf;
-    iov.iov_len = nb_sectors * 512;
+    iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
     qemu_iovec_init_external(&qiov, &iov, 1);
     acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
         bdrv_rw_em_cb, &async_ret);