]> git.proxmox.com Git - qemu.git/blobdiff - block/qed.c
target-sparc: Fix compiler errors (format strings)
[qemu.git] / block / qed.c
index 8e65d1842462a9989469cdc5dcb5e6c760e86395..39703793e9f9e281b9d9b36697a446df5488b665 100644 (file)
  *
  */
 
+#include "qemu-timer.h"
 #include "trace.h"
 #include "qed.h"
+#include "qerror.h"
 
 static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
 {
@@ -99,6 +101,81 @@ static int qed_write_header_sync(BDRVQEDState *s)
     return 0;
 }
 
+typedef struct {
+    GenericCB gencb;
+    BDRVQEDState *s;
+    struct iovec iov;
+    QEMUIOVector qiov;
+    int nsectors;
+    uint8_t *buf;
+} QEDWriteHeaderCB;
+
+static void qed_write_header_cb(void *opaque, int ret)
+{
+    QEDWriteHeaderCB *write_header_cb = opaque;
+
+    qemu_vfree(write_header_cb->buf);
+    gencb_complete(write_header_cb, ret);
+}
+
+static void qed_write_header_read_cb(void *opaque, int ret)
+{
+    QEDWriteHeaderCB *write_header_cb = opaque;
+    BDRVQEDState *s = write_header_cb->s;
+    BlockDriverAIOCB *acb;
+
+    if (ret) {
+        qed_write_header_cb(write_header_cb, ret);
+        return;
+    }
+
+    /* Update header */
+    qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf);
+
+    acb = bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov,
+                          write_header_cb->nsectors, qed_write_header_cb,
+                          write_header_cb);
+    if (!acb) {
+        qed_write_header_cb(write_header_cb, -EIO);
+    }
+}
+
+/**
+ * Update header in-place (does not rewrite backing filename or other strings)
+ *
+ * This function only updates known header fields in-place and does not affect
+ * extra data after the QED header.
+ */
+static void qed_write_header(BDRVQEDState *s, BlockDriverCompletionFunc cb,
+                             void *opaque)
+{
+    /* We must write full sectors for O_DIRECT but cannot necessarily generate
+     * the data following the header if an unrecognized compat feature is
+     * active.  Therefore, first read the sectors containing the header, update
+     * them, and write back.
+     */
+
+    BlockDriverAIOCB *acb;
+    int nsectors = (sizeof(QEDHeader) + BDRV_SECTOR_SIZE - 1) /
+                   BDRV_SECTOR_SIZE;
+    size_t len = nsectors * BDRV_SECTOR_SIZE;
+    QEDWriteHeaderCB *write_header_cb = gencb_alloc(sizeof(*write_header_cb),
+                                                    cb, opaque);
+
+    write_header_cb->s = s;
+    write_header_cb->nsectors = nsectors;
+    write_header_cb->buf = qemu_blockalign(s->bs, len);
+    write_header_cb->iov.iov_base = write_header_cb->buf;
+    write_header_cb->iov.iov_len = len;
+    qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);
+
+    acb = bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
+                         qed_write_header_read_cb, write_header_cb);
+    if (!acb) {
+        qed_write_header_cb(write_header_cb, -EIO);
+    }
+}
+
 static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size)
 {
     uint64_t table_entries;
@@ -215,6 +292,88 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
 
 static void qed_aio_next_io(void *opaque, int ret);
 
+static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
+{
+    assert(!s->allocating_write_reqs_plugged);
+
+    s->allocating_write_reqs_plugged = true;
+}
+
+static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
+{
+    QEDAIOCB *acb;
+
+    assert(s->allocating_write_reqs_plugged);
+
+    s->allocating_write_reqs_plugged = false;
+
+    acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
+    if (acb) {
+        qed_aio_next_io(acb, 0);
+    }
+}
+
+static void qed_finish_clear_need_check(void *opaque, int ret)
+{
+    /* Do nothing */
+}
+
+static void qed_flush_after_clear_need_check(void *opaque, int ret)
+{
+    BDRVQEDState *s = opaque;
+
+    bdrv_aio_flush(s->bs, qed_finish_clear_need_check, s);
+
+    /* No need to wait until flush completes */
+    qed_unplug_allocating_write_reqs(s);
+}
+
+static void qed_clear_need_check(void *opaque, int ret)
+{
+    BDRVQEDState *s = opaque;
+
+    if (ret) {
+        qed_unplug_allocating_write_reqs(s);
+        return;
+    }
+
+    s->header.features &= ~QED_F_NEED_CHECK;
+    qed_write_header(s, qed_flush_after_clear_need_check, s);
+}
+
+static void qed_need_check_timer_cb(void *opaque)
+{
+    BDRVQEDState *s = opaque;
+
+    /* The timer should only fire when allocating writes have drained */
+    assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs));
+
+    trace_qed_need_check_timer_cb(s);
+
+    qed_plug_allocating_write_reqs(s);
+
+    /* Ensure writes are on disk before clearing flag */
+    bdrv_aio_flush(s->bs, qed_clear_need_check, s);
+}
+
+static void qed_start_need_check_timer(BDRVQEDState *s)
+{
+    trace_qed_start_need_check_timer(s);
+
+    /* Use vm_clock so we don't alter the image file while suspended for
+     * migration.
+     */
+    qemu_mod_timer(s->need_check_timer, qemu_get_clock_ns(vm_clock) +
+                   get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT);
+}
+
+/* It's okay to call this multiple times or when no timer is started */
+static void qed_cancel_need_check_timer(BDRVQEDState *s)
+{
+    trace_qed_cancel_need_check_timer(s);
+    qemu_del_timer(s->need_check_timer);
+}
+
 static int bdrv_qed_open(BlockDriverState *bs, int flags)
 {
     BDRVQEDState *s = bs->opaque;
@@ -236,7 +395,13 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
         return -EINVAL;
     }
     if (s->header.features & ~QED_FEATURE_MASK) {
-        return -ENOTSUP; /* image uses unsupported feature bits */
+        /* image uses unsupported feature bits */
+        char buf[64];
+        snprintf(buf, sizeof(buf), "%" PRIx64,
+            s->header.features & ~QED_FEATURE_MASK);
+        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+            bs->device_name, "QED", buf);
+        return -ENOTSUP;
     }
     if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
         return -EINVAL;
@@ -309,6 +474,38 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
     qed_init_l2_cache(&s->l2_cache);
 
     ret = qed_read_l1_table_sync(s);
+    if (ret) {
+        goto out;
+    }
+
+    /* If image was not closed cleanly, check consistency */
+    if (s->header.features & QED_F_NEED_CHECK) {
+        /* Read-only images cannot be fixed.  There is no risk of corruption
+         * since write operations are not possible.  Therefore, allow
+         * potentially inconsistent images to be opened read-only.  This can
+         * aid data recovery from an otherwise inconsistent image.
+         */
+        if (!bdrv_is_read_only(bs->file)) {
+            BdrvCheckResult result = {0};
+
+            ret = qed_check(s, &result, true);
+            if (ret) {
+                goto out;
+            }
+            if (!result.corruptions && !result.check_errors) {
+                /* Ensure fixes reach storage before clearing check bit */
+                bdrv_flush(s->bs);
+
+                s->header.features &= ~QED_F_NEED_CHECK;
+                qed_write_header_sync(s);
+            }
+        }
+    }
+
+    s->need_check_timer = qemu_new_timer_ns(vm_clock,
+                                            qed_need_check_timer_cb, s);
+
+out:
     if (ret) {
         qed_free_l2_cache(&s->l2_cache);
         qemu_vfree(s->l1_table);
@@ -320,6 +517,18 @@ static void bdrv_qed_close(BlockDriverState *bs)
 {
     BDRVQEDState *s = bs->opaque;
 
+    qed_cancel_need_check_timer(s);
+    qemu_free_timer(s->need_check_timer);
+
+    /* Ensure writes reach stable storage */
+    bdrv_flush(bs->file);
+
+    /* Clean shutdown, no check required on next open */
+    if (s->header.features & QED_F_NEED_CHECK) {
+        s->header.features &= ~QED_F_NEED_CHECK;
+        qed_write_header_sync(s);
+    }
+
     qed_free_l2_cache(&s->l2_cache);
     qemu_vfree(s->l1_table);
 }
@@ -359,6 +568,12 @@ static int qed_create(const char *filename, uint32_t cluster_size,
         return ret;
     }
 
+    /* File must start empty and grow, check truncate is supported */
+    ret = bdrv_truncate(bs, 0);
+    if (ret < 0) {
+        goto out;
+    }
+
     if (backing_file) {
         header.features |= QED_F_BACKING_FILE;
         header.backing_filename_offset = sizeof(le_header);
@@ -450,7 +665,7 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
 {
     QEDIsAllocatedCB *cb = opaque;
     *cb->pnum = len / BDRV_SECTOR_SIZE;
-    cb->is_allocated = ret == QED_CLUSTER_FOUND;
+    cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO);
 }
 
 static int bdrv_qed_is_allocated(BlockDriverState *bs, int64_t sector_num,
@@ -622,7 +837,10 @@ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
  * @table:          L2 table
  * @index:          First cluster index
  * @n:              Number of contiguous clusters
- * @cluster:        First cluster byte offset in image file
+ * @cluster:        First cluster offset
+ *
+ * The cluster offset may be an allocated byte offset in the image file, the
+ * zero cluster marker, or the unallocated cluster marker.
  */
 static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
                                 unsigned int n, uint64_t cluster)
@@ -630,7 +848,10 @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
     int i;
     for (i = index; i < index + n; i++) {
         table->offsets[i] = cluster;
-        cluster += s->header.cluster_size;
+        if (!qed_offset_is_unalloc_cluster(cluster) &&
+            !qed_offset_is_zero_cluster(cluster)) {
+            cluster += s->header.cluster_size;
+        }
     }
 }
 
@@ -680,6 +901,8 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
         acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
         if (acb) {
             qed_aio_next_io(acb, 0);
+        } else if (s->header.features & QED_F_NEED_CHECK) {
+            qed_start_need_check_timer(s);
         }
     }
 }
@@ -860,6 +1083,19 @@ static void qed_aio_write_prefill(void *opaque, int ret)
                                 qed_aio_write_postfill, acb);
 }
 
+/**
+ * Check if the QED_F_NEED_CHECK bit should be set during allocating write
+ */
+static bool qed_should_set_need_check(BDRVQEDState *s)
+{
+    /* The flush before L2 update path ensures consistency */
+    if (s->bs->backing_hd) {
+        return false;
+    }
+
+    return !(s->header.features & QED_F_NEED_CHECK);
+}
+
 /**
  * Write new data cluster
  *
@@ -872,11 +1108,17 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
 {
     BDRVQEDState *s = acb_to_s(acb);
 
+    /* Cancel timer when the first allocating request comes in */
+    if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
+        qed_cancel_need_check_timer(s);
+    }
+
     /* Freeze this request if another allocating write is in progress */
     if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
         QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next);
     }
-    if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
+    if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) ||
+        s->allocating_write_reqs_plugged) {
         return; /* wait for existing request to finish */
     }
 
@@ -885,8 +1127,12 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
     acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
     qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
 
-    /* Write new cluster */
-    qed_aio_write_prefill(acb, 0);
+    if (qed_should_set_need_check(s)) {
+        s->header.features |= QED_F_NEED_CHECK;
+        qed_write_header(s, qed_aio_write_prefill, acb);
+    } else {
+        qed_aio_write_prefill(acb, 0);
+    }
 }
 
 /**
@@ -935,6 +1181,7 @@ static void qed_aio_write_data(void *opaque, int ret,
 
     case QED_CLUSTER_L2:
     case QED_CLUSTER_L1:
+    case QED_CLUSTER_ZERO:
         qed_aio_write_alloc(acb, len);
         break;
 
@@ -974,8 +1221,12 @@ static void qed_aio_read_data(void *opaque, int ret,
 
     qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
 
-    /* Handle backing file and unallocated sparse hole reads */
-    if (ret != QED_CLUSTER_FOUND) {
+    /* Handle zero cluster and backing file reads */
+    if (ret == QED_CLUSTER_ZERO) {
+        qemu_iovec_memset(&acb->cur_qiov, 0, acb->cur_qiov.size);
+        qed_aio_next_io(acb, 0);
+        return;
+    } else if (ret != QED_CLUSTER_FOUND) {
         qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
                               qed_aio_next_io, acb);
         return;
@@ -1082,7 +1333,27 @@ static BlockDriverAIOCB *bdrv_qed_aio_flush(BlockDriverState *bs,
 
 static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset)
 {
-    return -ENOTSUP;
+    BDRVQEDState *s = bs->opaque;
+    uint64_t old_image_size;
+    int ret;
+
+    if (!qed_is_image_size_valid(offset, s->header.cluster_size,
+                                 s->header.table_size)) {
+        return -EINVAL;
+    }
+
+    /* Shrinking is currently not supported */
+    if ((uint64_t)offset < s->header.image_size) {
+        return -ENOTSUP;
+    }
+
+    old_image_size = s->header.image_size;
+    s->header.image_size = offset;
+    ret = qed_write_header_sync(s);
+    if (ret < 0) {
+        s->header.image_size = old_image_size;
+    }
+    return ret;
 }
 
 static int64_t bdrv_qed_getlength(BlockDriverState *bs)
@@ -1172,7 +1443,9 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
 
 static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result)
 {
-    return -ENOTSUP;
+    BDRVQEDState *s = bs->opaque;
+
+    return qed_check(s, result, false);
 }
 
 static QEMUOptionParameter qed_create_options[] = {
@@ -1191,7 +1464,8 @@ static QEMUOptionParameter qed_create_options[] = {
     }, {
         .name = BLOCK_OPT_CLUSTER_SIZE,
         .type = OPT_SIZE,
-        .help = "Cluster size (in bytes)"
+        .help = "Cluster size (in bytes)",
+        .value = { .n = QED_DEFAULT_CLUSTER_SIZE },
     }, {
         .name = BLOCK_OPT_TABLE_SIZE,
         .type = OPT_SIZE,