]> git.proxmox.com Git - qemu.git/blobdiff - block/qed.c
scsi-generic: Handle queue full
[qemu.git] / block / qed.c
index 085c4f22109a41a3cb19a277041803e21c7a64ac..da0bf3127b599b31621057bf747c876e0260128c 100644 (file)
  *
  */
 
+#include "qemu-timer.h"
 #include "trace.h"
 #include "qed.h"
+#include "qerror.h"
 
 static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
 {
@@ -290,6 +292,88 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
 
 static void qed_aio_next_io(void *opaque, int ret);
 
+static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
+{
+    assert(!s->allocating_write_reqs_plugged);
+
+    s->allocating_write_reqs_plugged = true;
+}
+
+static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
+{
+    QEDAIOCB *acb;
+
+    assert(s->allocating_write_reqs_plugged);
+
+    s->allocating_write_reqs_plugged = false;
+
+    acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
+    if (acb) {
+        qed_aio_next_io(acb, 0);
+    }
+}
+
+static void qed_finish_clear_need_check(void *opaque, int ret)
+{
+    /* Do nothing */
+}
+
+static void qed_flush_after_clear_need_check(void *opaque, int ret)
+{
+    BDRVQEDState *s = opaque;
+
+    bdrv_aio_flush(s->bs, qed_finish_clear_need_check, s);
+
+    /* No need to wait until flush completes */
+    qed_unplug_allocating_write_reqs(s);
+}
+
+static void qed_clear_need_check(void *opaque, int ret)
+{
+    BDRVQEDState *s = opaque;
+
+    if (ret) {
+        qed_unplug_allocating_write_reqs(s);
+        return;
+    }
+
+    s->header.features &= ~QED_F_NEED_CHECK;
+    qed_write_header(s, qed_flush_after_clear_need_check, s);
+}
+
+static void qed_need_check_timer_cb(void *opaque)
+{
+    BDRVQEDState *s = opaque;
+
+    /* The timer should only fire when allocating writes have drained */
+    assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs));
+
+    trace_qed_need_check_timer_cb(s);
+
+    qed_plug_allocating_write_reqs(s);
+
+    /* Ensure writes are on disk before clearing flag */
+    bdrv_aio_flush(s->bs, qed_clear_need_check, s);
+}
+
+static void qed_start_need_check_timer(BDRVQEDState *s)
+{
+    trace_qed_start_need_check_timer(s);
+
+    /* Use vm_clock so we don't alter the image file while suspended for
+     * migration.
+     */
+    qemu_mod_timer(s->need_check_timer, qemu_get_clock_ns(vm_clock) +
+                   get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT);
+}
+
+/* It's okay to call this multiple times or when no timer is started */
+static void qed_cancel_need_check_timer(BDRVQEDState *s)
+{
+    trace_qed_cancel_need_check_timer(s);
+    qemu_del_timer(s->need_check_timer);
+}
+
 static int bdrv_qed_open(BlockDriverState *bs, int flags)
 {
     BDRVQEDState *s = bs->opaque;
@@ -311,7 +395,13 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
         return -EINVAL;
     }
     if (s->header.features & ~QED_FEATURE_MASK) {
-        return -ENOTSUP; /* image uses unsupported feature bits */
+        /* image uses unsupported feature bits */
+        char buf[64];
+        snprintf(buf, sizeof(buf), "%" PRIx64,
+            s->header.features & ~QED_FEATURE_MASK);
+        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+            bs->device_name, "QED", buf);
+        return -ENOTSUP;
     }
     if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
         return -EINVAL;
@@ -399,7 +489,10 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
             BdrvCheckResult result = {0};
 
             ret = qed_check(s, &result, true);
-            if (!ret && !result.corruptions && !result.check_errors) {
+            if (ret) {
+                goto out;
+            }
+            if (!result.corruptions && !result.check_errors) {
                 /* Ensure fixes reach storage before clearing check bit */
                 bdrv_flush(s->bs);
 
@@ -409,6 +502,9 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
         }
     }
 
+    s->need_check_timer = qemu_new_timer_ns(vm_clock,
+                                            qed_need_check_timer_cb, s);
+
 out:
     if (ret) {
         qed_free_l2_cache(&s->l2_cache);
@@ -421,6 +517,9 @@ static void bdrv_qed_close(BlockDriverState *bs)
 {
     BDRVQEDState *s = bs->opaque;
 
+    qed_cancel_need_check_timer(s);
+    qemu_free_timer(s->need_check_timer);
+
     /* Ensure writes reach stable storage */
     bdrv_flush(bs->file);
 
@@ -469,6 +568,12 @@ static int qed_create(const char *filename, uint32_t cluster_size,
         return ret;
     }
 
+    /* File must start empty and grow, check truncate is supported */
+    ret = bdrv_truncate(bs, 0);
+    if (ret < 0) {
+        goto out;
+    }
+
     if (backing_file) {
         header.features |= QED_F_BACKING_FILE;
         header.backing_filename_offset = sizeof(le_header);
@@ -560,7 +665,7 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
 {
     QEDIsAllocatedCB *cb = opaque;
     *cb->pnum = len / BDRV_SECTOR_SIZE;
-    cb->is_allocated = ret == QED_CLUSTER_FOUND;
+    cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO);
 }
 
 static int bdrv_qed_is_allocated(BlockDriverState *bs, int64_t sector_num,
@@ -732,7 +837,10 @@ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
  * @table:          L2 table
  * @index:          First cluster index
  * @n:              Number of contiguous clusters
- * @cluster:        First cluster byte offset in image file
+ * @cluster:        First cluster offset
+ *
+ * The cluster offset may be an allocated byte offset in the image file, the
+ * zero cluster marker, or the unallocated cluster marker.
  */
 static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
                                 unsigned int n, uint64_t cluster)
@@ -740,7 +848,10 @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
     int i;
     for (i = index; i < index + n; i++) {
         table->offsets[i] = cluster;
-        cluster += s->header.cluster_size;
+        if (!qed_offset_is_unalloc_cluster(cluster) &&
+            !qed_offset_is_zero_cluster(cluster)) {
+            cluster += s->header.cluster_size;
+        }
     }
 }
 
@@ -790,6 +901,8 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
         acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
         if (acb) {
             qed_aio_next_io(acb, 0);
+        } else if (s->header.features & QED_F_NEED_CHECK) {
+            qed_start_need_check_timer(s);
         }
     }
 }
@@ -970,6 +1083,19 @@ static void qed_aio_write_prefill(void *opaque, int ret)
                                 qed_aio_write_postfill, acb);
 }
 
+/**
+ * Check if the QED_F_NEED_CHECK bit should be set during allocating write
+ */
+static bool qed_should_set_need_check(BDRVQEDState *s)
+{
+    /* The flush before L2 update path ensures consistency */
+    if (s->bs->backing_hd) {
+        return false;
+    }
+
+    return !(s->header.features & QED_F_NEED_CHECK);
+}
+
 /**
  * Write new data cluster
  *
@@ -982,11 +1108,17 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
 {
     BDRVQEDState *s = acb_to_s(acb);
 
+    /* Cancel timer when the first allocating request comes in */
+    if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
+        qed_cancel_need_check_timer(s);
+    }
+
     /* Freeze this request if another allocating write is in progress */
     if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
         QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next);
     }
-    if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
+    if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) ||
+        s->allocating_write_reqs_plugged) {
         return; /* wait for existing request to finish */
     }
 
@@ -995,15 +1127,12 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
     acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
     qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
 
-    /* Write new cluster if the image is already marked dirty */
-    if (s->header.features & QED_F_NEED_CHECK) {
+    if (qed_should_set_need_check(s)) {
+        s->header.features |= QED_F_NEED_CHECK;
+        qed_write_header(s, qed_aio_write_prefill, acb);
+    } else {
         qed_aio_write_prefill(acb, 0);
-        return;
     }
-
-    /* Mark the image dirty before writing the new cluster */
-    s->header.features |= QED_F_NEED_CHECK;
-    qed_write_header(s, qed_aio_write_prefill, acb);
 }
 
 /**
@@ -1052,6 +1181,7 @@ static void qed_aio_write_data(void *opaque, int ret,
 
     case QED_CLUSTER_L2:
     case QED_CLUSTER_L1:
+    case QED_CLUSTER_ZERO:
         qed_aio_write_alloc(acb, len);
         break;
 
@@ -1091,8 +1221,12 @@ static void qed_aio_read_data(void *opaque, int ret,
 
     qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
 
-    /* Handle backing file and unallocated sparse hole reads */
-    if (ret != QED_CLUSTER_FOUND) {
+    /* Handle zero cluster and backing file reads */
+    if (ret == QED_CLUSTER_ZERO) {
+        qemu_iovec_memset(&acb->cur_qiov, 0, acb->cur_qiov.size);
+        qed_aio_next_io(acb, 0);
+        return;
+    } else if (ret != QED_CLUSTER_FOUND) {
         qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
                               qed_aio_next_io, acb);
         return;
@@ -1199,7 +1333,27 @@ static BlockDriverAIOCB *bdrv_qed_aio_flush(BlockDriverState *bs,
 
 static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset)
 {
-    return -ENOTSUP;
+    BDRVQEDState *s = bs->opaque;
+    uint64_t old_image_size;
+    int ret;
+
+    if (!qed_is_image_size_valid(offset, s->header.cluster_size,
+                                 s->header.table_size)) {
+        return -EINVAL;
+    }
+
+    /* Shrinking is currently not supported */
+    if ((uint64_t)offset < s->header.image_size) {
+        return -ENOTSUP;
+    }
+
+    old_image_size = s->header.image_size;
+    s->header.image_size = offset;
+    ret = qed_write_header_sync(s);
+    if (ret < 0) {
+        s->header.image_size = old_image_size;
+    }
+    return ret;
 }
 
 static int64_t bdrv_qed_getlength(BlockDriverState *bs)