scsi-generic: Handle queue full

[qemu.git] / block / qed.c
diff --git a/block/qed.c b/block/qed.c

index 085c4f22109a41a3cb19a277041803e21c7a64ac..da0bf3127b599b31621057bf747c876e0260128c 100644 (file)
--- a/block/qed.c
+++ b/block/qed.c
@@ -12,8 +12,10 @@
   *
   */
  
+#include "qemu-timer.h"
  #include "trace.h"
  #include "qed.h"
+#include "qerror.h"
  
  static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
  {
@@ -290,6 +292,88 @@ static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
  
  static void qed_aio_next_io(void *opaque, int ret);
  
+static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
+{
+    assert(!s->allocating_write_reqs_plugged);
+
+    s->allocating_write_reqs_plugged = true;
+}
+
+static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
+{
+    QEDAIOCB *acb;
+
+    assert(s->allocating_write_reqs_plugged);
+
+    s->allocating_write_reqs_plugged = false;
+
+    acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
+    if (acb) {
+        qed_aio_next_io(acb, 0);
+    }
+}
+
+static void qed_finish_clear_need_check(void *opaque, int ret)
+{
+    /* Do nothing */
+}
+
+static void qed_flush_after_clear_need_check(void *opaque, int ret)
+{
+    BDRVQEDState *s = opaque;
+
+    bdrv_aio_flush(s->bs, qed_finish_clear_need_check, s);
+
+    /* No need to wait until flush completes */
+    qed_unplug_allocating_write_reqs(s);
+}
+
+static void qed_clear_need_check(void *opaque, int ret)
+{
+    BDRVQEDState *s = opaque;
+
+    if (ret) {
+        qed_unplug_allocating_write_reqs(s);
+        return;
+    }
+
+    s->header.features &= ~QED_F_NEED_CHECK;
+    qed_write_header(s, qed_flush_after_clear_need_check, s);
+}
+
+static void qed_need_check_timer_cb(void *opaque)
+{
+    BDRVQEDState *s = opaque;
+
+    /* The timer should only fire when allocating writes have drained */
+    assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs));
+
+    trace_qed_need_check_timer_cb(s);
+
+    qed_plug_allocating_write_reqs(s);
+
+    /* Ensure writes are on disk before clearing flag */
+    bdrv_aio_flush(s->bs, qed_clear_need_check, s);
+}
+
+static void qed_start_need_check_timer(BDRVQEDState *s)
+{
+    trace_qed_start_need_check_timer(s);
+
+    /* Use vm_clock so we don't alter the image file while suspended for
+     * migration.
+     */
+    qemu_mod_timer(s->need_check_timer, qemu_get_clock_ns(vm_clock) +
+                   get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT);
+}
+
+/* It's okay to call this multiple times or when no timer is started */
+static void qed_cancel_need_check_timer(BDRVQEDState *s)
+{
+    trace_qed_cancel_need_check_timer(s);
+    qemu_del_timer(s->need_check_timer);
+}
+
  static int bdrv_qed_open(BlockDriverState *bs, int flags)
  {
      BDRVQEDState *s = bs->opaque;
@@ -311,7 +395,13 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
          return -EINVAL;
      }
      if (s->header.features & ~QED_FEATURE_MASK) {
-        return -ENOTSUP; /* image uses unsupported feature bits */
+        /* image uses unsupported feature bits */
+        char buf[64];
+        snprintf(buf, sizeof(buf), "%" PRIx64,
+            s->header.features & ~QED_FEATURE_MASK);
+        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+            bs->device_name, "QED", buf);
+        return -ENOTSUP;
      }
      if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
          return -EINVAL;
@@ -399,7 +489,10 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
              BdrvCheckResult result = {0};
  
              ret = qed_check(s, &result, true);
-            if (!ret && !result.corruptions && !result.check_errors) {
+            if (ret) {
+                goto out;
+            }
+            if (!result.corruptions && !result.check_errors) {
                  /* Ensure fixes reach storage before clearing check bit */
                  bdrv_flush(s->bs);
  
@@ -409,6 +502,9 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
          }
      }
  
+    s->need_check_timer = qemu_new_timer_ns(vm_clock,
+                                            qed_need_check_timer_cb, s);
+
  out:
      if (ret) {
          qed_free_l2_cache(&s->l2_cache);
@@ -421,6 +517,9 @@ static void bdrv_qed_close(BlockDriverState *bs)
  {
      BDRVQEDState *s = bs->opaque;
  
+    qed_cancel_need_check_timer(s);
+    qemu_free_timer(s->need_check_timer);
+
      /* Ensure writes reach stable storage */
      bdrv_flush(bs->file);
  
@@ -469,6 +568,12 @@ static int qed_create(const char *filename, uint32_t cluster_size,
          return ret;
      }
  
+    /* File must start empty and grow, check truncate is supported */
+    ret = bdrv_truncate(bs, 0);
+    if (ret < 0) {
+        goto out;
+    }
+
      if (backing_file) {
          header.features |= QED_F_BACKING_FILE;
          header.backing_filename_offset = sizeof(le_header);
@@ -560,7 +665,7 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
  {
      QEDIsAllocatedCB *cb = opaque;
      *cb->pnum = len / BDRV_SECTOR_SIZE;
-    cb->is_allocated = ret == QED_CLUSTER_FOUND;
+    cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO);
  }
  
  static int bdrv_qed_is_allocated(BlockDriverState *bs, int64_t sector_num,
@@ -732,7 +837,10 @@ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
   * @table:          L2 table
   * @index:          First cluster index
   * @n:              Number of contiguous clusters
- * @cluster:        First cluster byte offset in image file
+ * @cluster:        First cluster offset
+ *
+ * The cluster offset may be an allocated byte offset in the image file, the
+ * zero cluster marker, or the unallocated cluster marker.
   */
  static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
                                  unsigned int n, uint64_t cluster)
@@ -740,7 +848,10 @@ static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
      int i;
      for (i = index; i < index + n; i++) {
          table->offsets[i] = cluster;
-        cluster += s->header.cluster_size;
+        if (!qed_offset_is_unalloc_cluster(cluster) &&
+            !qed_offset_is_zero_cluster(cluster)) {
+            cluster += s->header.cluster_size;
+        }
      }
  }
  
@@ -790,6 +901,8 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
          acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
          if (acb) {
              qed_aio_next_io(acb, 0);
+        } else if (s->header.features & QED_F_NEED_CHECK) {
+            qed_start_need_check_timer(s);
          }
      }
  }
@@ -970,6 +1083,19 @@ static void qed_aio_write_prefill(void *opaque, int ret)
                                  qed_aio_write_postfill, acb);
  }
  
+/**
+ * Check if the QED_F_NEED_CHECK bit should be set during allocating write
+ */
+static bool qed_should_set_need_check(BDRVQEDState *s)
+{
+    /* The flush before L2 update path ensures consistency */
+    if (s->bs->backing_hd) {
+        return false;
+    }
+
+    return !(s->header.features & QED_F_NEED_CHECK);
+}
+
  /**
   * Write new data cluster
   *
@@ -982,11 +1108,17 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
  {
      BDRVQEDState *s = acb_to_s(acb);
  
+    /* Cancel timer when the first allocating request comes in */
+    if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
+        qed_cancel_need_check_timer(s);
+    }
+
      /* Freeze this request if another allocating write is in progress */
      if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
          QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next);
      }
-    if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
+    if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) ||
+        s->allocating_write_reqs_plugged) {
          return; /* wait for existing request to finish */
      }
  
@@ -995,15 +1127,12 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
      acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
      qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
  
-    /* Write new cluster if the image is already marked dirty */
-    if (s->header.features & QED_F_NEED_CHECK) {
+    if (qed_should_set_need_check(s)) {
+        s->header.features |= QED_F_NEED_CHECK;
+        qed_write_header(s, qed_aio_write_prefill, acb);
+    } else {
          qed_aio_write_prefill(acb, 0);
-        return;
      }
-
-    /* Mark the image dirty before writing the new cluster */
-    s->header.features |= QED_F_NEED_CHECK;
-    qed_write_header(s, qed_aio_write_prefill, acb);
  }
  
  /**
@@ -1052,6 +1181,7 @@ static void qed_aio_write_data(void *opaque, int ret,
  
      case QED_CLUSTER_L2:
      case QED_CLUSTER_L1:
+    case QED_CLUSTER_ZERO:
          qed_aio_write_alloc(acb, len);
          break;
  
@@ -1091,8 +1221,12 @@ static void qed_aio_read_data(void *opaque, int ret,
  
      qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
  
-    /* Handle backing file and unallocated sparse hole reads */
-    if (ret != QED_CLUSTER_FOUND) {
+    /* Handle zero cluster and backing file reads */
+    if (ret == QED_CLUSTER_ZERO) {
+        qemu_iovec_memset(&acb->cur_qiov, 0, acb->cur_qiov.size);
+        qed_aio_next_io(acb, 0);
+        return;
+    } else if (ret != QED_CLUSTER_FOUND) {
          qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
                                qed_aio_next_io, acb);
          return;
@@ -1199,7 +1333,27 @@ static BlockDriverAIOCB *bdrv_qed_aio_flush(BlockDriverState *bs,
  
  static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset)
  {
-    return -ENOTSUP;
+    BDRVQEDState *s = bs->opaque;
+    uint64_t old_image_size;
+    int ret;
+
+    if (!qed_is_image_size_valid(offset, s->header.cluster_size,
+                                 s->header.table_size)) {
+        return -EINVAL;
+    }
+
+    /* Shrinking is currently not supported */
+    if ((uint64_t)offset < s->header.image_size) {
+        return -ENOTSUP;
+    }
+
+    old_image_size = s->header.image_size;
+    s->header.image_size = offset;
+    ret = qed_write_header_sync(s);
+    if (ret < 0) {
+        s->header.image_size = old_image_size;
+    }
+    return ret;
  }
  
  static int64_t bdrv_qed_getlength(BlockDriverState *bs)