]> git.proxmox.com Git - qemu.git/blobdiff - block.c
aio / timers: Switch entire codebase to the new timer API
[qemu.git] / block.c
diff --git a/block.c b/block.c
index 79ad33d0f9e9a726a2a71ffb9ca3e86144675197..a387c1ad684a34208620e723004374880bda5027 100644 (file)
--- a/block.c
+++ b/block.c
@@ -127,11 +127,11 @@ void bdrv_io_limits_disable(BlockDriverState *bs)
 {
     bs->io_limits_enabled = false;
 
-    while (qemu_co_queue_next(&bs->throttled_reqs));
+    do {} while (qemu_co_enter_next(&bs->throttled_reqs));
 
     if (bs->block_timer) {
-        qemu_del_timer(bs->block_timer);
-        qemu_free_timer(bs->block_timer);
+        timer_del(bs->block_timer);
+        timer_free(bs->block_timer);
         bs->block_timer = NULL;
     }
 
@@ -143,13 +143,12 @@ static void bdrv_block_timer(void *opaque)
 {
     BlockDriverState *bs = opaque;
 
-    qemu_co_queue_next(&bs->throttled_reqs);
+    qemu_co_enter_next(&bs->throttled_reqs);
 }
 
 void bdrv_io_limits_enable(BlockDriverState *bs)
 {
-    qemu_co_queue_init(&bs->throttled_reqs);
-    bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
+    bs->block_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, bdrv_block_timer, bs);
     bs->io_limits_enabled = true;
 }
 
@@ -181,8 +180,8 @@ static void bdrv_io_limits_intercept(BlockDriverState *bs,
      */
 
     while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
-        qemu_mod_timer(bs->block_timer,
-                       wait_time + qemu_get_clock_ns(vm_clock));
+        timer_mod(bs->block_timer,
+                       wait_time + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
         qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
     }
 
@@ -305,6 +304,8 @@ BlockDriverState *bdrv_new(const char *device_name)
     }
     bdrv_iostatus_disable(bs);
     notifier_list_init(&bs->close_notifiers);
+    notifier_with_return_list_init(&bs->before_write_notifiers);
+    qemu_co_queue_init(&bs->throttled_reqs);
 
     return bs;
 }
@@ -416,7 +417,7 @@ int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
 {
     BlockDriver *drv;
 
-    drv = bdrv_find_protocol(filename);
+    drv = bdrv_find_protocol(filename, true);
     if (drv == NULL) {
         return -ENOENT;
     }
@@ -481,7 +482,8 @@ static BlockDriver *find_hdev_driver(const char *filename)
     return drv;
 }
 
-BlockDriver *bdrv_find_protocol(const char *filename)
+BlockDriver *bdrv_find_protocol(const char *filename,
+                                bool allow_protocol_prefix)
 {
     BlockDriver *drv1;
     char protocol[128];
@@ -502,9 +504,10 @@ BlockDriver *bdrv_find_protocol(const char *filename)
         return drv1;
     }
 
-    if (!path_has_protocol(filename)) {
+    if (!path_has_protocol(filename) || !allow_protocol_prefix) {
         return bdrv_find_format("file");
     }
+
     p = strchr(filename, ':');
     assert(p != NULL);
     len = p - filename;
@@ -703,6 +706,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
 
     bs->open_flags = flags;
     bs->buffer_alignment = 512;
+    bs->zero_beyond_eof = true;
     open_flags = bdrv_open_flags(bs, flags);
     bs->read_only = !(open_flags & BDRV_O_RDWR);
 
@@ -783,6 +787,7 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename,
     BlockDriverState *bs;
     BlockDriver *drv;
     const char *drvname;
+    bool allow_protocol_prefix = false;
     int ret;
 
     /* NULL means an empty set of options */
@@ -799,6 +804,7 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename,
         filename = qdict_get_try_str(options, "filename");
     } else if (filename && !qdict_haskey(options, "filename")) {
         qdict_put(options, "filename", qstring_from_str(filename));
+        allow_protocol_prefix = true;
     } else {
         qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't specify 'file' and "
                       "'filename' options at the same time");
@@ -812,7 +818,10 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename,
         drv = bdrv_find_whitelisted_format(drvname, !(flags & BDRV_O_RDWR));
         qdict_del(options, "driver");
     } else if (filename) {
-        drv = bdrv_find_protocol(filename);
+        drv = bdrv_find_protocol(filename, allow_protocol_prefix);
+        if (!drv) {
+            qerror_report(ERROR_CLASS_GENERIC_ERROR, "Unknown protocol");
+        }
     } else {
         qerror_report(ERROR_CLASS_GENERIC_ERROR,
                       "Must specify either driver or file");
@@ -962,6 +971,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
     char tmp_filename[PATH_MAX + 1];
     BlockDriverState *file = NULL;
     QDict *file_options = NULL;
+    const char *drvname;
 
     /* NULL means an empty set of options */
     if (options == NULL) {
@@ -1045,12 +1055,18 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
     extract_subqdict(options, &file_options, "file.");
 
     ret = bdrv_file_open(&file, filename, file_options,
-                         bdrv_open_flags(bs, flags));
+                         bdrv_open_flags(bs, flags | BDRV_O_UNMAP));
     if (ret < 0) {
         goto fail;
     }
 
     /* Find the right image format driver */
+    drvname = qdict_get_try_str(options, "driver");
+    if (drvname) {
+        drv = bdrv_find_whitelisted_format(drvname, !(flags & BDRV_O_RDWR));
+        qdict_del(options, "driver");
+    }
+
     if (!drv) {
         ret = find_image_format(file, filename, &drv);
     }
@@ -1291,8 +1307,8 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
             if (local_err != NULL) {
                 error_propagate(errp, local_err);
             } else {
-                error_set(errp, QERR_OPEN_FILE_FAILED,
-                          reopen_state->bs->filename);
+                error_setg(errp, "failed while preparing to reopen image '%s'",
+                           reopen_state->bs->filename);
             }
             goto error;
         }
@@ -1357,11 +1373,12 @@ void bdrv_reopen_abort(BDRVReopenState *reopen_state)
 
 void bdrv_close(BlockDriverState *bs)
 {
-    bdrv_flush(bs);
     if (bs->job) {
         block_job_cancel_sync(bs->job);
     }
-    bdrv_drain_all();
+    bdrv_drain_all(); /* complete I/O */
+    bdrv_flush(bs);
+    bdrv_drain_all(); /* in case flush left pending I/O */
     notifier_list_notify(&bs->close_notifiers, bs);
 
     if (bs->drv) {
@@ -1386,6 +1403,7 @@ void bdrv_close(BlockDriverState *bs)
         bs->valid_key = 0;
         bs->sg = 0;
         bs->growable = 0;
+        bs->zero_beyond_eof = false;
         QDECREF(bs->options);
         bs->options = NULL;
 
@@ -1412,6 +1430,35 @@ void bdrv_close_all(void)
     }
 }
 
+/* Check if any requests are in-flight (including throttled requests) */
+static bool bdrv_requests_pending(BlockDriverState *bs)
+{
+    if (!QLIST_EMPTY(&bs->tracked_requests)) {
+        return true;
+    }
+    if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
+        return true;
+    }
+    if (bs->file && bdrv_requests_pending(bs->file)) {
+        return true;
+    }
+    if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
+        return true;
+    }
+    return false;
+}
+
+static bool bdrv_requests_pending_all(void)
+{
+    BlockDriverState *bs;
+    QTAILQ_FOREACH(bs, &bdrv_states, list) {
+        if (bdrv_requests_pending(bs)) {
+            return true;
+        }
+    }
+    return false;
+}
+
 /*
  * Wait for pending requests to complete across all BlockDriverStates
  *
@@ -1426,27 +1473,22 @@ void bdrv_close_all(void)
  */
 void bdrv_drain_all(void)
 {
+    /* Always run first iteration so any pending completion BHs run */
+    bool busy = true;
     BlockDriverState *bs;
-    bool busy;
-
-    do {
-        busy = qemu_aio_wait();
 
+    while (busy) {
         /* FIXME: We do not have timer support here, so this is effectively
          * a busy wait.
          */
         QTAILQ_FOREACH(bs, &bdrv_states, list) {
-            if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
-                qemu_co_queue_restart_all(&bs->throttled_reqs);
+            while (qemu_co_enter_next(&bs->throttled_reqs)) {
                 busy = true;
             }
         }
-    } while (busy);
 
-    /* If requests are still pending there is a bug somewhere */
-    QTAILQ_FOREACH(bs, &bdrv_states, list) {
-        assert(QLIST_EMPTY(&bs->tracked_requests));
-        assert(qemu_co_queue_empty(&bs->throttled_reqs));
+        busy = bdrv_requests_pending_all();
+        busy |= aio_poll(qemu_get_aio_context(), busy);
     }
 }
 
@@ -1591,11 +1633,11 @@ void bdrv_delete(BlockDriverState *bs)
     assert(!bs->job);
     assert(!bs->in_use);
 
+    bdrv_close(bs);
+
     /* remove from list, if necessary */
     bdrv_make_anon(bs);
 
-    bdrv_close(bs);
-
     g_free(bs);
 }
 
@@ -1840,16 +1882,6 @@ int bdrv_commit_all(void)
     return 0;
 }
 
-struct BdrvTrackedRequest {
-    BlockDriverState *bs;
-    int64_t sector_num;
-    int nb_sectors;
-    bool is_write;
-    QLIST_ENTRY(BdrvTrackedRequest) list;
-    Coroutine *co; /* owner, used for deadlock detection */
-    CoQueue wait_queue; /* coroutines blocked on this request */
-};
-
 /**
  * Remove an active request from the tracked requests list
  *
@@ -2163,6 +2195,7 @@ typedef struct RwCo {
     QEMUIOVector *qiov;
     bool is_write;
     int ret;
+    BdrvRequestFlags flags;
 } RwCo;
 
 static void coroutine_fn bdrv_rw_co_entry(void *opaque)
@@ -2171,10 +2204,12 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
 
     if (!rwco->is_write) {
         rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
-                                     rwco->nb_sectors, rwco->qiov, 0);
+                                     rwco->nb_sectors, rwco->qiov,
+                                     rwco->flags);
     } else {
         rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
-                                      rwco->nb_sectors, rwco->qiov, 0);
+                                      rwco->nb_sectors, rwco->qiov,
+                                      rwco->flags);
     }
 }
 
@@ -2182,7 +2217,8 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
  * Process a vectored synchronous request using coroutines
  */
 static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
-                       QEMUIOVector *qiov, bool is_write)
+                       QEMUIOVector *qiov, bool is_write,
+                       BdrvRequestFlags flags)
 {
     Coroutine *co;
     RwCo rwco = {
@@ -2192,6 +2228,7 @@ static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
         .qiov = qiov,
         .is_write = is_write,
         .ret = NOT_DONE,
+        .flags = flags,
     };
     assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0);
 
@@ -2223,7 +2260,7 @@ static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
  * Process a synchronous request using coroutines
  */
 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
-                      int nb_sectors, bool is_write)
+                      int nb_sectors, bool is_write, BdrvRequestFlags flags)
 {
     QEMUIOVector qiov;
     struct iovec iov = {
@@ -2232,14 +2269,14 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
     };
 
     qemu_iovec_init_external(&qiov, &iov, 1);
-    return bdrv_rwv_co(bs, sector_num, &qiov, is_write);
+    return bdrv_rwv_co(bs, sector_num, &qiov, is_write, flags);
 }
 
 /* return < 0 if error. See bdrv_write() for the return codes */
 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
               uint8_t *buf, int nb_sectors)
 {
-    return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
+    return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
 }
 
 /* Just like bdrv_read(), but with I/O throttling temporarily disabled */
@@ -2251,7 +2288,7 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
 
     enabled = bs->io_limits_enabled;
     bs->io_limits_enabled = false;
-    ret = bdrv_read(bs, 0, buf, 1);
+    ret = bdrv_read(bs, sector_num, buf, nb_sectors);
     bs->io_limits_enabled = enabled;
     return ret;
 }
@@ -2265,12 +2302,18 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
                const uint8_t *buf, int nb_sectors)
 {
-    return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
+    return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
 }
 
 int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov)
 {
-    return bdrv_rwv_co(bs, sector_num, qiov, true);
+    return bdrv_rwv_co(bs, sector_num, qiov, true, 0);
+}
+
+int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+{
+    return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
+                      BDRV_REQ_ZERO_WRITE);
 }
 
 int bdrv_pread(BlockDriverState *bs, int64_t offset,
@@ -2528,7 +2571,35 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
         }
     }
 
-    ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+    if (!(bs->zero_beyond_eof && bs->growable)) {
+        ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+    } else {
+        /* Read zeros after EOF of growable BDSes */
+        int64_t len, total_sectors, max_nb_sectors;
+
+        len = bdrv_getlength(bs);
+        if (len < 0) {
+            ret = len;
+            goto out;
+        }
+
+        total_sectors = len >> BDRV_SECTOR_BITS;
+        max_nb_sectors = MAX(0, total_sectors - sector_num);
+        if (max_nb_sectors > 0) {
+            ret = drv->bdrv_co_readv(bs, sector_num,
+                                     MIN(nb_sectors, max_nb_sectors), qiov);
+        } else {
+            ret = 0;
+        }
+
+        /* Reading beyond end of file is supposed to produce zeroes */
+        if (ret == 0 && total_sectors < sector_num + nb_sectors) {
+            uint64_t offset = MAX(0, total_sectors - sector_num);
+            uint64_t bytes = (sector_num + nb_sectors - offset) *
+                              BDRV_SECTOR_SIZE;
+            qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
+        }
+    }
 
 out:
     tracked_request_end(&req);
@@ -2620,7 +2691,11 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
 
     tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
 
-    if (flags & BDRV_REQ_ZERO_WRITE) {
+    ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
+
+    if (ret < 0) {
+        /* Do nothing, write notifier decided to fail this request */
+    } else if (flags & BDRV_REQ_ZERO_WRITE) {
         ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
     } else {
         ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
@@ -2907,13 +2982,24 @@ int bdrv_get_flags(BlockDriverState *bs)
     return bs->open_flags;
 }
 
-void bdrv_flush_all(void)
+int bdrv_flush_all(void)
 {
     BlockDriverState *bs;
+    int result = 0;
 
     QTAILQ_FOREACH(bs, &bdrv_states, list) {
-        bdrv_flush(bs);
+        int ret = bdrv_flush(bs);
+        if (ret < 0 && !result) {
+            result = ret;
+        }
     }
+
+    return result;
+}
+
+int bdrv_has_zero_init_1(BlockDriverState *bs)
+{
+    return 1;
 }
 
 int bdrv_has_zero_init(BlockDriverState *bs)
@@ -2924,7 +3010,8 @@ int bdrv_has_zero_init(BlockDriverState *bs)
         return bs->drv->bdrv_has_zero_init(bs);
     }
 
-    return 1;
+    /* safe default */
+    return 0;
 }
 
 typedef struct BdrvCoIsAllocatedData {
@@ -3660,7 +3747,7 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
     double   elapsed_time;
     int      bps_ret, iops_ret;
 
-    now = qemu_get_clock_ns(vm_clock);
+    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
     if (now > bs->slice_end) {
         bs->slice_start = now;
         bs->slice_end   = now + BLOCK_IO_SLICE_TIME;
@@ -3680,7 +3767,7 @@ static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
             *wait = max_wait;
         }
 
-        now = qemu_get_clock_ns(vm_clock);
+        now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
         if (bs->slice_end < now + max_wait) {
             bs->slice_end = now + max_wait;
         }
@@ -4450,7 +4537,7 @@ void bdrv_img_create(const char *filename, const char *fmt,
         return;
     }
 
-    proto_drv = bdrv_find_protocol(filename);
+    proto_drv = bdrv_find_protocol(filename, true);
     if (!proto_drv) {
         error_setg(errp, "Unknown protocol '%s'", filename);
         return;
@@ -4581,3 +4668,9 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs)
     /* Currently BlockDriverState always uses the main loop AioContext */
     return qemu_get_aio_context();
 }
+
+void bdrv_add_before_write_notifier(BlockDriverState *bs,
+                                    NotifierWithReturn *notifier)
+{
+    notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
+}