]> git.proxmox.com Git - mirror_qemu.git/blob - block/block-backend.c
Merge remote-tracking branch 'remotes/hreitz-gitlab/tags/pull-block-2022-02-01' into...
[mirror_qemu.git] / block / block-backend.c
1 /*
2 * QEMU Block backends
3 *
4 * Copyright (C) 2014-2016 Red Hat, Inc.
5 *
6 * Authors:
7 * Markus Armbruster <armbru@redhat.com>,
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2.1
10 * or later. See the COPYING.LIB file in the top-level directory.
11 */
12
13 #include "qemu/osdep.h"
14 #include "sysemu/block-backend.h"
15 #include "block/block_int.h"
16 #include "block/blockjob.h"
17 #include "block/coroutines.h"
18 #include "block/throttle-groups.h"
19 #include "hw/qdev-core.h"
20 #include "sysemu/blockdev.h"
21 #include "sysemu/runstate.h"
22 #include "sysemu/replay.h"
23 #include "qapi/error.h"
24 #include "qapi/qapi-events-block.h"
25 #include "qemu/id.h"
26 #include "qemu/main-loop.h"
27 #include "qemu/option.h"
28 #include "trace.h"
29 #include "migration/misc.h"
30
31 /* Number of coroutines to reserve per attached device model */
32 #define COROUTINE_POOL_RESERVATION 64
33
34 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
35
36 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
37
38 typedef struct BlockBackendAioNotifier {
39 void (*attached_aio_context)(AioContext *new_context, void *opaque);
40 void (*detach_aio_context)(void *opaque);
41 void *opaque;
42 QLIST_ENTRY(BlockBackendAioNotifier) list;
43 } BlockBackendAioNotifier;
44
45 struct BlockBackend {
46 char *name;
47 int refcnt;
48 BdrvChild *root;
49 AioContext *ctx;
50 DriveInfo *legacy_dinfo; /* null unless created by drive_new() */
51 QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */
52 QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
53 BlockBackendPublic public;
54
55 DeviceState *dev; /* attached device model, if any */
56 const BlockDevOps *dev_ops;
57 void *dev_opaque;
58
59 /* the block size for which the guest device expects atomicity */
60 int guest_block_size;
61
62 /* If the BDS tree is removed, some of its options are stored here (which
63 * can be used to restore those options in the new BDS on insert) */
64 BlockBackendRootState root_state;
65
66 bool enable_write_cache;
67
68 /* I/O stats (display with "info blockstats"). */
69 BlockAcctStats stats;
70
71 BlockdevOnError on_read_error, on_write_error;
72 bool iostatus_enabled;
73 BlockDeviceIoStatus iostatus;
74
75 uint64_t perm;
76 uint64_t shared_perm;
77 bool disable_perm;
78
79 bool allow_aio_context_change;
80 bool allow_write_beyond_eof;
81
82 NotifierList remove_bs_notifiers, insert_bs_notifiers;
83 QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
84
85 int quiesce_counter;
86 CoQueue queued_requests;
87 bool disable_request_queuing;
88
89 VMChangeStateEntry *vmsh;
90 bool force_allow_inactivate;
91
92 /* Number of in-flight aio requests. BlockDriverState also counts
93 * in-flight requests but aio requests can exist even when blk->root is
94 * NULL, so we cannot rely on its counter for that case.
95 * Accessed with atomic ops.
96 */
97 unsigned int in_flight;
98 };
99
100 typedef struct BlockBackendAIOCB {
101 BlockAIOCB common;
102 BlockBackend *blk;
103 int ret;
104 } BlockBackendAIOCB;
105
106 static const AIOCBInfo block_backend_aiocb_info = {
107 .get_aio_context = blk_aiocb_get_aio_context,
108 .aiocb_size = sizeof(BlockBackendAIOCB),
109 };
110
111 static void drive_info_del(DriveInfo *dinfo);
112 static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
113
114 /* All BlockBackends */
115 static QTAILQ_HEAD(, BlockBackend) block_backends =
116 QTAILQ_HEAD_INITIALIZER(block_backends);
117
118 /* All BlockBackends referenced by the monitor and which are iterated through by
119 * blk_next() */
120 static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
121 QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
122
123 static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format,
124 int *child_flags, QDict *child_options,
125 int parent_flags, QDict *parent_options)
126 {
127 /* We're not supposed to call this function for root nodes */
128 abort();
129 }
130 static void blk_root_drained_begin(BdrvChild *child);
131 static bool blk_root_drained_poll(BdrvChild *child);
132 static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter);
133
134 static void blk_root_change_media(BdrvChild *child, bool load);
135 static void blk_root_resize(BdrvChild *child);
136
137 static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
138 GSList **ignore, Error **errp);
139 static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx,
140 GSList **ignore);
141
142 static char *blk_root_get_parent_desc(BdrvChild *child)
143 {
144 BlockBackend *blk = child->opaque;
145 g_autofree char *dev_id = NULL;
146
147 if (blk->name) {
148 return g_strdup_printf("block device '%s'", blk->name);
149 }
150
151 dev_id = blk_get_attached_dev_id(blk);
152 if (*dev_id) {
153 return g_strdup_printf("block device '%s'", dev_id);
154 } else {
155 /* TODO Callback into the BB owner for something more detailed */
156 return g_strdup("an unnamed block device");
157 }
158 }
159
160 static const char *blk_root_get_name(BdrvChild *child)
161 {
162 return blk_name(child->opaque);
163 }
164
165 static void blk_vm_state_changed(void *opaque, bool running, RunState state)
166 {
167 Error *local_err = NULL;
168 BlockBackend *blk = opaque;
169
170 if (state == RUN_STATE_INMIGRATE) {
171 return;
172 }
173
174 qemu_del_vm_change_state_handler(blk->vmsh);
175 blk->vmsh = NULL;
176 blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
177 if (local_err) {
178 error_report_err(local_err);
179 }
180 }
181
182 /*
183 * Notifies the user of the BlockBackend that migration has completed. qdev
184 * devices can tighten their permissions in response (specifically revoke
185 * shared write permissions that we needed for storage migration).
186 *
187 * If an error is returned, the VM cannot be allowed to be resumed.
188 */
189 static void blk_root_activate(BdrvChild *child, Error **errp)
190 {
191 BlockBackend *blk = child->opaque;
192 Error *local_err = NULL;
193 uint64_t saved_shared_perm;
194
195 if (!blk->disable_perm) {
196 return;
197 }
198
199 blk->disable_perm = false;
200
201 /*
202 * blk->shared_perm contains the permissions we want to share once
203 * migration is really completely done. For now, we need to share
204 * all; but we also need to retain blk->shared_perm, which is
205 * overwritten by a successful blk_set_perm() call. Save it and
206 * restore it below.
207 */
208 saved_shared_perm = blk->shared_perm;
209
210 blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err);
211 if (local_err) {
212 error_propagate(errp, local_err);
213 blk->disable_perm = true;
214 return;
215 }
216 blk->shared_perm = saved_shared_perm;
217
218 if (runstate_check(RUN_STATE_INMIGRATE)) {
219 /* Activation can happen when migration process is still active, for
220 * example when nbd_server_add is called during non-shared storage
221 * migration. Defer the shared_perm update to migration completion. */
222 if (!blk->vmsh) {
223 blk->vmsh = qemu_add_vm_change_state_handler(blk_vm_state_changed,
224 blk);
225 }
226 return;
227 }
228
229 blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
230 if (local_err) {
231 error_propagate(errp, local_err);
232 blk->disable_perm = true;
233 return;
234 }
235 }
236
237 void blk_set_force_allow_inactivate(BlockBackend *blk)
238 {
239 blk->force_allow_inactivate = true;
240 }
241
242 static bool blk_can_inactivate(BlockBackend *blk)
243 {
244 /* If it is a guest device, inactivate is ok. */
245 if (blk->dev || blk_name(blk)[0]) {
246 return true;
247 }
248
249 /* Inactivating means no more writes to the image can be done,
250 * even if those writes would be changes invisible to the
251 * guest. For block job BBs that satisfy this, we can just allow
252 * it. This is the case for mirror job source, which is required
253 * by libvirt non-shared block migration. */
254 if (!(blk->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED))) {
255 return true;
256 }
257
258 return blk->force_allow_inactivate;
259 }
260
261 static int blk_root_inactivate(BdrvChild *child)
262 {
263 BlockBackend *blk = child->opaque;
264
265 if (blk->disable_perm) {
266 return 0;
267 }
268
269 if (!blk_can_inactivate(blk)) {
270 return -EPERM;
271 }
272
273 blk->disable_perm = true;
274 if (blk->root) {
275 bdrv_child_try_set_perm(blk->root, 0, BLK_PERM_ALL, &error_abort);
276 }
277
278 return 0;
279 }
280
281 static void blk_root_attach(BdrvChild *child)
282 {
283 BlockBackend *blk = child->opaque;
284 BlockBackendAioNotifier *notifier;
285
286 trace_blk_root_attach(child, blk, child->bs);
287
288 QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
289 bdrv_add_aio_context_notifier(child->bs,
290 notifier->attached_aio_context,
291 notifier->detach_aio_context,
292 notifier->opaque);
293 }
294 }
295
296 static void blk_root_detach(BdrvChild *child)
297 {
298 BlockBackend *blk = child->opaque;
299 BlockBackendAioNotifier *notifier;
300
301 trace_blk_root_detach(child, blk, child->bs);
302
303 QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
304 bdrv_remove_aio_context_notifier(child->bs,
305 notifier->attached_aio_context,
306 notifier->detach_aio_context,
307 notifier->opaque);
308 }
309 }
310
311 static AioContext *blk_root_get_parent_aio_context(BdrvChild *c)
312 {
313 BlockBackend *blk = c->opaque;
314
315 return blk_get_aio_context(blk);
316 }
317
318 static const BdrvChildClass child_root = {
319 .inherit_options = blk_root_inherit_options,
320
321 .change_media = blk_root_change_media,
322 .resize = blk_root_resize,
323 .get_name = blk_root_get_name,
324 .get_parent_desc = blk_root_get_parent_desc,
325
326 .drained_begin = blk_root_drained_begin,
327 .drained_poll = blk_root_drained_poll,
328 .drained_end = blk_root_drained_end,
329
330 .activate = blk_root_activate,
331 .inactivate = blk_root_inactivate,
332
333 .attach = blk_root_attach,
334 .detach = blk_root_detach,
335
336 .can_set_aio_ctx = blk_root_can_set_aio_ctx,
337 .set_aio_ctx = blk_root_set_aio_ctx,
338
339 .get_parent_aio_context = blk_root_get_parent_aio_context,
340 };
341
342 /*
343 * Create a new BlockBackend with a reference count of one.
344 *
345 * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
346 * to request for a block driver node that is attached to this BlockBackend.
347 * @shared_perm is a bitmask which describes which permissions may be granted
348 * to other users of the attached node.
349 * Both sets of permissions can be changed later using blk_set_perm().
350 *
351 * Return the new BlockBackend on success, null on failure.
352 */
353 BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
354 {
355 BlockBackend *blk;
356
357 blk = g_new0(BlockBackend, 1);
358 blk->refcnt = 1;
359 blk->ctx = ctx;
360 blk->perm = perm;
361 blk->shared_perm = shared_perm;
362 blk_set_enable_write_cache(blk, true);
363
364 blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT;
365 blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
366
367 block_acct_init(&blk->stats);
368
369 qemu_co_queue_init(&blk->queued_requests);
370 notifier_list_init(&blk->remove_bs_notifiers);
371 notifier_list_init(&blk->insert_bs_notifiers);
372 QLIST_INIT(&blk->aio_notifiers);
373
374 QTAILQ_INSERT_TAIL(&block_backends, blk, link);
375 return blk;
376 }
377
378 /*
379 * Create a new BlockBackend connected to an existing BlockDriverState.
380 *
381 * @perm is a bitmasks of BLK_PERM_* constants which describes the
382 * permissions to request for @bs that is attached to this
383 * BlockBackend. @shared_perm is a bitmask which describes which
384 * permissions may be granted to other users of the attached node.
385 * Both sets of permissions can be changed later using blk_set_perm().
386 *
387 * Return the new BlockBackend on success, null on failure.
388 */
389 BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
390 uint64_t shared_perm, Error **errp)
391 {
392 BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm);
393
394 if (blk_insert_bs(blk, bs, errp) < 0) {
395 blk_unref(blk);
396 return NULL;
397 }
398 return blk;
399 }
400
401 /*
402 * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
403 * The new BlockBackend is in the main AioContext.
404 *
405 * Just as with bdrv_open(), after having called this function the reference to
406 * @options belongs to the block layer (even on failure).
407 *
408 * TODO: Remove @filename and @flags; it should be possible to specify a whole
409 * BDS tree just by specifying the @options QDict (or @reference,
410 * alternatively). At the time of adding this function, this is not possible,
411 * though, so callers of this function have to be able to specify @filename and
412 * @flags.
413 */
414 BlockBackend *blk_new_open(const char *filename, const char *reference,
415 QDict *options, int flags, Error **errp)
416 {
417 BlockBackend *blk;
418 BlockDriverState *bs;
419 uint64_t perm = 0;
420 uint64_t shared = BLK_PERM_ALL;
421
422 /*
423 * blk_new_open() is mainly used in .bdrv_create implementations and the
424 * tools where sharing isn't a major concern because the BDS stays private
425 * and the file is generally not supposed to be used by a second process,
426 * so we just request permission according to the flags.
427 *
428 * The exceptions are xen_disk and blockdev_init(); in these cases, the
429 * caller of blk_new_open() doesn't make use of the permissions, but they
430 * shouldn't hurt either. We can still share everything here because the
431 * guest devices will add their own blockers if they can't share.
432 */
433 if ((flags & BDRV_O_NO_IO) == 0) {
434 perm |= BLK_PERM_CONSISTENT_READ;
435 if (flags & BDRV_O_RDWR) {
436 perm |= BLK_PERM_WRITE;
437 }
438 }
439 if (flags & BDRV_O_RESIZE) {
440 perm |= BLK_PERM_RESIZE;
441 }
442 if (flags & BDRV_O_NO_SHARE) {
443 shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
444 }
445
446 blk = blk_new(qemu_get_aio_context(), perm, shared);
447 bs = bdrv_open(filename, reference, options, flags, errp);
448 if (!bs) {
449 blk_unref(blk);
450 return NULL;
451 }
452
453 blk->root = bdrv_root_attach_child(bs, "root", &child_root,
454 BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
455 perm, shared, blk, errp);
456 if (!blk->root) {
457 blk_unref(blk);
458 return NULL;
459 }
460
461 return blk;
462 }
463
464 static void blk_delete(BlockBackend *blk)
465 {
466 assert(!blk->refcnt);
467 assert(!blk->name);
468 assert(!blk->dev);
469 if (blk->public.throttle_group_member.throttle_state) {
470 blk_io_limits_disable(blk);
471 }
472 if (blk->root) {
473 blk_remove_bs(blk);
474 }
475 if (blk->vmsh) {
476 qemu_del_vm_change_state_handler(blk->vmsh);
477 blk->vmsh = NULL;
478 }
479 assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
480 assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
481 assert(QLIST_EMPTY(&blk->aio_notifiers));
482 QTAILQ_REMOVE(&block_backends, blk, link);
483 drive_info_del(blk->legacy_dinfo);
484 block_acct_cleanup(&blk->stats);
485 g_free(blk);
486 }
487
488 static void drive_info_del(DriveInfo *dinfo)
489 {
490 if (!dinfo) {
491 return;
492 }
493 qemu_opts_del(dinfo->opts);
494 g_free(dinfo);
495 }
496
497 int blk_get_refcnt(BlockBackend *blk)
498 {
499 return blk ? blk->refcnt : 0;
500 }
501
502 /*
503 * Increment @blk's reference count.
504 * @blk must not be null.
505 */
506 void blk_ref(BlockBackend *blk)
507 {
508 assert(blk->refcnt > 0);
509 blk->refcnt++;
510 }
511
512 /*
513 * Decrement @blk's reference count.
514 * If this drops it to zero, destroy @blk.
515 * For convenience, do nothing if @blk is null.
516 */
517 void blk_unref(BlockBackend *blk)
518 {
519 if (blk) {
520 assert(blk->refcnt > 0);
521 if (blk->refcnt > 1) {
522 blk->refcnt--;
523 } else {
524 blk_drain(blk);
525 /* blk_drain() cannot resurrect blk, nobody held a reference */
526 assert(blk->refcnt == 1);
527 blk->refcnt = 0;
528 blk_delete(blk);
529 }
530 }
531 }
532
533 /*
534 * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
535 * ones which are hidden (i.e. are not referenced by the monitor).
536 */
537 BlockBackend *blk_all_next(BlockBackend *blk)
538 {
539 return blk ? QTAILQ_NEXT(blk, link)
540 : QTAILQ_FIRST(&block_backends);
541 }
542
543 void blk_remove_all_bs(void)
544 {
545 BlockBackend *blk = NULL;
546
547 while ((blk = blk_all_next(blk)) != NULL) {
548 AioContext *ctx = blk_get_aio_context(blk);
549
550 aio_context_acquire(ctx);
551 if (blk->root) {
552 blk_remove_bs(blk);
553 }
554 aio_context_release(ctx);
555 }
556 }
557
558 /*
559 * Return the monitor-owned BlockBackend after @blk.
560 * If @blk is null, return the first one.
561 * Else, return @blk's next sibling, which may be null.
562 *
563 * To iterate over all BlockBackends, do
564 * for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
565 * ...
566 * }
567 */
568 BlockBackend *blk_next(BlockBackend *blk)
569 {
570 return blk ? QTAILQ_NEXT(blk, monitor_link)
571 : QTAILQ_FIRST(&monitor_block_backends);
572 }
573
574 /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
575 * the monitor or attached to a BlockBackend */
576 BlockDriverState *bdrv_next(BdrvNextIterator *it)
577 {
578 BlockDriverState *bs, *old_bs;
579
580 /* Must be called from the main loop */
581 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
582
583 /* First, return all root nodes of BlockBackends. In order to avoid
584 * returning a BDS twice when multiple BBs refer to it, we only return it
585 * if the BB is the first one in the parent list of the BDS. */
586 if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
587 BlockBackend *old_blk = it->blk;
588
589 old_bs = old_blk ? blk_bs(old_blk) : NULL;
590
591 do {
592 it->blk = blk_all_next(it->blk);
593 bs = it->blk ? blk_bs(it->blk) : NULL;
594 } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));
595
596 if (it->blk) {
597 blk_ref(it->blk);
598 }
599 blk_unref(old_blk);
600
601 if (bs) {
602 bdrv_ref(bs);
603 bdrv_unref(old_bs);
604 return bs;
605 }
606 it->phase = BDRV_NEXT_MONITOR_OWNED;
607 } else {
608 old_bs = it->bs;
609 }
610
611 /* Then return the monitor-owned BDSes without a BB attached. Ignore all
612 * BDSes that are attached to a BlockBackend here; they have been handled
613 * by the above block already */
614 do {
615 it->bs = bdrv_next_monitor_owned(it->bs);
616 bs = it->bs;
617 } while (bs && bdrv_has_blk(bs));
618
619 if (bs) {
620 bdrv_ref(bs);
621 }
622 bdrv_unref(old_bs);
623
624 return bs;
625 }
626
627 static void bdrv_next_reset(BdrvNextIterator *it)
628 {
629 *it = (BdrvNextIterator) {
630 .phase = BDRV_NEXT_BACKEND_ROOTS,
631 };
632 }
633
634 BlockDriverState *bdrv_first(BdrvNextIterator *it)
635 {
636 bdrv_next_reset(it);
637 return bdrv_next(it);
638 }
639
640 /* Must be called when aborting a bdrv_next() iteration before
641 * bdrv_next() returns NULL */
642 void bdrv_next_cleanup(BdrvNextIterator *it)
643 {
644 /* Must be called from the main loop */
645 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
646
647 if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
648 if (it->blk) {
649 bdrv_unref(blk_bs(it->blk));
650 blk_unref(it->blk);
651 }
652 } else {
653 bdrv_unref(it->bs);
654 }
655
656 bdrv_next_reset(it);
657 }
658
659 /*
660 * Add a BlockBackend into the list of backends referenced by the monitor, with
661 * the given @name acting as the handle for the monitor.
662 * Strictly for use by blockdev.c.
663 *
664 * @name must not be null or empty.
665 *
666 * Returns true on success and false on failure. In the latter case, an Error
667 * object is returned through @errp.
668 */
669 bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
670 {
671 assert(!blk->name);
672 assert(name && name[0]);
673
674 if (!id_wellformed(name)) {
675 error_setg(errp, "Invalid device name");
676 return false;
677 }
678 if (blk_by_name(name)) {
679 error_setg(errp, "Device with id '%s' already exists", name);
680 return false;
681 }
682 if (bdrv_find_node(name)) {
683 error_setg(errp,
684 "Device name '%s' conflicts with an existing node name",
685 name);
686 return false;
687 }
688
689 blk->name = g_strdup(name);
690 QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
691 return true;
692 }
693
694 /*
695 * Remove a BlockBackend from the list of backends referenced by the monitor.
696 * Strictly for use by blockdev.c.
697 */
698 void monitor_remove_blk(BlockBackend *blk)
699 {
700 if (!blk->name) {
701 return;
702 }
703
704 QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
705 g_free(blk->name);
706 blk->name = NULL;
707 }
708
709 /*
710 * Return @blk's name, a non-null string.
711 * Returns an empty string iff @blk is not referenced by the monitor.
712 */
713 const char *blk_name(const BlockBackend *blk)
714 {
715 return blk->name ?: "";
716 }
717
718 /*
719 * Return the BlockBackend with name @name if it exists, else null.
720 * @name must not be null.
721 */
722 BlockBackend *blk_by_name(const char *name)
723 {
724 BlockBackend *blk = NULL;
725
726 assert(name);
727 while ((blk = blk_next(blk)) != NULL) {
728 if (!strcmp(name, blk->name)) {
729 return blk;
730 }
731 }
732 return NULL;
733 }
734
735 /*
736 * Return the BlockDriverState attached to @blk if any, else null.
737 */
738 BlockDriverState *blk_bs(BlockBackend *blk)
739 {
740 return blk->root ? blk->root->bs : NULL;
741 }
742
743 static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
744 {
745 BdrvChild *child;
746 QLIST_FOREACH(child, &bs->parents, next_parent) {
747 if (child->klass == &child_root) {
748 return child->opaque;
749 }
750 }
751
752 return NULL;
753 }
754
755 /*
756 * Returns true if @bs has an associated BlockBackend.
757 */
758 bool bdrv_has_blk(BlockDriverState *bs)
759 {
760 return bdrv_first_blk(bs) != NULL;
761 }
762
763 /*
764 * Returns true if @bs has only BlockBackends as parents.
765 */
766 bool bdrv_is_root_node(BlockDriverState *bs)
767 {
768 BdrvChild *c;
769
770 QLIST_FOREACH(c, &bs->parents, next_parent) {
771 if (c->klass != &child_root) {
772 return false;
773 }
774 }
775
776 return true;
777 }
778
779 /*
780 * Return @blk's DriveInfo if any, else null.
781 */
782 DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
783 {
784 return blk->legacy_dinfo;
785 }
786
787 /*
788 * Set @blk's DriveInfo to @dinfo, and return it.
789 * @blk must not have a DriveInfo set already.
790 * No other BlockBackend may have the same DriveInfo set.
791 */
792 DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
793 {
794 assert(!blk->legacy_dinfo);
795 return blk->legacy_dinfo = dinfo;
796 }
797
798 /*
799 * Return the BlockBackend with DriveInfo @dinfo.
800 * It must exist.
801 */
802 BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
803 {
804 BlockBackend *blk = NULL;
805
806 while ((blk = blk_next(blk)) != NULL) {
807 if (blk->legacy_dinfo == dinfo) {
808 return blk;
809 }
810 }
811 abort();
812 }
813
814 /*
815 * Returns a pointer to the publicly accessible fields of @blk.
816 */
817 BlockBackendPublic *blk_get_public(BlockBackend *blk)
818 {
819 return &blk->public;
820 }
821
822 /*
823 * Returns a BlockBackend given the associated @public fields.
824 */
825 BlockBackend *blk_by_public(BlockBackendPublic *public)
826 {
827 return container_of(public, BlockBackend, public);
828 }
829
830 /*
831 * Disassociates the currently associated BlockDriverState from @blk.
832 */
833 void blk_remove_bs(BlockBackend *blk)
834 {
835 ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
836 BdrvChild *root;
837
838 notifier_list_notify(&blk->remove_bs_notifiers, blk);
839 if (tgm->throttle_state) {
840 BlockDriverState *bs = blk_bs(blk);
841
842 /*
843 * Take a ref in case blk_bs() changes across bdrv_drained_begin(), for
844 * example, if a temporary filter node is removed by a blockjob.
845 */
846 bdrv_ref(bs);
847 bdrv_drained_begin(bs);
848 throttle_group_detach_aio_context(tgm);
849 throttle_group_attach_aio_context(tgm, qemu_get_aio_context());
850 bdrv_drained_end(bs);
851 bdrv_unref(bs);
852 }
853
854 blk_update_root_state(blk);
855
856 /* bdrv_root_unref_child() will cause blk->root to become stale and may
857 * switch to a completion coroutine later on. Let's drain all I/O here
858 * to avoid that and a potential QEMU crash.
859 */
860 blk_drain(blk);
861 root = blk->root;
862 blk->root = NULL;
863 bdrv_root_unref_child(root);
864 }
865
866 /*
867 * Associates a new BlockDriverState with @blk.
868 */
869 int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
870 {
871 ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
872 bdrv_ref(bs);
873 blk->root = bdrv_root_attach_child(bs, "root", &child_root,
874 BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
875 blk->perm, blk->shared_perm,
876 blk, errp);
877 if (blk->root == NULL) {
878 return -EPERM;
879 }
880
881 notifier_list_notify(&blk->insert_bs_notifiers, blk);
882 if (tgm->throttle_state) {
883 throttle_group_detach_aio_context(tgm);
884 throttle_group_attach_aio_context(tgm, bdrv_get_aio_context(bs));
885 }
886
887 return 0;
888 }
889
890 /*
891 * Change BlockDriverState associated with @blk.
892 */
893 int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp)
894 {
895 return bdrv_replace_child_bs(blk->root, new_bs, errp);
896 }
897
898 /*
899 * Sets the permission bitmasks that the user of the BlockBackend needs.
900 */
901 int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
902 Error **errp)
903 {
904 int ret;
905
906 if (blk->root && !blk->disable_perm) {
907 ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
908 if (ret < 0) {
909 return ret;
910 }
911 }
912
913 blk->perm = perm;
914 blk->shared_perm = shared_perm;
915
916 return 0;
917 }
918
919 void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
920 {
921 *perm = blk->perm;
922 *shared_perm = blk->shared_perm;
923 }
924
925 /*
926 * Attach device model @dev to @blk.
927 * Return 0 on success, -EBUSY when a device model is attached already.
928 */
929 int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
930 {
931 if (blk->dev) {
932 return -EBUSY;
933 }
934
935 /* While migration is still incoming, we don't need to apply the
936 * permissions of guest device BlockBackends. We might still have a block
937 * job or NBD server writing to the image for storage migration. */
938 if (runstate_check(RUN_STATE_INMIGRATE)) {
939 blk->disable_perm = true;
940 }
941
942 blk_ref(blk);
943 blk->dev = dev;
944 blk_iostatus_reset(blk);
945
946 return 0;
947 }
948
949 /*
950 * Detach device model @dev from @blk.
951 * @dev must be currently attached to @blk.
952 */
953 void blk_detach_dev(BlockBackend *blk, DeviceState *dev)
954 {
955 assert(blk->dev == dev);
956 blk->dev = NULL;
957 blk->dev_ops = NULL;
958 blk->dev_opaque = NULL;
959 blk->guest_block_size = 512;
960 blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
961 blk_unref(blk);
962 }
963
964 /*
965 * Return the device model attached to @blk if any, else null.
966 */
967 DeviceState *blk_get_attached_dev(BlockBackend *blk)
968 {
969 return blk->dev;
970 }
971
972 /* Return the qdev ID, or if no ID is assigned the QOM path, of the block
973 * device attached to the BlockBackend. */
974 char *blk_get_attached_dev_id(BlockBackend *blk)
975 {
976 DeviceState *dev = blk->dev;
977
978 if (!dev) {
979 return g_strdup("");
980 } else if (dev->id) {
981 return g_strdup(dev->id);
982 }
983
984 return object_get_canonical_path(OBJECT(dev)) ?: g_strdup("");
985 }
986
987 /*
988 * Return the BlockBackend which has the device model @dev attached if it
989 * exists, else null.
990 *
991 * @dev must not be null.
992 */
993 BlockBackend *blk_by_dev(void *dev)
994 {
995 BlockBackend *blk = NULL;
996
997 assert(dev != NULL);
998 while ((blk = blk_all_next(blk)) != NULL) {
999 if (blk->dev == dev) {
1000 return blk;
1001 }
1002 }
1003 return NULL;
1004 }
1005
1006 /*
1007 * Set @blk's device model callbacks to @ops.
1008 * @opaque is the opaque argument to pass to the callbacks.
1009 * This is for use by device models.
1010 */
1011 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
1012 void *opaque)
1013 {
1014 blk->dev_ops = ops;
1015 blk->dev_opaque = opaque;
1016
1017 /* Are we currently quiesced? Should we enforce this right now? */
1018 if (blk->quiesce_counter && ops->drained_begin) {
1019 ops->drained_begin(opaque);
1020 }
1021 }
1022
1023 /*
1024 * Notify @blk's attached device model of media change.
1025 *
1026 * If @load is true, notify of media load. This action can fail, meaning that
1027 * the medium cannot be loaded. @errp is set then.
1028 *
1029 * If @load is false, notify of media eject. This can never fail.
1030 *
1031 * Also send DEVICE_TRAY_MOVED events as appropriate.
1032 */
1033 void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
1034 {
1035 if (blk->dev_ops && blk->dev_ops->change_media_cb) {
1036 bool tray_was_open, tray_is_open;
1037 Error *local_err = NULL;
1038
1039 tray_was_open = blk_dev_is_tray_open(blk);
1040 blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
1041 if (local_err) {
1042 assert(load == true);
1043 error_propagate(errp, local_err);
1044 return;
1045 }
1046 tray_is_open = blk_dev_is_tray_open(blk);
1047
1048 if (tray_was_open != tray_is_open) {
1049 char *id = blk_get_attached_dev_id(blk);
1050 qapi_event_send_device_tray_moved(blk_name(blk), id, tray_is_open);
1051 g_free(id);
1052 }
1053 }
1054 }
1055
1056 static void blk_root_change_media(BdrvChild *child, bool load)
1057 {
1058 blk_dev_change_media_cb(child->opaque, load, NULL);
1059 }
1060
1061 /*
1062 * Does @blk's attached device model have removable media?
1063 * %true if no device model is attached.
1064 */
1065 bool blk_dev_has_removable_media(BlockBackend *blk)
1066 {
1067 return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
1068 }
1069
1070 /*
1071 * Does @blk's attached device model have a tray?
1072 */
1073 bool blk_dev_has_tray(BlockBackend *blk)
1074 {
1075 return blk->dev_ops && blk->dev_ops->is_tray_open;
1076 }
1077
1078 /*
1079 * Notify @blk's attached device model of a media eject request.
1080 * If @force is true, the medium is about to be yanked out forcefully.
1081 */
1082 void blk_dev_eject_request(BlockBackend *blk, bool force)
1083 {
1084 if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
1085 blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
1086 }
1087 }
1088
1089 /*
1090 * Does @blk's attached device model have a tray, and is it open?
1091 */
1092 bool blk_dev_is_tray_open(BlockBackend *blk)
1093 {
1094 if (blk_dev_has_tray(blk)) {
1095 return blk->dev_ops->is_tray_open(blk->dev_opaque);
1096 }
1097 return false;
1098 }
1099
1100 /*
1101 * Does @blk's attached device model have the medium locked?
1102 * %false if the device model has no such lock.
1103 */
1104 bool blk_dev_is_medium_locked(BlockBackend *blk)
1105 {
1106 if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
1107 return blk->dev_ops->is_medium_locked(blk->dev_opaque);
1108 }
1109 return false;
1110 }
1111
1112 /*
1113 * Notify @blk's attached device model of a backend size change.
1114 */
1115 static void blk_root_resize(BdrvChild *child)
1116 {
1117 BlockBackend *blk = child->opaque;
1118
1119 if (blk->dev_ops && blk->dev_ops->resize_cb) {
1120 blk->dev_ops->resize_cb(blk->dev_opaque);
1121 }
1122 }
1123
1124 void blk_iostatus_enable(BlockBackend *blk)
1125 {
1126 blk->iostatus_enabled = true;
1127 blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
1128 }
1129
1130 /* The I/O status is only enabled if the drive explicitly
1131 * enables it _and_ the VM is configured to stop on errors */
1132 bool blk_iostatus_is_enabled(const BlockBackend *blk)
1133 {
1134 return (blk->iostatus_enabled &&
1135 (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
1136 blk->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
1137 blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
1138 }
1139
1140 BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
1141 {
1142 return blk->iostatus;
1143 }
1144
1145 void blk_iostatus_disable(BlockBackend *blk)
1146 {
1147 blk->iostatus_enabled = false;
1148 }
1149
1150 void blk_iostatus_reset(BlockBackend *blk)
1151 {
1152 if (blk_iostatus_is_enabled(blk)) {
1153 blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
1154 }
1155 }
1156
1157 void blk_iostatus_set_err(BlockBackend *blk, int error)
1158 {
1159 assert(blk_iostatus_is_enabled(blk));
1160 if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
1161 blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
1162 BLOCK_DEVICE_IO_STATUS_FAILED;
1163 }
1164 }
1165
1166 void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
1167 {
1168 blk->allow_write_beyond_eof = allow;
1169 }
1170
1171 void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
1172 {
1173 blk->allow_aio_context_change = allow;
1174 }
1175
1176 void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
1177 {
1178 blk->disable_request_queuing = disable;
1179 }
1180
1181 static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
1182 int64_t bytes)
1183 {
1184 int64_t len;
1185
1186 if (bytes < 0) {
1187 return -EIO;
1188 }
1189
1190 if (!blk_is_available(blk)) {
1191 return -ENOMEDIUM;
1192 }
1193
1194 if (offset < 0) {
1195 return -EIO;
1196 }
1197
1198 if (!blk->allow_write_beyond_eof) {
1199 len = blk_getlength(blk);
1200 if (len < 0) {
1201 return len;
1202 }
1203
1204 if (offset > len || len - offset < bytes) {
1205 return -EIO;
1206 }
1207 }
1208
1209 return 0;
1210 }
1211
1212 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
1213 static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
1214 {
1215 assert(blk->in_flight > 0);
1216
1217 if (blk->quiesce_counter && !blk->disable_request_queuing) {
1218 blk_dec_in_flight(blk);
1219 qemu_co_queue_wait(&blk->queued_requests, NULL);
1220 blk_inc_in_flight(blk);
1221 }
1222 }
1223
1224 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
1225 int coroutine_fn
1226 blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
1227 QEMUIOVector *qiov, BdrvRequestFlags flags)
1228 {
1229 int ret;
1230 BlockDriverState *bs;
1231
1232 blk_wait_while_drained(blk);
1233
1234 /* Call blk_bs() only after waiting, the graph may have changed */
1235 bs = blk_bs(blk);
1236 trace_blk_co_preadv(blk, bs, offset, bytes, flags);
1237
1238 ret = blk_check_byte_request(blk, offset, bytes);
1239 if (ret < 0) {
1240 return ret;
1241 }
1242
1243 bdrv_inc_in_flight(bs);
1244
1245 /* throttling disk I/O */
1246 if (blk->public.throttle_group_member.throttle_state) {
1247 throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
1248 bytes, false);
1249 }
1250
1251 ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
1252 bdrv_dec_in_flight(bs);
1253 return ret;
1254 }
1255
1256 int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
1257 int64_t bytes, QEMUIOVector *qiov,
1258 BdrvRequestFlags flags)
1259 {
1260 int ret;
1261
1262 blk_inc_in_flight(blk);
1263 ret = blk_co_do_preadv(blk, offset, bytes, qiov, flags);
1264 blk_dec_in_flight(blk);
1265
1266 return ret;
1267 }
1268
1269 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
1270 int coroutine_fn
1271 blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
1272 QEMUIOVector *qiov, size_t qiov_offset,
1273 BdrvRequestFlags flags)
1274 {
1275 int ret;
1276 BlockDriverState *bs;
1277
1278 blk_wait_while_drained(blk);
1279
1280 /* Call blk_bs() only after waiting, the graph may have changed */
1281 bs = blk_bs(blk);
1282 trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
1283
1284 ret = blk_check_byte_request(blk, offset, bytes);
1285 if (ret < 0) {
1286 return ret;
1287 }
1288
1289 bdrv_inc_in_flight(bs);
1290 /* throttling disk I/O */
1291 if (blk->public.throttle_group_member.throttle_state) {
1292 throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
1293 bytes, true);
1294 }
1295
1296 if (!blk->enable_write_cache) {
1297 flags |= BDRV_REQ_FUA;
1298 }
1299
1300 ret = bdrv_co_pwritev_part(blk->root, offset, bytes, qiov, qiov_offset,
1301 flags);
1302 bdrv_dec_in_flight(bs);
1303 return ret;
1304 }
1305
1306 int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
1307 int64_t bytes,
1308 QEMUIOVector *qiov, size_t qiov_offset,
1309 BdrvRequestFlags flags)
1310 {
1311 int ret;
1312
1313 blk_inc_in_flight(blk);
1314 ret = blk_co_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
1315 blk_dec_in_flight(blk);
1316
1317 return ret;
1318 }
1319
1320 int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
1321 int64_t bytes, QEMUIOVector *qiov,
1322 BdrvRequestFlags flags)
1323 {
1324 return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags);
1325 }
1326
1327 static int coroutine_fn blk_pwritev_part(BlockBackend *blk, int64_t offset,
1328 int64_t bytes,
1329 QEMUIOVector *qiov, size_t qiov_offset,
1330 BdrvRequestFlags flags)
1331 {
1332 int ret;
1333
1334 blk_inc_in_flight(blk);
1335 ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
1336 blk_dec_in_flight(blk);
1337
1338 return ret;
1339 }
1340
1341 typedef struct BlkRwCo {
1342 BlockBackend *blk;
1343 int64_t offset;
1344 void *iobuf;
1345 int ret;
1346 BdrvRequestFlags flags;
1347 } BlkRwCo;
1348
1349 int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1350 int64_t bytes, BdrvRequestFlags flags)
1351 {
1352 return blk_pwritev_part(blk, offset, bytes, NULL, 0,
1353 flags | BDRV_REQ_ZERO_WRITE);
1354 }
1355
1356 int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
1357 {
1358 return bdrv_make_zero(blk->root, flags);
1359 }
1360
1361 void blk_inc_in_flight(BlockBackend *blk)
1362 {
1363 qatomic_inc(&blk->in_flight);
1364 }
1365
1366 void blk_dec_in_flight(BlockBackend *blk)
1367 {
1368 qatomic_dec(&blk->in_flight);
1369 aio_wait_kick();
1370 }
1371
1372 static void error_callback_bh(void *opaque)
1373 {
1374 struct BlockBackendAIOCB *acb = opaque;
1375
1376 blk_dec_in_flight(acb->blk);
1377 acb->common.cb(acb->common.opaque, acb->ret);
1378 qemu_aio_unref(acb);
1379 }
1380
1381 BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
1382 BlockCompletionFunc *cb,
1383 void *opaque, int ret)
1384 {
1385 struct BlockBackendAIOCB *acb;
1386
1387 blk_inc_in_flight(blk);
1388 acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
1389 acb->blk = blk;
1390 acb->ret = ret;
1391
1392 replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
1393 error_callback_bh, acb);
1394 return &acb->common;
1395 }
1396
1397 typedef struct BlkAioEmAIOCB {
1398 BlockAIOCB common;
1399 BlkRwCo rwco;
1400 int64_t bytes;
1401 bool has_returned;
1402 } BlkAioEmAIOCB;
1403
1404 static AioContext *blk_aio_em_aiocb_get_aio_context(BlockAIOCB *acb_)
1405 {
1406 BlkAioEmAIOCB *acb = container_of(acb_, BlkAioEmAIOCB, common);
1407
1408 return blk_get_aio_context(acb->rwco.blk);
1409 }
1410
1411 static const AIOCBInfo blk_aio_em_aiocb_info = {
1412 .aiocb_size = sizeof(BlkAioEmAIOCB),
1413 .get_aio_context = blk_aio_em_aiocb_get_aio_context,
1414 };
1415
1416 static void blk_aio_complete(BlkAioEmAIOCB *acb)
1417 {
1418 if (acb->has_returned) {
1419 acb->common.cb(acb->common.opaque, acb->rwco.ret);
1420 blk_dec_in_flight(acb->rwco.blk);
1421 qemu_aio_unref(acb);
1422 }
1423 }
1424
1425 static void blk_aio_complete_bh(void *opaque)
1426 {
1427 BlkAioEmAIOCB *acb = opaque;
1428 assert(acb->has_returned);
1429 blk_aio_complete(acb);
1430 }
1431
1432 static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset,
1433 int64_t bytes,
1434 void *iobuf, CoroutineEntry co_entry,
1435 BdrvRequestFlags flags,
1436 BlockCompletionFunc *cb, void *opaque)
1437 {
1438 BlkAioEmAIOCB *acb;
1439 Coroutine *co;
1440
1441 blk_inc_in_flight(blk);
1442 acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
1443 acb->rwco = (BlkRwCo) {
1444 .blk = blk,
1445 .offset = offset,
1446 .iobuf = iobuf,
1447 .flags = flags,
1448 .ret = NOT_DONE,
1449 };
1450 acb->bytes = bytes;
1451 acb->has_returned = false;
1452
1453 co = qemu_coroutine_create(co_entry, acb);
1454 bdrv_coroutine_enter(blk_bs(blk), co);
1455
1456 acb->has_returned = true;
1457 if (acb->rwco.ret != NOT_DONE) {
1458 replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
1459 blk_aio_complete_bh, acb);
1460 }
1461
1462 return &acb->common;
1463 }
1464
1465 static void blk_aio_read_entry(void *opaque)
1466 {
1467 BlkAioEmAIOCB *acb = opaque;
1468 BlkRwCo *rwco = &acb->rwco;
1469 QEMUIOVector *qiov = rwco->iobuf;
1470
1471 assert(qiov->size == acb->bytes);
1472 rwco->ret = blk_co_do_preadv(rwco->blk, rwco->offset, acb->bytes,
1473 qiov, rwco->flags);
1474 blk_aio_complete(acb);
1475 }
1476
1477 static void blk_aio_write_entry(void *opaque)
1478 {
1479 BlkAioEmAIOCB *acb = opaque;
1480 BlkRwCo *rwco = &acb->rwco;
1481 QEMUIOVector *qiov = rwco->iobuf;
1482
1483 assert(!qiov || qiov->size == acb->bytes);
1484 rwco->ret = blk_co_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
1485 qiov, 0, rwco->flags);
1486 blk_aio_complete(acb);
1487 }
1488
1489 BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1490 int64_t bytes, BdrvRequestFlags flags,
1491 BlockCompletionFunc *cb, void *opaque)
1492 {
1493 return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_write_entry,
1494 flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
1495 }
1496
1497 int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes)
1498 {
1499 int ret;
1500 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1501
1502 blk_inc_in_flight(blk);
1503 ret = blk_do_preadv(blk, offset, bytes, &qiov, 0);
1504 blk_dec_in_flight(blk);
1505
1506 return ret < 0 ? ret : bytes;
1507 }
1508
1509 int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes,
1510 BdrvRequestFlags flags)
1511 {
1512 int ret;
1513 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1514
1515 ret = blk_pwritev_part(blk, offset, bytes, &qiov, 0, flags);
1516
1517 return ret < 0 ? ret : bytes;
1518 }
1519
1520 int64_t blk_getlength(BlockBackend *blk)
1521 {
1522 if (!blk_is_available(blk)) {
1523 return -ENOMEDIUM;
1524 }
1525
1526 return bdrv_getlength(blk_bs(blk));
1527 }
1528
1529 void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
1530 {
1531 if (!blk_bs(blk)) {
1532 *nb_sectors_ptr = 0;
1533 } else {
1534 bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
1535 }
1536 }
1537
1538 int64_t blk_nb_sectors(BlockBackend *blk)
1539 {
1540 if (!blk_is_available(blk)) {
1541 return -ENOMEDIUM;
1542 }
1543
1544 return bdrv_nb_sectors(blk_bs(blk));
1545 }
1546
1547 BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
1548 QEMUIOVector *qiov, BdrvRequestFlags flags,
1549 BlockCompletionFunc *cb, void *opaque)
1550 {
1551 assert((uint64_t)qiov->size <= INT64_MAX);
1552 return blk_aio_prwv(blk, offset, qiov->size, qiov,
1553 blk_aio_read_entry, flags, cb, opaque);
1554 }
1555
1556 BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
1557 QEMUIOVector *qiov, BdrvRequestFlags flags,
1558 BlockCompletionFunc *cb, void *opaque)
1559 {
1560 assert((uint64_t)qiov->size <= INT64_MAX);
1561 return blk_aio_prwv(blk, offset, qiov->size, qiov,
1562 blk_aio_write_entry, flags, cb, opaque);
1563 }
1564
1565 void blk_aio_cancel(BlockAIOCB *acb)
1566 {
1567 bdrv_aio_cancel(acb);
1568 }
1569
1570 void blk_aio_cancel_async(BlockAIOCB *acb)
1571 {
1572 bdrv_aio_cancel_async(acb);
1573 }
1574
1575 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
1576 int coroutine_fn
1577 blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
1578 {
1579 blk_wait_while_drained(blk);
1580
1581 if (!blk_is_available(blk)) {
1582 return -ENOMEDIUM;
1583 }
1584
1585 return bdrv_co_ioctl(blk_bs(blk), req, buf);
1586 }
1587
1588 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
1589 {
1590 int ret;
1591
1592 blk_inc_in_flight(blk);
1593 ret = blk_do_ioctl(blk, req, buf);
1594 blk_dec_in_flight(blk);
1595
1596 return ret;
1597 }
1598
1599 static void blk_aio_ioctl_entry(void *opaque)
1600 {
1601 BlkAioEmAIOCB *acb = opaque;
1602 BlkRwCo *rwco = &acb->rwco;
1603
1604 rwco->ret = blk_co_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
1605
1606 blk_aio_complete(acb);
1607 }
1608
1609 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
1610 BlockCompletionFunc *cb, void *opaque)
1611 {
1612 return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
1613 }
1614
1615 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
1616 int coroutine_fn
1617 blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
1618 {
1619 int ret;
1620
1621 blk_wait_while_drained(blk);
1622
1623 ret = blk_check_byte_request(blk, offset, bytes);
1624 if (ret < 0) {
1625 return ret;
1626 }
1627
1628 return bdrv_co_pdiscard(blk->root, offset, bytes);
1629 }
1630
1631 static void blk_aio_pdiscard_entry(void *opaque)
1632 {
1633 BlkAioEmAIOCB *acb = opaque;
1634 BlkRwCo *rwco = &acb->rwco;
1635
1636 rwco->ret = blk_co_do_pdiscard(rwco->blk, rwco->offset, acb->bytes);
1637 blk_aio_complete(acb);
1638 }
1639
1640 BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
1641 int64_t offset, int64_t bytes,
1642 BlockCompletionFunc *cb, void *opaque)
1643 {
1644 return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
1645 cb, opaque);
1646 }
1647
1648 int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
1649 int64_t bytes)
1650 {
1651 int ret;
1652
1653 blk_inc_in_flight(blk);
1654 ret = blk_co_do_pdiscard(blk, offset, bytes);
1655 blk_dec_in_flight(blk);
1656
1657 return ret;
1658 }
1659
1660 int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
1661 {
1662 int ret;
1663
1664 blk_inc_in_flight(blk);
1665 ret = blk_do_pdiscard(blk, offset, bytes);
1666 blk_dec_in_flight(blk);
1667
1668 return ret;
1669 }
1670
1671 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
1672 int coroutine_fn blk_co_do_flush(BlockBackend *blk)
1673 {
1674 blk_wait_while_drained(blk);
1675
1676 if (!blk_is_available(blk)) {
1677 return -ENOMEDIUM;
1678 }
1679
1680 return bdrv_co_flush(blk_bs(blk));
1681 }
1682
1683 static void blk_aio_flush_entry(void *opaque)
1684 {
1685 BlkAioEmAIOCB *acb = opaque;
1686 BlkRwCo *rwco = &acb->rwco;
1687
1688 rwco->ret = blk_co_do_flush(rwco->blk);
1689 blk_aio_complete(acb);
1690 }
1691
1692 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
1693 BlockCompletionFunc *cb, void *opaque)
1694 {
1695 return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
1696 }
1697
1698 int coroutine_fn blk_co_flush(BlockBackend *blk)
1699 {
1700 int ret;
1701
1702 blk_inc_in_flight(blk);
1703 ret = blk_co_do_flush(blk);
1704 blk_dec_in_flight(blk);
1705
1706 return ret;
1707 }
1708
1709 int blk_flush(BlockBackend *blk)
1710 {
1711 int ret;
1712
1713 blk_inc_in_flight(blk);
1714 ret = blk_do_flush(blk);
1715 blk_dec_in_flight(blk);
1716
1717 return ret;
1718 }
1719
1720 void blk_drain(BlockBackend *blk)
1721 {
1722 BlockDriverState *bs = blk_bs(blk);
1723
1724 if (bs) {
1725 bdrv_ref(bs);
1726 bdrv_drained_begin(bs);
1727 }
1728
1729 /* We may have -ENOMEDIUM completions in flight */
1730 AIO_WAIT_WHILE(blk_get_aio_context(blk),
1731 qatomic_mb_read(&blk->in_flight) > 0);
1732
1733 if (bs) {
1734 bdrv_drained_end(bs);
1735 bdrv_unref(bs);
1736 }
1737 }
1738
1739 void blk_drain_all(void)
1740 {
1741 BlockBackend *blk = NULL;
1742
1743 bdrv_drain_all_begin();
1744
1745 while ((blk = blk_all_next(blk)) != NULL) {
1746 AioContext *ctx = blk_get_aio_context(blk);
1747
1748 aio_context_acquire(ctx);
1749
1750 /* We may have -ENOMEDIUM completions in flight */
1751 AIO_WAIT_WHILE(ctx, qatomic_mb_read(&blk->in_flight) > 0);
1752
1753 aio_context_release(ctx);
1754 }
1755
1756 bdrv_drain_all_end();
1757 }
1758
1759 void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
1760 BlockdevOnError on_write_error)
1761 {
1762 blk->on_read_error = on_read_error;
1763 blk->on_write_error = on_write_error;
1764 }
1765
1766 BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
1767 {
1768 return is_read ? blk->on_read_error : blk->on_write_error;
1769 }
1770
1771 BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
1772 int error)
1773 {
1774 BlockdevOnError on_err = blk_get_on_error(blk, is_read);
1775
1776 switch (on_err) {
1777 case BLOCKDEV_ON_ERROR_ENOSPC:
1778 return (error == ENOSPC) ?
1779 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
1780 case BLOCKDEV_ON_ERROR_STOP:
1781 return BLOCK_ERROR_ACTION_STOP;
1782 case BLOCKDEV_ON_ERROR_REPORT:
1783 return BLOCK_ERROR_ACTION_REPORT;
1784 case BLOCKDEV_ON_ERROR_IGNORE:
1785 return BLOCK_ERROR_ACTION_IGNORE;
1786 case BLOCKDEV_ON_ERROR_AUTO:
1787 default:
1788 abort();
1789 }
1790 }
1791
1792 static void send_qmp_error_event(BlockBackend *blk,
1793 BlockErrorAction action,
1794 bool is_read, int error)
1795 {
1796 IoOperationType optype;
1797 BlockDriverState *bs = blk_bs(blk);
1798
1799 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
1800 qapi_event_send_block_io_error(blk_name(blk), !!bs,
1801 bs ? bdrv_get_node_name(bs) : NULL, optype,
1802 action, blk_iostatus_is_enabled(blk),
1803 error == ENOSPC, strerror(error));
1804 }
1805
1806 /* This is done by device models because, while the block layer knows
1807 * about the error, it does not know whether an operation comes from
1808 * the device or the block layer (from a job, for example).
1809 */
1810 void blk_error_action(BlockBackend *blk, BlockErrorAction action,
1811 bool is_read, int error)
1812 {
1813 assert(error >= 0);
1814
1815 if (action == BLOCK_ERROR_ACTION_STOP) {
1816 /* First set the iostatus, so that "info block" returns an iostatus
1817 * that matches the events raised so far (an additional error iostatus
1818 * is fine, but not a lost one).
1819 */
1820 blk_iostatus_set_err(blk, error);
1821
1822 /* Then raise the request to stop the VM and the event.
1823 * qemu_system_vmstop_request_prepare has two effects. First,
1824 * it ensures that the STOP event always comes after the
1825 * BLOCK_IO_ERROR event. Second, it ensures that even if management
1826 * can observe the STOP event and do a "cont" before the STOP
1827 * event is issued, the VM will not stop. In this case, vm_start()
1828 * also ensures that the STOP/RESUME pair of events is emitted.
1829 */
1830 qemu_system_vmstop_request_prepare();
1831 send_qmp_error_event(blk, action, is_read, error);
1832 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
1833 } else {
1834 send_qmp_error_event(blk, action, is_read, error);
1835 }
1836 }
1837
1838 /*
1839 * Returns true if the BlockBackend can support taking write permissions
1840 * (because its root node is not read-only).
1841 */
1842 bool blk_supports_write_perm(BlockBackend *blk)
1843 {
1844 BlockDriverState *bs = blk_bs(blk);
1845
1846 if (bs) {
1847 return !bdrv_is_read_only(bs);
1848 } else {
1849 return blk->root_state.open_flags & BDRV_O_RDWR;
1850 }
1851 }
1852
1853 /*
1854 * Returns true if the BlockBackend can be written to in its current
1855 * configuration (i.e. if write permission have been requested)
1856 */
1857 bool blk_is_writable(BlockBackend *blk)
1858 {
1859 return blk->perm & BLK_PERM_WRITE;
1860 }
1861
1862 bool blk_is_sg(BlockBackend *blk)
1863 {
1864 BlockDriverState *bs = blk_bs(blk);
1865
1866 if (!bs) {
1867 return false;
1868 }
1869
1870 return bdrv_is_sg(bs);
1871 }
1872
1873 bool blk_enable_write_cache(BlockBackend *blk)
1874 {
1875 return blk->enable_write_cache;
1876 }
1877
1878 void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
1879 {
1880 blk->enable_write_cache = wce;
1881 }
1882
1883 void blk_invalidate_cache(BlockBackend *blk, Error **errp)
1884 {
1885 BlockDriverState *bs = blk_bs(blk);
1886
1887 if (!bs) {
1888 error_setg(errp, "Device '%s' has no medium", blk->name);
1889 return;
1890 }
1891
1892 bdrv_invalidate_cache(bs, errp);
1893 }
1894
1895 bool blk_is_inserted(BlockBackend *blk)
1896 {
1897 BlockDriverState *bs = blk_bs(blk);
1898
1899 return bs && bdrv_is_inserted(bs);
1900 }
1901
1902 bool blk_is_available(BlockBackend *blk)
1903 {
1904 return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
1905 }
1906
1907 void blk_lock_medium(BlockBackend *blk, bool locked)
1908 {
1909 BlockDriverState *bs = blk_bs(blk);
1910
1911 if (bs) {
1912 bdrv_lock_medium(bs, locked);
1913 }
1914 }
1915
1916 void blk_eject(BlockBackend *blk, bool eject_flag)
1917 {
1918 BlockDriverState *bs = blk_bs(blk);
1919 char *id;
1920
1921 if (bs) {
1922 bdrv_eject(bs, eject_flag);
1923 }
1924
1925 /* Whether or not we ejected on the backend,
1926 * the frontend experienced a tray event. */
1927 id = blk_get_attached_dev_id(blk);
1928 qapi_event_send_device_tray_moved(blk_name(blk), id,
1929 eject_flag);
1930 g_free(id);
1931 }
1932
1933 int blk_get_flags(BlockBackend *blk)
1934 {
1935 BlockDriverState *bs = blk_bs(blk);
1936
1937 if (bs) {
1938 return bdrv_get_flags(bs);
1939 } else {
1940 return blk->root_state.open_flags;
1941 }
1942 }
1943
1944 /* Returns the minimum request alignment, in bytes; guaranteed nonzero */
1945 uint32_t blk_get_request_alignment(BlockBackend *blk)
1946 {
1947 BlockDriverState *bs = blk_bs(blk);
1948 return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE;
1949 }
1950
1951 /* Returns the maximum hardware transfer length, in bytes; guaranteed nonzero */
1952 uint64_t blk_get_max_hw_transfer(BlockBackend *blk)
1953 {
1954 BlockDriverState *bs = blk_bs(blk);
1955 uint64_t max = INT_MAX;
1956
1957 if (bs) {
1958 max = MIN_NON_ZERO(max, bs->bl.max_hw_transfer);
1959 max = MIN_NON_ZERO(max, bs->bl.max_transfer);
1960 }
1961 return ROUND_DOWN(max, blk_get_request_alignment(blk));
1962 }
1963
1964 /* Returns the maximum transfer length, in bytes; guaranteed nonzero */
1965 uint32_t blk_get_max_transfer(BlockBackend *blk)
1966 {
1967 BlockDriverState *bs = blk_bs(blk);
1968 uint32_t max = INT_MAX;
1969
1970 if (bs) {
1971 max = MIN_NON_ZERO(max, bs->bl.max_transfer);
1972 }
1973 return ROUND_DOWN(max, blk_get_request_alignment(blk));
1974 }
1975
1976 int blk_get_max_hw_iov(BlockBackend *blk)
1977 {
1978 return MIN_NON_ZERO(blk->root->bs->bl.max_hw_iov,
1979 blk->root->bs->bl.max_iov);
1980 }
1981
1982 int blk_get_max_iov(BlockBackend *blk)
1983 {
1984 return blk->root->bs->bl.max_iov;
1985 }
1986
1987 void blk_set_guest_block_size(BlockBackend *blk, int align)
1988 {
1989 blk->guest_block_size = align;
1990 }
1991
1992 void *blk_try_blockalign(BlockBackend *blk, size_t size)
1993 {
1994 return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
1995 }
1996
1997 void *blk_blockalign(BlockBackend *blk, size_t size)
1998 {
1999 return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
2000 }
2001
2002 bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
2003 {
2004 BlockDriverState *bs = blk_bs(blk);
2005
2006 if (!bs) {
2007 return false;
2008 }
2009
2010 return bdrv_op_is_blocked(bs, op, errp);
2011 }
2012
2013 void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
2014 {
2015 BlockDriverState *bs = blk_bs(blk);
2016
2017 if (bs) {
2018 bdrv_op_unblock(bs, op, reason);
2019 }
2020 }
2021
2022 void blk_op_block_all(BlockBackend *blk, Error *reason)
2023 {
2024 BlockDriverState *bs = blk_bs(blk);
2025
2026 if (bs) {
2027 bdrv_op_block_all(bs, reason);
2028 }
2029 }
2030
2031 void blk_op_unblock_all(BlockBackend *blk, Error *reason)
2032 {
2033 BlockDriverState *bs = blk_bs(blk);
2034
2035 if (bs) {
2036 bdrv_op_unblock_all(bs, reason);
2037 }
2038 }
2039
2040 AioContext *blk_get_aio_context(BlockBackend *blk)
2041 {
2042 BlockDriverState *bs = blk_bs(blk);
2043
2044 if (bs) {
2045 AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
2046 assert(ctx == blk->ctx);
2047 }
2048
2049 return blk->ctx;
2050 }
2051
2052 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
2053 {
2054 BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
2055 return blk_get_aio_context(blk_acb->blk);
2056 }
2057
2058 static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
2059 bool update_root_node, Error **errp)
2060 {
2061 BlockDriverState *bs = blk_bs(blk);
2062 ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
2063 int ret;
2064
2065 if (bs) {
2066 bdrv_ref(bs);
2067
2068 if (update_root_node) {
2069 ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root,
2070 errp);
2071 if (ret < 0) {
2072 bdrv_unref(bs);
2073 return ret;
2074 }
2075 }
2076 if (tgm->throttle_state) {
2077 bdrv_drained_begin(bs);
2078 throttle_group_detach_aio_context(tgm);
2079 throttle_group_attach_aio_context(tgm, new_context);
2080 bdrv_drained_end(bs);
2081 }
2082
2083 bdrv_unref(bs);
2084 }
2085
2086 blk->ctx = new_context;
2087 return 0;
2088 }
2089
2090 int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
2091 Error **errp)
2092 {
2093 return blk_do_set_aio_context(blk, new_context, true, errp);
2094 }
2095
2096 static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
2097 GSList **ignore, Error **errp)
2098 {
2099 BlockBackend *blk = child->opaque;
2100
2101 if (blk->allow_aio_context_change) {
2102 return true;
2103 }
2104
2105 /* Only manually created BlockBackends that are not attached to anything
2106 * can change their AioContext without updating their user. */
2107 if (!blk->name || blk->dev) {
2108 /* TODO Add BB name/QOM path */
2109 error_setg(errp, "Cannot change iothread of active block backend");
2110 return false;
2111 }
2112
2113 return true;
2114 }
2115
2116 static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx,
2117 GSList **ignore)
2118 {
2119 BlockBackend *blk = child->opaque;
2120 blk_do_set_aio_context(blk, ctx, false, &error_abort);
2121 }
2122
2123 void blk_add_aio_context_notifier(BlockBackend *blk,
2124 void (*attached_aio_context)(AioContext *new_context, void *opaque),
2125 void (*detach_aio_context)(void *opaque), void *opaque)
2126 {
2127 BlockBackendAioNotifier *notifier;
2128 BlockDriverState *bs = blk_bs(blk);
2129
2130 notifier = g_new(BlockBackendAioNotifier, 1);
2131 notifier->attached_aio_context = attached_aio_context;
2132 notifier->detach_aio_context = detach_aio_context;
2133 notifier->opaque = opaque;
2134 QLIST_INSERT_HEAD(&blk->aio_notifiers, notifier, list);
2135
2136 if (bs) {
2137 bdrv_add_aio_context_notifier(bs, attached_aio_context,
2138 detach_aio_context, opaque);
2139 }
2140 }
2141
2142 void blk_remove_aio_context_notifier(BlockBackend *blk,
2143 void (*attached_aio_context)(AioContext *,
2144 void *),
2145 void (*detach_aio_context)(void *),
2146 void *opaque)
2147 {
2148 BlockBackendAioNotifier *notifier;
2149 BlockDriverState *bs = blk_bs(blk);
2150
2151 if (bs) {
2152 bdrv_remove_aio_context_notifier(bs, attached_aio_context,
2153 detach_aio_context, opaque);
2154 }
2155
2156 QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
2157 if (notifier->attached_aio_context == attached_aio_context &&
2158 notifier->detach_aio_context == detach_aio_context &&
2159 notifier->opaque == opaque) {
2160 QLIST_REMOVE(notifier, list);
2161 g_free(notifier);
2162 return;
2163 }
2164 }
2165
2166 abort();
2167 }
2168
2169 void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
2170 {
2171 notifier_list_add(&blk->remove_bs_notifiers, notify);
2172 }
2173
2174 void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
2175 {
2176 notifier_list_add(&blk->insert_bs_notifiers, notify);
2177 }
2178
2179 void blk_io_plug(BlockBackend *blk)
2180 {
2181 BlockDriverState *bs = blk_bs(blk);
2182
2183 if (bs) {
2184 bdrv_io_plug(bs);
2185 }
2186 }
2187
2188 void blk_io_unplug(BlockBackend *blk)
2189 {
2190 BlockDriverState *bs = blk_bs(blk);
2191
2192 if (bs) {
2193 bdrv_io_unplug(bs);
2194 }
2195 }
2196
2197 BlockAcctStats *blk_get_stats(BlockBackend *blk)
2198 {
2199 return &blk->stats;
2200 }
2201
2202 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
2203 BlockCompletionFunc *cb, void *opaque)
2204 {
2205 return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
2206 }
2207
2208 int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
2209 int64_t bytes, BdrvRequestFlags flags)
2210 {
2211 return blk_co_pwritev(blk, offset, bytes, NULL,
2212 flags | BDRV_REQ_ZERO_WRITE);
2213 }
2214
2215 int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
2216 int64_t bytes)
2217 {
2218 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
2219 return blk_pwritev_part(blk, offset, bytes, &qiov, 0,
2220 BDRV_REQ_WRITE_COMPRESSED);
2221 }
2222
2223 int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
2224 PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
2225 {
2226 if (!blk_is_available(blk)) {
2227 error_setg(errp, "No medium inserted");
2228 return -ENOMEDIUM;
2229 }
2230
2231 return bdrv_truncate(blk->root, offset, exact, prealloc, flags, errp);
2232 }
2233
2234 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
2235 int64_t pos, int size)
2236 {
2237 int ret;
2238
2239 if (!blk_is_available(blk)) {
2240 return -ENOMEDIUM;
2241 }
2242
2243 ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
2244 if (ret < 0) {
2245 return ret;
2246 }
2247
2248 if (ret == size && !blk->enable_write_cache) {
2249 ret = bdrv_flush(blk_bs(blk));
2250 }
2251
2252 return ret < 0 ? ret : size;
2253 }
2254
2255 int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
2256 {
2257 if (!blk_is_available(blk)) {
2258 return -ENOMEDIUM;
2259 }
2260
2261 return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
2262 }
2263
2264 int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
2265 {
2266 if (!blk_is_available(blk)) {
2267 return -ENOMEDIUM;
2268 }
2269
2270 return bdrv_probe_blocksizes(blk_bs(blk), bsz);
2271 }
2272
2273 int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
2274 {
2275 if (!blk_is_available(blk)) {
2276 return -ENOMEDIUM;
2277 }
2278
2279 return bdrv_probe_geometry(blk_bs(blk), geo);
2280 }
2281
2282 /*
2283 * Updates the BlockBackendRootState object with data from the currently
2284 * attached BlockDriverState.
2285 */
2286 void blk_update_root_state(BlockBackend *blk)
2287 {
2288 assert(blk->root);
2289
2290 blk->root_state.open_flags = blk->root->bs->open_flags;
2291 blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
2292 }
2293
2294 /*
2295 * Returns the detect-zeroes setting to be used for bdrv_open() of a
2296 * BlockDriverState which is supposed to inherit the root state.
2297 */
2298 bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
2299 {
2300 return blk->root_state.detect_zeroes;
2301 }
2302
2303 /*
2304 * Returns the flags to be used for bdrv_open() of a BlockDriverState which is
2305 * supposed to inherit the root state.
2306 */
2307 int blk_get_open_flags_from_root_state(BlockBackend *blk)
2308 {
2309 return blk->root_state.open_flags;
2310 }
2311
2312 BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
2313 {
2314 return &blk->root_state;
2315 }
2316
2317 int blk_commit_all(void)
2318 {
2319 BlockBackend *blk = NULL;
2320
2321 while ((blk = blk_all_next(blk)) != NULL) {
2322 AioContext *aio_context = blk_get_aio_context(blk);
2323 BlockDriverState *unfiltered_bs = bdrv_skip_filters(blk_bs(blk));
2324
2325 aio_context_acquire(aio_context);
2326 if (blk_is_inserted(blk) && bdrv_cow_child(unfiltered_bs)) {
2327 int ret;
2328
2329 ret = bdrv_commit(unfiltered_bs);
2330 if (ret < 0) {
2331 aio_context_release(aio_context);
2332 return ret;
2333 }
2334 }
2335 aio_context_release(aio_context);
2336 }
2337 return 0;
2338 }
2339
2340
2341 /* throttling disk I/O limits */
2342 void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
2343 {
2344 throttle_group_config(&blk->public.throttle_group_member, cfg);
2345 }
2346
2347 void blk_io_limits_disable(BlockBackend *blk)
2348 {
2349 BlockDriverState *bs = blk_bs(blk);
2350 ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
2351 assert(tgm->throttle_state);
2352 if (bs) {
2353 bdrv_ref(bs);
2354 bdrv_drained_begin(bs);
2355 }
2356 throttle_group_unregister_tgm(tgm);
2357 if (bs) {
2358 bdrv_drained_end(bs);
2359 bdrv_unref(bs);
2360 }
2361 }
2362
2363 /* should be called before blk_set_io_limits if a limit is set */
2364 void blk_io_limits_enable(BlockBackend *blk, const char *group)
2365 {
2366 assert(!blk->public.throttle_group_member.throttle_state);
2367 throttle_group_register_tgm(&blk->public.throttle_group_member,
2368 group, blk_get_aio_context(blk));
2369 }
2370
2371 void blk_io_limits_update_group(BlockBackend *blk, const char *group)
2372 {
2373 /* this BB is not part of any group */
2374 if (!blk->public.throttle_group_member.throttle_state) {
2375 return;
2376 }
2377
2378 /* this BB is a part of the same group than the one we want */
2379 if (!g_strcmp0(throttle_group_get_name(&blk->public.throttle_group_member),
2380 group)) {
2381 return;
2382 }
2383
2384 /* need to change the group this bs belong to */
2385 blk_io_limits_disable(blk);
2386 blk_io_limits_enable(blk, group);
2387 }
2388
2389 static void blk_root_drained_begin(BdrvChild *child)
2390 {
2391 BlockBackend *blk = child->opaque;
2392 ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
2393
2394 if (++blk->quiesce_counter == 1) {
2395 if (blk->dev_ops && blk->dev_ops->drained_begin) {
2396 blk->dev_ops->drained_begin(blk->dev_opaque);
2397 }
2398 }
2399
2400 /* Note that blk->root may not be accessible here yet if we are just
2401 * attaching to a BlockDriverState that is drained. Use child instead. */
2402
2403 if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) {
2404 throttle_group_restart_tgm(tgm);
2405 }
2406 }
2407
2408 static bool blk_root_drained_poll(BdrvChild *child)
2409 {
2410 BlockBackend *blk = child->opaque;
2411 bool busy = false;
2412 assert(blk->quiesce_counter);
2413
2414 if (blk->dev_ops && blk->dev_ops->drained_poll) {
2415 busy = blk->dev_ops->drained_poll(blk->dev_opaque);
2416 }
2417 return busy || !!blk->in_flight;
2418 }
2419
2420 static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
2421 {
2422 BlockBackend *blk = child->opaque;
2423 assert(blk->quiesce_counter);
2424
2425 assert(blk->public.throttle_group_member.io_limits_disabled);
2426 qatomic_dec(&blk->public.throttle_group_member.io_limits_disabled);
2427
2428 if (--blk->quiesce_counter == 0) {
2429 if (blk->dev_ops && blk->dev_ops->drained_end) {
2430 blk->dev_ops->drained_end(blk->dev_opaque);
2431 }
2432 while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
2433 /* Resume all queued requests */
2434 }
2435 }
2436 }
2437
2438 void blk_register_buf(BlockBackend *blk, void *host, size_t size)
2439 {
2440 bdrv_register_buf(blk_bs(blk), host, size);
2441 }
2442
2443 void blk_unregister_buf(BlockBackend *blk, void *host)
2444 {
2445 bdrv_unregister_buf(blk_bs(blk), host);
2446 }
2447
2448 int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
2449 BlockBackend *blk_out, int64_t off_out,
2450 int64_t bytes, BdrvRequestFlags read_flags,
2451 BdrvRequestFlags write_flags)
2452 {
2453 int r;
2454 r = blk_check_byte_request(blk_in, off_in, bytes);
2455 if (r) {
2456 return r;
2457 }
2458 r = blk_check_byte_request(blk_out, off_out, bytes);
2459 if (r) {
2460 return r;
2461 }
2462 return bdrv_co_copy_range(blk_in->root, off_in,
2463 blk_out->root, off_out,
2464 bytes, read_flags, write_flags);
2465 }
2466
2467 const BdrvChild *blk_root(BlockBackend *blk)
2468 {
2469 return blk->root;
2470 }
2471
2472 int blk_make_empty(BlockBackend *blk, Error **errp)
2473 {
2474 if (!blk_is_available(blk)) {
2475 error_setg(errp, "No medium inserted");
2476 return -ENOMEDIUM;
2477 }
2478
2479 return bdrv_make_empty(blk->root, errp);
2480 }