]> git.proxmox.com Git - mirror_qemu.git/blob - block/block-backend.c
Merge remote-tracking branch 'remotes/kwolf/tags/for-upstream' into staging
[mirror_qemu.git] / block / block-backend.c
1 /*
2 * QEMU Block backends
3 *
4 * Copyright (C) 2014-2016 Red Hat, Inc.
5 *
6 * Authors:
7 * Markus Armbruster <armbru@redhat.com>,
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2.1
10 * or later. See the COPYING.LIB file in the top-level directory.
11 */
12
13 #include "qemu/osdep.h"
14 #include "sysemu/block-backend.h"
15 #include "block/block_int.h"
16 #include "block/blockjob.h"
17 #include "block/coroutines.h"
18 #include "block/throttle-groups.h"
19 #include "hw/qdev-core.h"
20 #include "sysemu/blockdev.h"
21 #include "sysemu/runstate.h"
22 #include "sysemu/replay.h"
23 #include "qapi/error.h"
24 #include "qapi/qapi-events-block.h"
25 #include "qemu/id.h"
26 #include "qemu/main-loop.h"
27 #include "qemu/option.h"
28 #include "trace.h"
29 #include "migration/misc.h"
30
31 /* Number of coroutines to reserve per attached device model */
32 #define COROUTINE_POOL_RESERVATION 64
33
34 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
35
36 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
37
38 typedef struct BlockBackendAioNotifier {
39 void (*attached_aio_context)(AioContext *new_context, void *opaque);
40 void (*detach_aio_context)(void *opaque);
41 void *opaque;
42 QLIST_ENTRY(BlockBackendAioNotifier) list;
43 } BlockBackendAioNotifier;
44
45 struct BlockBackend {
46 char *name;
47 int refcnt;
48 BdrvChild *root;
49 AioContext *ctx;
50 DriveInfo *legacy_dinfo; /* null unless created by drive_new() */
51 QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */
52 QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
53 BlockBackendPublic public;
54
55 DeviceState *dev; /* attached device model, if any */
56 const BlockDevOps *dev_ops;
57 void *dev_opaque;
58
59 /* the block size for which the guest device expects atomicity */
60 int guest_block_size;
61
62 /* If the BDS tree is removed, some of its options are stored here (which
63 * can be used to restore those options in the new BDS on insert) */
64 BlockBackendRootState root_state;
65
66 bool enable_write_cache;
67
68 /* I/O stats (display with "info blockstats"). */
69 BlockAcctStats stats;
70
71 BlockdevOnError on_read_error, on_write_error;
72 bool iostatus_enabled;
73 BlockDeviceIoStatus iostatus;
74
75 uint64_t perm;
76 uint64_t shared_perm;
77 bool disable_perm;
78
79 bool allow_aio_context_change;
80 bool allow_write_beyond_eof;
81
82 NotifierList remove_bs_notifiers, insert_bs_notifiers;
83 QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
84
85 int quiesce_counter;
86 CoQueue queued_requests;
87 bool disable_request_queuing;
88
89 VMChangeStateEntry *vmsh;
90 bool force_allow_inactivate;
91
92 /* Number of in-flight aio requests. BlockDriverState also counts
93 * in-flight requests but aio requests can exist even when blk->root is
94 * NULL, so we cannot rely on its counter for that case.
95 * Accessed with atomic ops.
96 */
97 unsigned int in_flight;
98 };
99
100 typedef struct BlockBackendAIOCB {
101 BlockAIOCB common;
102 BlockBackend *blk;
103 int ret;
104 } BlockBackendAIOCB;
105
106 static const AIOCBInfo block_backend_aiocb_info = {
107 .get_aio_context = blk_aiocb_get_aio_context,
108 .aiocb_size = sizeof(BlockBackendAIOCB),
109 };
110
111 static void drive_info_del(DriveInfo *dinfo);
112 static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
113
114 /* All BlockBackends */
115 static QTAILQ_HEAD(, BlockBackend) block_backends =
116 QTAILQ_HEAD_INITIALIZER(block_backends);
117
118 /* All BlockBackends referenced by the monitor and which are iterated through by
119 * blk_next() */
120 static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
121 QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
122
123 static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format,
124 int *child_flags, QDict *child_options,
125 int parent_flags, QDict *parent_options)
126 {
127 /* We're not supposed to call this function for root nodes */
128 abort();
129 }
130 static void blk_root_drained_begin(BdrvChild *child);
131 static bool blk_root_drained_poll(BdrvChild *child);
132 static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter);
133
134 static void blk_root_change_media(BdrvChild *child, bool load);
135 static void blk_root_resize(BdrvChild *child);
136
137 static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
138 GSList **ignore, Error **errp);
139 static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx,
140 GSList **ignore);
141
142 static char *blk_root_get_parent_desc(BdrvChild *child)
143 {
144 BlockBackend *blk = child->opaque;
145 g_autofree char *dev_id = NULL;
146
147 if (blk->name) {
148 return g_strdup_printf("block device '%s'", blk->name);
149 }
150
151 dev_id = blk_get_attached_dev_id(blk);
152 if (*dev_id) {
153 return g_strdup_printf("block device '%s'", dev_id);
154 } else {
155 /* TODO Callback into the BB owner for something more detailed */
156 return g_strdup("an unnamed block device");
157 }
158 }
159
160 static const char *blk_root_get_name(BdrvChild *child)
161 {
162 return blk_name(child->opaque);
163 }
164
165 static void blk_vm_state_changed(void *opaque, bool running, RunState state)
166 {
167 Error *local_err = NULL;
168 BlockBackend *blk = opaque;
169
170 if (state == RUN_STATE_INMIGRATE) {
171 return;
172 }
173
174 qemu_del_vm_change_state_handler(blk->vmsh);
175 blk->vmsh = NULL;
176 blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
177 if (local_err) {
178 error_report_err(local_err);
179 }
180 }
181
182 /*
183 * Notifies the user of the BlockBackend that migration has completed. qdev
184 * devices can tighten their permissions in response (specifically revoke
185 * shared write permissions that we needed for storage migration).
186 *
187 * If an error is returned, the VM cannot be allowed to be resumed.
188 */
189 static void blk_root_activate(BdrvChild *child, Error **errp)
190 {
191 BlockBackend *blk = child->opaque;
192 Error *local_err = NULL;
193
194 if (!blk->disable_perm) {
195 return;
196 }
197
198 blk->disable_perm = false;
199
200 blk_set_perm(blk, blk->perm, BLK_PERM_ALL, &local_err);
201 if (local_err) {
202 error_propagate(errp, local_err);
203 blk->disable_perm = true;
204 return;
205 }
206
207 if (runstate_check(RUN_STATE_INMIGRATE)) {
208 /* Activation can happen when migration process is still active, for
209 * example when nbd_server_add is called during non-shared storage
210 * migration. Defer the shared_perm update to migration completion. */
211 if (!blk->vmsh) {
212 blk->vmsh = qemu_add_vm_change_state_handler(blk_vm_state_changed,
213 blk);
214 }
215 return;
216 }
217
218 blk_set_perm(blk, blk->perm, blk->shared_perm, &local_err);
219 if (local_err) {
220 error_propagate(errp, local_err);
221 blk->disable_perm = true;
222 return;
223 }
224 }
225
226 void blk_set_force_allow_inactivate(BlockBackend *blk)
227 {
228 blk->force_allow_inactivate = true;
229 }
230
231 static bool blk_can_inactivate(BlockBackend *blk)
232 {
233 /* If it is a guest device, inactivate is ok. */
234 if (blk->dev || blk_name(blk)[0]) {
235 return true;
236 }
237
238 /* Inactivating means no more writes to the image can be done,
239 * even if those writes would be changes invisible to the
240 * guest. For block job BBs that satisfy this, we can just allow
241 * it. This is the case for mirror job source, which is required
242 * by libvirt non-shared block migration. */
243 if (!(blk->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED))) {
244 return true;
245 }
246
247 return blk->force_allow_inactivate;
248 }
249
250 static int blk_root_inactivate(BdrvChild *child)
251 {
252 BlockBackend *blk = child->opaque;
253
254 if (blk->disable_perm) {
255 return 0;
256 }
257
258 if (!blk_can_inactivate(blk)) {
259 return -EPERM;
260 }
261
262 blk->disable_perm = true;
263 if (blk->root) {
264 bdrv_child_try_set_perm(blk->root, 0, BLK_PERM_ALL, &error_abort);
265 }
266
267 return 0;
268 }
269
270 static void blk_root_attach(BdrvChild *child)
271 {
272 BlockBackend *blk = child->opaque;
273 BlockBackendAioNotifier *notifier;
274
275 trace_blk_root_attach(child, blk, child->bs);
276
277 QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
278 bdrv_add_aio_context_notifier(child->bs,
279 notifier->attached_aio_context,
280 notifier->detach_aio_context,
281 notifier->opaque);
282 }
283 }
284
285 static void blk_root_detach(BdrvChild *child)
286 {
287 BlockBackend *blk = child->opaque;
288 BlockBackendAioNotifier *notifier;
289
290 trace_blk_root_detach(child, blk, child->bs);
291
292 QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
293 bdrv_remove_aio_context_notifier(child->bs,
294 notifier->attached_aio_context,
295 notifier->detach_aio_context,
296 notifier->opaque);
297 }
298 }
299
300 static AioContext *blk_root_get_parent_aio_context(BdrvChild *c)
301 {
302 BlockBackend *blk = c->opaque;
303
304 return blk_get_aio_context(blk);
305 }
306
307 static const BdrvChildClass child_root = {
308 .inherit_options = blk_root_inherit_options,
309
310 .change_media = blk_root_change_media,
311 .resize = blk_root_resize,
312 .get_name = blk_root_get_name,
313 .get_parent_desc = blk_root_get_parent_desc,
314
315 .drained_begin = blk_root_drained_begin,
316 .drained_poll = blk_root_drained_poll,
317 .drained_end = blk_root_drained_end,
318
319 .activate = blk_root_activate,
320 .inactivate = blk_root_inactivate,
321
322 .attach = blk_root_attach,
323 .detach = blk_root_detach,
324
325 .can_set_aio_ctx = blk_root_can_set_aio_ctx,
326 .set_aio_ctx = blk_root_set_aio_ctx,
327
328 .get_parent_aio_context = blk_root_get_parent_aio_context,
329 };
330
331 /*
332 * Create a new BlockBackend with a reference count of one.
333 *
334 * @perm is a bitmasks of BLK_PERM_* constants which describes the permissions
335 * to request for a block driver node that is attached to this BlockBackend.
336 * @shared_perm is a bitmask which describes which permissions may be granted
337 * to other users of the attached node.
338 * Both sets of permissions can be changed later using blk_set_perm().
339 *
340 * Return the new BlockBackend on success, null on failure.
341 */
342 BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
343 {
344 BlockBackend *blk;
345
346 blk = g_new0(BlockBackend, 1);
347 blk->refcnt = 1;
348 blk->ctx = ctx;
349 blk->perm = perm;
350 blk->shared_perm = shared_perm;
351 blk_set_enable_write_cache(blk, true);
352
353 blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT;
354 blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
355
356 block_acct_init(&blk->stats);
357
358 qemu_co_queue_init(&blk->queued_requests);
359 notifier_list_init(&blk->remove_bs_notifiers);
360 notifier_list_init(&blk->insert_bs_notifiers);
361 QLIST_INIT(&blk->aio_notifiers);
362
363 QTAILQ_INSERT_TAIL(&block_backends, blk, link);
364 return blk;
365 }
366
367 /*
368 * Create a new BlockBackend connected to an existing BlockDriverState.
369 *
370 * @perm is a bitmasks of BLK_PERM_* constants which describes the
371 * permissions to request for @bs that is attached to this
372 * BlockBackend. @shared_perm is a bitmask which describes which
373 * permissions may be granted to other users of the attached node.
374 * Both sets of permissions can be changed later using blk_set_perm().
375 *
376 * Return the new BlockBackend on success, null on failure.
377 */
378 BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
379 uint64_t shared_perm, Error **errp)
380 {
381 BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm);
382
383 if (blk_insert_bs(blk, bs, errp) < 0) {
384 blk_unref(blk);
385 return NULL;
386 }
387 return blk;
388 }
389
390 /*
391 * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
392 * The new BlockBackend is in the main AioContext.
393 *
394 * Just as with bdrv_open(), after having called this function the reference to
395 * @options belongs to the block layer (even on failure).
396 *
397 * TODO: Remove @filename and @flags; it should be possible to specify a whole
398 * BDS tree just by specifying the @options QDict (or @reference,
399 * alternatively). At the time of adding this function, this is not possible,
400 * though, so callers of this function have to be able to specify @filename and
401 * @flags.
402 */
403 BlockBackend *blk_new_open(const char *filename, const char *reference,
404 QDict *options, int flags, Error **errp)
405 {
406 BlockBackend *blk;
407 BlockDriverState *bs;
408 uint64_t perm = 0;
409 uint64_t shared = BLK_PERM_ALL;
410
411 /*
412 * blk_new_open() is mainly used in .bdrv_create implementations and the
413 * tools where sharing isn't a major concern because the BDS stays private
414 * and the file is generally not supposed to be used by a second process,
415 * so we just request permission according to the flags.
416 *
417 * The exceptions are xen_disk and blockdev_init(); in these cases, the
418 * caller of blk_new_open() doesn't make use of the permissions, but they
419 * shouldn't hurt either. We can still share everything here because the
420 * guest devices will add their own blockers if they can't share.
421 */
422 if ((flags & BDRV_O_NO_IO) == 0) {
423 perm |= BLK_PERM_CONSISTENT_READ;
424 if (flags & BDRV_O_RDWR) {
425 perm |= BLK_PERM_WRITE;
426 }
427 }
428 if (flags & BDRV_O_RESIZE) {
429 perm |= BLK_PERM_RESIZE;
430 }
431 if (flags & BDRV_O_NO_SHARE) {
432 shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
433 }
434
435 blk = blk_new(qemu_get_aio_context(), perm, shared);
436 bs = bdrv_open(filename, reference, options, flags, errp);
437 if (!bs) {
438 blk_unref(blk);
439 return NULL;
440 }
441
442 blk->root = bdrv_root_attach_child(bs, "root", &child_root,
443 BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
444 perm, shared, blk, errp);
445 if (!blk->root) {
446 blk_unref(blk);
447 return NULL;
448 }
449
450 return blk;
451 }
452
453 static void blk_delete(BlockBackend *blk)
454 {
455 assert(!blk->refcnt);
456 assert(!blk->name);
457 assert(!blk->dev);
458 if (blk->public.throttle_group_member.throttle_state) {
459 blk_io_limits_disable(blk);
460 }
461 if (blk->root) {
462 blk_remove_bs(blk);
463 }
464 if (blk->vmsh) {
465 qemu_del_vm_change_state_handler(blk->vmsh);
466 blk->vmsh = NULL;
467 }
468 assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
469 assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
470 assert(QLIST_EMPTY(&blk->aio_notifiers));
471 QTAILQ_REMOVE(&block_backends, blk, link);
472 drive_info_del(blk->legacy_dinfo);
473 block_acct_cleanup(&blk->stats);
474 g_free(blk);
475 }
476
477 static void drive_info_del(DriveInfo *dinfo)
478 {
479 if (!dinfo) {
480 return;
481 }
482 qemu_opts_del(dinfo->opts);
483 g_free(dinfo);
484 }
485
486 int blk_get_refcnt(BlockBackend *blk)
487 {
488 return blk ? blk->refcnt : 0;
489 }
490
491 /*
492 * Increment @blk's reference count.
493 * @blk must not be null.
494 */
495 void blk_ref(BlockBackend *blk)
496 {
497 assert(blk->refcnt > 0);
498 blk->refcnt++;
499 }
500
501 /*
502 * Decrement @blk's reference count.
503 * If this drops it to zero, destroy @blk.
504 * For convenience, do nothing if @blk is null.
505 */
506 void blk_unref(BlockBackend *blk)
507 {
508 if (blk) {
509 assert(blk->refcnt > 0);
510 if (blk->refcnt > 1) {
511 blk->refcnt--;
512 } else {
513 blk_drain(blk);
514 /* blk_drain() cannot resurrect blk, nobody held a reference */
515 assert(blk->refcnt == 1);
516 blk->refcnt = 0;
517 blk_delete(blk);
518 }
519 }
520 }
521
522 /*
523 * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
524 * ones which are hidden (i.e. are not referenced by the monitor).
525 */
526 BlockBackend *blk_all_next(BlockBackend *blk)
527 {
528 return blk ? QTAILQ_NEXT(blk, link)
529 : QTAILQ_FIRST(&block_backends);
530 }
531
532 void blk_remove_all_bs(void)
533 {
534 BlockBackend *blk = NULL;
535
536 while ((blk = blk_all_next(blk)) != NULL) {
537 AioContext *ctx = blk_get_aio_context(blk);
538
539 aio_context_acquire(ctx);
540 if (blk->root) {
541 blk_remove_bs(blk);
542 }
543 aio_context_release(ctx);
544 }
545 }
546
547 /*
548 * Return the monitor-owned BlockBackend after @blk.
549 * If @blk is null, return the first one.
550 * Else, return @blk's next sibling, which may be null.
551 *
552 * To iterate over all BlockBackends, do
553 * for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
554 * ...
555 * }
556 */
557 BlockBackend *blk_next(BlockBackend *blk)
558 {
559 return blk ? QTAILQ_NEXT(blk, monitor_link)
560 : QTAILQ_FIRST(&monitor_block_backends);
561 }
562
563 /* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
564 * the monitor or attached to a BlockBackend */
565 BlockDriverState *bdrv_next(BdrvNextIterator *it)
566 {
567 BlockDriverState *bs, *old_bs;
568
569 /* Must be called from the main loop */
570 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
571
572 /* First, return all root nodes of BlockBackends. In order to avoid
573 * returning a BDS twice when multiple BBs refer to it, we only return it
574 * if the BB is the first one in the parent list of the BDS. */
575 if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
576 BlockBackend *old_blk = it->blk;
577
578 old_bs = old_blk ? blk_bs(old_blk) : NULL;
579
580 do {
581 it->blk = blk_all_next(it->blk);
582 bs = it->blk ? blk_bs(it->blk) : NULL;
583 } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));
584
585 if (it->blk) {
586 blk_ref(it->blk);
587 }
588 blk_unref(old_blk);
589
590 if (bs) {
591 bdrv_ref(bs);
592 bdrv_unref(old_bs);
593 return bs;
594 }
595 it->phase = BDRV_NEXT_MONITOR_OWNED;
596 } else {
597 old_bs = it->bs;
598 }
599
600 /* Then return the monitor-owned BDSes without a BB attached. Ignore all
601 * BDSes that are attached to a BlockBackend here; they have been handled
602 * by the above block already */
603 do {
604 it->bs = bdrv_next_monitor_owned(it->bs);
605 bs = it->bs;
606 } while (bs && bdrv_has_blk(bs));
607
608 if (bs) {
609 bdrv_ref(bs);
610 }
611 bdrv_unref(old_bs);
612
613 return bs;
614 }
615
616 static void bdrv_next_reset(BdrvNextIterator *it)
617 {
618 *it = (BdrvNextIterator) {
619 .phase = BDRV_NEXT_BACKEND_ROOTS,
620 };
621 }
622
623 BlockDriverState *bdrv_first(BdrvNextIterator *it)
624 {
625 bdrv_next_reset(it);
626 return bdrv_next(it);
627 }
628
629 /* Must be called when aborting a bdrv_next() iteration before
630 * bdrv_next() returns NULL */
631 void bdrv_next_cleanup(BdrvNextIterator *it)
632 {
633 /* Must be called from the main loop */
634 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
635
636 if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
637 if (it->blk) {
638 bdrv_unref(blk_bs(it->blk));
639 blk_unref(it->blk);
640 }
641 } else {
642 bdrv_unref(it->bs);
643 }
644
645 bdrv_next_reset(it);
646 }
647
648 /*
649 * Add a BlockBackend into the list of backends referenced by the monitor, with
650 * the given @name acting as the handle for the monitor.
651 * Strictly for use by blockdev.c.
652 *
653 * @name must not be null or empty.
654 *
655 * Returns true on success and false on failure. In the latter case, an Error
656 * object is returned through @errp.
657 */
658 bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
659 {
660 assert(!blk->name);
661 assert(name && name[0]);
662
663 if (!id_wellformed(name)) {
664 error_setg(errp, "Invalid device name");
665 return false;
666 }
667 if (blk_by_name(name)) {
668 error_setg(errp, "Device with id '%s' already exists", name);
669 return false;
670 }
671 if (bdrv_find_node(name)) {
672 error_setg(errp,
673 "Device name '%s' conflicts with an existing node name",
674 name);
675 return false;
676 }
677
678 blk->name = g_strdup(name);
679 QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
680 return true;
681 }
682
683 /*
684 * Remove a BlockBackend from the list of backends referenced by the monitor.
685 * Strictly for use by blockdev.c.
686 */
687 void monitor_remove_blk(BlockBackend *blk)
688 {
689 if (!blk->name) {
690 return;
691 }
692
693 QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
694 g_free(blk->name);
695 blk->name = NULL;
696 }
697
698 /*
699 * Return @blk's name, a non-null string.
700 * Returns an empty string iff @blk is not referenced by the monitor.
701 */
702 const char *blk_name(const BlockBackend *blk)
703 {
704 return blk->name ?: "";
705 }
706
707 /*
708 * Return the BlockBackend with name @name if it exists, else null.
709 * @name must not be null.
710 */
711 BlockBackend *blk_by_name(const char *name)
712 {
713 BlockBackend *blk = NULL;
714
715 assert(name);
716 while ((blk = blk_next(blk)) != NULL) {
717 if (!strcmp(name, blk->name)) {
718 return blk;
719 }
720 }
721 return NULL;
722 }
723
724 /*
725 * Return the BlockDriverState attached to @blk if any, else null.
726 */
727 BlockDriverState *blk_bs(BlockBackend *blk)
728 {
729 return blk->root ? blk->root->bs : NULL;
730 }
731
732 static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
733 {
734 BdrvChild *child;
735 QLIST_FOREACH(child, &bs->parents, next_parent) {
736 if (child->klass == &child_root) {
737 return child->opaque;
738 }
739 }
740
741 return NULL;
742 }
743
744 /*
745 * Returns true if @bs has an associated BlockBackend.
746 */
747 bool bdrv_has_blk(BlockDriverState *bs)
748 {
749 return bdrv_first_blk(bs) != NULL;
750 }
751
752 /*
753 * Returns true if @bs has only BlockBackends as parents.
754 */
755 bool bdrv_is_root_node(BlockDriverState *bs)
756 {
757 BdrvChild *c;
758
759 QLIST_FOREACH(c, &bs->parents, next_parent) {
760 if (c->klass != &child_root) {
761 return false;
762 }
763 }
764
765 return true;
766 }
767
768 /*
769 * Return @blk's DriveInfo if any, else null.
770 */
771 DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
772 {
773 return blk->legacy_dinfo;
774 }
775
776 /*
777 * Set @blk's DriveInfo to @dinfo, and return it.
778 * @blk must not have a DriveInfo set already.
779 * No other BlockBackend may have the same DriveInfo set.
780 */
781 DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
782 {
783 assert(!blk->legacy_dinfo);
784 return blk->legacy_dinfo = dinfo;
785 }
786
787 /*
788 * Return the BlockBackend with DriveInfo @dinfo.
789 * It must exist.
790 */
791 BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
792 {
793 BlockBackend *blk = NULL;
794
795 while ((blk = blk_next(blk)) != NULL) {
796 if (blk->legacy_dinfo == dinfo) {
797 return blk;
798 }
799 }
800 abort();
801 }
802
803 /*
804 * Returns a pointer to the publicly accessible fields of @blk.
805 */
806 BlockBackendPublic *blk_get_public(BlockBackend *blk)
807 {
808 return &blk->public;
809 }
810
811 /*
812 * Returns a BlockBackend given the associated @public fields.
813 */
814 BlockBackend *blk_by_public(BlockBackendPublic *public)
815 {
816 return container_of(public, BlockBackend, public);
817 }
818
819 /*
820 * Disassociates the currently associated BlockDriverState from @blk.
821 */
822 void blk_remove_bs(BlockBackend *blk)
823 {
824 ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
825 BlockDriverState *bs;
826 BdrvChild *root;
827
828 notifier_list_notify(&blk->remove_bs_notifiers, blk);
829 if (tgm->throttle_state) {
830 bs = blk_bs(blk);
831 bdrv_drained_begin(bs);
832 throttle_group_detach_aio_context(tgm);
833 throttle_group_attach_aio_context(tgm, qemu_get_aio_context());
834 bdrv_drained_end(bs);
835 }
836
837 blk_update_root_state(blk);
838
839 /* bdrv_root_unref_child() will cause blk->root to become stale and may
840 * switch to a completion coroutine later on. Let's drain all I/O here
841 * to avoid that and a potential QEMU crash.
842 */
843 blk_drain(blk);
844 root = blk->root;
845 blk->root = NULL;
846 bdrv_root_unref_child(root);
847 }
848
849 /*
850 * Associates a new BlockDriverState with @blk.
851 */
852 int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
853 {
854 ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
855 bdrv_ref(bs);
856 blk->root = bdrv_root_attach_child(bs, "root", &child_root,
857 BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
858 blk->perm, blk->shared_perm,
859 blk, errp);
860 if (blk->root == NULL) {
861 return -EPERM;
862 }
863
864 notifier_list_notify(&blk->insert_bs_notifiers, blk);
865 if (tgm->throttle_state) {
866 throttle_group_detach_aio_context(tgm);
867 throttle_group_attach_aio_context(tgm, bdrv_get_aio_context(bs));
868 }
869
870 return 0;
871 }
872
873 /*
874 * Change BlockDriverState associated with @blk.
875 */
876 int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp)
877 {
878 return bdrv_replace_child_bs(blk->root, new_bs, errp);
879 }
880
881 /*
882 * Sets the permission bitmasks that the user of the BlockBackend needs.
883 */
884 int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
885 Error **errp)
886 {
887 int ret;
888
889 if (blk->root && !blk->disable_perm) {
890 ret = bdrv_child_try_set_perm(blk->root, perm, shared_perm, errp);
891 if (ret < 0) {
892 return ret;
893 }
894 }
895
896 blk->perm = perm;
897 blk->shared_perm = shared_perm;
898
899 return 0;
900 }
901
902 void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm)
903 {
904 *perm = blk->perm;
905 *shared_perm = blk->shared_perm;
906 }
907
908 /*
909 * Attach device model @dev to @blk.
910 * Return 0 on success, -EBUSY when a device model is attached already.
911 */
912 int blk_attach_dev(BlockBackend *blk, DeviceState *dev)
913 {
914 if (blk->dev) {
915 return -EBUSY;
916 }
917
918 /* While migration is still incoming, we don't need to apply the
919 * permissions of guest device BlockBackends. We might still have a block
920 * job or NBD server writing to the image for storage migration. */
921 if (runstate_check(RUN_STATE_INMIGRATE)) {
922 blk->disable_perm = true;
923 }
924
925 blk_ref(blk);
926 blk->dev = dev;
927 blk_iostatus_reset(blk);
928
929 return 0;
930 }
931
932 /*
933 * Detach device model @dev from @blk.
934 * @dev must be currently attached to @blk.
935 */
936 void blk_detach_dev(BlockBackend *blk, DeviceState *dev)
937 {
938 assert(blk->dev == dev);
939 blk->dev = NULL;
940 blk->dev_ops = NULL;
941 blk->dev_opaque = NULL;
942 blk->guest_block_size = 512;
943 blk_set_perm(blk, 0, BLK_PERM_ALL, &error_abort);
944 blk_unref(blk);
945 }
946
947 /*
948 * Return the device model attached to @blk if any, else null.
949 */
950 DeviceState *blk_get_attached_dev(BlockBackend *blk)
951 {
952 return blk->dev;
953 }
954
955 /* Return the qdev ID, or if no ID is assigned the QOM path, of the block
956 * device attached to the BlockBackend. */
957 char *blk_get_attached_dev_id(BlockBackend *blk)
958 {
959 DeviceState *dev = blk->dev;
960
961 if (!dev) {
962 return g_strdup("");
963 } else if (dev->id) {
964 return g_strdup(dev->id);
965 }
966
967 return object_get_canonical_path(OBJECT(dev)) ?: g_strdup("");
968 }
969
970 /*
971 * Return the BlockBackend which has the device model @dev attached if it
972 * exists, else null.
973 *
974 * @dev must not be null.
975 */
976 BlockBackend *blk_by_dev(void *dev)
977 {
978 BlockBackend *blk = NULL;
979
980 assert(dev != NULL);
981 while ((blk = blk_all_next(blk)) != NULL) {
982 if (blk->dev == dev) {
983 return blk;
984 }
985 }
986 return NULL;
987 }
988
989 /*
990 * Set @blk's device model callbacks to @ops.
991 * @opaque is the opaque argument to pass to the callbacks.
992 * This is for use by device models.
993 */
994 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
995 void *opaque)
996 {
997 blk->dev_ops = ops;
998 blk->dev_opaque = opaque;
999
1000 /* Are we currently quiesced? Should we enforce this right now? */
1001 if (blk->quiesce_counter && ops->drained_begin) {
1002 ops->drained_begin(opaque);
1003 }
1004 }
1005
1006 /*
1007 * Notify @blk's attached device model of media change.
1008 *
1009 * If @load is true, notify of media load. This action can fail, meaning that
1010 * the medium cannot be loaded. @errp is set then.
1011 *
1012 * If @load is false, notify of media eject. This can never fail.
1013 *
1014 * Also send DEVICE_TRAY_MOVED events as appropriate.
1015 */
1016 void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp)
1017 {
1018 if (blk->dev_ops && blk->dev_ops->change_media_cb) {
1019 bool tray_was_open, tray_is_open;
1020 Error *local_err = NULL;
1021
1022 tray_was_open = blk_dev_is_tray_open(blk);
1023 blk->dev_ops->change_media_cb(blk->dev_opaque, load, &local_err);
1024 if (local_err) {
1025 assert(load == true);
1026 error_propagate(errp, local_err);
1027 return;
1028 }
1029 tray_is_open = blk_dev_is_tray_open(blk);
1030
1031 if (tray_was_open != tray_is_open) {
1032 char *id = blk_get_attached_dev_id(blk);
1033 qapi_event_send_device_tray_moved(blk_name(blk), id, tray_is_open);
1034 g_free(id);
1035 }
1036 }
1037 }
1038
1039 static void blk_root_change_media(BdrvChild *child, bool load)
1040 {
1041 blk_dev_change_media_cb(child->opaque, load, NULL);
1042 }
1043
1044 /*
1045 * Does @blk's attached device model have removable media?
1046 * %true if no device model is attached.
1047 */
1048 bool blk_dev_has_removable_media(BlockBackend *blk)
1049 {
1050 return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
1051 }
1052
1053 /*
1054 * Does @blk's attached device model have a tray?
1055 */
1056 bool blk_dev_has_tray(BlockBackend *blk)
1057 {
1058 return blk->dev_ops && blk->dev_ops->is_tray_open;
1059 }
1060
1061 /*
1062 * Notify @blk's attached device model of a media eject request.
1063 * If @force is true, the medium is about to be yanked out forcefully.
1064 */
1065 void blk_dev_eject_request(BlockBackend *blk, bool force)
1066 {
1067 if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
1068 blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
1069 }
1070 }
1071
1072 /*
1073 * Does @blk's attached device model have a tray, and is it open?
1074 */
1075 bool blk_dev_is_tray_open(BlockBackend *blk)
1076 {
1077 if (blk_dev_has_tray(blk)) {
1078 return blk->dev_ops->is_tray_open(blk->dev_opaque);
1079 }
1080 return false;
1081 }
1082
1083 /*
1084 * Does @blk's attached device model have the medium locked?
1085 * %false if the device model has no such lock.
1086 */
1087 bool blk_dev_is_medium_locked(BlockBackend *blk)
1088 {
1089 if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
1090 return blk->dev_ops->is_medium_locked(blk->dev_opaque);
1091 }
1092 return false;
1093 }
1094
1095 /*
1096 * Notify @blk's attached device model of a backend size change.
1097 */
1098 static void blk_root_resize(BdrvChild *child)
1099 {
1100 BlockBackend *blk = child->opaque;
1101
1102 if (blk->dev_ops && blk->dev_ops->resize_cb) {
1103 blk->dev_ops->resize_cb(blk->dev_opaque);
1104 }
1105 }
1106
1107 void blk_iostatus_enable(BlockBackend *blk)
1108 {
1109 blk->iostatus_enabled = true;
1110 blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
1111 }
1112
1113 /* The I/O status is only enabled if the drive explicitly
1114 * enables it _and_ the VM is configured to stop on errors */
1115 bool blk_iostatus_is_enabled(const BlockBackend *blk)
1116 {
1117 return (blk->iostatus_enabled &&
1118 (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
1119 blk->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
1120 blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
1121 }
1122
1123 BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
1124 {
1125 return blk->iostatus;
1126 }
1127
1128 void blk_iostatus_disable(BlockBackend *blk)
1129 {
1130 blk->iostatus_enabled = false;
1131 }
1132
1133 void blk_iostatus_reset(BlockBackend *blk)
1134 {
1135 if (blk_iostatus_is_enabled(blk)) {
1136 blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
1137 }
1138 }
1139
1140 void blk_iostatus_set_err(BlockBackend *blk, int error)
1141 {
1142 assert(blk_iostatus_is_enabled(blk));
1143 if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
1144 blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
1145 BLOCK_DEVICE_IO_STATUS_FAILED;
1146 }
1147 }
1148
1149 void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
1150 {
1151 blk->allow_write_beyond_eof = allow;
1152 }
1153
1154 void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
1155 {
1156 blk->allow_aio_context_change = allow;
1157 }
1158
1159 void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
1160 {
1161 blk->disable_request_queuing = disable;
1162 }
1163
1164 static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
1165 int64_t bytes)
1166 {
1167 int64_t len;
1168
1169 if (bytes < 0) {
1170 return -EIO;
1171 }
1172
1173 if (!blk_is_available(blk)) {
1174 return -ENOMEDIUM;
1175 }
1176
1177 if (offset < 0) {
1178 return -EIO;
1179 }
1180
1181 if (!blk->allow_write_beyond_eof) {
1182 len = blk_getlength(blk);
1183 if (len < 0) {
1184 return len;
1185 }
1186
1187 if (offset > len || len - offset < bytes) {
1188 return -EIO;
1189 }
1190 }
1191
1192 return 0;
1193 }
1194
1195 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
1196 static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
1197 {
1198 assert(blk->in_flight > 0);
1199
1200 if (blk->quiesce_counter && !blk->disable_request_queuing) {
1201 blk_dec_in_flight(blk);
1202 qemu_co_queue_wait(&blk->queued_requests, NULL);
1203 blk_inc_in_flight(blk);
1204 }
1205 }
1206
1207 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
1208 int coroutine_fn
1209 blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
1210 QEMUIOVector *qiov, BdrvRequestFlags flags)
1211 {
1212 int ret;
1213 BlockDriverState *bs;
1214
1215 blk_wait_while_drained(blk);
1216
1217 /* Call blk_bs() only after waiting, the graph may have changed */
1218 bs = blk_bs(blk);
1219 trace_blk_co_preadv(blk, bs, offset, bytes, flags);
1220
1221 ret = blk_check_byte_request(blk, offset, bytes);
1222 if (ret < 0) {
1223 return ret;
1224 }
1225
1226 bdrv_inc_in_flight(bs);
1227
1228 /* throttling disk I/O */
1229 if (blk->public.throttle_group_member.throttle_state) {
1230 throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
1231 bytes, false);
1232 }
1233
1234 ret = bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
1235 bdrv_dec_in_flight(bs);
1236 return ret;
1237 }
1238
1239 int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
1240 int64_t bytes, QEMUIOVector *qiov,
1241 BdrvRequestFlags flags)
1242 {
1243 int ret;
1244
1245 blk_inc_in_flight(blk);
1246 ret = blk_co_do_preadv(blk, offset, bytes, qiov, flags);
1247 blk_dec_in_flight(blk);
1248
1249 return ret;
1250 }
1251
1252 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
1253 int coroutine_fn
1254 blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
1255 QEMUIOVector *qiov, size_t qiov_offset,
1256 BdrvRequestFlags flags)
1257 {
1258 int ret;
1259 BlockDriverState *bs;
1260
1261 blk_wait_while_drained(blk);
1262
1263 /* Call blk_bs() only after waiting, the graph may have changed */
1264 bs = blk_bs(blk);
1265 trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
1266
1267 ret = blk_check_byte_request(blk, offset, bytes);
1268 if (ret < 0) {
1269 return ret;
1270 }
1271
1272 bdrv_inc_in_flight(bs);
1273 /* throttling disk I/O */
1274 if (blk->public.throttle_group_member.throttle_state) {
1275 throttle_group_co_io_limits_intercept(&blk->public.throttle_group_member,
1276 bytes, true);
1277 }
1278
1279 if (!blk->enable_write_cache) {
1280 flags |= BDRV_REQ_FUA;
1281 }
1282
1283 ret = bdrv_co_pwritev_part(blk->root, offset, bytes, qiov, qiov_offset,
1284 flags);
1285 bdrv_dec_in_flight(bs);
1286 return ret;
1287 }
1288
1289 int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
1290 int64_t bytes,
1291 QEMUIOVector *qiov, size_t qiov_offset,
1292 BdrvRequestFlags flags)
1293 {
1294 int ret;
1295
1296 blk_inc_in_flight(blk);
1297 ret = blk_co_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
1298 blk_dec_in_flight(blk);
1299
1300 return ret;
1301 }
1302
1303 int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
1304 int64_t bytes, QEMUIOVector *qiov,
1305 BdrvRequestFlags flags)
1306 {
1307 return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags);
1308 }
1309
1310 static int coroutine_fn blk_pwritev_part(BlockBackend *blk, int64_t offset,
1311 int64_t bytes,
1312 QEMUIOVector *qiov, size_t qiov_offset,
1313 BdrvRequestFlags flags)
1314 {
1315 int ret;
1316
1317 blk_inc_in_flight(blk);
1318 ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
1319 blk_dec_in_flight(blk);
1320
1321 return ret;
1322 }
1323
1324 typedef struct BlkRwCo {
1325 BlockBackend *blk;
1326 int64_t offset;
1327 void *iobuf;
1328 int ret;
1329 BdrvRequestFlags flags;
1330 } BlkRwCo;
1331
1332 int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1333 int64_t bytes, BdrvRequestFlags flags)
1334 {
1335 return blk_pwritev_part(blk, offset, bytes, NULL, 0,
1336 flags | BDRV_REQ_ZERO_WRITE);
1337 }
1338
1339 int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
1340 {
1341 return bdrv_make_zero(blk->root, flags);
1342 }
1343
1344 void blk_inc_in_flight(BlockBackend *blk)
1345 {
1346 qatomic_inc(&blk->in_flight);
1347 }
1348
1349 void blk_dec_in_flight(BlockBackend *blk)
1350 {
1351 qatomic_dec(&blk->in_flight);
1352 aio_wait_kick();
1353 }
1354
1355 static void error_callback_bh(void *opaque)
1356 {
1357 struct BlockBackendAIOCB *acb = opaque;
1358
1359 blk_dec_in_flight(acb->blk);
1360 acb->common.cb(acb->common.opaque, acb->ret);
1361 qemu_aio_unref(acb);
1362 }
1363
1364 BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
1365 BlockCompletionFunc *cb,
1366 void *opaque, int ret)
1367 {
1368 struct BlockBackendAIOCB *acb;
1369
1370 blk_inc_in_flight(blk);
1371 acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
1372 acb->blk = blk;
1373 acb->ret = ret;
1374
1375 replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
1376 error_callback_bh, acb);
1377 return &acb->common;
1378 }
1379
1380 typedef struct BlkAioEmAIOCB {
1381 BlockAIOCB common;
1382 BlkRwCo rwco;
1383 int64_t bytes;
1384 bool has_returned;
1385 } BlkAioEmAIOCB;
1386
1387 static AioContext *blk_aio_em_aiocb_get_aio_context(BlockAIOCB *acb_)
1388 {
1389 BlkAioEmAIOCB *acb = container_of(acb_, BlkAioEmAIOCB, common);
1390
1391 return blk_get_aio_context(acb->rwco.blk);
1392 }
1393
1394 static const AIOCBInfo blk_aio_em_aiocb_info = {
1395 .aiocb_size = sizeof(BlkAioEmAIOCB),
1396 .get_aio_context = blk_aio_em_aiocb_get_aio_context,
1397 };
1398
1399 static void blk_aio_complete(BlkAioEmAIOCB *acb)
1400 {
1401 if (acb->has_returned) {
1402 acb->common.cb(acb->common.opaque, acb->rwco.ret);
1403 blk_dec_in_flight(acb->rwco.blk);
1404 qemu_aio_unref(acb);
1405 }
1406 }
1407
1408 static void blk_aio_complete_bh(void *opaque)
1409 {
1410 BlkAioEmAIOCB *acb = opaque;
1411 assert(acb->has_returned);
1412 blk_aio_complete(acb);
1413 }
1414
1415 static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset,
1416 int64_t bytes,
1417 void *iobuf, CoroutineEntry co_entry,
1418 BdrvRequestFlags flags,
1419 BlockCompletionFunc *cb, void *opaque)
1420 {
1421 BlkAioEmAIOCB *acb;
1422 Coroutine *co;
1423
1424 blk_inc_in_flight(blk);
1425 acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
1426 acb->rwco = (BlkRwCo) {
1427 .blk = blk,
1428 .offset = offset,
1429 .iobuf = iobuf,
1430 .flags = flags,
1431 .ret = NOT_DONE,
1432 };
1433 acb->bytes = bytes;
1434 acb->has_returned = false;
1435
1436 co = qemu_coroutine_create(co_entry, acb);
1437 bdrv_coroutine_enter(blk_bs(blk), co);
1438
1439 acb->has_returned = true;
1440 if (acb->rwco.ret != NOT_DONE) {
1441 replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
1442 blk_aio_complete_bh, acb);
1443 }
1444
1445 return &acb->common;
1446 }
1447
1448 static void blk_aio_read_entry(void *opaque)
1449 {
1450 BlkAioEmAIOCB *acb = opaque;
1451 BlkRwCo *rwco = &acb->rwco;
1452 QEMUIOVector *qiov = rwco->iobuf;
1453
1454 assert(qiov->size == acb->bytes);
1455 rwco->ret = blk_co_do_preadv(rwco->blk, rwco->offset, acb->bytes,
1456 qiov, rwco->flags);
1457 blk_aio_complete(acb);
1458 }
1459
1460 static void blk_aio_write_entry(void *opaque)
1461 {
1462 BlkAioEmAIOCB *acb = opaque;
1463 BlkRwCo *rwco = &acb->rwco;
1464 QEMUIOVector *qiov = rwco->iobuf;
1465
1466 assert(!qiov || qiov->size == acb->bytes);
1467 rwco->ret = blk_co_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
1468 qiov, 0, rwco->flags);
1469 blk_aio_complete(acb);
1470 }
1471
1472 BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
1473 int64_t bytes, BdrvRequestFlags flags,
1474 BlockCompletionFunc *cb, void *opaque)
1475 {
1476 return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_write_entry,
1477 flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
1478 }
1479
1480 int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes)
1481 {
1482 int ret;
1483 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1484
1485 blk_inc_in_flight(blk);
1486 ret = blk_do_preadv(blk, offset, bytes, &qiov, 0);
1487 blk_dec_in_flight(blk);
1488
1489 return ret < 0 ? ret : bytes;
1490 }
1491
1492 int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes,
1493 BdrvRequestFlags flags)
1494 {
1495 int ret;
1496 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
1497
1498 ret = blk_pwritev_part(blk, offset, bytes, &qiov, 0, flags);
1499
1500 return ret < 0 ? ret : bytes;
1501 }
1502
1503 int64_t blk_getlength(BlockBackend *blk)
1504 {
1505 if (!blk_is_available(blk)) {
1506 return -ENOMEDIUM;
1507 }
1508
1509 return bdrv_getlength(blk_bs(blk));
1510 }
1511
1512 void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
1513 {
1514 if (!blk_bs(blk)) {
1515 *nb_sectors_ptr = 0;
1516 } else {
1517 bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
1518 }
1519 }
1520
1521 int64_t blk_nb_sectors(BlockBackend *blk)
1522 {
1523 if (!blk_is_available(blk)) {
1524 return -ENOMEDIUM;
1525 }
1526
1527 return bdrv_nb_sectors(blk_bs(blk));
1528 }
1529
1530 BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
1531 QEMUIOVector *qiov, BdrvRequestFlags flags,
1532 BlockCompletionFunc *cb, void *opaque)
1533 {
1534 assert((uint64_t)qiov->size <= INT64_MAX);
1535 return blk_aio_prwv(blk, offset, qiov->size, qiov,
1536 blk_aio_read_entry, flags, cb, opaque);
1537 }
1538
1539 BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
1540 QEMUIOVector *qiov, BdrvRequestFlags flags,
1541 BlockCompletionFunc *cb, void *opaque)
1542 {
1543 assert((uint64_t)qiov->size <= INT64_MAX);
1544 return blk_aio_prwv(blk, offset, qiov->size, qiov,
1545 blk_aio_write_entry, flags, cb, opaque);
1546 }
1547
1548 void blk_aio_cancel(BlockAIOCB *acb)
1549 {
1550 bdrv_aio_cancel(acb);
1551 }
1552
1553 void blk_aio_cancel_async(BlockAIOCB *acb)
1554 {
1555 bdrv_aio_cancel_async(acb);
1556 }
1557
1558 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
1559 int coroutine_fn
1560 blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
1561 {
1562 blk_wait_while_drained(blk);
1563
1564 if (!blk_is_available(blk)) {
1565 return -ENOMEDIUM;
1566 }
1567
1568 return bdrv_co_ioctl(blk_bs(blk), req, buf);
1569 }
1570
1571 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
1572 {
1573 int ret;
1574
1575 blk_inc_in_flight(blk);
1576 ret = blk_do_ioctl(blk, req, buf);
1577 blk_dec_in_flight(blk);
1578
1579 return ret;
1580 }
1581
1582 static void blk_aio_ioctl_entry(void *opaque)
1583 {
1584 BlkAioEmAIOCB *acb = opaque;
1585 BlkRwCo *rwco = &acb->rwco;
1586
1587 rwco->ret = blk_co_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
1588
1589 blk_aio_complete(acb);
1590 }
1591
1592 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
1593 BlockCompletionFunc *cb, void *opaque)
1594 {
1595 return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
1596 }
1597
1598 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
1599 int coroutine_fn
1600 blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
1601 {
1602 int ret;
1603
1604 blk_wait_while_drained(blk);
1605
1606 ret = blk_check_byte_request(blk, offset, bytes);
1607 if (ret < 0) {
1608 return ret;
1609 }
1610
1611 return bdrv_co_pdiscard(blk->root, offset, bytes);
1612 }
1613
1614 static void blk_aio_pdiscard_entry(void *opaque)
1615 {
1616 BlkAioEmAIOCB *acb = opaque;
1617 BlkRwCo *rwco = &acb->rwco;
1618
1619 rwco->ret = blk_co_do_pdiscard(rwco->blk, rwco->offset, acb->bytes);
1620 blk_aio_complete(acb);
1621 }
1622
1623 BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
1624 int64_t offset, int64_t bytes,
1625 BlockCompletionFunc *cb, void *opaque)
1626 {
1627 return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
1628 cb, opaque);
1629 }
1630
1631 int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
1632 int64_t bytes)
1633 {
1634 int ret;
1635
1636 blk_inc_in_flight(blk);
1637 ret = blk_co_do_pdiscard(blk, offset, bytes);
1638 blk_dec_in_flight(blk);
1639
1640 return ret;
1641 }
1642
1643 int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes)
1644 {
1645 int ret;
1646
1647 blk_inc_in_flight(blk);
1648 ret = blk_do_pdiscard(blk, offset, bytes);
1649 blk_dec_in_flight(blk);
1650
1651 return ret;
1652 }
1653
1654 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
1655 int coroutine_fn blk_co_do_flush(BlockBackend *blk)
1656 {
1657 blk_wait_while_drained(blk);
1658
1659 if (!blk_is_available(blk)) {
1660 return -ENOMEDIUM;
1661 }
1662
1663 return bdrv_co_flush(blk_bs(blk));
1664 }
1665
1666 static void blk_aio_flush_entry(void *opaque)
1667 {
1668 BlkAioEmAIOCB *acb = opaque;
1669 BlkRwCo *rwco = &acb->rwco;
1670
1671 rwco->ret = blk_co_do_flush(rwco->blk);
1672 blk_aio_complete(acb);
1673 }
1674
1675 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
1676 BlockCompletionFunc *cb, void *opaque)
1677 {
1678 return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
1679 }
1680
1681 int coroutine_fn blk_co_flush(BlockBackend *blk)
1682 {
1683 int ret;
1684
1685 blk_inc_in_flight(blk);
1686 ret = blk_co_do_flush(blk);
1687 blk_dec_in_flight(blk);
1688
1689 return ret;
1690 }
1691
1692 int blk_flush(BlockBackend *blk)
1693 {
1694 int ret;
1695
1696 blk_inc_in_flight(blk);
1697 ret = blk_do_flush(blk);
1698 blk_dec_in_flight(blk);
1699
1700 return ret;
1701 }
1702
1703 void blk_drain(BlockBackend *blk)
1704 {
1705 BlockDriverState *bs = blk_bs(blk);
1706
1707 if (bs) {
1708 bdrv_drained_begin(bs);
1709 }
1710
1711 /* We may have -ENOMEDIUM completions in flight */
1712 AIO_WAIT_WHILE(blk_get_aio_context(blk),
1713 qatomic_mb_read(&blk->in_flight) > 0);
1714
1715 if (bs) {
1716 bdrv_drained_end(bs);
1717 }
1718 }
1719
1720 void blk_drain_all(void)
1721 {
1722 BlockBackend *blk = NULL;
1723
1724 bdrv_drain_all_begin();
1725
1726 while ((blk = blk_all_next(blk)) != NULL) {
1727 AioContext *ctx = blk_get_aio_context(blk);
1728
1729 aio_context_acquire(ctx);
1730
1731 /* We may have -ENOMEDIUM completions in flight */
1732 AIO_WAIT_WHILE(ctx, qatomic_mb_read(&blk->in_flight) > 0);
1733
1734 aio_context_release(ctx);
1735 }
1736
1737 bdrv_drain_all_end();
1738 }
1739
1740 void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
1741 BlockdevOnError on_write_error)
1742 {
1743 blk->on_read_error = on_read_error;
1744 blk->on_write_error = on_write_error;
1745 }
1746
1747 BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
1748 {
1749 return is_read ? blk->on_read_error : blk->on_write_error;
1750 }
1751
1752 BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
1753 int error)
1754 {
1755 BlockdevOnError on_err = blk_get_on_error(blk, is_read);
1756
1757 switch (on_err) {
1758 case BLOCKDEV_ON_ERROR_ENOSPC:
1759 return (error == ENOSPC) ?
1760 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
1761 case BLOCKDEV_ON_ERROR_STOP:
1762 return BLOCK_ERROR_ACTION_STOP;
1763 case BLOCKDEV_ON_ERROR_REPORT:
1764 return BLOCK_ERROR_ACTION_REPORT;
1765 case BLOCKDEV_ON_ERROR_IGNORE:
1766 return BLOCK_ERROR_ACTION_IGNORE;
1767 case BLOCKDEV_ON_ERROR_AUTO:
1768 default:
1769 abort();
1770 }
1771 }
1772
1773 static void send_qmp_error_event(BlockBackend *blk,
1774 BlockErrorAction action,
1775 bool is_read, int error)
1776 {
1777 IoOperationType optype;
1778 BlockDriverState *bs = blk_bs(blk);
1779
1780 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
1781 qapi_event_send_block_io_error(blk_name(blk), !!bs,
1782 bs ? bdrv_get_node_name(bs) : NULL, optype,
1783 action, blk_iostatus_is_enabled(blk),
1784 error == ENOSPC, strerror(error));
1785 }
1786
1787 /* This is done by device models because, while the block layer knows
1788 * about the error, it does not know whether an operation comes from
1789 * the device or the block layer (from a job, for example).
1790 */
1791 void blk_error_action(BlockBackend *blk, BlockErrorAction action,
1792 bool is_read, int error)
1793 {
1794 assert(error >= 0);
1795
1796 if (action == BLOCK_ERROR_ACTION_STOP) {
1797 /* First set the iostatus, so that "info block" returns an iostatus
1798 * that matches the events raised so far (an additional error iostatus
1799 * is fine, but not a lost one).
1800 */
1801 blk_iostatus_set_err(blk, error);
1802
1803 /* Then raise the request to stop the VM and the event.
1804 * qemu_system_vmstop_request_prepare has two effects. First,
1805 * it ensures that the STOP event always comes after the
1806 * BLOCK_IO_ERROR event. Second, it ensures that even if management
1807 * can observe the STOP event and do a "cont" before the STOP
1808 * event is issued, the VM will not stop. In this case, vm_start()
1809 * also ensures that the STOP/RESUME pair of events is emitted.
1810 */
1811 qemu_system_vmstop_request_prepare();
1812 send_qmp_error_event(blk, action, is_read, error);
1813 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
1814 } else {
1815 send_qmp_error_event(blk, action, is_read, error);
1816 }
1817 }
1818
1819 /*
1820 * Returns true if the BlockBackend can support taking write permissions
1821 * (because its root node is not read-only).
1822 */
1823 bool blk_supports_write_perm(BlockBackend *blk)
1824 {
1825 BlockDriverState *bs = blk_bs(blk);
1826
1827 if (bs) {
1828 return !bdrv_is_read_only(bs);
1829 } else {
1830 return blk->root_state.open_flags & BDRV_O_RDWR;
1831 }
1832 }
1833
1834 /*
1835 * Returns true if the BlockBackend can be written to in its current
1836 * configuration (i.e. if write permission have been requested)
1837 */
1838 bool blk_is_writable(BlockBackend *blk)
1839 {
1840 return blk->perm & BLK_PERM_WRITE;
1841 }
1842
1843 bool blk_is_sg(BlockBackend *blk)
1844 {
1845 BlockDriverState *bs = blk_bs(blk);
1846
1847 if (!bs) {
1848 return false;
1849 }
1850
1851 return bdrv_is_sg(bs);
1852 }
1853
1854 bool blk_enable_write_cache(BlockBackend *blk)
1855 {
1856 return blk->enable_write_cache;
1857 }
1858
1859 void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
1860 {
1861 blk->enable_write_cache = wce;
1862 }
1863
1864 void blk_invalidate_cache(BlockBackend *blk, Error **errp)
1865 {
1866 BlockDriverState *bs = blk_bs(blk);
1867
1868 if (!bs) {
1869 error_setg(errp, "Device '%s' has no medium", blk->name);
1870 return;
1871 }
1872
1873 bdrv_invalidate_cache(bs, errp);
1874 }
1875
1876 bool blk_is_inserted(BlockBackend *blk)
1877 {
1878 BlockDriverState *bs = blk_bs(blk);
1879
1880 return bs && bdrv_is_inserted(bs);
1881 }
1882
1883 bool blk_is_available(BlockBackend *blk)
1884 {
1885 return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
1886 }
1887
1888 void blk_lock_medium(BlockBackend *blk, bool locked)
1889 {
1890 BlockDriverState *bs = blk_bs(blk);
1891
1892 if (bs) {
1893 bdrv_lock_medium(bs, locked);
1894 }
1895 }
1896
1897 void blk_eject(BlockBackend *blk, bool eject_flag)
1898 {
1899 BlockDriverState *bs = blk_bs(blk);
1900 char *id;
1901
1902 if (bs) {
1903 bdrv_eject(bs, eject_flag);
1904 }
1905
1906 /* Whether or not we ejected on the backend,
1907 * the frontend experienced a tray event. */
1908 id = blk_get_attached_dev_id(blk);
1909 qapi_event_send_device_tray_moved(blk_name(blk), id,
1910 eject_flag);
1911 g_free(id);
1912 }
1913
1914 int blk_get_flags(BlockBackend *blk)
1915 {
1916 BlockDriverState *bs = blk_bs(blk);
1917
1918 if (bs) {
1919 return bdrv_get_flags(bs);
1920 } else {
1921 return blk->root_state.open_flags;
1922 }
1923 }
1924
1925 /* Returns the minimum request alignment, in bytes; guaranteed nonzero */
1926 uint32_t blk_get_request_alignment(BlockBackend *blk)
1927 {
1928 BlockDriverState *bs = blk_bs(blk);
1929 return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE;
1930 }
1931
1932 /* Returns the maximum hardware transfer length, in bytes; guaranteed nonzero */
1933 uint64_t blk_get_max_hw_transfer(BlockBackend *blk)
1934 {
1935 BlockDriverState *bs = blk_bs(blk);
1936 uint64_t max = INT_MAX;
1937
1938 if (bs) {
1939 max = MIN_NON_ZERO(max, bs->bl.max_hw_transfer);
1940 max = MIN_NON_ZERO(max, bs->bl.max_transfer);
1941 }
1942 return ROUND_DOWN(max, blk_get_request_alignment(blk));
1943 }
1944
1945 /* Returns the maximum transfer length, in bytes; guaranteed nonzero */
1946 uint32_t blk_get_max_transfer(BlockBackend *blk)
1947 {
1948 BlockDriverState *bs = blk_bs(blk);
1949 uint32_t max = INT_MAX;
1950
1951 if (bs) {
1952 max = MIN_NON_ZERO(max, bs->bl.max_transfer);
1953 }
1954 return ROUND_DOWN(max, blk_get_request_alignment(blk));
1955 }
1956
1957 int blk_get_max_hw_iov(BlockBackend *blk)
1958 {
1959 return MIN_NON_ZERO(blk->root->bs->bl.max_hw_iov,
1960 blk->root->bs->bl.max_iov);
1961 }
1962
1963 int blk_get_max_iov(BlockBackend *blk)
1964 {
1965 return blk->root->bs->bl.max_iov;
1966 }
1967
1968 void blk_set_guest_block_size(BlockBackend *blk, int align)
1969 {
1970 blk->guest_block_size = align;
1971 }
1972
1973 void *blk_try_blockalign(BlockBackend *blk, size_t size)
1974 {
1975 return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
1976 }
1977
1978 void *blk_blockalign(BlockBackend *blk, size_t size)
1979 {
1980 return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
1981 }
1982
1983 bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
1984 {
1985 BlockDriverState *bs = blk_bs(blk);
1986
1987 if (!bs) {
1988 return false;
1989 }
1990
1991 return bdrv_op_is_blocked(bs, op, errp);
1992 }
1993
1994 void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
1995 {
1996 BlockDriverState *bs = blk_bs(blk);
1997
1998 if (bs) {
1999 bdrv_op_unblock(bs, op, reason);
2000 }
2001 }
2002
2003 void blk_op_block_all(BlockBackend *blk, Error *reason)
2004 {
2005 BlockDriverState *bs = blk_bs(blk);
2006
2007 if (bs) {
2008 bdrv_op_block_all(bs, reason);
2009 }
2010 }
2011
2012 void blk_op_unblock_all(BlockBackend *blk, Error *reason)
2013 {
2014 BlockDriverState *bs = blk_bs(blk);
2015
2016 if (bs) {
2017 bdrv_op_unblock_all(bs, reason);
2018 }
2019 }
2020
2021 AioContext *blk_get_aio_context(BlockBackend *blk)
2022 {
2023 BlockDriverState *bs = blk_bs(blk);
2024
2025 if (bs) {
2026 AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
2027 assert(ctx == blk->ctx);
2028 }
2029
2030 return blk->ctx;
2031 }
2032
2033 static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
2034 {
2035 BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
2036 return blk_get_aio_context(blk_acb->blk);
2037 }
2038
2039 static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
2040 bool update_root_node, Error **errp)
2041 {
2042 BlockDriverState *bs = blk_bs(blk);
2043 ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
2044 int ret;
2045
2046 if (bs) {
2047 if (update_root_node) {
2048 ret = bdrv_child_try_set_aio_context(bs, new_context, blk->root,
2049 errp);
2050 if (ret < 0) {
2051 return ret;
2052 }
2053 }
2054 if (tgm->throttle_state) {
2055 bdrv_drained_begin(bs);
2056 throttle_group_detach_aio_context(tgm);
2057 throttle_group_attach_aio_context(tgm, new_context);
2058 bdrv_drained_end(bs);
2059 }
2060 }
2061
2062 blk->ctx = new_context;
2063 return 0;
2064 }
2065
2066 int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
2067 Error **errp)
2068 {
2069 return blk_do_set_aio_context(blk, new_context, true, errp);
2070 }
2071
2072 static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
2073 GSList **ignore, Error **errp)
2074 {
2075 BlockBackend *blk = child->opaque;
2076
2077 if (blk->allow_aio_context_change) {
2078 return true;
2079 }
2080
2081 /* Only manually created BlockBackends that are not attached to anything
2082 * can change their AioContext without updating their user. */
2083 if (!blk->name || blk->dev) {
2084 /* TODO Add BB name/QOM path */
2085 error_setg(errp, "Cannot change iothread of active block backend");
2086 return false;
2087 }
2088
2089 return true;
2090 }
2091
2092 static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx,
2093 GSList **ignore)
2094 {
2095 BlockBackend *blk = child->opaque;
2096 blk_do_set_aio_context(blk, ctx, false, &error_abort);
2097 }
2098
2099 void blk_add_aio_context_notifier(BlockBackend *blk,
2100 void (*attached_aio_context)(AioContext *new_context, void *opaque),
2101 void (*detach_aio_context)(void *opaque), void *opaque)
2102 {
2103 BlockBackendAioNotifier *notifier;
2104 BlockDriverState *bs = blk_bs(blk);
2105
2106 notifier = g_new(BlockBackendAioNotifier, 1);
2107 notifier->attached_aio_context = attached_aio_context;
2108 notifier->detach_aio_context = detach_aio_context;
2109 notifier->opaque = opaque;
2110 QLIST_INSERT_HEAD(&blk->aio_notifiers, notifier, list);
2111
2112 if (bs) {
2113 bdrv_add_aio_context_notifier(bs, attached_aio_context,
2114 detach_aio_context, opaque);
2115 }
2116 }
2117
2118 void blk_remove_aio_context_notifier(BlockBackend *blk,
2119 void (*attached_aio_context)(AioContext *,
2120 void *),
2121 void (*detach_aio_context)(void *),
2122 void *opaque)
2123 {
2124 BlockBackendAioNotifier *notifier;
2125 BlockDriverState *bs = blk_bs(blk);
2126
2127 if (bs) {
2128 bdrv_remove_aio_context_notifier(bs, attached_aio_context,
2129 detach_aio_context, opaque);
2130 }
2131
2132 QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
2133 if (notifier->attached_aio_context == attached_aio_context &&
2134 notifier->detach_aio_context == detach_aio_context &&
2135 notifier->opaque == opaque) {
2136 QLIST_REMOVE(notifier, list);
2137 g_free(notifier);
2138 return;
2139 }
2140 }
2141
2142 abort();
2143 }
2144
2145 void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
2146 {
2147 notifier_list_add(&blk->remove_bs_notifiers, notify);
2148 }
2149
2150 void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
2151 {
2152 notifier_list_add(&blk->insert_bs_notifiers, notify);
2153 }
2154
2155 void blk_io_plug(BlockBackend *blk)
2156 {
2157 BlockDriverState *bs = blk_bs(blk);
2158
2159 if (bs) {
2160 bdrv_io_plug(bs);
2161 }
2162 }
2163
2164 void blk_io_unplug(BlockBackend *blk)
2165 {
2166 BlockDriverState *bs = blk_bs(blk);
2167
2168 if (bs) {
2169 bdrv_io_unplug(bs);
2170 }
2171 }
2172
2173 BlockAcctStats *blk_get_stats(BlockBackend *blk)
2174 {
2175 return &blk->stats;
2176 }
2177
2178 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
2179 BlockCompletionFunc *cb, void *opaque)
2180 {
2181 return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
2182 }
2183
2184 int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
2185 int64_t bytes, BdrvRequestFlags flags)
2186 {
2187 return blk_co_pwritev(blk, offset, bytes, NULL,
2188 flags | BDRV_REQ_ZERO_WRITE);
2189 }
2190
2191 int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
2192 int64_t bytes)
2193 {
2194 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
2195 return blk_pwritev_part(blk, offset, bytes, &qiov, 0,
2196 BDRV_REQ_WRITE_COMPRESSED);
2197 }
2198
2199 int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
2200 PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
2201 {
2202 if (!blk_is_available(blk)) {
2203 error_setg(errp, "No medium inserted");
2204 return -ENOMEDIUM;
2205 }
2206
2207 return bdrv_truncate(blk->root, offset, exact, prealloc, flags, errp);
2208 }
2209
2210 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
2211 int64_t pos, int size)
2212 {
2213 int ret;
2214
2215 if (!blk_is_available(blk)) {
2216 return -ENOMEDIUM;
2217 }
2218
2219 ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
2220 if (ret < 0) {
2221 return ret;
2222 }
2223
2224 if (ret == size && !blk->enable_write_cache) {
2225 ret = bdrv_flush(blk_bs(blk));
2226 }
2227
2228 return ret < 0 ? ret : size;
2229 }
2230
2231 int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
2232 {
2233 if (!blk_is_available(blk)) {
2234 return -ENOMEDIUM;
2235 }
2236
2237 return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
2238 }
2239
2240 int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
2241 {
2242 if (!blk_is_available(blk)) {
2243 return -ENOMEDIUM;
2244 }
2245
2246 return bdrv_probe_blocksizes(blk_bs(blk), bsz);
2247 }
2248
2249 int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
2250 {
2251 if (!blk_is_available(blk)) {
2252 return -ENOMEDIUM;
2253 }
2254
2255 return bdrv_probe_geometry(blk_bs(blk), geo);
2256 }
2257
2258 /*
2259 * Updates the BlockBackendRootState object with data from the currently
2260 * attached BlockDriverState.
2261 */
2262 void blk_update_root_state(BlockBackend *blk)
2263 {
2264 assert(blk->root);
2265
2266 blk->root_state.open_flags = blk->root->bs->open_flags;
2267 blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
2268 }
2269
2270 /*
2271 * Returns the detect-zeroes setting to be used for bdrv_open() of a
2272 * BlockDriverState which is supposed to inherit the root state.
2273 */
2274 bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
2275 {
2276 return blk->root_state.detect_zeroes;
2277 }
2278
2279 /*
2280 * Returns the flags to be used for bdrv_open() of a BlockDriverState which is
2281 * supposed to inherit the root state.
2282 */
2283 int blk_get_open_flags_from_root_state(BlockBackend *blk)
2284 {
2285 return blk->root_state.open_flags;
2286 }
2287
2288 BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
2289 {
2290 return &blk->root_state;
2291 }
2292
2293 int blk_commit_all(void)
2294 {
2295 BlockBackend *blk = NULL;
2296
2297 while ((blk = blk_all_next(blk)) != NULL) {
2298 AioContext *aio_context = blk_get_aio_context(blk);
2299 BlockDriverState *unfiltered_bs = bdrv_skip_filters(blk_bs(blk));
2300
2301 aio_context_acquire(aio_context);
2302 if (blk_is_inserted(blk) && bdrv_cow_child(unfiltered_bs)) {
2303 int ret;
2304
2305 ret = bdrv_commit(unfiltered_bs);
2306 if (ret < 0) {
2307 aio_context_release(aio_context);
2308 return ret;
2309 }
2310 }
2311 aio_context_release(aio_context);
2312 }
2313 return 0;
2314 }
2315
2316
2317 /* throttling disk I/O limits */
2318 void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
2319 {
2320 throttle_group_config(&blk->public.throttle_group_member, cfg);
2321 }
2322
2323 void blk_io_limits_disable(BlockBackend *blk)
2324 {
2325 BlockDriverState *bs = blk_bs(blk);
2326 ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
2327 assert(tgm->throttle_state);
2328 if (bs) {
2329 bdrv_drained_begin(bs);
2330 }
2331 throttle_group_unregister_tgm(tgm);
2332 if (bs) {
2333 bdrv_drained_end(bs);
2334 }
2335 }
2336
2337 /* should be called before blk_set_io_limits if a limit is set */
2338 void blk_io_limits_enable(BlockBackend *blk, const char *group)
2339 {
2340 assert(!blk->public.throttle_group_member.throttle_state);
2341 throttle_group_register_tgm(&blk->public.throttle_group_member,
2342 group, blk_get_aio_context(blk));
2343 }
2344
2345 void blk_io_limits_update_group(BlockBackend *blk, const char *group)
2346 {
2347 /* this BB is not part of any group */
2348 if (!blk->public.throttle_group_member.throttle_state) {
2349 return;
2350 }
2351
2352 /* this BB is a part of the same group than the one we want */
2353 if (!g_strcmp0(throttle_group_get_name(&blk->public.throttle_group_member),
2354 group)) {
2355 return;
2356 }
2357
2358 /* need to change the group this bs belong to */
2359 blk_io_limits_disable(blk);
2360 blk_io_limits_enable(blk, group);
2361 }
2362
2363 static void blk_root_drained_begin(BdrvChild *child)
2364 {
2365 BlockBackend *blk = child->opaque;
2366 ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
2367
2368 if (++blk->quiesce_counter == 1) {
2369 if (blk->dev_ops && blk->dev_ops->drained_begin) {
2370 blk->dev_ops->drained_begin(blk->dev_opaque);
2371 }
2372 }
2373
2374 /* Note that blk->root may not be accessible here yet if we are just
2375 * attaching to a BlockDriverState that is drained. Use child instead. */
2376
2377 if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) {
2378 throttle_group_restart_tgm(tgm);
2379 }
2380 }
2381
2382 static bool blk_root_drained_poll(BdrvChild *child)
2383 {
2384 BlockBackend *blk = child->opaque;
2385 bool busy = false;
2386 assert(blk->quiesce_counter);
2387
2388 if (blk->dev_ops && blk->dev_ops->drained_poll) {
2389 busy = blk->dev_ops->drained_poll(blk->dev_opaque);
2390 }
2391 return busy || !!blk->in_flight;
2392 }
2393
2394 static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
2395 {
2396 BlockBackend *blk = child->opaque;
2397 assert(blk->quiesce_counter);
2398
2399 assert(blk->public.throttle_group_member.io_limits_disabled);
2400 qatomic_dec(&blk->public.throttle_group_member.io_limits_disabled);
2401
2402 if (--blk->quiesce_counter == 0) {
2403 if (blk->dev_ops && blk->dev_ops->drained_end) {
2404 blk->dev_ops->drained_end(blk->dev_opaque);
2405 }
2406 while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
2407 /* Resume all queued requests */
2408 }
2409 }
2410 }
2411
2412 void blk_register_buf(BlockBackend *blk, void *host, size_t size)
2413 {
2414 bdrv_register_buf(blk_bs(blk), host, size);
2415 }
2416
2417 void blk_unregister_buf(BlockBackend *blk, void *host)
2418 {
2419 bdrv_unregister_buf(blk_bs(blk), host);
2420 }
2421
2422 int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
2423 BlockBackend *blk_out, int64_t off_out,
2424 int64_t bytes, BdrvRequestFlags read_flags,
2425 BdrvRequestFlags write_flags)
2426 {
2427 int r;
2428 r = blk_check_byte_request(blk_in, off_in, bytes);
2429 if (r) {
2430 return r;
2431 }
2432 r = blk_check_byte_request(blk_out, off_out, bytes);
2433 if (r) {
2434 return r;
2435 }
2436 return bdrv_co_copy_range(blk_in->root, off_in,
2437 blk_out->root, off_out,
2438 bytes, read_flags, write_flags);
2439 }
2440
2441 const BdrvChild *blk_root(BlockBackend *blk)
2442 {
2443 return blk->root;
2444 }
2445
2446 int blk_make_empty(BlockBackend *blk, Error **errp)
2447 {
2448 if (!blk_is_available(blk)) {
2449 error_setg(errp, "No medium inserted");
2450 return -ENOMEDIUM;
2451 }
2452
2453 return bdrv_make_empty(blk->root, errp);
2454 }