2 * QEMU System Emulator block driver
4 * Copyright (c) 2011 IBM Corp.
5 * Copyright (c) 2012 Red Hat, Inc.
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "block/block.h"
29 #include "block/blockjob_int.h"
30 #include "block/block_int.h"
31 #include "block/trace.h"
32 #include "sysemu/block-backend.h"
33 #include "qapi/error.h"
34 #include "qapi/qapi-events-block-core.h"
35 #include "qapi/qmp/qerror.h"
36 #include "qemu/coroutine.h"
37 #include "qemu/timer.h"
39 /* Transactional group of block jobs */
42 /* Is this txn being cancelled? */
46 QLIST_HEAD(, BlockJob
) jobs
;
53 * The block job API is composed of two categories of functions.
55 * The first includes functions used by the monitor. The monitor is
56 * peculiar in that it accesses the block job list with block_job_get, and
57 * therefore needs consistency across block_job_get and the actual operation
58 * (e.g. block_job_set_speed). The consistency is achieved with
59 * aio_context_acquire/release. These functions are declared in blockjob.h.
61 * The second includes functions used by the block job drivers and sometimes
62 * by the core block layer. These do not care about locking, because the
63 * whole coroutine runs under the AioContext lock, and are declared in
67 static bool is_block_job(Job
*job
)
69 return job_type(job
) == JOB_TYPE_BACKUP
||
70 job_type(job
) == JOB_TYPE_COMMIT
||
71 job_type(job
) == JOB_TYPE_MIRROR
||
72 job_type(job
) == JOB_TYPE_STREAM
;
75 BlockJob
*block_job_next(BlockJob
*bjob
)
77 Job
*job
= bjob
? &bjob
->job
: NULL
;
81 } while (job
&& !is_block_job(job
));
83 return job
? container_of(job
, BlockJob
, job
) : NULL
;
86 BlockJob
*block_job_get(const char *id
)
88 Job
*job
= job_get(id
);
90 if (job
&& is_block_job(job
)) {
91 return container_of(job
, BlockJob
, job
);
97 BlockJobTxn
*block_job_txn_new(void)
99 BlockJobTxn
*txn
= g_new0(BlockJobTxn
, 1);
100 QLIST_INIT(&txn
->jobs
);
105 static void block_job_txn_ref(BlockJobTxn
*txn
)
110 void block_job_txn_unref(BlockJobTxn
*txn
)
112 if (txn
&& --txn
->refcnt
== 0) {
117 void block_job_txn_add_job(BlockJobTxn
*txn
, BlockJob
*job
)
126 QLIST_INSERT_HEAD(&txn
->jobs
, job
, txn_list
);
127 block_job_txn_ref(txn
);
130 void block_job_txn_del_job(BlockJob
*job
)
133 QLIST_REMOVE(job
, txn_list
);
134 block_job_txn_unref(job
->txn
);
139 static void block_job_attached_aio_context(AioContext
*new_context
,
141 static void block_job_detach_aio_context(void *opaque
);
143 void block_job_free(Job
*job
)
145 BlockJob
*bjob
= container_of(job
, BlockJob
, job
);
146 BlockDriverState
*bs
= blk_bs(bjob
->blk
);
151 block_job_remove_all_bdrv(bjob
);
152 blk_remove_aio_context_notifier(bjob
->blk
,
153 block_job_attached_aio_context
,
154 block_job_detach_aio_context
, bjob
);
155 blk_unref(bjob
->blk
);
156 error_free(bjob
->blocker
);
159 static void block_job_attached_aio_context(AioContext
*new_context
,
162 BlockJob
*job
= opaque
;
164 job
->job
.aio_context
= new_context
;
165 if (job
->driver
->attached_aio_context
) {
166 job
->driver
->attached_aio_context(job
, new_context
);
169 job_resume(&job
->job
);
172 void block_job_drain(Job
*job
)
174 BlockJob
*bjob
= container_of(job
, BlockJob
, job
);
176 blk_drain(bjob
->blk
);
177 if (bjob
->driver
->drain
) {
178 bjob
->driver
->drain(bjob
);
182 static void block_job_detach_aio_context(void *opaque
)
184 BlockJob
*job
= opaque
;
186 /* In case the job terminates during aio_poll()... */
189 job_pause(&job
->job
);
191 while (!job
->job
.paused
&& !job_is_completed(&job
->job
)) {
192 job_drain(&job
->job
);
195 job
->job
.aio_context
= NULL
;
196 job_unref(&job
->job
);
199 static char *child_job_get_parent_desc(BdrvChild
*c
)
201 BlockJob
*job
= c
->opaque
;
202 return g_strdup_printf("%s job '%s'", job_type_str(&job
->job
), job
->job
.id
);
205 static void child_job_drained_begin(BdrvChild
*c
)
207 BlockJob
*job
= c
->opaque
;
208 job_pause(&job
->job
);
211 static void child_job_drained_end(BdrvChild
*c
)
213 BlockJob
*job
= c
->opaque
;
214 job_resume(&job
->job
);
217 static const BdrvChildRole child_job
= {
218 .get_parent_desc
= child_job_get_parent_desc
,
219 .drained_begin
= child_job_drained_begin
,
220 .drained_end
= child_job_drained_end
,
221 .stay_at_node
= true,
224 void block_job_remove_all_bdrv(BlockJob
*job
)
227 for (l
= job
->nodes
; l
; l
= l
->next
) {
228 BdrvChild
*c
= l
->data
;
229 bdrv_op_unblock_all(c
->bs
, job
->blocker
);
230 bdrv_root_unref_child(c
);
232 g_slist_free(job
->nodes
);
236 int block_job_add_bdrv(BlockJob
*job
, const char *name
, BlockDriverState
*bs
,
237 uint64_t perm
, uint64_t shared_perm
, Error
**errp
)
241 c
= bdrv_root_attach_child(bs
, name
, &child_job
, perm
, shared_perm
,
247 job
->nodes
= g_slist_prepend(job
->nodes
, c
);
249 bdrv_op_block_all(bs
, job
->blocker
);
254 bool block_job_is_internal(BlockJob
*job
)
256 return (job
->job
.id
== NULL
);
259 const BlockJobDriver
*block_job_driver(BlockJob
*job
)
264 static int block_job_prepare(BlockJob
*job
)
266 if (job
->job
.ret
== 0 && job
->driver
->prepare
) {
267 job
->job
.ret
= job
->driver
->prepare(job
);
272 static void job_cancel_async(Job
*job
, bool force
)
274 if (job
->user_paused
) {
275 /* Do not call job_enter here, the caller will handle it. */
276 job
->user_paused
= false;
277 if (job
->driver
->user_resume
) {
278 job
->driver
->user_resume(job
);
280 assert(job
->pause_count
> 0);
283 job
->cancelled
= true;
284 /* To prevent 'force == false' overriding a previous 'force == true' */
285 job
->force_cancel
|= force
;
288 static int block_job_txn_apply(BlockJobTxn
*txn
, int fn(BlockJob
*), bool lock
)
291 BlockJob
*job
, *next
;
294 QLIST_FOREACH_SAFE(job
, &txn
->jobs
, txn_list
, next
) {
296 ctx
= blk_get_aio_context(job
->blk
);
297 aio_context_acquire(ctx
);
301 aio_context_release(ctx
);
310 static void block_job_completed_txn_abort(BlockJob
*job
)
313 BlockJobTxn
*txn
= job
->txn
;
318 * We are cancelled by another job, which will handle everything.
322 txn
->aborting
= true;
323 block_job_txn_ref(txn
);
325 /* We are the first failed job. Cancel other jobs. */
326 QLIST_FOREACH(other_job
, &txn
->jobs
, txn_list
) {
327 ctx
= blk_get_aio_context(other_job
->blk
);
328 aio_context_acquire(ctx
);
331 /* Other jobs are effectively cancelled by us, set the status for
332 * them; this job, however, may or may not be cancelled, depending
333 * on the caller, so leave it. */
334 QLIST_FOREACH(other_job
, &txn
->jobs
, txn_list
) {
335 if (other_job
!= job
) {
336 job_cancel_async(&other_job
->job
, false);
339 while (!QLIST_EMPTY(&txn
->jobs
)) {
340 other_job
= QLIST_FIRST(&txn
->jobs
);
341 ctx
= blk_get_aio_context(other_job
->blk
);
342 if (!job_is_completed(&other_job
->job
)) {
343 assert(job_is_cancelled(&other_job
->job
));
344 job_finish_sync(&other_job
->job
, NULL
, NULL
);
346 job_finalize_single(&other_job
->job
);
347 aio_context_release(ctx
);
350 block_job_txn_unref(txn
);
353 static int block_job_needs_finalize(BlockJob
*job
)
355 return !job
->job
.auto_finalize
;
358 static int block_job_finalize_single(BlockJob
*job
)
360 return job_finalize_single(&job
->job
);
363 static void block_job_do_finalize(BlockJob
*job
)
366 assert(job
&& job
->txn
);
368 /* prepare the transaction to complete */
369 rc
= block_job_txn_apply(job
->txn
, block_job_prepare
, true);
371 block_job_completed_txn_abort(job
);
373 block_job_txn_apply(job
->txn
, block_job_finalize_single
, true);
377 static int block_job_transition_to_pending(BlockJob
*job
)
379 job_state_transition(&job
->job
, JOB_STATUS_PENDING
);
380 if (!job
->job
.auto_finalize
) {
381 job_event_pending(&job
->job
);
386 static void block_job_completed_txn_success(BlockJob
*job
)
388 BlockJobTxn
*txn
= job
->txn
;
391 job_state_transition(&job
->job
, JOB_STATUS_WAITING
);
394 * Successful completion, see if there are other running jobs in this
397 QLIST_FOREACH(other_job
, &txn
->jobs
, txn_list
) {
398 if (!job_is_completed(&other_job
->job
)) {
401 assert(other_job
->job
.ret
== 0);
404 block_job_txn_apply(txn
, block_job_transition_to_pending
, false);
406 /* If no jobs need manual finalization, automatically do so */
407 if (block_job_txn_apply(txn
, block_job_needs_finalize
, false) == 0) {
408 block_job_do_finalize(job
);
412 /* Assumes the job_mutex is held */
413 static bool job_timer_pending(Job
*job
)
415 return timer_pending(&job
->sleep_timer
);
418 void block_job_set_speed(BlockJob
*job
, int64_t speed
, Error
**errp
)
420 int64_t old_speed
= job
->speed
;
422 if (job_apply_verb(&job
->job
, JOB_VERB_SET_SPEED
, errp
)) {
426 error_setg(errp
, QERR_INVALID_PARAMETER
, "speed");
430 ratelimit_set_speed(&job
->limit
, speed
, BLOCK_JOB_SLICE_TIME
);
433 if (speed
&& speed
<= old_speed
) {
437 /* kick only if a timer is pending */
438 job_enter_cond(&job
->job
, job_timer_pending
);
441 int64_t block_job_ratelimit_get_delay(BlockJob
*job
, uint64_t n
)
447 return ratelimit_calculate_delay(&job
->limit
, n
);
450 void block_job_finalize(BlockJob
*job
, Error
**errp
)
452 assert(job
&& job
->job
.id
);
453 if (job_apply_verb(&job
->job
, JOB_VERB_FINALIZE
, errp
)) {
456 block_job_do_finalize(job
);
459 void block_job_dismiss(BlockJob
**jobptr
, Error
**errp
)
461 BlockJob
*job
= *jobptr
;
462 /* similarly to _complete, this is QMP-interface only. */
464 if (job_apply_verb(&job
->job
, JOB_VERB_DISMISS
, errp
)) {
468 job_do_dismiss(&job
->job
);
472 void block_job_cancel(BlockJob
*job
, bool force
)
474 if (job
->job
.status
== JOB_STATUS_CONCLUDED
) {
475 job_do_dismiss(&job
->job
);
478 job_cancel_async(&job
->job
, force
);
479 if (!job_started(&job
->job
)) {
480 block_job_completed(job
, -ECANCELED
);
481 } else if (job
->job
.deferred_to_main_loop
) {
482 block_job_completed_txn_abort(job
);
484 block_job_enter(job
);
488 void block_job_user_cancel(BlockJob
*job
, bool force
, Error
**errp
)
490 if (job_apply_verb(&job
->job
, JOB_VERB_CANCEL
, errp
)) {
493 block_job_cancel(job
, force
);
496 /* A wrapper around block_job_cancel() taking an Error ** parameter so it may be
497 * used with job_finish_sync() without the need for (rather nasty) function
498 * pointer casts there. */
499 static void block_job_cancel_err(Job
*job
, Error
**errp
)
501 BlockJob
*bjob
= container_of(job
, BlockJob
, job
);
502 assert(is_block_job(job
));
503 block_job_cancel(bjob
, false);
506 int block_job_cancel_sync(BlockJob
*job
)
508 return job_finish_sync(&job
->job
, &block_job_cancel_err
, NULL
);
511 void block_job_cancel_sync_all(void)
514 AioContext
*aio_context
;
516 while ((job
= block_job_next(NULL
))) {
517 aio_context
= blk_get_aio_context(job
->blk
);
518 aio_context_acquire(aio_context
);
519 block_job_cancel_sync(job
);
520 aio_context_release(aio_context
);
524 int block_job_complete_sync(BlockJob
*job
, Error
**errp
)
526 return job_finish_sync(&job
->job
, job_complete
, errp
);
529 void block_job_progress_update(BlockJob
*job
, uint64_t done
)
534 void block_job_progress_set_remaining(BlockJob
*job
, uint64_t remaining
)
536 job
->len
= job
->offset
+ remaining
;
539 BlockJobInfo
*block_job_query(BlockJob
*job
, Error
**errp
)
543 if (block_job_is_internal(job
)) {
544 error_setg(errp
, "Cannot query QEMU internal jobs");
547 info
= g_new0(BlockJobInfo
, 1);
548 info
->type
= g_strdup(job_type_str(&job
->job
));
549 info
->device
= g_strdup(job
->job
.id
);
550 info
->len
= job
->len
;
551 info
->busy
= atomic_read(&job
->job
.busy
);
552 info
->paused
= job
->job
.pause_count
> 0;
553 info
->offset
= job
->offset
;
554 info
->speed
= job
->speed
;
555 info
->io_status
= job
->iostatus
;
556 info
->ready
= job
->ready
;
557 info
->status
= job
->job
.status
;
558 info
->auto_finalize
= job
->job
.auto_finalize
;
559 info
->auto_dismiss
= job
->job
.auto_dismiss
;
560 info
->has_error
= job
->job
.ret
!= 0;
561 info
->error
= job
->job
.ret
? g_strdup(strerror(-job
->job
.ret
)) : NULL
;
565 static void block_job_iostatus_set_err(BlockJob
*job
, int error
)
567 if (job
->iostatus
== BLOCK_DEVICE_IO_STATUS_OK
) {
568 job
->iostatus
= error
== ENOSPC
? BLOCK_DEVICE_IO_STATUS_NOSPACE
:
569 BLOCK_DEVICE_IO_STATUS_FAILED
;
573 static void block_job_event_cancelled(Notifier
*n
, void *opaque
)
575 BlockJob
*job
= opaque
;
577 if (block_job_is_internal(job
)) {
581 qapi_event_send_block_job_cancelled(job_type(&job
->job
),
589 static void block_job_event_completed(Notifier
*n
, void *opaque
)
591 BlockJob
*job
= opaque
;
592 const char *msg
= NULL
;
594 if (block_job_is_internal(job
)) {
598 if (job
->job
.ret
< 0) {
599 msg
= strerror(-job
->job
.ret
);
602 qapi_event_send_block_job_completed(job_type(&job
->job
),
612 static void block_job_event_pending(Notifier
*n
, void *opaque
)
614 BlockJob
*job
= opaque
;
616 if (block_job_is_internal(job
)) {
620 qapi_event_send_block_job_pending(job_type(&job
->job
),
626 * API for block job drivers and the block layer. These functions are
627 * declared in blockjob_int.h.
630 void *block_job_create(const char *job_id
, const BlockJobDriver
*driver
,
631 BlockJobTxn
*txn
, BlockDriverState
*bs
, uint64_t perm
,
632 uint64_t shared_perm
, int64_t speed
, int flags
,
633 BlockCompletionFunc
*cb
, void *opaque
, Error
**errp
)
640 error_setg(errp
, QERR_DEVICE_IN_USE
, bdrv_get_device_name(bs
));
644 if (job_id
== NULL
&& !(flags
& JOB_INTERNAL
)) {
645 job_id
= bdrv_get_device_name(bs
);
648 blk
= blk_new(perm
, shared_perm
);
649 ret
= blk_insert_bs(blk
, bs
, errp
);
655 job
= job_create(job_id
, &driver
->job_driver
, blk_get_aio_context(blk
),
656 flags
, cb
, opaque
, errp
);
662 assert(is_block_job(&job
->job
));
663 assert(job
->job
.driver
->free
== &block_job_free
);
664 assert(job
->job
.driver
->user_resume
== &block_job_user_resume
);
665 assert(job
->job
.driver
->drain
== &block_job_drain
);
667 job
->driver
= driver
;
670 job
->finalize_cancelled_notifier
.notify
= block_job_event_cancelled
;
671 job
->finalize_completed_notifier
.notify
= block_job_event_completed
;
672 job
->pending_notifier
.notify
= block_job_event_pending
;
674 notifier_list_add(&job
->job
.on_finalize_cancelled
,
675 &job
->finalize_cancelled_notifier
);
676 notifier_list_add(&job
->job
.on_finalize_completed
,
677 &job
->finalize_completed_notifier
);
678 notifier_list_add(&job
->job
.on_pending
, &job
->pending_notifier
);
680 error_setg(&job
->blocker
, "block device is in use by block job: %s",
681 job_type_str(&job
->job
));
682 block_job_add_bdrv(job
, "main node", bs
, 0, BLK_PERM_ALL
, &error_abort
);
685 bdrv_op_unblock(bs
, BLOCK_OP_TYPE_DATAPLANE
, job
->blocker
);
687 blk_add_aio_context_notifier(blk
, block_job_attached_aio_context
,
688 block_job_detach_aio_context
, job
);
690 /* Only set speed when necessary to avoid NotSupported error */
692 Error
*local_err
= NULL
;
694 block_job_set_speed(job
, speed
, &local_err
);
696 job_early_fail(&job
->job
);
697 error_propagate(errp
, local_err
);
702 /* Single jobs are modeled as single-job transactions for sake of
703 * consolidating the job management logic */
705 txn
= block_job_txn_new();
706 block_job_txn_add_job(txn
, job
);
707 block_job_txn_unref(txn
);
709 block_job_txn_add_job(txn
, job
);
715 void block_job_completed(BlockJob
*job
, int ret
)
717 assert(job
&& job
->txn
&& !job_is_completed(&job
->job
));
718 assert(blk_bs(job
->blk
)->job
== job
);
720 job_update_rc(&job
->job
);
721 trace_block_job_completed(job
, ret
, job
->job
.ret
);
723 block_job_completed_txn_abort(job
);
725 block_job_completed_txn_success(job
);
729 void block_job_enter(BlockJob
*job
)
731 job_enter_cond(&job
->job
, NULL
);
734 void block_job_yield(BlockJob
*job
)
736 assert(job
->job
.busy
);
738 /* Check cancellation *before* setting busy = false, too! */
739 if (job_is_cancelled(&job
->job
)) {
743 if (!job_should_pause(&job
->job
)) {
744 job_do_yield(&job
->job
, -1);
747 job_pause_point(&job
->job
);
750 void block_job_iostatus_reset(BlockJob
*job
)
752 if (job
->iostatus
== BLOCK_DEVICE_IO_STATUS_OK
) {
755 assert(job
->job
.user_paused
&& job
->job
.pause_count
> 0);
756 job
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
759 void block_job_user_resume(Job
*job
)
761 BlockJob
*bjob
= container_of(job
, BlockJob
, job
);
762 block_job_iostatus_reset(bjob
);
765 void block_job_event_ready(BlockJob
*job
)
767 job_state_transition(&job
->job
, JOB_STATUS_READY
);
770 if (block_job_is_internal(job
)) {
774 qapi_event_send_block_job_ready(job_type(&job
->job
),
778 job
->speed
, &error_abort
);
781 BlockErrorAction
block_job_error_action(BlockJob
*job
, BlockdevOnError on_err
,
782 int is_read
, int error
)
784 BlockErrorAction action
;
787 case BLOCKDEV_ON_ERROR_ENOSPC
:
788 case BLOCKDEV_ON_ERROR_AUTO
:
789 action
= (error
== ENOSPC
) ?
790 BLOCK_ERROR_ACTION_STOP
: BLOCK_ERROR_ACTION_REPORT
;
792 case BLOCKDEV_ON_ERROR_STOP
:
793 action
= BLOCK_ERROR_ACTION_STOP
;
795 case BLOCKDEV_ON_ERROR_REPORT
:
796 action
= BLOCK_ERROR_ACTION_REPORT
;
798 case BLOCKDEV_ON_ERROR_IGNORE
:
799 action
= BLOCK_ERROR_ACTION_IGNORE
;
804 if (!block_job_is_internal(job
)) {
805 qapi_event_send_block_job_error(job
->job
.id
,
806 is_read
? IO_OPERATION_TYPE_READ
:
807 IO_OPERATION_TYPE_WRITE
,
808 action
, &error_abort
);
810 if (action
== BLOCK_ERROR_ACTION_STOP
) {
811 job_pause(&job
->job
);
812 /* make the pause user visible, which will be resumed from QMP. */
813 job
->job
.user_paused
= true;
814 block_job_iostatus_set_err(job
, error
);