X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=job.c;h=660ce22c56b6115155451f9b60c4f237ff99b8eb;hb=4d48c1bc079147a02ec1a3df9a41497c515d2e61;hp=3aaaebafe2c87cd83e16ea6bc9577210f1089fa9;hpb=5101d00d2f1138a73344dc4833587f76d7a5fa5c;p=mirror_qemu.git diff --git a/job.c b/job.c index 3aaaebafe2..660ce22c56 100644 --- a/job.c +++ b/job.c @@ -32,6 +32,27 @@ #include "trace/trace-root.h" #include "qapi/qapi-events-job.h" +/* + * The job API is composed of two categories of functions. + * + * The first includes functions used by the monitor. The monitor is + * peculiar in that it accesses the job list with job_get, and + * therefore needs consistency across job_get and the actual operation + * (e.g. job_user_cancel). To achieve this consistency, the caller + * calls job_lock/job_unlock itself around the whole operation. + * + * + * The second includes functions used by the job drivers and sometimes + * by the core block layer. These delegate the locking to the callee instead. + */ + +/* + * job_mutex protects the jobs list, but also makes the + * struct job fields thread-safe. + */ +QemuMutex job_mutex; + +/* Protected by job_mutex */ static QLIST_HEAD(, Job) jobs = QLIST_HEAD_INITIALIZER(jobs); /* Job State Transition Table */ @@ -56,9 +77,10 @@ bool JobVerbTable[JOB_VERB__MAX][JOB_STATUS__MAX] = { [JOB_VERB_PAUSE] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0}, [JOB_VERB_RESUME] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0}, [JOB_VERB_SET_SPEED] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0}, - [JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, + [JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0}, [JOB_VERB_FINALIZE] = {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, [JOB_VERB_DISMISS] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0}, + [JOB_VERB_CHANGE] = {0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0}, }; /* Transactional group of jobs */ @@ -74,17 +96,12 @@ struct JobTxn { int refcnt; }; -/* Right now, this mutex is only needed to synchronize accesses to job->busy - * and job->sleep_timer, such as concurrent calls to job_do_yield and - * job_enter. */ -static QemuMutex job_mutex; - -static void job_lock(void) +void job_lock(void) { qemu_mutex_lock(&job_mutex); } -static void job_unlock(void) +void job_unlock(void) { qemu_mutex_unlock(&job_mutex); } @@ -102,19 +119,38 @@ JobTxn *job_txn_new(void) return txn; } -static void job_txn_ref(JobTxn *txn) +/* Called with job_mutex held. */ +static void job_txn_ref_locked(JobTxn *txn) { txn->refcnt++; } -void job_txn_unref(JobTxn *txn) +void job_txn_unref_locked(JobTxn *txn) { if (txn && --txn->refcnt == 0) { g_free(txn); } } -void job_txn_add_job(JobTxn *txn, Job *job) +void job_txn_unref(JobTxn *txn) +{ + JOB_LOCK_GUARD(); + job_txn_unref_locked(txn); +} + +/** + * @txn: The transaction (may be NULL) + * @job: Job to add to the transaction + * + * Add @job to the transaction. The @job must not already be in a transaction. + * The caller must call either job_txn_unref() or job_completed() to release + * the reference that is automatically grabbed here. + * + * If @txn is NULL, the function does nothing. + * + * Called with job_mutex held. + */ +static void job_txn_add_job_locked(JobTxn *txn, Job *job) { if (!txn) { return; @@ -124,21 +160,22 @@ void job_txn_add_job(JobTxn *txn, Job *job) job->txn = txn; QLIST_INSERT_HEAD(&txn->jobs, job, txn_list); - job_txn_ref(txn); + job_txn_ref_locked(txn); } -static void job_txn_del_job(Job *job) +/* Called with job_mutex held. */ +static void job_txn_del_job_locked(Job *job) { if (job->txn) { QLIST_REMOVE(job, txn_list); - job_txn_unref(job->txn); + job_txn_unref_locked(job->txn); job->txn = NULL; } } -static int job_txn_apply(Job *job, int fn(Job *)) +/* Called with job_mutex held, but releases it temporarily. */ +static int job_txn_apply_locked(Job *job, int fn(Job *)) { - AioContext *inner_ctx; Job *other_job, *next; JobTxn *txn = job->txn; int rc = 0; @@ -149,25 +186,16 @@ static int job_txn_apply(Job *job, int fn(Job *)) * we need to release it here to avoid holding the lock twice - which would * break AIO_WAIT_WHILE from within fn. */ - job_ref(job); - aio_context_release(job->aio_context); + job_ref_locked(job); QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) { - inner_ctx = other_job->aio_context; - aio_context_acquire(inner_ctx); rc = fn(other_job); - aio_context_release(inner_ctx); if (rc) { break; } } - /* - * Note that job->aio_context might have been changed by calling fn, so we - * can't use a local variable to cache it. - */ - aio_context_acquire(job->aio_context); - job_unref(job); + job_unref_locked(job); return rc; } @@ -176,7 +204,8 @@ bool job_is_internal(Job *job) return (job->id == NULL); } -static void job_state_transition(Job *job, JobStatus s1) +/* Called with job_mutex held. */ +static void job_state_transition_locked(Job *job, JobStatus s1) { JobStatus s0 = job->status; assert(s1 >= 0 && s1 < JOB_STATUS__MAX); @@ -191,7 +220,7 @@ static void job_state_transition(Job *job, JobStatus s1) } } -int job_apply_verb(Job *job, JobVerb verb, Error **errp) +int job_apply_verb_locked(Job *job, JobVerb verb, Error **errp) { JobStatus s0 = job->status; assert(verb >= 0 && verb < JOB_VERB__MAX); @@ -215,12 +244,32 @@ const char *job_type_str(const Job *job) return JobType_str(job_type(job)); } +bool job_is_cancelled_locked(Job *job) +{ + /* force_cancel may be true only if cancelled is true, too */ + assert(job->cancelled || !job->force_cancel); + return job->force_cancel; +} + bool job_is_cancelled(Job *job) +{ + JOB_LOCK_GUARD(); + return job_is_cancelled_locked(job); +} + +/* Called with job_mutex held. */ +static bool job_cancel_requested_locked(Job *job) { return job->cancelled; } -bool job_is_ready(Job *job) +bool job_cancel_requested(Job *job) +{ + JOB_LOCK_GUARD(); + return job_cancel_requested_locked(job); +} + +bool job_is_ready_locked(Job *job) { switch (job->status) { case JOB_STATUS_UNDEFINED: @@ -242,7 +291,13 @@ bool job_is_ready(Job *job) return false; } -bool job_is_completed(Job *job) +bool job_is_ready(Job *job) +{ + JOB_LOCK_GUARD(); + return job_is_ready_locked(job); +} + +bool job_is_completed_locked(Job *job) { switch (job->status) { case JOB_STATUS_UNDEFINED: @@ -264,17 +319,24 @@ bool job_is_completed(Job *job) return false; } -static bool job_started(Job *job) +static bool job_is_completed(Job *job) +{ + JOB_LOCK_GUARD(); + return job_is_completed_locked(job); +} + +static bool job_started_locked(Job *job) { return job->co; } -static bool job_should_pause(Job *job) +/* Called with job_mutex held. */ +static bool job_should_pause_locked(Job *job) { return job->pause_count > 0; } -Job *job_next(Job *job) +Job *job_next_locked(Job *job) { if (!job) { return QLIST_FIRST(&jobs); @@ -282,7 +344,13 @@ Job *job_next(Job *job) return QLIST_NEXT(job, job_list); } -Job *job_get(const char *id) +Job *job_next(Job *job) +{ + JOB_LOCK_GUARD(); + return job_next_locked(job); +} + +Job *job_get_locked(const char *id) { Job *job; @@ -295,6 +363,18 @@ Job *job_get(const char *id) return NULL; } +void job_set_aio_context(Job *job, AioContext *ctx) +{ + /* protect against read in job_finish_sync_locked and job_start */ + GLOBAL_STATE_CODE(); + /* protect against read in job_do_yield_locked */ + JOB_LOCK_GUARD(); + /* ensure the job is quiescent while the AioContext is changed */ + assert(job->paused || job_is_completed_locked(job)); + job->aio_context = ctx; +} + +/* Called with job_mutex *not* held. */ static void job_sleep_timer_cb(void *opaque) { Job *job = opaque; @@ -308,6 +388,8 @@ void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn, { Job *job; + JOB_LOCK_GUARD(); + if (job_id) { if (flags & JOB_INTERNAL) { error_setg(errp, "Cannot specify job ID for internal job"); @@ -317,7 +399,7 @@ void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn, error_setg(errp, "Invalid job ID '%s'", job_id); return NULL; } - if (job_get(job_id)) { + if (job_get_locked(job_id)) { error_setg(errp, "Job ID '%s' already in use", job_id); return NULL; } @@ -339,12 +421,15 @@ void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn, job->cb = cb; job->opaque = opaque; + progress_init(&job->progress); + notifier_list_init(&job->on_finalize_cancelled); notifier_list_init(&job->on_finalize_completed); notifier_list_init(&job->on_pending); notifier_list_init(&job->on_ready); + notifier_list_init(&job->on_idle); - job_state_transition(job, JOB_STATUS_CREATED); + job_state_transition_locked(job, JOB_STATUS_CREATED); aio_timer_init(qemu_get_aio_context(), &job->sleep_timer, QEMU_CLOCK_REALTIME, SCALE_NS, job_sleep_timer_cb, job); @@ -355,33 +440,38 @@ void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn, * consolidating the job management logic */ if (!txn) { txn = job_txn_new(); - job_txn_add_job(txn, job); - job_txn_unref(txn); + job_txn_add_job_locked(txn, job); + job_txn_unref_locked(txn); } else { - job_txn_add_job(txn, job); + job_txn_add_job_locked(txn, job); } return job; } -void job_ref(Job *job) +void job_ref_locked(Job *job) { ++job->refcnt; } -void job_unref(Job *job) +void job_unref_locked(Job *job) { + GLOBAL_STATE_CODE(); + if (--job->refcnt == 0) { assert(job->status == JOB_STATUS_NULL); assert(!timer_pending(&job->sleep_timer)); assert(!job->txn); if (job->driver->free) { + job_unlock(); job->driver->free(job); + job_lock(); } QLIST_REMOVE(job, job_list); + progress_destroy(&job->progress); error_free(job->err); g_free(job->id); g_free(job); @@ -403,48 +493,56 @@ void job_progress_increase_remaining(Job *job, uint64_t delta) progress_increase_remaining(&job->progress, delta); } -void job_event_cancelled(Job *job) +/** + * To be called when a cancelled job is finalised. + * Called with job_mutex held. + */ +static void job_event_cancelled_locked(Job *job) { notifier_list_notify(&job->on_finalize_cancelled, job); } -void job_event_completed(Job *job) +/** + * To be called when a successfully completed job is finalised. + * Called with job_mutex held. + */ +static void job_event_completed_locked(Job *job) { notifier_list_notify(&job->on_finalize_completed, job); } -static void job_event_pending(Job *job) +/* Called with job_mutex held. */ +static void job_event_pending_locked(Job *job) { notifier_list_notify(&job->on_pending, job); } -static void job_event_ready(Job *job) +/* Called with job_mutex held. */ +static void job_event_ready_locked(Job *job) { notifier_list_notify(&job->on_ready, job); } -static void job_event_idle(Job *job) +/* Called with job_mutex held. */ +static void job_event_idle_locked(Job *job) { notifier_list_notify(&job->on_idle, job); } -void job_enter_cond(Job *job, bool(*fn)(Job *job)) +void job_enter_cond_locked(Job *job, bool(*fn)(Job *job)) { - if (!job_started(job)) { + if (!job_started_locked(job)) { return; } if (job->deferred_to_main_loop) { return; } - job_lock(); if (job->busy) { - job_unlock(); return; } if (fn && !fn(job)) { - job_unlock(); return; } @@ -452,12 +550,14 @@ void job_enter_cond(Job *job, bool(*fn)(Job *job)) timer_del(&job->sleep_timer); job->busy = true; job_unlock(); - aio_co_enter(job->aio_context, job->co); + aio_co_wake(job->co); + job_lock(); } void job_enter(Job *job) { - job_enter_cond(job, NULL); + JOB_LOCK_GUARD(); + job_enter_cond_locked(job, NULL); } /* Yield, and schedule a timer to reenter the coroutine after @ns nanoseconds. @@ -465,100 +565,137 @@ void job_enter(Job *job) * is allowed and cancels the timer. * * If @ns is (uint64_t) -1, no timer is scheduled and job_enter() must be - * called explicitly. */ -static void coroutine_fn job_do_yield(Job *job, uint64_t ns) + * called explicitly. + * + * Called with job_mutex held, but releases it temporarily. + */ +static void coroutine_fn job_do_yield_locked(Job *job, uint64_t ns) { - job_lock(); + AioContext *next_aio_context; + if (ns != -1) { timer_mod(&job->sleep_timer, ns); } job->busy = false; - job_event_idle(job); + job_event_idle_locked(job); job_unlock(); qemu_coroutine_yield(); + job_lock(); + + next_aio_context = job->aio_context; + /* + * Coroutine has resumed, but in the meanwhile the job AioContext + * might have changed via bdrv_try_change_aio_context(), so we need to move + * the coroutine too in the new aiocontext. + */ + while (qemu_get_current_aio_context() != next_aio_context) { + job_unlock(); + aio_co_reschedule_self(next_aio_context); + job_lock(); + next_aio_context = job->aio_context; + } - /* Set by job_enter_cond() before re-entering the coroutine. */ + /* Set by job_enter_cond_locked() before re-entering the coroutine. */ assert(job->busy); } -void coroutine_fn job_pause_point(Job *job) +/* Called with job_mutex held, but releases it temporarily. */ +static void coroutine_fn job_pause_point_locked(Job *job) { - assert(job && job_started(job)); + assert(job && job_started_locked(job)); - if (!job_should_pause(job)) { + if (!job_should_pause_locked(job)) { return; } - if (job_is_cancelled(job)) { + if (job_is_cancelled_locked(job)) { return; } if (job->driver->pause) { + job_unlock(); job->driver->pause(job); + job_lock(); } - if (job_should_pause(job) && !job_is_cancelled(job)) { + if (job_should_pause_locked(job) && !job_is_cancelled_locked(job)) { JobStatus status = job->status; - job_state_transition(job, status == JOB_STATUS_READY - ? JOB_STATUS_STANDBY - : JOB_STATUS_PAUSED); + job_state_transition_locked(job, status == JOB_STATUS_READY + ? JOB_STATUS_STANDBY + : JOB_STATUS_PAUSED); job->paused = true; - job_do_yield(job, -1); + job_do_yield_locked(job, -1); job->paused = false; - job_state_transition(job, status); + job_state_transition_locked(job, status); } if (job->driver->resume) { + job_unlock(); job->driver->resume(job); + job_lock(); } } -void job_yield(Job *job) +void coroutine_fn job_pause_point(Job *job) { + JOB_LOCK_GUARD(); + job_pause_point_locked(job); +} + +void coroutine_fn job_yield(Job *job) +{ + JOB_LOCK_GUARD(); assert(job->busy); /* Check cancellation *before* setting busy = false, too! */ - if (job_is_cancelled(job)) { + if (job_is_cancelled_locked(job)) { return; } - if (!job_should_pause(job)) { - job_do_yield(job, -1); + if (!job_should_pause_locked(job)) { + job_do_yield_locked(job, -1); } - job_pause_point(job); + job_pause_point_locked(job); } void coroutine_fn job_sleep_ns(Job *job, int64_t ns) { + JOB_LOCK_GUARD(); assert(job->busy); /* Check cancellation *before* setting busy = false, too! */ - if (job_is_cancelled(job)) { + if (job_is_cancelled_locked(job)) { return; } - if (!job_should_pause(job)) { - job_do_yield(job, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ns); + if (!job_should_pause_locked(job)) { + job_do_yield_locked(job, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ns); } - job_pause_point(job); + job_pause_point_locked(job); } -/* Assumes the block_job_mutex is held */ -static bool job_timer_not_pending(Job *job) +/* Assumes the job_mutex is held */ +static bool job_timer_not_pending_locked(Job *job) { return !timer_pending(&job->sleep_timer); } -void job_pause(Job *job) +void job_pause_locked(Job *job) { job->pause_count++; if (!job->paused) { - job_enter(job); + job_enter_cond_locked(job, NULL); } } -void job_resume(Job *job) +void job_pause(Job *job) +{ + JOB_LOCK_GUARD(); + job_pause_locked(job); +} + +void job_resume_locked(Job *job) { assert(job->pause_count > 0); job->pause_count--; @@ -567,12 +704,18 @@ void job_resume(Job *job) } /* kick only if no timer is pending */ - job_enter_cond(job, job_timer_not_pending); + job_enter_cond_locked(job, job_timer_not_pending_locked); } -void job_user_pause(Job *job, Error **errp) +void job_resume(Job *job) { - if (job_apply_verb(job, JOB_VERB_PAUSE, errp)) { + JOB_LOCK_GUARD(); + job_resume_locked(job); +} + +void job_user_pause_locked(Job *job, Error **errp) +{ + if (job_apply_verb_locked(job, JOB_VERB_PAUSE, errp)) { return; } if (job->user_paused) { @@ -580,87 +723,95 @@ void job_user_pause(Job *job, Error **errp) return; } job->user_paused = true; - job_pause(job); + job_pause_locked(job); } -bool job_user_paused(Job *job) +bool job_user_paused_locked(Job *job) { return job->user_paused; } -void job_user_resume(Job *job, Error **errp) +void job_user_resume_locked(Job *job, Error **errp) { assert(job); + GLOBAL_STATE_CODE(); if (!job->user_paused || job->pause_count <= 0) { error_setg(errp, "Can't resume a job that was not paused"); return; } - if (job_apply_verb(job, JOB_VERB_RESUME, errp)) { + if (job_apply_verb_locked(job, JOB_VERB_RESUME, errp)) { return; } if (job->driver->user_resume) { + job_unlock(); job->driver->user_resume(job); + job_lock(); } job->user_paused = false; - job_resume(job); + job_resume_locked(job); } -static void job_do_dismiss(Job *job) +/* Called with job_mutex held, but releases it temporarily. */ +static void job_do_dismiss_locked(Job *job) { assert(job); job->busy = false; job->paused = false; job->deferred_to_main_loop = true; - job_txn_del_job(job); + job_txn_del_job_locked(job); - job_state_transition(job, JOB_STATUS_NULL); - job_unref(job); + job_state_transition_locked(job, JOB_STATUS_NULL); + job_unref_locked(job); } -void job_dismiss(Job **jobptr, Error **errp) +void job_dismiss_locked(Job **jobptr, Error **errp) { Job *job = *jobptr; /* similarly to _complete, this is QMP-interface only. */ assert(job->id); - if (job_apply_verb(job, JOB_VERB_DISMISS, errp)) { + if (job_apply_verb_locked(job, JOB_VERB_DISMISS, errp)) { return; } - job_do_dismiss(job); + job_do_dismiss_locked(job); *jobptr = NULL; } void job_early_fail(Job *job) { + JOB_LOCK_GUARD(); assert(job->status == JOB_STATUS_CREATED); - job_do_dismiss(job); + job_do_dismiss_locked(job); } -static void job_conclude(Job *job) +/* Called with job_mutex held. */ +static void job_conclude_locked(Job *job) { - job_state_transition(job, JOB_STATUS_CONCLUDED); - if (job->auto_dismiss || !job_started(job)) { - job_do_dismiss(job); + job_state_transition_locked(job, JOB_STATUS_CONCLUDED); + if (job->auto_dismiss || !job_started_locked(job)) { + job_do_dismiss_locked(job); } } -static void job_update_rc(Job *job) +/* Called with job_mutex held. */ +static void job_update_rc_locked(Job *job) { - if (!job->ret && job_is_cancelled(job)) { + if (!job->ret && job_is_cancelled_locked(job)) { job->ret = -ECANCELED; } if (job->ret) { if (!job->err) { error_setg(&job->err, "%s", strerror(-job->ret)); } - job_state_transition(job, JOB_STATUS_ABORTING); + job_state_transition_locked(job, JOB_STATUS_ABORTING); } } static void job_commit(Job *job) { assert(!job->ret); + GLOBAL_STATE_CODE(); if (job->driver->commit) { job->driver->commit(job); } @@ -669,6 +820,7 @@ static void job_commit(Job *job) static void job_abort(Job *job) { assert(job->ret); + GLOBAL_STATE_CODE(); if (job->driver->abort) { job->driver->abort(job); } @@ -676,19 +828,28 @@ static void job_abort(Job *job) static void job_clean(Job *job) { + GLOBAL_STATE_CODE(); if (job->driver->clean) { job->driver->clean(job); } } -static int job_finalize_single(Job *job) +/* + * Called with job_mutex held, but releases it temporarily. + */ +static int job_finalize_single_locked(Job *job) { - assert(job_is_completed(job)); + int job_ret; + + assert(job_is_completed_locked(job)); /* Ensure abort is called for late-transactional failures */ - job_update_rc(job); + job_update_rc_locked(job); - if (!job->ret) { + job_ret = job->ret; + job_unlock(); + + if (!job_ret) { job_commit(job); } else { job_abort(job); @@ -696,43 +857,71 @@ static int job_finalize_single(Job *job) job_clean(job); if (job->cb) { - job->cb(job->opaque, job->ret); + job->cb(job->opaque, job_ret); } + job_lock(); + /* Emit events only if we actually started */ - if (job_started(job)) { - if (job_is_cancelled(job)) { - job_event_cancelled(job); + if (job_started_locked(job)) { + if (job_is_cancelled_locked(job)) { + job_event_cancelled_locked(job); } else { - job_event_completed(job); + job_event_completed_locked(job); } } - job_txn_del_job(job); - job_conclude(job); + job_txn_del_job_locked(job); + job_conclude_locked(job); return 0; } -static void job_cancel_async(Job *job, bool force) +/* + * Called with job_mutex held, but releases it temporarily. + */ +static void job_cancel_async_locked(Job *job, bool force) { + GLOBAL_STATE_CODE(); + if (job->driver->cancel) { + job_unlock(); + force = job->driver->cancel(job, force); + job_lock(); + } else { + /* No .cancel() means the job will behave as if force-cancelled */ + force = true; + } + if (job->user_paused) { /* Do not call job_enter here, the caller will handle it. */ if (job->driver->user_resume) { + job_unlock(); job->driver->user_resume(job); + job_lock(); } job->user_paused = false; assert(job->pause_count > 0); job->pause_count--; } - job->cancelled = true; - /* To prevent 'force == false' overriding a previous 'force == true' */ - job->force_cancel |= force; + + /* + * Ignore soft cancel requests after the job is already done + * (We will still invoke job->driver->cancel() above, but if the + * job driver supports soft cancelling and the job is done, that + * should be a no-op, too. We still call it so it can override + * @force.) + */ + if (force || !job->deferred_to_main_loop) { + job->cancelled = true; + /* To prevent 'force == false' overriding a previous 'force == true' */ + job->force_cancel |= force; + } } -static void job_completed_txn_abort(Job *job) +/* + * Called with job_mutex held, but releases it temporarily. + */ +static void job_completed_txn_abort_locked(Job *job) { - AioContext *outer_ctx = job->aio_context; - AioContext *ctx; JobTxn *txn = job->txn; Job *other_job; @@ -743,159 +932,161 @@ static void job_completed_txn_abort(Job *job) return; } txn->aborting = true; - job_txn_ref(txn); + job_txn_ref_locked(txn); - /* We can only hold the single job's AioContext lock while calling - * job_finalize_single() because the finalization callbacks can involve - * calls of AIO_WAIT_WHILE(), which could deadlock otherwise. */ - aio_context_release(outer_ctx); + job_ref_locked(job); /* Other jobs are effectively cancelled by us, set the status for * them; this job, however, may or may not be cancelled, depending * on the caller, so leave it. */ QLIST_FOREACH(other_job, &txn->jobs, txn_list) { if (other_job != job) { - ctx = other_job->aio_context; - aio_context_acquire(ctx); - job_cancel_async(other_job, false); - aio_context_release(ctx); + /* + * This is a transaction: If one job failed, no result will matter. + * Therefore, pass force=true to terminate all other jobs as quickly + * as possible. + */ + job_cancel_async_locked(other_job, true); } } while (!QLIST_EMPTY(&txn->jobs)) { other_job = QLIST_FIRST(&txn->jobs); - ctx = other_job->aio_context; - aio_context_acquire(ctx); - if (!job_is_completed(other_job)) { - assert(job_is_cancelled(other_job)); - job_finish_sync(other_job, NULL, NULL); + if (!job_is_completed_locked(other_job)) { + assert(job_cancel_requested_locked(other_job)); + job_finish_sync_locked(other_job, NULL, NULL); } - job_finalize_single(other_job); - aio_context_release(ctx); + job_finalize_single_locked(other_job); } - aio_context_acquire(outer_ctx); - - job_txn_unref(txn); + job_unref_locked(job); + job_txn_unref_locked(txn); } -static int job_prepare(Job *job) +/* Called with job_mutex held, but releases it temporarily */ +static int job_prepare_locked(Job *job) { + int ret; + + GLOBAL_STATE_CODE(); + if (job->ret == 0 && job->driver->prepare) { - job->ret = job->driver->prepare(job); - job_update_rc(job); + job_unlock(); + ret = job->driver->prepare(job); + job_lock(); + job->ret = ret; + job_update_rc_locked(job); } + return job->ret; } -static int job_needs_finalize(Job *job) +/* Called with job_mutex held */ +static int job_needs_finalize_locked(Job *job) { return !job->auto_finalize; } -static void job_do_finalize(Job *job) +/* Called with job_mutex held */ +static void job_do_finalize_locked(Job *job) { int rc; assert(job && job->txn); /* prepare the transaction to complete */ - rc = job_txn_apply(job, job_prepare); + rc = job_txn_apply_locked(job, job_prepare_locked); if (rc) { - job_completed_txn_abort(job); + job_completed_txn_abort_locked(job); } else { - job_txn_apply(job, job_finalize_single); + job_txn_apply_locked(job, job_finalize_single_locked); } } -void job_finalize(Job *job, Error **errp) +void job_finalize_locked(Job *job, Error **errp) { assert(job && job->id); - if (job_apply_verb(job, JOB_VERB_FINALIZE, errp)) { + if (job_apply_verb_locked(job, JOB_VERB_FINALIZE, errp)) { return; } - job_do_finalize(job); + job_do_finalize_locked(job); } -static int job_transition_to_pending(Job *job) +/* Called with job_mutex held. */ +static int job_transition_to_pending_locked(Job *job) { - job_state_transition(job, JOB_STATUS_PENDING); + job_state_transition_locked(job, JOB_STATUS_PENDING); if (!job->auto_finalize) { - job_event_pending(job); + job_event_pending_locked(job); } return 0; } void job_transition_to_ready(Job *job) { - job_state_transition(job, JOB_STATUS_READY); - job_event_ready(job); + JOB_LOCK_GUARD(); + job_state_transition_locked(job, JOB_STATUS_READY); + job_event_ready_locked(job); } -static void job_completed_txn_success(Job *job) +/* Called with job_mutex held. */ +static void job_completed_txn_success_locked(Job *job) { JobTxn *txn = job->txn; Job *other_job; - job_state_transition(job, JOB_STATUS_WAITING); + job_state_transition_locked(job, JOB_STATUS_WAITING); /* * Successful completion, see if there are other running jobs in this * txn. */ QLIST_FOREACH(other_job, &txn->jobs, txn_list) { - if (!job_is_completed(other_job)) { + if (!job_is_completed_locked(other_job)) { return; } assert(other_job->ret == 0); } - job_txn_apply(job, job_transition_to_pending); + job_txn_apply_locked(job, job_transition_to_pending_locked); /* If no jobs need manual finalization, automatically do so */ - if (job_txn_apply(job, job_needs_finalize) == 0) { - job_do_finalize(job); + if (job_txn_apply_locked(job, job_needs_finalize_locked) == 0) { + job_do_finalize_locked(job); } } -static void job_completed(Job *job) +/* Called with job_mutex held. */ +static void job_completed_locked(Job *job) { - assert(job && job->txn && !job_is_completed(job)); + assert(job && job->txn && !job_is_completed_locked(job)); - job_update_rc(job); + job_update_rc_locked(job); trace_job_completed(job, job->ret); if (job->ret) { - job_completed_txn_abort(job); + job_completed_txn_abort_locked(job); } else { - job_completed_txn_success(job); + job_completed_txn_success_locked(job); } } -/** Useful only as a type shim for aio_bh_schedule_oneshot. */ +/** + * Useful only as a type shim for aio_bh_schedule_oneshot. + * Called with job_mutex *not* held. + */ static void job_exit(void *opaque) { Job *job = (Job *)opaque; - AioContext *ctx; - - job_ref(job); - aio_context_acquire(job->aio_context); + JOB_LOCK_GUARD(); + job_ref_locked(job); /* This is a lie, we're not quiescent, but still doing the completion * callbacks. However, completion callbacks tend to involve operations that * drain block nodes, and if .drained_poll still returned true, we would * deadlock. */ job->busy = false; - job_event_idle(job); + job_event_idle_locked(job); - job_completed(job); - - /* - * Note that calling job_completed can move the job to a different - * aio_context, so we cannot cache from above. job_txn_apply takes care of - * acquiring the new lock, and we ref/unref to avoid job_completed freeing - * the job underneath us. - */ - ctx = job->aio_context; - job_unref(job); - aio_context_release(ctx); + job_completed_locked(job); + job_unref_locked(job); } /** @@ -905,118 +1096,169 @@ static void job_exit(void *opaque) static void coroutine_fn job_co_entry(void *opaque) { Job *job = opaque; + int ret; assert(job && job->driver && job->driver->run); - job_pause_point(job); - job->ret = job->driver->run(job, &job->err); - job->deferred_to_main_loop = true; - job->busy = true; + WITH_JOB_LOCK_GUARD() { + assert(job->aio_context == qemu_get_current_aio_context()); + job_pause_point_locked(job); + } + ret = job->driver->run(job, &job->err); + WITH_JOB_LOCK_GUARD() { + job->ret = ret; + job->deferred_to_main_loop = true; + job->busy = true; + } aio_bh_schedule_oneshot(qemu_get_aio_context(), job_exit, job); } void job_start(Job *job) { - assert(job && !job_started(job) && job->paused && - job->driver && job->driver->run); - job->co = qemu_coroutine_create(job_co_entry, job); - job->pause_count--; - job->busy = true; - job->paused = false; - job_state_transition(job, JOB_STATUS_RUNNING); + assert(qemu_in_main_thread()); + + WITH_JOB_LOCK_GUARD() { + assert(job && !job_started_locked(job) && job->paused && + job->driver && job->driver->run); + job->co = qemu_coroutine_create(job_co_entry, job); + job->pause_count--; + job->busy = true; + job->paused = false; + job_state_transition_locked(job, JOB_STATUS_RUNNING); + } aio_co_enter(job->aio_context, job->co); } -void job_cancel(Job *job, bool force) +void job_cancel_locked(Job *job, bool force) { if (job->status == JOB_STATUS_CONCLUDED) { - job_do_dismiss(job); + job_do_dismiss_locked(job); return; } - job_cancel_async(job, force); - if (!job_started(job)) { - job_completed(job); + job_cancel_async_locked(job, force); + if (!job_started_locked(job)) { + job_completed_locked(job); } else if (job->deferred_to_main_loop) { - job_completed_txn_abort(job); + /* + * job_cancel_async() ignores soft-cancel requests for jobs + * that are already done (i.e. deferred to the main loop). We + * have to check again whether the job is really cancelled. + * (job_cancel_requested() and job_is_cancelled() are equivalent + * here, because job_cancel_async() will make soft-cancel + * requests no-ops when deferred_to_main_loop is true. We + * choose to call job_is_cancelled() to show that we invoke + * job_completed_txn_abort() only for force-cancelled jobs.) + */ + if (job_is_cancelled_locked(job)) { + job_completed_txn_abort_locked(job); + } } else { - job_enter(job); + job_enter_cond_locked(job, NULL); } } -void job_user_cancel(Job *job, bool force, Error **errp) +void job_user_cancel_locked(Job *job, bool force, Error **errp) { - if (job_apply_verb(job, JOB_VERB_CANCEL, errp)) { + if (job_apply_verb_locked(job, JOB_VERB_CANCEL, errp)) { return; } - job_cancel(job, force); + job_cancel_locked(job, force); +} + +/* A wrapper around job_cancel_locked() taking an Error ** parameter so it may + * be used with job_finish_sync_locked() without the need for (rather nasty) + * function pointer casts there. + * + * Called with job_mutex held. + */ +static void job_cancel_err_locked(Job *job, Error **errp) +{ + job_cancel_locked(job, false); +} + +/** + * Same as job_cancel_err(), but force-cancel. + * Called with job_mutex held. + */ +static void job_force_cancel_err_locked(Job *job, Error **errp) +{ + job_cancel_locked(job, true); } -/* A wrapper around job_cancel() taking an Error ** parameter so it may be - * used with job_finish_sync() without the need for (rather nasty) function - * pointer casts there. */ -static void job_cancel_err(Job *job, Error **errp) +int job_cancel_sync_locked(Job *job, bool force) { - job_cancel(job, false); + if (force) { + return job_finish_sync_locked(job, &job_force_cancel_err_locked, NULL); + } else { + return job_finish_sync_locked(job, &job_cancel_err_locked, NULL); + } } -int job_cancel_sync(Job *job) +int job_cancel_sync(Job *job, bool force) { - return job_finish_sync(job, &job_cancel_err, NULL); + JOB_LOCK_GUARD(); + return job_cancel_sync_locked(job, force); } void job_cancel_sync_all(void) { Job *job; - AioContext *aio_context; + JOB_LOCK_GUARD(); - while ((job = job_next(NULL))) { - aio_context = job->aio_context; - aio_context_acquire(aio_context); - job_cancel_sync(job); - aio_context_release(aio_context); + while ((job = job_next_locked(NULL))) { + job_cancel_sync_locked(job, true); } } -int job_complete_sync(Job *job, Error **errp) +int job_complete_sync_locked(Job *job, Error **errp) { - return job_finish_sync(job, job_complete, errp); + return job_finish_sync_locked(job, job_complete_locked, errp); } -void job_complete(Job *job, Error **errp) +void job_complete_locked(Job *job, Error **errp) { /* Should not be reachable via external interface for internal jobs */ assert(job->id); - if (job_apply_verb(job, JOB_VERB_COMPLETE, errp)) { + GLOBAL_STATE_CODE(); + if (job_apply_verb_locked(job, JOB_VERB_COMPLETE, errp)) { return; } - if (job->pause_count || job_is_cancelled(job) || !job->driver->complete) { + if (job_cancel_requested_locked(job) || !job->driver->complete) { error_setg(errp, "The active block job '%s' cannot be completed", job->id); return; } + job_unlock(); job->driver->complete(job, errp); + job_lock(); } -int job_finish_sync(Job *job, void (*finish)(Job *, Error **errp), Error **errp) +int job_finish_sync_locked(Job *job, + void (*finish)(Job *, Error **errp), + Error **errp) { Error *local_err = NULL; int ret; + GLOBAL_STATE_CODE(); - job_ref(job); + job_ref_locked(job); if (finish) { finish(job, &local_err); } if (local_err) { error_propagate(errp, local_err); - job_unref(job); + job_unref_locked(job); return -EBUSY; } - AIO_WAIT_WHILE(job->aio_context, - (job_enter(job), !job_is_completed(job))); + job_unlock(); + AIO_WAIT_WHILE_UNLOCKED(job->aio_context, + (job_enter(job), !job_is_completed(job))); + job_lock(); - ret = (job_is_cancelled(job) && job->ret == 0) ? -ECANCELED : job->ret; - job_unref(job); + ret = (job_is_cancelled_locked(job) && job->ret == 0) + ? -ECANCELED : job->ret; + job_unref_locked(job); return ret; }