From: Chris Wilson Date: Mon, 16 Jul 2018 13:40:09 +0000 (+0100) Subject: drm/i915/selftests: Exercise reset to break stuck GTT eviction X-Git-Tag: v5.15~8228^2~19^2~15 X-Git-Url: https://git.proxmox.com/?a=commitdiff_plain;h=eb5f43d45b3b3c239b9ca0fde6eee11244a57346;p=mirror_ubuntu-kernels.git drm/i915/selftests: Exercise reset to break stuck GTT eviction We must be able to reset the GPU while we are waiting on it to perform an eviction (unbinding an active vma). So attach a spinning request to a target vma and try and it evict it from a thread to see if that blocks indefinitely. v2: Add a wait for the thread to start just in case that takes more than 10ms... v3: complete() not completion_done() to signal the completion. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180716134009.13143-1-chris@chris-wilson.co.uk --- diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 73462a65a330..65d66cdedd26 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -27,6 +27,7 @@ #include "../i915_selftest.h" #include "i915_random.h" #include "igt_flush_test.h" +#include "igt_wedge_me.h" #include "mock_context.h" #include "mock_drm.h" @@ -921,7 +922,7 @@ static u32 fake_hangcheck(struct i915_request *rq, u32 mask) return reset_count; } -static int igt_wait_reset(void *arg) +static int igt_reset_wait(void *arg) { struct drm_i915_private *i915 = arg; struct i915_request *rq; @@ -995,6 +996,170 @@ unlock: return err; } +struct evict_vma { + struct completion completion; + struct i915_vma *vma; +}; + +static int evict_vma(void *data) +{ + struct evict_vma *arg = data; + struct i915_address_space *vm = arg->vma->vm; + struct drm_i915_private *i915 = vm->i915; + struct drm_mm_node evict = arg->vma->node; + int err; + + complete(&arg->completion); + + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_evict_for_node(vm, &evict, 0); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + +static int __igt_reset_evict_vma(struct drm_i915_private *i915, + struct i915_address_space *vm) +{ + struct drm_i915_gem_object *obj; + struct task_struct *tsk = NULL; + struct i915_request *rq; + struct evict_vma arg; + struct hang h; + int err; + + if (!intel_engine_can_store_dword(i915->engine[RCS])) + return 0; + + /* Check that we can recover an unbind stuck on a hanging request */ + + global_reset_lock(i915); + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto fini; + } + + arg.vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(arg.vma)) { + err = PTR_ERR(arg.vma); + goto out_obj; + } + + rq = hang_create_request(&h, i915->engine[RCS]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_obj; + } + + err = i915_vma_pin(arg.vma, 0, 0, + i915_vma_is_ggtt(arg.vma) ? PIN_GLOBAL : PIN_USER); + if (err) + goto out_obj; + + err = i915_vma_move_to_active(arg.vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unpin(arg.vma); + + i915_request_get(rq); + i915_request_add(rq); + if (err) + goto out_rq; + + mutex_unlock(&i915->drm.struct_mutex); + + if (!wait_until_running(&h, rq)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + + pr_err("%s: Failed to start request %x, at %x\n", + __func__, rq->fence.seqno, hws_seqno(&h, rq)); + intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); + + i915_gem_set_wedged(i915); + goto out_reset; + } + + init_completion(&arg.completion); + + tsk = kthread_run(evict_vma, &arg, "igt/evict_vma"); + if (IS_ERR(tsk)) { + err = PTR_ERR(tsk); + tsk = NULL; + goto out_reset; + } + + wait_for_completion(&arg.completion); + + if (wait_for(waitqueue_active(&rq->execute), 10)) { + struct drm_printer p = drm_info_printer(i915->drm.dev); + + pr_err("igt/evict_vma kthread did not wait\n"); + intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); + + i915_gem_set_wedged(i915); + goto out_reset; + } + +out_reset: + fake_hangcheck(rq, intel_engine_flag(rq->engine)); + + if (tsk) { + struct igt_wedge_me w; + + /* The reset, even indirectly, should take less than 10ms. */ + igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/) + err = kthread_stop(tsk); + } + + mutex_lock(&i915->drm.struct_mutex); +out_rq: + i915_request_put(rq); +out_obj: + i915_gem_object_put(obj); +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + global_reset_unlock(i915); + + if (i915_terminally_wedged(&i915->gpu_error)) + return -EIO; + + return err; +} + +static int igt_reset_evict_ggtt(void *arg) +{ + struct drm_i915_private *i915 = arg; + + return __igt_reset_evict_vma(i915, &i915->ggtt.vm); +} + +static int igt_reset_evict_ppgtt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx; + int err; + + mutex_lock(&i915->drm.struct_mutex); + ctx = kernel_context(i915); + mutex_unlock(&i915->drm.struct_mutex); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + err = 0; + if (ctx->ppgtt) /* aliasing == global gtt locking, covered above */ + err = __igt_reset_evict_vma(i915, &ctx->ppgtt->vm); + + kernel_context_close(ctx); + return err; +} + static int wait_for_others(struct drm_i915_private *i915, struct intel_engine_cs *exclude) { @@ -1240,8 +1405,10 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_reset_idle_engine), SUBTEST(igt_reset_active_engine), SUBTEST(igt_reset_engines), - SUBTEST(igt_wait_reset), SUBTEST(igt_reset_queue), + SUBTEST(igt_reset_wait), + SUBTEST(igt_reset_evict_ggtt), + SUBTEST(igt_reset_evict_ppgtt), SUBTEST(igt_handle_error), }; bool saved_hangcheck;