]> git.proxmox.com Git - mirror_ubuntu-kernels.git/commitdiff
drm/i915/execlists: Enable coarse preemption boundaries for gen8
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 29 Mar 2019 13:40:24 +0000 (13:40 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Fri, 5 Apr 2019 10:00:28 +0000 (11:00 +0100)
When we introduced preemption, we chose to keep it disabled for gen8 as
supporting preemption inside GPGPU user batches required various w/a in
userspace. Since then, the desire to preempt long queues of requests
between batches (e.g. within busywaiting semaphores) has grown. So allow
arbitration within the busywaits and between requests, but disable
arbitration within user batches so that we can preempt between requests
and not risk breaking GPGPU.

However, since this preemption is much coarser and doesn't interfere
with userspace, we decline to include it amongst the scheduler
capabilities. (This is also required for us to skip over the preemption
selftests that expect to be able to preempt user batches.)

Michal suggested that we could perhaps allow preemption inside gen8
userspace batches if we can satisfy ourselves that the default
preemption settings are viable with existing userspace (principally
OpenCL which already should carry any known workaround). We could then
merge the two code paths back into one, even dropping the artifical
has-preemption device feature flag.

Testcase: igt/gem_exec_scheduler/semaphore-user
References: beecec901790 ("drm/i915/execlists: Preemption!")
Fixes: e88619646971 ("drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Michal Winiarski <michal.winiarski@intel.com> #irc
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190329134024.5254-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_gem_context.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/selftests/intel_lrc.c

index fe7ddb1f59e111311f258aaf289dbc002bad5fa0..f8c94405670b039335bd4f54e105ebc711d54eb8 100644 (file)
@@ -562,7 +562,7 @@ static void init_contexts(struct drm_i915_private *i915)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-       return HAS_LOGICAL_RING_PREEMPTION(i915);
+       return HAS_EXECLISTS(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
index bec232acc8d7b1c344d9b62bd8450cfe6a0cb542..b662a054f22807093ca6b0fb159049365514cf84 100644 (file)
@@ -233,7 +233,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 {
        int last_prio;
 
-       if (!intel_engine_has_preemption(engine))
+       if (!engine->preempt_context)
                return false;
 
        if (i915_request_completed(rq))
@@ -2035,7 +2035,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
 {
        u32 *cs;
 
-       cs = intel_ring_begin(rq, 6);
+       cs = intel_ring_begin(rq, 4);
        if (IS_ERR(cs))
                return PTR_ERR(cs);
 
@@ -2046,19 +2046,37 @@ static int gen8_emit_bb_start(struct i915_request *rq,
         * particular all the gen that do not need the w/a at all!), if we
         * took care to make sure that on every switch into this context
         * (both ordinary and for preemption) that arbitrartion was enabled
-        * we would be fine. However, there doesn't seem to be a downside to
-        * being paranoid and making sure it is set before each batch and
-        * every context-switch.
-        *
-        * Note that if we fail to enable arbitration before the request
-        * is complete, then we do not see the context-switch interrupt and
-        * the engine hangs (with RING_HEAD == RING_TAIL).
-        *
-        * That satisfies both the GPGPU w/a and our heavy-handed paranoia.
+        * we would be fine.  However, for gen8 there is another w/a that
+        * requires us to not preempt inside GPGPU execution, so we keep
+        * arbitration disabled for gen8 batches. Arbitration will be
+        * re-enabled before we close the request
+        * (engine->emit_fini_breadcrumb).
         */
+       *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+       /* FIXME(BDW+): Address space and security selectors. */
+       *cs++ = MI_BATCH_BUFFER_START_GEN8 |
+               (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+       *cs++ = lower_32_bits(offset);
+       *cs++ = upper_32_bits(offset);
+
+       intel_ring_advance(rq, cs);
+
+       return 0;
+}
+
+static int gen9_emit_bb_start(struct i915_request *rq,
+                             u64 offset, u32 len,
+                             const unsigned int flags)
+{
+       u32 *cs;
+
+       cs = intel_ring_begin(rq, 6);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
        *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
 
-       /* FIXME(BDW): Address space and security selectors. */
        *cs++ = MI_BATCH_BUFFER_START_GEN8 |
                (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
        *cs++ = lower_32_bits(offset);
@@ -2316,7 +2334,8 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
        engine->flags |= I915_ENGINE_SUPPORTS_STATS;
        if (!intel_vgpu_active(engine->i915))
                engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
-       if (engine->preempt_context)
+       if (engine->preempt_context &&
+           HAS_LOGICAL_RING_PREEMPTION(engine->i915))
                engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 }
 
@@ -2350,7 +2369,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
                 * until a more refined solution exists.
                 */
        }
-       engine->emit_bb_start = gen8_emit_bb_start;
+       if (IS_GEN(engine->i915, 8))
+               engine->emit_bb_start = gen8_emit_bb_start;
+       else
+               engine->emit_bb_start = gen9_emit_bb_start;
 }
 
 static inline void
index 0d3cae564db87efa6c43860e1ebff715e0b5a53b..8bcd4e1d58eebfd1087ae18e78d15d8d56e56896 100644 (file)
@@ -76,6 +76,185 @@ err_unlock:
        return err;
 }
 
+static int live_busywait_preempt(void *arg)
+{
+       struct drm_i915_private *i915 = arg;
+       struct i915_gem_context *ctx_hi, *ctx_lo;
+       struct intel_engine_cs *engine;
+       struct drm_i915_gem_object *obj;
+       struct i915_vma *vma;
+       enum intel_engine_id id;
+       intel_wakeref_t wakeref;
+       int err = -ENOMEM;
+       u32 *map;
+
+       /*
+        * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
+        * preempt the busywaits used to synchronise between rings.
+        */
+
+       mutex_lock(&i915->drm.struct_mutex);
+       wakeref = intel_runtime_pm_get(i915);
+
+       ctx_hi = kernel_context(i915);
+       if (!ctx_hi)
+               goto err_unlock;
+       ctx_hi->sched.priority = INT_MAX;
+
+       ctx_lo = kernel_context(i915);
+       if (!ctx_lo)
+               goto err_ctx_hi;
+       ctx_lo->sched.priority = INT_MIN;
+
+       obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+       if (IS_ERR(obj)) {
+               err = PTR_ERR(obj);
+               goto err_ctx_lo;
+       }
+
+       map = i915_gem_object_pin_map(obj, I915_MAP_WC);
+       if (IS_ERR(map)) {
+               err = PTR_ERR(map);
+               goto err_obj;
+       }
+
+       vma = i915_vma_instance(obj, &i915->ggtt.vm, 0);
+       if (IS_ERR(vma)) {
+               err = PTR_ERR(vma);
+               goto err_map;
+       }
+
+       err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+       if (err)
+               goto err_map;
+
+       for_each_engine(engine, i915, id) {
+               struct i915_request *lo, *hi;
+               struct igt_live_test t;
+               u32 *cs;
+
+               if (!intel_engine_can_store_dword(engine))
+                       continue;
+
+               if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+                       err = -EIO;
+                       goto err_vma;
+               }
+
+               /*
+                * We create two requests. The low priority request
+                * busywaits on a semaphore (inside the ringbuffer where
+                * is should be preemptible) and the high priority requests
+                * uses a MI_STORE_DWORD_IMM to update the semaphore value
+                * allowing the first request to complete. If preemption
+                * fails, we hang instead.
+                */
+
+               lo = i915_request_alloc(engine, ctx_lo);
+               if (IS_ERR(lo)) {
+                       err = PTR_ERR(lo);
+                       goto err_vma;
+               }
+
+               cs = intel_ring_begin(lo, 8);
+               if (IS_ERR(cs)) {
+                       err = PTR_ERR(cs);
+                       i915_request_add(lo);
+                       goto err_vma;
+               }
+
+               *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+               *cs++ = i915_ggtt_offset(vma);
+               *cs++ = 0;
+               *cs++ = 1;
+
+               /* XXX Do we need a flush + invalidate here? */
+
+               *cs++ = MI_SEMAPHORE_WAIT |
+                       MI_SEMAPHORE_GLOBAL_GTT |
+                       MI_SEMAPHORE_POLL |
+                       MI_SEMAPHORE_SAD_EQ_SDD;
+               *cs++ = 0;
+               *cs++ = i915_ggtt_offset(vma);
+               *cs++ = 0;
+
+               intel_ring_advance(lo, cs);
+               i915_request_add(lo);
+
+               if (wait_for(READ_ONCE(*map), 10)) {
+                       err = -ETIMEDOUT;
+                       goto err_vma;
+               }
+
+               /* Low priority request should be busywaiting now */
+               if (i915_request_wait(lo, I915_WAIT_LOCKED, 1) != -ETIME) {
+                       pr_err("%s: Busywaiting request did not!\n",
+                              engine->name);
+                       err = -EIO;
+                       goto err_vma;
+               }
+
+               hi = i915_request_alloc(engine, ctx_hi);
+               if (IS_ERR(hi)) {
+                       err = PTR_ERR(hi);
+                       goto err_vma;
+               }
+
+               cs = intel_ring_begin(hi, 4);
+               if (IS_ERR(cs)) {
+                       err = PTR_ERR(cs);
+                       i915_request_add(hi);
+                       goto err_vma;
+               }
+
+               *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+               *cs++ = i915_ggtt_offset(vma);
+               *cs++ = 0;
+               *cs++ = 0;
+
+               intel_ring_advance(hi, cs);
+               i915_request_add(hi);
+
+               if (i915_request_wait(lo, I915_WAIT_LOCKED, HZ / 5) < 0) {
+                       struct drm_printer p = drm_info_printer(i915->drm.dev);
+
+                       pr_err("%s: Failed to preempt semaphore busywait!\n",
+                              engine->name);
+
+                       intel_engine_dump(engine, &p, "%s\n", engine->name);
+                       GEM_TRACE_DUMP();
+
+                       i915_gem_set_wedged(i915);
+                       err = -EIO;
+                       goto err_vma;
+               }
+               GEM_BUG_ON(READ_ONCE(*map));
+
+               if (igt_live_test_end(&t)) {
+                       err = -EIO;
+                       goto err_vma;
+               }
+       }
+
+       err = 0;
+err_vma:
+       i915_vma_unpin(vma);
+err_map:
+       i915_gem_object_unpin_map(obj);
+err_obj:
+       i915_gem_object_put(obj);
+err_ctx_lo:
+       kernel_context_close(ctx_lo);
+err_ctx_hi:
+       kernel_context_close(ctx_hi);
+err_unlock:
+       if (igt_flush_test(i915, I915_WAIT_LOCKED))
+               err = -EIO;
+       intel_runtime_pm_put(i915, wakeref);
+       mutex_unlock(&i915->drm.struct_mutex);
+       return err;
+}
+
 static int live_preempt(void *arg)
 {
        struct drm_i915_private *i915 = arg;
@@ -1127,6 +1306,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
        static const struct i915_subtest tests[] = {
                SUBTEST(live_sanitycheck),
+               SUBTEST(live_busywait_preempt),
                SUBTEST(live_preempt),
                SUBTEST(live_late_preempt),
                SUBTEST(live_suppress_self_preempt),