drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)

author Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Tue, 5 Feb 2019 09:50:31 +0000 (09:50 +0000)

committer Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Tue, 5 Feb 2019 11:32:03 +0000 (11:32 +0000)
author Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Tue, 5 Feb 2019 09:50:31 +0000 (09:50 +0000)
committer Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Tue, 5 Feb 2019 11:32:03 +0000 (11:32 +0000)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c

index d3887c27c3bab32ebe8a4f8ff9a7fec94e6bba9d..2d3e1ce9cc76717d4c6a30aa4e89d2f4fdd37d40 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -89,6 +89,7 @@
  #include <drm/i915_drm.h>
  #include "i915_drv.h"
  #include "i915_trace.h"
+#include "intel_lrc_reg.h"
  #include "intel_workarounds.h"
  
  #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -321,6 +322,15 @@ static u32 default_desc_template(const struct drm_i915_private *i915,
         return desc;
  }
  
+static void intel_context_retire(struct i915_gem_active *active,
+                                struct i915_request *rq)
+{
+       struct intel_context *ce =
+               container_of(active, typeof(*ce), active_tracker);
+
+       intel_context_unpin(ce);
+}
+
  void
  intel_context_init(struct intel_context *ce,
                    struct i915_gem_context *ctx,
@@ -333,6 +343,8 @@ intel_context_init(struct intel_context *ce,
  
         /* Use the whole device by default */
         ce->sseu = intel_device_default_sseu(ctx->i915);
+
+       init_request_active(&ce->active_tracker, intel_context_retire);
  }
  
  static struct i915_gem_context *
@@ -850,6 +862,56 @@ out:
         return 0;
  }
  
+static int get_sseu(struct i915_gem_context *ctx,
+                   struct drm_i915_gem_context_param *args)
+{
+       struct drm_i915_gem_context_param_sseu user_sseu;
+       struct intel_engine_cs *engine;
+       struct intel_context *ce;
+       int ret;
+
+       if (args->size == 0)
+               goto out;
+       else if (args->size < sizeof(user_sseu))
+               return -EINVAL;
+
+       if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+                          sizeof(user_sseu)))
+               return -EFAULT;
+
+       if (user_sseu.flags || user_sseu.rsvd)
+               return -EINVAL;
+
+       engine = intel_engine_lookup_user(ctx->i915,
+                                         user_sseu.engine_class,
+                                         user_sseu.engine_instance);
+       if (!engine)
+               return -EINVAL;
+
+       /* Only use for mutex here is to serialize get_param and set_param. */
+       ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+       if (ret)
+               return ret;
+
+       ce = to_intel_context(ctx, engine);
+
+       user_sseu.slice_mask = ce->sseu.slice_mask;
+       user_sseu.subslice_mask = ce->sseu.subslice_mask;
+       user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
+       user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
+
+       mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+       if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
+                        sizeof(user_sseu)))
+               return -EFAULT;
+
+out:
+       args->size = sizeof(user_sseu);
+
+       return 0;
+}
+
  int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
                                     struct drm_file *file)
  {
@@ -862,15 +924,17 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
         if (!ctx)
                 return -ENOENT;
  
-       args->size = 0;
         switch (args->param) {
         case I915_CONTEXT_PARAM_BAN_PERIOD:
                 ret = -EINVAL;
                 break;
         case I915_CONTEXT_PARAM_NO_ZEROMAP:
+               args->size = 0;
                 args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
                 break;
         case I915_CONTEXT_PARAM_GTT_SIZE:
+               args->size = 0;
+
                 if (ctx->ppgtt)
                         args->value = ctx->ppgtt->vm.total;
                 else if (to_i915(dev)->mm.aliasing_ppgtt)
@@ -879,14 +943,20 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
                         args->value = to_i915(dev)->ggtt.vm.total;
                 break;
         case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+               args->size = 0;
                 args->value = i915_gem_context_no_error_capture(ctx);
                 break;
         case I915_CONTEXT_PARAM_BANNABLE:
+               args->size = 0;
                 args->value = i915_gem_context_is_bannable(ctx);
                 break;
         case I915_CONTEXT_PARAM_PRIORITY:
+               args->size = 0;
                 args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
                 break;
+       case I915_CONTEXT_PARAM_SSEU:
+               ret = get_sseu(ctx, args);
+               break;
         default:
                 ret = -EINVAL;
                 break;
@@ -896,6 +966,270 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
         return ret;
  }
  
+static int gen8_emit_rpcs_config(struct i915_request *rq,
+                                struct intel_context *ce,
+                                struct intel_sseu sseu)
+{
+       u64 offset;
+       u32 *cs;
+
+       cs = intel_ring_begin(rq, 4);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       offset = i915_ggtt_offset(ce->state) +
+                LRC_STATE_PN * PAGE_SIZE +
+                (CTX_R_PWR_CLK_STATE + 1) * 4;
+
+       *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+       *cs++ = lower_32_bits(offset);
+       *cs++ = upper_32_bits(offset);
+       *cs++ = gen8_make_rpcs(rq->i915, &sseu);
+
+       intel_ring_advance(rq, cs);
+
+       return 0;
+}
+
+static int
+gen8_modify_rpcs_gpu(struct intel_context *ce,
+                    struct intel_engine_cs *engine,
+                    struct intel_sseu sseu)
+{
+       struct drm_i915_private *i915 = engine->i915;
+       struct i915_request *rq, *prev;
+       intel_wakeref_t wakeref;
+       int ret;
+
+       GEM_BUG_ON(!ce->pin_count);
+
+       lockdep_assert_held(&i915->drm.struct_mutex);
+
+       /* Submitting requests etc needs the hw awake. */
+       wakeref = intel_runtime_pm_get(i915);
+
+       rq = i915_request_alloc(engine, i915->kernel_context);
+       if (IS_ERR(rq)) {
+               ret = PTR_ERR(rq);
+               goto out_put;
+       }
+
+       /* Queue this switch after all other activity by this context. */
+       prev = i915_gem_active_raw(&ce->ring->timeline->last_request,
+                                  &i915->drm.struct_mutex);
+       if (prev && !i915_request_completed(prev)) {
+               ret = i915_request_await_dma_fence(rq, &prev->fence);
+               if (ret < 0)
+                       goto out_add;
+       }
+
+       /* Order all following requests to be after. */
+       ret = i915_timeline_set_barrier(ce->ring->timeline, rq);
+       if (ret)
+               goto out_add;
+
+       ret = gen8_emit_rpcs_config(rq, ce, sseu);
+       if (ret)
+               goto out_add;
+
+       /*
+        * Guarantee context image and the timeline remains pinned until the
+        * modifying request is retired by setting the ce activity tracker.
+        *
+        * But we only need to take one pin on the account of it. Or in other
+        * words transfer the pinned ce object to tracked active request.
+        */
+       if (!i915_gem_active_isset(&ce->active_tracker))
+               __intel_context_pin(ce);
+       i915_gem_active_set(&ce->active_tracker, rq);
+
+out_add:
+       i915_request_add(rq);
+out_put:
+       intel_runtime_pm_put(i915, wakeref);
+
+       return ret;
+}
+
+static int
+i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx,
+                                 struct intel_engine_cs *engine,
+                                 struct intel_sseu sseu)
+{
+       struct intel_context *ce = to_intel_context(ctx, engine);
+       int ret;
+
+       GEM_BUG_ON(INTEL_GEN(ctx->i915) < 8);
+       GEM_BUG_ON(engine->id != RCS);
+
+       ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+       if (ret)
+               return ret;
+
+       /* Nothing to do if unmodified. */
+       if (!memcmp(&ce->sseu, &sseu, sizeof(sseu)))
+               goto out;
+
+       /*
+        * If context is not idle we have to submit an ordered request to modify
+        * its context image via the kernel context. Pristine and idle contexts
+        * will be configured on pinning.
+        */
+       if (ce->pin_count)
+               ret = gen8_modify_rpcs_gpu(ce, engine, sseu);
+
+       if (!ret)
+               ce->sseu = sseu;
+
+out:
+       mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+       return ret;
+}
+
+static int
+user_to_context_sseu(struct drm_i915_private *i915,
+                    const struct drm_i915_gem_context_param_sseu *user,
+                    struct intel_sseu *context)
+{
+       const struct sseu_dev_info *device = &RUNTIME_INFO(i915)->sseu;
+
+       /* No zeros in any field. */
+       if (!user->slice_mask || !user->subslice_mask ||
+           !user->min_eus_per_subslice || !user->max_eus_per_subslice)
+               return -EINVAL;
+
+       /* Max > min. */
+       if (user->max_eus_per_subslice < user->min_eus_per_subslice)
+               return -EINVAL;
+
+       /*
+        * Some future proofing on the types since the uAPI is wider than the
+        * current internal implementation.
+        */
+       if (overflows_type(user->slice_mask, context->slice_mask) ||
+           overflows_type(user->subslice_mask, context->subslice_mask) ||
+           overflows_type(user->min_eus_per_subslice,
+                          context->min_eus_per_subslice) ||
+           overflows_type(user->max_eus_per_subslice,
+                          context->max_eus_per_subslice))
+               return -EINVAL;
+
+       /* Check validity against hardware. */
+       if (user->slice_mask & ~device->slice_mask)
+               return -EINVAL;
+
+       if (user->subslice_mask & ~device->subslice_mask[0])
+               return -EINVAL;
+
+       if (user->max_eus_per_subslice > device->max_eus_per_subslice)
+               return -EINVAL;
+
+       context->slice_mask = user->slice_mask;
+       context->subslice_mask = user->subslice_mask;
+       context->min_eus_per_subslice = user->min_eus_per_subslice;
+       context->max_eus_per_subslice = user->max_eus_per_subslice;
+
+       /* Part specific restrictions. */
+       if (IS_GEN(i915, 11)) {
+               unsigned int hw_s = hweight8(device->slice_mask);
+               unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]);
+               unsigned int req_s = hweight8(context->slice_mask);
+               unsigned int req_ss = hweight8(context->subslice_mask);
+
+               /*
+                * Only full subslice enablement is possible if more than one
+                * slice is turned on.
+                */
+               if (req_s > 1 && req_ss != hw_ss_per_s)
+                       return -EINVAL;
+
+               /*
+                * If more than four (SScount bitfield limit) subslices are
+                * requested then the number has to be even.
+                */
+               if (req_ss > 4 && (req_ss & 1))
+                       return -EINVAL;
+
+               /*
+                * If only one slice is enabled and subslice count is below the
+                * device full enablement, it must be at most half of the all
+                * available subslices.
+                */
+               if (req_s == 1 && req_ss < hw_ss_per_s &&
+                   req_ss > (hw_ss_per_s / 2))
+                       return -EINVAL;
+
+               /* ABI restriction - VME use case only. */
+
+               /* All slices or one slice only. */
+               if (req_s != 1 && req_s != hw_s)
+                       return -EINVAL;
+
+               /*
+                * Half subslices or full enablement only when one slice is
+                * enabled.
+                */
+               if (req_s == 1 &&
+                   (req_ss != hw_ss_per_s && req_ss != (hw_ss_per_s / 2)))
+                       return -EINVAL;
+
+               /* No EU configuration changes. */
+               if ((user->min_eus_per_subslice !=
+                    device->max_eus_per_subslice) ||
+                   (user->max_eus_per_subslice !=
+                    device->max_eus_per_subslice))
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int set_sseu(struct i915_gem_context *ctx,
+                   struct drm_i915_gem_context_param *args)
+{
+       struct drm_i915_private *i915 = ctx->i915;
+       struct drm_i915_gem_context_param_sseu user_sseu;
+       struct intel_engine_cs *engine;
+       struct intel_sseu sseu;
+       int ret;
+
+       if (args->size < sizeof(user_sseu))
+               return -EINVAL;
+
+       if (!IS_GEN(i915, 11))
+               return -ENODEV;
+
+       if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+                          sizeof(user_sseu)))
+               return -EFAULT;
+
+       if (user_sseu.flags || user_sseu.rsvd)
+               return -EINVAL;
+
+       engine = intel_engine_lookup_user(i915,
+                                         user_sseu.engine_class,
+                                         user_sseu.engine_instance);
+       if (!engine)
+               return -EINVAL;
+
+       /* Only render engine supports RPCS configuration. */
+       if (engine->class != RENDER_CLASS)
+               return -ENODEV;
+
+       ret = user_to_context_sseu(i915, &user_sseu, &sseu);
+       if (ret)
+               return ret;
+
+       ret = i915_gem_context_reconfigure_sseu(ctx, engine, sseu);
+       if (ret)
+               return ret;
+
+       args->size = sizeof(user_sseu);
+
+       return 0;
+}
+
  int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
                                     struct drm_file *file)
  {
@@ -958,7 +1292,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
                                         I915_USER_PRIORITY(priority);
                 }
                 break;
-
+       case I915_CONTEXT_PARAM_SSEU:
+               ret = set_sseu(ctx, args);
+               break;
         default:
                 ret = -EINVAL;
                 break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h

index 919f6f0a0f7a772b7e923b81e37781c40d1ffadf..92ad5272e57ffa4e2743b7dba8b122066d4f0365 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -183,6 +183,12 @@ struct i915_gem_context {
                 u64 lrc_desc;
                 int pin_count;
  
+               /**
+                * active_tracker: Active tracker for the external rq activity
+                * on this intel_context object.
+                */
+               struct i915_gem_active active_tracker;
+
                 const struct intel_context_ops *ops;
  
                 /** sseu: Control eu/slice partitioning */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c

index d99c462e2c096a46fdcc661ff6ce45569de190ef..5e98fd79bd9df4a04950bdae683f8b67e2c4e7dd 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2498,7 +2498,9 @@ u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
          * subslices are enabled, or a count between one and four on the first
          * slice.
          */
-       if (IS_GEN(i915, 11) && slices == 1 && subslices >= 4) {
+       if (IS_GEN(i915, 11) &&
+           slices == 1 &&
+           subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
                 GEM_BUG_ON(subslices & 1);
  
                 subslice_pg = false;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h

index 298b2e197744bbc28782d1a853e1ee3577f02bee..397810fa2d33c95f69770bdf3563ea44213b40c6 100644 (file)
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1486,9 +1486,73 @@ struct drm_i915_gem_context_param {
  #define   I915_CONTEXT_MAX_USER_PRIORITY       1023 /* inclusive */
  #define   I915_CONTEXT_DEFAULT_PRIORITY                0
  #define   I915_CONTEXT_MIN_USER_PRIORITY       -1023 /* inclusive */
+       /*
+        * When using the following param, value should be a pointer to
+        * drm_i915_gem_context_param_sseu.
+        */
+#define I915_CONTEXT_PARAM_SSEU                0x7
         __u64 value;
  };
  
+/**
+ * Context SSEU programming
+ *
+ * It may be necessary for either functional or performance reason to configure
+ * a context to run with a reduced number of SSEU (where SSEU stands for Slice/
+ * Sub-slice/EU).
+ *
+ * This is done by configuring SSEU configuration using the below
+ * @struct drm_i915_gem_context_param_sseu for every supported engine which
+ * userspace intends to use.
+ *
+ * Not all GPUs or engines support this functionality in which case an error
+ * code -ENODEV will be returned.
+ *
+ * Also, flexibility of possible SSEU configuration permutations varies between
+ * GPU generations and software imposed limitations. Requesting such a
+ * combination will return an error code of -EINVAL.
+ *
+ * NOTE: When perf/OA is active the context's SSEU configuration is ignored in
+ * favour of a single global setting.
+ */
+struct drm_i915_gem_context_param_sseu {
+       /*
+        * Engine class & instance to be configured or queried.
+        */
+       __u16 engine_class;
+       __u16 engine_instance;
+
+       /*
+        * Unused for now. Must be cleared to zero.
+        */
+       __u32 flags;
+
+       /*
+        * Mask of slices to enable for the context. Valid values are a subset
+        * of the bitmask value returned for I915_PARAM_SLICE_MASK.
+        */
+       __u64 slice_mask;
+
+       /*
+        * Mask of subslices to enable for the context. Valid values are a
+        * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK.
+        */
+       __u64 subslice_mask;
+
+       /*
+        * Minimum/Maximum number of EUs to enable per subslice for the
+        * context. min_eus_per_subslice must be inferior or equal to
+        * max_eus_per_subslice.
+        */
+       __u16 min_eus_per_subslice;
+       __u16 max_eus_per_subslice;
+
+       /*
+        * Unused for now. Must be cleared to zero.
+        */
+       __u32 rsvd;
+};
+
  enum drm_i915_oa_format {
         I915_OA_FORMAT_A13 = 1,     /* HSW only */
         I915_OA_FORMAT_A29,         /* HSW only */
author	Tvrtko Ursulin <tvrtko.ursulin@intel.com>
	Tue, 5 Feb 2019 09:50:31 +0000 (09:50 +0000)
committer	Tvrtko Ursulin <tvrtko.ursulin@intel.com>
	Tue, 5 Feb 2019 11:32:03 +0000 (11:32 +0000)
drivers/gpu/drm/i915/i915_gem_context.c		patch \| blob \| blame \| history
drivers/gpu/drm/i915/i915_gem_context.h		patch \| blob \| blame \| history
drivers/gpu/drm/i915/intel_lrc.c		patch \| blob \| blame \| history
include/uapi/drm/i915_drm.h		patch \| blob \| blame \| history