]> git.proxmox.com Git - mirror_ubuntu-kernels.git/commitdiff
drm/i915: Allow specification of parallel execbuf
authorChris Wilson <chris@chris-wilson.co.uk>
Tue, 21 May 2019 21:11:34 +0000 (22:11 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Wed, 22 May 2019 07:40:50 +0000 (08:40 +0100)
There is a desire to split a task onto two engines and have them run at
the same time, e.g. scanline interleaving to spread the workload evenly.
Through the use of the out-fence from the first execbuf, we can
coordinate secondary execbuf to only become ready simultaneously with
the first, so that with all things idle the second execbufs are executed
in parallel with the first. The key difference here between the new
EXEC_FENCE_SUBMIT and the existing EXEC_FENCE_IN is that the in-fence
waits for the completion of the first request (so that all of its
rendering results are visible to the second execbuf, the more common
userspace fence requirement).

Since we only have a single input fence slot, userspace cannot mix an
in-fence and a submit-fence. It has to use one or the other! This is not
such a harsh requirement, since by virtue of the submit-fence, the
secondary execbuf inherit all of the dependencies from the first
request, and for the application the dependencies should be common
between the primary and secondary execbuf.

Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Testcase: igt/gem_exec_fence/parallel
Link: https://github.com/intel/media-driver/pull/546
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190521211134.16117-10-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c
include/uapi/drm/i915_drm.h

index 5061cb32856b1a7af0e64cd714a5f5e747a65ca3..83d2eb9e74cb72bb47198bd89bb8766a0b081c2a 100644 (file)
@@ -443,6 +443,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
        case I915_PARAM_HAS_EXEC_CAPTURE:
        case I915_PARAM_HAS_EXEC_BATCH_FIRST:
        case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
+       case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
                /* For the time being all of these are always true;
                 * if some supported hardware does not have one of these
                 * features this value needs to be provided from
index d6c5220addd002db1fbf5a2c9e11797fb8bfcd3d..7ce25b54c57be98d6827647937eabcd3f7923fe8 100644 (file)
@@ -2318,6 +2318,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 {
        struct i915_execbuffer eb;
        struct dma_fence *in_fence = NULL;
+       struct dma_fence *exec_fence = NULL;
        struct sync_file *out_fence = NULL;
        int out_fence_fd = -1;
        int err;
@@ -2360,11 +2361,24 @@ i915_gem_do_execbuffer(struct drm_device *dev,
                        return -EINVAL;
        }
 
+       if (args->flags & I915_EXEC_FENCE_SUBMIT) {
+               if (in_fence) {
+                       err = -EINVAL;
+                       goto err_in_fence;
+               }
+
+               exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+               if (!exec_fence) {
+                       err = -EINVAL;
+                       goto err_in_fence;
+               }
+       }
+
        if (args->flags & I915_EXEC_FENCE_OUT) {
                out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
                if (out_fence_fd < 0) {
                        err = out_fence_fd;
-                       goto err_in_fence;
+                       goto err_exec_fence;
                }
        }
 
@@ -2494,6 +2508,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
                        goto err_request;
        }
 
+       if (exec_fence) {
+               err = i915_request_await_execution(eb.request, exec_fence,
+                                                  eb.engine->bond_execute);
+               if (err < 0)
+                       goto err_request;
+       }
+
        if (fences) {
                err = await_fence_array(&eb, fences);
                if (err)
@@ -2555,6 +2576,8 @@ err_destroy:
 err_out_fence:
        if (out_fence_fd != -1)
                put_unused_fd(out_fence_fd);
+err_exec_fence:
+       dma_fence_put(exec_fence);
 err_in_fence:
        dma_fence_put(in_fence);
        return err;
index e2da9027bcdf8ba1f0e22ee1c0e1d32e40757a70..bdb00ec1f8be5046c5e7a78f744e24ae609be7f3 100644 (file)
@@ -604,6 +604,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_MMAP_GTT_COHERENT   52
 
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel
+ * execution through use of explicit fence support.
+ * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
+ */
+#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1126,7 +1132,16 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_ARRAY   (1<<19)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
+/*
+ * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent
+ * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
+ * the batch.
+ *
+ * Returns -EINVAL if the sync_file fd cannot be found.
+ */
+#define I915_EXEC_FENCE_SUBMIT         (1 << 20)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
 
 #define I915_EXEC_CONTEXT_ID_MASK      (0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \