From 944397f04f24eaf05125896dcb601c0e1c917879 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Jan 2017 16:16:11 +0000 Subject: [PATCH] drm/i915: Store required fence size/alignment for GGTT vma The fence size/alignment is a combination of the vma size plus object tiling parameters. Those parameters are rarely changed, making the fence size/alignemnt roughly constant for the lifetime of the VMA. We can simplify subsequent calculations by precalculating the size/alignment required for GGTT vma taking fencing into account (with an update if we do change the tiling or stride). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170109161613.11881-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 7 ++- drivers/gpu/drm/i915/i915_gem.c | 27 ++++------ drivers/gpu/drm/i915/i915_gem_fence_reg.c | 20 ++++---- drivers/gpu/drm/i915/i915_gem_tiling.c | 36 +++++++------ drivers/gpu/drm/i915/i915_vma.c | 61 +++++++++++------------ drivers/gpu/drm/i915/i915_vma.h | 3 ++ 6 files changed, 73 insertions(+), 81 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8185229f370f..5ee76628be98 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3360,11 +3360,10 @@ int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int i915_gem_open(struct drm_device *dev, struct drm_file *file); void i915_gem_release(struct drm_device *dev, struct drm_file *file); -u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, u64 size, +u32 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, u32 size, int tiling_mode, unsigned int stride); -u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size, - int tiling_mode, unsigned int stride, - bool fenced); +u32 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u32 size, + int tiling_mode, unsigned int stride); int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 07cc0d01915f..1f9496e587dc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2026,10 +2026,10 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) * Return the required global GTT size for an object, taking into account * potential fence register mapping. */ -u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, - u64 size, int tiling_mode, unsigned int stride) +u32 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, + u32 size, int tiling_mode, unsigned int stride) { - u64 ggtt_size; + u32 ggtt_size; GEM_BUG_ON(!size); @@ -2062,14 +2062,12 @@ u64 i915_gem_get_ggtt_size(struct drm_i915_private *dev_priv, * @size: object size * @tiling_mode: tiling mode * @stride: tiling stride - * @fenced: is fenced alignment required or not * * Return the required global GTT alignment for an object, taking into account * potential fence register mapping. */ -u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size, - int tiling_mode, unsigned int stride, - bool fenced) +u32 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u32 size, + int tiling_mode, unsigned int stride) { GEM_BUG_ON(!size); @@ -2077,9 +2075,7 @@ u64 i915_gem_get_ggtt_alignment(struct drm_i915_private *dev_priv, u64 size, * Minimum alignment is 4k (GTT page size), but might be greater * if a fence register is needed for the object. */ - if (INTEL_GEN(dev_priv) >= 4 || - (!fenced && (IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))) || - tiling_mode == I915_TILING_NONE) + if (INTEL_GEN(dev_priv) >= 4 || tiling_mode == I915_TILING_NONE) return 4096; /* @@ -3558,7 +3554,7 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) return; if (--vma->obj->pin_display == 0) - vma->display_alignment = 0; + vma->display_alignment = 4096; /* Bump the LRU to try and avoid premature eviction whilst flipping */ if (!i915_vma_is_active(vma)) @@ -3703,11 +3699,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, return ERR_PTR(-ENOSPC); if (flags & PIN_MAPPABLE) { - u32 fence_size; - - fence_size = i915_gem_get_ggtt_size(dev_priv, vma->size, - i915_gem_object_get_tiling(obj), - i915_gem_object_get_stride(obj)); /* If the required space is larger than the available * aperture, we will not able to find a slot for the * object and unbinding the object now will be in @@ -3715,7 +3706,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, * the object in and out of the Global GTT and * waste a lot of cycles under the mutex. */ - if (fence_size > dev_priv->ggtt.mappable_end) + if (vma->fence_size > dev_priv->ggtt.mappable_end) return ERR_PTR(-E2BIG); /* If NONBLOCK is set the caller is optimistically @@ -3734,7 +3725,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, * we could try to minimise harm to others. */ if (flags & PIN_NONBLOCK && - fence_size > dev_priv->ggtt.mappable_end / 2) + vma->fence_size > dev_priv->ggtt.mappable_end / 2) return ERR_PTR(-ENOSPC); } diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index 26f242359fbd..8b37c4cb311a 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -79,11 +79,11 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, if (vma) { unsigned int stride = i915_gem_object_get_stride(vma->obj); u32 row_size = i915_gem_object_get_tile_row_size(vma->obj); - u32 size = rounddown((u32)vma->node.size, row_size); + u32 size = rounddown((u32)vma->fence_size, row_size); GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); GEM_BUG_ON(vma->node.start & 4095); - GEM_BUG_ON(vma->node.size & 4095); + GEM_BUG_ON(vma->fence_size & 4095); GEM_BUG_ON(stride & 127); val = (vma->node.start + size - 4096) << 32; @@ -128,8 +128,8 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); GEM_BUG_ON(vma->node.start & ~I915_FENCE_START_MASK); - GEM_BUG_ON(!is_power_of_2(vma->node.size)); - GEM_BUG_ON(vma->node.start & (vma->node.size - 1)); + GEM_BUG_ON(!is_power_of_2(vma->fence_size)); + GEM_BUG_ON(vma->node.start & (vma->fence_size - 1)); if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) stride /= 128; @@ -140,7 +140,7 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, val = vma->node.start; if (is_y_tiled) val |= BIT(I830_FENCE_TILING_Y_SHIFT); - val |= I915_FENCE_SIZE_BITS(vma->node.size); + val |= I915_FENCE_SIZE_BITS(vma->fence_size); val |= ilog2(stride) << I830_FENCE_PITCH_SHIFT; val |= I830_FENCE_REG_VALID; @@ -162,20 +162,18 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, val = 0; if (vma) { - unsigned int tiling = i915_gem_object_get_tiling(vma->obj); - bool is_y_tiled = tiling == I915_TILING_Y; unsigned int stride = i915_gem_object_get_stride(vma->obj); GEM_BUG_ON(!i915_vma_is_map_and_fenceable(vma)); GEM_BUG_ON(vma->node.start & ~I830_FENCE_START_MASK); - GEM_BUG_ON(!is_power_of_2(vma->node.size)); + GEM_BUG_ON(!is_power_of_2(vma->fence_size)); GEM_BUG_ON(!is_power_of_2(stride / 128)); - GEM_BUG_ON(vma->node.start & (vma->node.size - 1)); + GEM_BUG_ON(vma->node.start & (vma->fence_size - 1)); val = vma->node.start; - if (is_y_tiled) + if (i915_gem_object_get_tiling(vma->obj) == I915_TILING_Y) val |= BIT(I830_FENCE_TILING_Y_SHIFT); - val |= I830_FENCE_SIZE_BITS(vma->node.size); + val |= I830_FENCE_SIZE_BITS(vma->fence_size); val |= ilog2(stride / 128) << I830_FENCE_PITCH_SHIFT; val |= I830_FENCE_REG_VALID; } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 51b8d71876b7..23a896cd934f 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -120,25 +120,18 @@ i915_tiling_ok(struct drm_i915_private *dev_priv, static bool i915_vma_fence_prepare(struct i915_vma *vma, int tiling_mode, unsigned int stride) { - struct drm_i915_private *dev_priv = vma->vm->i915; - u32 size; + struct drm_i915_private *i915 = vma->vm->i915; + u32 size, alignment; if (!i915_vma_is_map_and_fenceable(vma)) return true; - if (INTEL_GEN(dev_priv) == 3) { - if (vma->node.start & ~I915_FENCE_START_MASK) - return false; - } else { - if (vma->node.start & ~I830_FENCE_START_MASK) - return false; - } - - size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode, stride); + size = i915_gem_get_ggtt_size(i915, vma->size, tiling_mode, stride); if (vma->node.size < size) return false; - if (vma->node.start & (size - 1)) + alignment = i915_gem_get_ggtt_alignment(i915, vma->size, tiling_mode, stride); + if (vma->node.start & (alignment - 1)) return false; return true; @@ -156,6 +149,9 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, return 0; list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!i915_vma_is_ggtt(vma)) + break; + if (i915_vma_fence_prepare(vma, tiling_mode, stride)) continue; @@ -277,10 +273,18 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, mutex_unlock(&obj->mm.lock); list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (!vma->fence) - continue; - - vma->fence->dirty = true; + if (!i915_vma_is_ggtt(vma)) + break; + + vma->fence_size = i915_gem_get_ggtt_size(dev_priv, vma->size, + args->tiling_mode, + args->stride); + vma->fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, vma->size, + args->tiling_mode, + args->stride); + + if (vma->fence) + vma->fence->dirty = true; } obj->tiling_and_stride = args->stride | args->tiling_mode; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 734f77b7697f..a605d735662c 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -91,6 +91,7 @@ __i915_vma_create(struct drm_i915_gem_object *obj, vma->vm = vm; vma->obj = obj; vma->size = obj->base.size; + vma->display_alignment = 4096; if (view) { vma->ggtt_view = *view; @@ -110,6 +111,17 @@ __i915_vma_create(struct drm_i915_gem_object *obj, } if (i915_is_ggtt(vm)) { + GEM_BUG_ON(overflows_type(vma->size, u32)); + vma->fence_size = i915_gem_get_ggtt_size(vm->i915, vma->size, + i915_gem_object_get_tiling(obj), + i915_gem_object_get_stride(obj)); + GEM_BUG_ON(vma->fence_size & 4095); + + vma->fence_alignment = i915_gem_get_ggtt_alignment(vm->i915, vma->size, + i915_gem_object_get_tiling(obj), + i915_gem_object_get_stride(obj)); + GEM_BUG_ON(!is_power_of_2(vma->fence_alignment)); + vma->flags |= I915_VMA_GGTT; list_add(&vma->obj_link, &obj->vma_list); } else { @@ -277,34 +289,24 @@ i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) { - struct drm_i915_gem_object *obj = vma->obj; - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); bool mappable, fenceable; - u32 fence_size, fence_alignment; - fence_size = i915_gem_get_ggtt_size(dev_priv, - vma->size, - i915_gem_object_get_tiling(obj), - i915_gem_object_get_stride(obj)); - fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, - vma->size, - i915_gem_object_get_tiling(obj), - i915_gem_object_get_stride(obj), - true); - GEM_BUG_ON(!is_power_of_2(fence_alignment)); - - fenceable = (vma->node.size == fence_size && - (vma->node.start & (fence_alignment - 1)) == 0); - - mappable = (vma->node.start + fence_size <= - dev_priv->ggtt.mappable_end); + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON(!vma->fence_size); /* * Explicitly disable for rotated VMA since the display does not * need the fence and the VMA is not accessible to other users. */ - if (mappable && fenceable && - vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED) + if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) + return; + + fenceable = (vma->node.size >= vma->fence_size && + (vma->node.start & (vma->fence_alignment - 1)) == 0); + + mappable = vma->node.start + vma->fence_size <= i915_vm_to_ggtt(vma->vm)->mappable_end; + + if (mappable && fenceable) vma->flags |= I915_VMA_CAN_FENCE; else vma->flags &= ~I915_VMA_CAN_FENCE; @@ -371,17 +373,12 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); size = max(size, vma->size); - if (flags & PIN_MAPPABLE) - size = i915_gem_get_ggtt_size(dev_priv, size, - i915_gem_object_get_tiling(obj), - i915_gem_object_get_stride(obj)); - - alignment = max(max(alignment, vma->display_alignment), - i915_gem_get_ggtt_alignment(dev_priv, size, - i915_gem_object_get_tiling(obj), - i915_gem_object_get_stride(obj), - flags & PIN_MAPPABLE)); - GEM_BUG_ON(!is_power_of_2(alignment)); + alignment = max(alignment, vma->display_alignment); + if (flags & PIN_MAPPABLE) { + size = max_t(typeof(size), size, vma->fence_size); + alignment = max_t(typeof(alignment), + alignment, vma->fence_alignment); + } start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index e3b2b3b1e056..a969bbb65871 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -55,6 +55,9 @@ struct i915_vma { u64 size; u64 display_alignment; + u32 fence_size; + u32 fence_alignment; + unsigned int flags; /** * How many users have pinned this object in GTT space. The following -- 2.39.5