]>
Commit | Line | Data |
---|---|---|
9d0a6fa6 MK |
1 | /* |
2 | * Copyright © 2014 Intel Corporation | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice (including the next | |
12 | * paragraph) shall be included in all copies or substantial portions of the | |
13 | * Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 | * IN THE SOFTWARE. | |
22 | * | |
23 | * Authors: | |
24 | * Mika Kuoppala <mika.kuoppala@intel.com> | |
25 | * | |
26 | */ | |
27 | ||
28 | #include "i915_drv.h" | |
29 | #include "intel_renderstate.h" | |
30 | ||
4e50f082 | 31 | struct intel_render_state { |
e40f9ee6 | 32 | const struct intel_renderstate_rodata *rodata; |
a5e85c8a | 33 | struct i915_vma *vma; |
4e50f082 CW |
34 | u32 batch_offset; |
35 | u32 batch_size; | |
36 | u32 aux_offset; | |
37 | u32 aux_size; | |
e40f9ee6 CW |
38 | }; |
39 | ||
9d0a6fa6 | 40 | static const struct intel_renderstate_rodata * |
4e50f082 | 41 | render_state_get_rodata(const struct intel_engine_cs *engine) |
9d0a6fa6 | 42 | { |
4e50f082 | 43 | switch (INTEL_GEN(engine->i915)) { |
9d0a6fa6 MK |
44 | case 6: |
45 | return &gen6_null_state; | |
46 | case 7: | |
47 | return &gen7_null_state; | |
48 | case 8: | |
49 | return &gen8_null_state; | |
ff7a60f2 AR |
50 | case 9: |
51 | return &gen9_null_state; | |
9d0a6fa6 MK |
52 | } |
53 | ||
54 | return NULL; | |
55 | } | |
56 | ||
84e81020 AS |
57 | /* |
58 | * Macro to add commands to auxiliary batch. | |
59 | * This macro only checks for page overflow before inserting the commands, | |
60 | * this is sufficient as the null state generator makes the final batch | |
61 | * with two passes to build command and state separately. At this point | |
62 | * the size of both are known and it compacts them by relocating the state | |
550116d2 | 63 | * right after the commands taking care of alignment so we should sufficient |
84e81020 AS |
64 | * space below them for adding new commands. |
65 | */ | |
66 | #define OUT_BATCH(batch, i, val) \ | |
67 | do { \ | |
4e50f082 CW |
68 | if ((i) >= PAGE_SIZE / sizeof(u32)) \ |
69 | goto err; \ | |
84e81020 AS |
70 | (batch)[(i)++] = (val); \ |
71 | } while(0) | |
72 | ||
4e50f082 CW |
73 | static int render_state_setup(struct intel_render_state *so, |
74 | struct drm_i915_private *i915) | |
1ce826d4 CW |
75 | { |
76 | const struct intel_renderstate_rodata *rodata = so->rodata; | |
4e50f082 | 77 | struct drm_i915_gem_object *obj = so->vma->obj; |
1ce826d4 | 78 | unsigned int i = 0, reloc_index = 0; |
4e50f082 | 79 | unsigned int needs_clflush; |
1ce826d4 CW |
80 | u32 *d; |
81 | int ret; | |
82 | ||
4e50f082 | 83 | ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); |
9d0a6fa6 MK |
84 | if (ret) |
85 | return ret; | |
86 | ||
4e50f082 | 87 | d = kmap_atomic(i915_gem_object_get_dirty_page(obj, 0)); |
1ce826d4 | 88 | |
9d0a6fa6 MK |
89 | while (i < rodata->batch_items) { |
90 | u32 s = rodata->batch[i]; | |
91 | ||
1ce826d4 | 92 | if (i * 4 == rodata->reloc[reloc_index]) { |
a5e85c8a | 93 | u64 r = s + so->vma->node.start; |
1ce826d4 | 94 | s = lower_32_bits(r); |
dfc5148f | 95 | if (HAS_64BIT_RELOC(i915)) { |
9d0a6fa6 | 96 | if (i + 1 >= rodata->batch_items || |
4e50f082 CW |
97 | rodata->batch[i + 1] != 0) |
98 | goto err; | |
9d0a6fa6 | 99 | |
1ce826d4 CW |
100 | d[i++] = s; |
101 | s = upper_32_bits(r); | |
9d0a6fa6 MK |
102 | } |
103 | ||
104 | reloc_index++; | |
105 | } | |
106 | ||
1ce826d4 | 107 | d[i++] = s; |
9d0a6fa6 | 108 | } |
84e81020 | 109 | |
4e50f082 CW |
110 | if (rodata->reloc[reloc_index] != -1) { |
111 | DRM_ERROR("only %d relocs resolved\n", reloc_index); | |
112 | goto err; | |
113 | } | |
114 | ||
115 | so->batch_offset = so->vma->node.start; | |
116 | so->batch_size = rodata->batch_items * sizeof(u32); | |
117 | ||
84e81020 AS |
118 | while (i % CACHELINE_DWORDS) |
119 | OUT_BATCH(d, i, MI_NOOP); | |
120 | ||
4e50f082 | 121 | so->aux_offset = i * sizeof(u32); |
84e81020 | 122 | |
4e50f082 | 123 | if (HAS_POOLED_EU(i915)) { |
33e141ed | 124 | /* |
125 | * We always program 3x6 pool config but depending upon which | |
126 | * subslice is disabled HW drops down to appropriate config | |
127 | * shown below. | |
128 | * | |
129 | * In the below table 2x6 config always refers to | |
130 | * fused-down version, native 2x6 is not available and can | |
131 | * be ignored | |
132 | * | |
133 | * SNo subslices config eu pool configuration | |
134 | * ----------------------------------------------------------- | |
135 | * 1 3 subslices enabled (3x6) - 0x00777000 (9+9) | |
136 | * 2 ss0 disabled (2x6) - 0x00777000 (3+9) | |
137 | * 3 ss1 disabled (2x6) - 0x00770000 (6+6) | |
138 | * 4 ss2 disabled (2x6) - 0x00007000 (9+3) | |
139 | */ | |
140 | u32 eu_pool_config = 0x00777000; | |
141 | ||
142 | OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE); | |
143 | OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE); | |
144 | OUT_BATCH(d, i, eu_pool_config); | |
145 | OUT_BATCH(d, i, 0); | |
146 | OUT_BATCH(d, i, 0); | |
147 | OUT_BATCH(d, i, 0); | |
148 | } | |
149 | ||
84e81020 | 150 | OUT_BATCH(d, i, MI_BATCH_BUFFER_END); |
4e50f082 CW |
151 | so->aux_size = i * sizeof(u32) - so->aux_offset; |
152 | so->aux_offset += so->batch_offset; | |
84e81020 AS |
153 | /* |
154 | * Since we are sending length, we need to strictly conform to | |
155 | * all requirements. For Gen2 this must be a multiple of 8. | |
156 | */ | |
4e50f082 | 157 | so->aux_size = ALIGN(so->aux_size, 8); |
9d0a6fa6 | 158 | |
4e50f082 CW |
159 | if (needs_clflush) |
160 | drm_clflush_virt_range(d, i * sizeof(u32)); | |
161 | kunmap_atomic(d); | |
dd72bde0 | 162 | |
4e50f082 CW |
163 | ret = i915_gem_object_set_to_gtt_domain(obj, false); |
164 | out: | |
165 | i915_gem_obj_finish_shmem_access(obj); | |
dd72bde0 | 166 | return ret; |
4e50f082 CW |
167 | |
168 | err: | |
169 | kunmap_atomic(d); | |
170 | ret = -EINVAL; | |
171 | goto out; | |
9d0a6fa6 MK |
172 | } |
173 | ||
84e81020 AS |
174 | #undef OUT_BATCH |
175 | ||
4e50f082 | 176 | int i915_gem_render_state_init(struct intel_engine_cs *engine) |
9d0a6fa6 | 177 | { |
4e50f082 CW |
178 | struct intel_render_state *so; |
179 | const struct intel_renderstate_rodata *rodata; | |
a5e85c8a | 180 | struct drm_i915_gem_object *obj; |
9d0a6fa6 MK |
181 | int ret; |
182 | ||
4e50f082 CW |
183 | if (engine->id != RCS) |
184 | return 0; | |
46470fc9 | 185 | |
4e50f082 CW |
186 | rodata = render_state_get_rodata(engine); |
187 | if (!rodata) | |
1ce826d4 | 188 | return 0; |
9d0a6fa6 | 189 | |
f51455d4 | 190 | if (rodata->batch_items * 4 > PAGE_SIZE) |
15d21db8 | 191 | return -EINVAL; |
564ddb2f | 192 | |
4e50f082 CW |
193 | so = kmalloc(sizeof(*so), GFP_KERNEL); |
194 | if (!so) | |
195 | return -ENOMEM; | |
564ddb2f | 196 | |
f51455d4 | 197 | obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); |
4e50f082 CW |
198 | if (IS_ERR(obj)) { |
199 | ret = PTR_ERR(obj); | |
200 | goto err_free; | |
a5e85c8a | 201 | } |
564ddb2f | 202 | |
a01cb37a | 203 | so->vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); |
4e50f082 CW |
204 | if (IS_ERR(so->vma)) { |
205 | ret = PTR_ERR(so->vma); | |
a5e85c8a | 206 | goto err_obj; |
4e50f082 CW |
207 | } |
208 | ||
209 | so->rodata = rodata; | |
210 | engine->render_state = so; | |
211 | return 0; | |
15d21db8 | 212 | |
4e50f082 CW |
213 | err_obj: |
214 | i915_gem_object_put(obj); | |
215 | err_free: | |
216 | kfree(so); | |
217 | return ret; | |
218 | } | |
219 | ||
220 | int i915_gem_render_state_emit(struct drm_i915_gem_request *req) | |
221 | { | |
222 | struct intel_render_state *so; | |
223 | int ret; | |
224 | ||
4c7d62c6 CW |
225 | lockdep_assert_held(&req->i915->drm.struct_mutex); |
226 | ||
4e50f082 CW |
227 | so = req->engine->render_state; |
228 | if (!so) | |
229 | return 0; | |
230 | ||
231 | /* Recreate the page after shrinking */ | |
a4f5ea64 | 232 | if (!so->vma->obj->mm.pages) |
4e50f082 CW |
233 | so->batch_offset = -1; |
234 | ||
235 | ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); | |
15d21db8 | 236 | if (ret) |
4e50f082 | 237 | return ret; |
9d0a6fa6 | 238 | |
4e50f082 CW |
239 | if (so->vma->node.start != so->batch_offset) { |
240 | ret = render_state_setup(so, req->i915); | |
241 | if (ret) | |
242 | goto err_unpin; | |
243 | } | |
244 | ||
a0125a93 CW |
245 | ret = req->engine->emit_flush(req, EMIT_INVALIDATE); |
246 | if (ret) | |
247 | goto err_unpin; | |
248 | ||
4e50f082 CW |
249 | ret = req->engine->emit_bb_start(req, |
250 | so->batch_offset, so->batch_size, | |
803688ba | 251 | I915_DISPATCH_SECURE); |
9d0a6fa6 | 252 | if (ret) |
15d21db8 | 253 | goto err_unpin; |
9d0a6fa6 | 254 | |
4e50f082 | 255 | if (so->aux_size > 8) { |
803688ba | 256 | ret = req->engine->emit_bb_start(req, |
4e50f082 | 257 | so->aux_offset, so->aux_size, |
803688ba | 258 | I915_DISPATCH_SECURE); |
84e81020 | 259 | if (ret) |
15d21db8 | 260 | goto err_unpin; |
84e81020 AS |
261 | } |
262 | ||
4e50f082 | 263 | i915_vma_move_to_active(so->vma, req, 0); |
15d21db8 | 264 | err_unpin: |
4e50f082 | 265 | i915_vma_unpin(so->vma); |
9d0a6fa6 MK |
266 | return ret; |
267 | } | |
4e50f082 CW |
268 | |
269 | void i915_gem_render_state_fini(struct intel_engine_cs *engine) | |
270 | { | |
271 | struct intel_render_state *so; | |
272 | struct drm_i915_gem_object *obj; | |
273 | ||
274 | so = fetch_and_zero(&engine->render_state); | |
275 | if (!so) | |
276 | return; | |
277 | ||
278 | obj = so->vma->obj; | |
279 | ||
280 | i915_vma_close(so->vma); | |
281 | __i915_gem_object_release_unless_active(obj); | |
282 | ||
283 | kfree(so); | |
284 | } |