]>
Commit | Line | Data |
---|---|---|
254f965c BW |
1 | /* |
2 | * Copyright © 2011-2012 Intel Corporation | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice (including the next | |
12 | * paragraph) shall be included in all copies or substantial portions of the | |
13 | * Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 | * IN THE SOFTWARE. | |
22 | * | |
23 | * Authors: | |
24 | * Ben Widawsky <ben@bwidawsk.net> | |
25 | * | |
26 | */ | |
27 | ||
28 | /* | |
29 | * This file implements HW context support. On gen5+ a HW context consists of an | |
30 | * opaque GPU object which is referenced at times of context saves and restores. | |
31 | * With RC6 enabled, the context is also referenced as the GPU enters and exists | |
32 | * from RC6 (GPU has it's own internal power context, except on gen5). Though | |
33 | * something like a context does exist for the media ring, the code only | |
34 | * supports contexts for the render ring. | |
35 | * | |
36 | * In software, there is a distinction between contexts created by the user, | |
37 | * and the default HW context. The default HW context is used by GPU clients | |
38 | * that do not request setup of their own hardware context. The default | |
39 | * context's state is never restored to help prevent programming errors. This | |
40 | * would happen if a client ran and piggy-backed off another clients GPU state. | |
41 | * The default context only exists to give the GPU some offset to load as the | |
42 | * current to invoke a save of the context we actually care about. In fact, the | |
43 | * code could likely be constructed, albeit in a more complicated fashion, to | |
44 | * never use the default context, though that limits the driver's ability to | |
45 | * swap out, and/or destroy other contexts. | |
46 | * | |
47 | * All other contexts are created as a request by the GPU client. These contexts | |
48 | * store GPU state, and thus allow GPU clients to not re-emit state (and | |
49 | * potentially query certain state) at any time. The kernel driver makes | |
50 | * certain that the appropriate commands are inserted. | |
51 | * | |
52 | * The context life cycle is semi-complicated in that context BOs may live | |
53 | * longer than the context itself because of the way the hardware, and object | |
54 | * tracking works. Below is a very crude representation of the state machine | |
55 | * describing the context life. | |
56 | * refcount pincount active | |
57 | * S0: initial state 0 0 0 | |
58 | * S1: context created 1 0 0 | |
59 | * S2: context is currently running 2 1 X | |
60 | * S3: GPU referenced, but not current 2 0 1 | |
61 | * S4: context is current, but destroyed 1 1 0 | |
62 | * S5: like S3, but destroyed 1 0 1 | |
63 | * | |
64 | * The most common (but not all) transitions: | |
65 | * S0->S1: client creates a context | |
66 | * S1->S2: client submits execbuf with context | |
67 | * S2->S3: other clients submits execbuf with context | |
68 | * S3->S1: context object was retired | |
69 | * S3->S2: clients submits another execbuf | |
70 | * S2->S4: context destroy called with current context | |
71 | * S3->S5->S0: destroy path | |
72 | * S4->S5->S0: destroy path on current context | |
73 | * | |
74 | * There are two confusing terms used above: | |
75 | * The "current context" means the context which is currently running on the | |
76 | * GPU. The GPU has loaded it's state already and has stored away the gtt | |
77 | * offset of the BO. The GPU is not actively referencing the data at this | |
78 | * offset, but it will on the next context switch. The only way to avoid this | |
79 | * is to do a GPU reset. | |
80 | * | |
81 | * An "active context' is one which was previously the "current context" and is | |
82 | * on the active list waiting for the next context switch to occur. Until this | |
83 | * happens, the object must remain at the same gtt offset. It is therefore | |
84 | * possible to destroy a context, but it is still active. | |
85 | * | |
86 | */ | |
87 | ||
88 | #include "drmP.h" | |
89 | #include "i915_drm.h" | |
90 | #include "i915_drv.h" | |
91 | ||
40521054 BW |
92 | /* This is a HW constraint. The value below is the largest known requirement |
93 | * I've seen in a spec to date, and that was a workaround for a non-shipping | |
94 | * part. It should be safe to decrease this, but it's more future proof as is. | |
95 | */ | |
96 | #define CONTEXT_ALIGN (64<<10) | |
97 | ||
98 | static struct i915_hw_context * | |
99 | i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id); | |
100 | ||
254f965c BW |
101 | static int get_context_size(struct drm_device *dev) |
102 | { | |
103 | struct drm_i915_private *dev_priv = dev->dev_private; | |
104 | int ret; | |
105 | u32 reg; | |
106 | ||
107 | switch (INTEL_INFO(dev)->gen) { | |
108 | case 6: | |
109 | reg = I915_READ(CXT_SIZE); | |
110 | ret = GEN6_CXT_TOTAL_SIZE(reg) * 64; | |
111 | break; | |
112 | case 7: | |
113 | reg = I915_READ(GEN7_CTX_SIZE); | |
114 | ret = GEN7_CTX_TOTAL_SIZE(reg) * 64; | |
115 | break; | |
116 | default: | |
117 | BUG(); | |
118 | } | |
119 | ||
120 | return ret; | |
121 | } | |
122 | ||
40521054 BW |
123 | static void do_destroy(struct i915_hw_context *ctx) |
124 | { | |
125 | struct drm_device *dev = ctx->obj->base.dev; | |
126 | struct drm_i915_private *dev_priv = dev->dev_private; | |
127 | ||
128 | if (ctx->file_priv) | |
129 | idr_remove(&ctx->file_priv->context_idr, ctx->id); | |
130 | else | |
131 | BUG_ON(ctx != dev_priv->ring[RCS].default_context); | |
132 | ||
133 | drm_gem_object_unreference(&ctx->obj->base); | |
134 | kfree(ctx); | |
135 | } | |
136 | ||
137 | static int | |
138 | create_hw_context(struct drm_device *dev, | |
139 | struct drm_i915_file_private *file_priv, | |
140 | struct i915_hw_context **ctx_out) | |
141 | { | |
142 | struct drm_i915_private *dev_priv = dev->dev_private; | |
143 | int ret, id; | |
144 | ||
145 | *ctx_out = kzalloc(sizeof(struct drm_i915_file_private), GFP_KERNEL); | |
146 | if (*ctx_out == NULL) | |
147 | return -ENOMEM; | |
148 | ||
149 | (*ctx_out)->obj = i915_gem_alloc_object(dev, | |
150 | dev_priv->hw_context_size); | |
151 | if ((*ctx_out)->obj == NULL) { | |
152 | kfree(*ctx_out); | |
153 | DRM_DEBUG_DRIVER("Context object allocated failed\n"); | |
154 | return -ENOMEM; | |
155 | } | |
156 | ||
157 | /* The ring associated with the context object is handled by the normal | |
158 | * object tracking code. We give an initial ring value simple to pass an | |
159 | * assertion in the context switch code. | |
160 | */ | |
161 | (*ctx_out)->ring = &dev_priv->ring[RCS]; | |
162 | ||
163 | /* Default context will never have a file_priv */ | |
164 | if (file_priv == NULL) | |
165 | return 0; | |
166 | ||
167 | (*ctx_out)->file_priv = file_priv; | |
168 | ||
169 | again: | |
170 | if (idr_pre_get(&file_priv->context_idr, GFP_KERNEL) == 0) { | |
171 | ret = -ENOMEM; | |
172 | DRM_DEBUG_DRIVER("idr allocation failed\n"); | |
173 | goto err_out; | |
174 | } | |
175 | ||
176 | ret = idr_get_new_above(&file_priv->context_idr, *ctx_out, | |
177 | DEFAULT_CONTEXT_ID + 1, &id); | |
178 | if (ret == 0) | |
179 | (*ctx_out)->id = id; | |
180 | ||
181 | if (ret == -EAGAIN) | |
182 | goto again; | |
183 | else if (ret) | |
184 | goto err_out; | |
185 | ||
186 | return 0; | |
187 | ||
188 | err_out: | |
189 | do_destroy(*ctx_out); | |
190 | return ret; | |
191 | } | |
192 | ||
e0556841 BW |
193 | static inline bool is_default_context(struct i915_hw_context *ctx) |
194 | { | |
195 | return (ctx == ctx->ring->default_context); | |
196 | } | |
197 | ||
254f965c BW |
198 | /** |
199 | * The default context needs to exist per ring that uses contexts. It stores the | |
200 | * context state of the GPU for applications that don't utilize HW contexts, as | |
201 | * well as an idle case. | |
202 | */ | |
203 | static int create_default_context(struct drm_i915_private *dev_priv) | |
204 | { | |
40521054 BW |
205 | struct i915_hw_context *ctx; |
206 | int ret; | |
207 | ||
208 | BUG_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); | |
209 | ||
210 | ret = create_hw_context(dev_priv->dev, NULL, | |
211 | &dev_priv->ring[RCS].default_context); | |
212 | if (ret) | |
213 | return ret; | |
214 | ||
215 | /* We may need to do things with the shrinker which require us to | |
216 | * immediately switch back to the default context. This can cause a | |
217 | * problem as pinning the default context also requires GTT space which | |
218 | * may not be available. To avoid this we always pin the | |
219 | * default context. | |
220 | */ | |
221 | ctx = dev_priv->ring[RCS].default_context; | |
222 | ret = i915_gem_object_pin(ctx->obj, CONTEXT_ALIGN, false); | |
223 | if (ret) { | |
224 | do_destroy(ctx); | |
225 | return ret; | |
226 | } | |
227 | ||
228 | return ret; | |
254f965c BW |
229 | } |
230 | ||
231 | void i915_gem_context_init(struct drm_device *dev) | |
232 | { | |
233 | struct drm_i915_private *dev_priv = dev->dev_private; | |
234 | uint32_t ctx_size; | |
235 | ||
236 | if (!HAS_HW_CONTEXTS(dev)) | |
237 | return; | |
238 | ||
239 | /* If called from reset, or thaw... we've been here already */ | |
40521054 BW |
240 | if (dev_priv->hw_contexts_disabled || |
241 | dev_priv->ring[RCS].default_context) | |
254f965c BW |
242 | return; |
243 | ||
244 | ctx_size = get_context_size(dev); | |
245 | dev_priv->hw_context_size = get_context_size(dev); | |
246 | dev_priv->hw_context_size = round_up(dev_priv->hw_context_size, 4096); | |
247 | ||
248 | if (ctx_size <= 0 || ctx_size > (1<<20)) { | |
249 | dev_priv->hw_contexts_disabled = true; | |
250 | return; | |
251 | } | |
252 | ||
253 | if (create_default_context(dev_priv)) { | |
254 | dev_priv->hw_contexts_disabled = true; | |
255 | return; | |
256 | } | |
257 | ||
258 | DRM_DEBUG_DRIVER("HW context support initialized\n"); | |
259 | } | |
260 | ||
261 | void i915_gem_context_fini(struct drm_device *dev) | |
262 | { | |
263 | struct drm_i915_private *dev_priv = dev->dev_private; | |
264 | ||
265 | if (dev_priv->hw_contexts_disabled) | |
266 | return; | |
40521054 BW |
267 | |
268 | i915_gem_object_unpin(dev_priv->ring[RCS].default_context->obj); | |
269 | ||
270 | do_destroy(dev_priv->ring[RCS].default_context); | |
254f965c BW |
271 | } |
272 | ||
273 | void i915_gem_context_open(struct drm_device *dev, struct drm_file *file) | |
274 | { | |
275 | struct drm_i915_private *dev_priv = dev->dev_private; | |
40521054 | 276 | struct drm_i915_file_private *file_priv = file->driver_priv; |
254f965c BW |
277 | |
278 | if (dev_priv->hw_contexts_disabled) | |
279 | return; | |
40521054 BW |
280 | |
281 | idr_init(&file_priv->context_idr); | |
282 | } | |
283 | ||
284 | static int context_idr_cleanup(int id, void *p, void *data) | |
285 | { | |
286 | struct drm_file *file = (struct drm_file *)data; | |
287 | struct drm_i915_file_private *file_priv = file->driver_priv; | |
288 | struct i915_hw_context *ctx; | |
289 | ||
290 | BUG_ON(id == DEFAULT_CONTEXT_ID); | |
291 | ctx = i915_gem_context_get(file_priv, id); | |
292 | if (WARN_ON(ctx == NULL)) | |
293 | return -ENXIO; | |
294 | ||
295 | do_destroy(ctx); | |
296 | ||
297 | return 0; | |
254f965c BW |
298 | } |
299 | ||
300 | void i915_gem_context_close(struct drm_device *dev, struct drm_file *file) | |
301 | { | |
302 | struct drm_i915_private *dev_priv = dev->dev_private; | |
40521054 | 303 | struct drm_i915_file_private *file_priv = file->driver_priv; |
254f965c BW |
304 | |
305 | if (dev_priv->hw_contexts_disabled) | |
306 | return; | |
40521054 BW |
307 | |
308 | mutex_lock(&dev->struct_mutex); | |
309 | idr_for_each(&file_priv->context_idr, context_idr_cleanup, file); | |
310 | idr_destroy(&file_priv->context_idr); | |
311 | mutex_unlock(&dev->struct_mutex); | |
312 | } | |
313 | ||
e0556841 | 314 | static struct i915_hw_context * |
40521054 BW |
315 | i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id) |
316 | { | |
317 | return (struct i915_hw_context *)idr_find(&file_priv->context_idr, id); | |
254f965c | 318 | } |
e0556841 BW |
319 | |
320 | static inline int | |
321 | mi_set_context(struct intel_ring_buffer *ring, | |
322 | struct i915_hw_context *new_context, | |
323 | u32 hw_flags) | |
324 | { | |
325 | int ret; | |
326 | ||
327 | ret = intel_ring_begin(ring, 4); | |
328 | if (ret) | |
329 | return ret; | |
330 | ||
331 | intel_ring_emit(ring, MI_NOOP); | |
332 | intel_ring_emit(ring, MI_SET_CONTEXT); | |
333 | intel_ring_emit(ring, new_context->obj->gtt_offset | | |
334 | MI_MM_SPACE_GTT | | |
335 | MI_SAVE_EXT_STATE_EN | | |
336 | MI_RESTORE_EXT_STATE_EN | | |
337 | hw_flags); | |
338 | /* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP */ | |
339 | intel_ring_emit(ring, MI_NOOP); | |
340 | ||
341 | intel_ring_advance(ring); | |
342 | ||
343 | return ret; | |
344 | } | |
345 | ||
346 | static int do_switch(struct drm_i915_gem_object *from_obj, | |
347 | struct i915_hw_context *to, | |
348 | u32 seqno) | |
349 | { | |
350 | struct intel_ring_buffer *ring = NULL; | |
351 | u32 hw_flags = 0; | |
352 | int ret; | |
353 | ||
354 | BUG_ON(to == NULL); | |
355 | BUG_ON(from_obj != NULL && from_obj->pin_count == 0); | |
356 | ||
357 | ret = i915_gem_object_pin(to->obj, CONTEXT_ALIGN, false); | |
358 | if (ret) | |
359 | return ret; | |
360 | ||
361 | if (!to->is_initialized || is_default_context(to)) | |
362 | hw_flags |= MI_RESTORE_INHIBIT; | |
363 | else if (WARN_ON_ONCE(from_obj == to->obj)) /* not yet expected */ | |
364 | hw_flags |= MI_FORCE_RESTORE; | |
365 | ||
366 | ring = to->ring; | |
367 | ret = mi_set_context(ring, to, hw_flags); | |
368 | if (ret) { | |
369 | i915_gem_object_unpin(to->obj); | |
370 | return ret; | |
371 | } | |
372 | ||
373 | /* The backing object for the context is done after switching to the | |
374 | * *next* context. Therefore we cannot retire the previous context until | |
375 | * the next context has already started running. In fact, the below code | |
376 | * is a bit suboptimal because the retiring can occur simply after the | |
377 | * MI_SET_CONTEXT instead of when the next seqno has completed. | |
378 | */ | |
379 | if (from_obj != NULL) { | |
380 | from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; | |
381 | i915_gem_object_move_to_active(from_obj, ring, seqno); | |
382 | /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the | |
383 | * whole damn pipeline, we don't need to explicitly mark the | |
384 | * object dirty. The only exception is that the context must be | |
385 | * correct in case the object gets swapped out. Ideally we'd be | |
386 | * able to defer doing this until we know the object would be | |
387 | * swapped, but there is no way to do that yet. | |
388 | */ | |
389 | from_obj->dirty = 1; | |
390 | BUG_ON(from_obj->ring != to->ring); | |
391 | i915_gem_object_unpin(from_obj); | |
392 | } | |
393 | ||
394 | ring->last_context_obj = to->obj; | |
395 | to->is_initialized = true; | |
396 | ||
397 | return 0; | |
398 | } | |
399 | ||
400 | /** | |
401 | * i915_switch_context() - perform a GPU context switch. | |
402 | * @ring: ring for which we'll execute the context switch | |
403 | * @file_priv: file_priv associated with the context, may be NULL | |
404 | * @id: context id number | |
405 | * @seqno: sequence number by which the new context will be switched to | |
406 | * @flags: | |
407 | * | |
408 | * The context life cycle is simple. The context refcount is incremented and | |
409 | * decremented by 1 and create and destroy. If the context is in use by the GPU, | |
410 | * it will have a refoucnt > 1. This allows us to destroy the context abstract | |
411 | * object while letting the normal object tracking destroy the backing BO. | |
412 | */ | |
413 | int i915_switch_context(struct intel_ring_buffer *ring, | |
414 | struct drm_file *file, | |
415 | int to_id) | |
416 | { | |
417 | struct drm_i915_private *dev_priv = ring->dev->dev_private; | |
418 | struct drm_i915_file_private *file_priv = NULL; | |
419 | struct i915_hw_context *to; | |
420 | struct drm_i915_gem_object *from_obj = ring->last_context_obj; | |
421 | int ret; | |
422 | ||
423 | if (dev_priv->hw_contexts_disabled) | |
424 | return 0; | |
425 | ||
426 | if (ring != &dev_priv->ring[RCS]) | |
427 | return 0; | |
428 | ||
429 | if (file) | |
430 | file_priv = file->driver_priv; | |
431 | ||
432 | if (to_id == DEFAULT_CONTEXT_ID) { | |
433 | to = ring->default_context; | |
434 | } else { | |
435 | to = i915_gem_context_get(file_priv, to_id); | |
436 | if (to == NULL) | |
437 | return -EINVAL; | |
438 | } | |
439 | ||
440 | if (from_obj == to->obj) | |
441 | return 0; | |
442 | ||
443 | ret = do_switch(from_obj, to, i915_gem_next_request_seqno(to->ring)); | |
444 | if (ret) | |
445 | return ret; | |
446 | ||
447 | /* Just to make the code a little cleaner we take the object reference | |
448 | * after the switch was successful. It would be more intuitive to ref | |
449 | * the 'to' object before the switch but we know the refcount must be >0 | |
450 | * if context_get() succeeded, and we hold struct mutex. So it's safe to | |
451 | * do this here/now | |
452 | */ | |
453 | drm_gem_object_reference(&to->obj->base); | |
454 | if (from_obj != NULL) | |
455 | drm_gem_object_unreference(&from_obj->base); | |
456 | return ret; | |
457 | } |