]>
Commit | Line | Data |
---|---|---|
254f965c BW |
1 | /* |
2 | * Copyright © 2011-2012 Intel Corporation | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice (including the next | |
12 | * paragraph) shall be included in all copies or substantial portions of the | |
13 | * Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 | * IN THE SOFTWARE. | |
22 | * | |
23 | * Authors: | |
24 | * Ben Widawsky <ben@bwidawsk.net> | |
25 | * | |
26 | */ | |
27 | ||
28 | /* | |
29 | * This file implements HW context support. On gen5+ a HW context consists of an | |
30 | * opaque GPU object which is referenced at times of context saves and restores. | |
31 | * With RC6 enabled, the context is also referenced as the GPU enters and exists | |
32 | * from RC6 (GPU has it's own internal power context, except on gen5). Though | |
33 | * something like a context does exist for the media ring, the code only | |
34 | * supports contexts for the render ring. | |
35 | * | |
36 | * In software, there is a distinction between contexts created by the user, | |
37 | * and the default HW context. The default HW context is used by GPU clients | |
38 | * that do not request setup of their own hardware context. The default | |
39 | * context's state is never restored to help prevent programming errors. This | |
40 | * would happen if a client ran and piggy-backed off another clients GPU state. | |
41 | * The default context only exists to give the GPU some offset to load as the | |
42 | * current to invoke a save of the context we actually care about. In fact, the | |
43 | * code could likely be constructed, albeit in a more complicated fashion, to | |
44 | * never use the default context, though that limits the driver's ability to | |
45 | * swap out, and/or destroy other contexts. | |
46 | * | |
47 | * All other contexts are created as a request by the GPU client. These contexts | |
48 | * store GPU state, and thus allow GPU clients to not re-emit state (and | |
49 | * potentially query certain state) at any time. The kernel driver makes | |
50 | * certain that the appropriate commands are inserted. | |
51 | * | |
52 | * The context life cycle is semi-complicated in that context BOs may live | |
53 | * longer than the context itself because of the way the hardware, and object | |
54 | * tracking works. Below is a very crude representation of the state machine | |
55 | * describing the context life. | |
56 | * refcount pincount active | |
57 | * S0: initial state 0 0 0 | |
58 | * S1: context created 1 0 0 | |
59 | * S2: context is currently running 2 1 X | |
60 | * S3: GPU referenced, but not current 2 0 1 | |
61 | * S4: context is current, but destroyed 1 1 0 | |
62 | * S5: like S3, but destroyed 1 0 1 | |
63 | * | |
64 | * The most common (but not all) transitions: | |
65 | * S0->S1: client creates a context | |
66 | * S1->S2: client submits execbuf with context | |
67 | * S2->S3: other clients submits execbuf with context | |
68 | * S3->S1: context object was retired | |
69 | * S3->S2: clients submits another execbuf | |
70 | * S2->S4: context destroy called with current context | |
71 | * S3->S5->S0: destroy path | |
72 | * S4->S5->S0: destroy path on current context | |
73 | * | |
74 | * There are two confusing terms used above: | |
75 | * The "current context" means the context which is currently running on the | |
76 | * GPU. The GPU has loaded it's state already and has stored away the gtt | |
77 | * offset of the BO. The GPU is not actively referencing the data at this | |
78 | * offset, but it will on the next context switch. The only way to avoid this | |
79 | * is to do a GPU reset. | |
80 | * | |
81 | * An "active context' is one which was previously the "current context" and is | |
82 | * on the active list waiting for the next context switch to occur. Until this | |
83 | * happens, the object must remain at the same gtt offset. It is therefore | |
84 | * possible to destroy a context, but it is still active. | |
85 | * | |
86 | */ | |
87 | ||
88 | #include "drmP.h" | |
89 | #include "i915_drm.h" | |
90 | #include "i915_drv.h" | |
91 | ||
40521054 BW |
92 | /* This is a HW constraint. The value below is the largest known requirement |
93 | * I've seen in a spec to date, and that was a workaround for a non-shipping | |
94 | * part. It should be safe to decrease this, but it's more future proof as is. | |
95 | */ | |
96 | #define CONTEXT_ALIGN (64<<10) | |
97 | ||
98 | static struct i915_hw_context * | |
99 | i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id); | |
dfabbcb4 BW |
100 | static int do_switch(struct drm_i915_gem_object *from_obj, |
101 | struct i915_hw_context *to, u32 seqno); | |
40521054 | 102 | |
254f965c BW |
103 | static int get_context_size(struct drm_device *dev) |
104 | { | |
105 | struct drm_i915_private *dev_priv = dev->dev_private; | |
106 | int ret; | |
107 | u32 reg; | |
108 | ||
109 | switch (INTEL_INFO(dev)->gen) { | |
110 | case 6: | |
111 | reg = I915_READ(CXT_SIZE); | |
112 | ret = GEN6_CXT_TOTAL_SIZE(reg) * 64; | |
113 | break; | |
114 | case 7: | |
115 | reg = I915_READ(GEN7_CTX_SIZE); | |
116 | ret = GEN7_CTX_TOTAL_SIZE(reg) * 64; | |
117 | break; | |
118 | default: | |
119 | BUG(); | |
120 | } | |
121 | ||
122 | return ret; | |
123 | } | |
124 | ||
40521054 BW |
125 | static void do_destroy(struct i915_hw_context *ctx) |
126 | { | |
127 | struct drm_device *dev = ctx->obj->base.dev; | |
128 | struct drm_i915_private *dev_priv = dev->dev_private; | |
129 | ||
130 | if (ctx->file_priv) | |
131 | idr_remove(&ctx->file_priv->context_idr, ctx->id); | |
132 | else | |
133 | BUG_ON(ctx != dev_priv->ring[RCS].default_context); | |
134 | ||
135 | drm_gem_object_unreference(&ctx->obj->base); | |
136 | kfree(ctx); | |
137 | } | |
138 | ||
139 | static int | |
140 | create_hw_context(struct drm_device *dev, | |
141 | struct drm_i915_file_private *file_priv, | |
142 | struct i915_hw_context **ctx_out) | |
143 | { | |
144 | struct drm_i915_private *dev_priv = dev->dev_private; | |
145 | int ret, id; | |
146 | ||
147 | *ctx_out = kzalloc(sizeof(struct drm_i915_file_private), GFP_KERNEL); | |
148 | if (*ctx_out == NULL) | |
149 | return -ENOMEM; | |
150 | ||
151 | (*ctx_out)->obj = i915_gem_alloc_object(dev, | |
152 | dev_priv->hw_context_size); | |
153 | if ((*ctx_out)->obj == NULL) { | |
154 | kfree(*ctx_out); | |
155 | DRM_DEBUG_DRIVER("Context object allocated failed\n"); | |
156 | return -ENOMEM; | |
157 | } | |
158 | ||
159 | /* The ring associated with the context object is handled by the normal | |
160 | * object tracking code. We give an initial ring value simple to pass an | |
161 | * assertion in the context switch code. | |
162 | */ | |
163 | (*ctx_out)->ring = &dev_priv->ring[RCS]; | |
164 | ||
165 | /* Default context will never have a file_priv */ | |
166 | if (file_priv == NULL) | |
167 | return 0; | |
168 | ||
169 | (*ctx_out)->file_priv = file_priv; | |
170 | ||
171 | again: | |
172 | if (idr_pre_get(&file_priv->context_idr, GFP_KERNEL) == 0) { | |
173 | ret = -ENOMEM; | |
174 | DRM_DEBUG_DRIVER("idr allocation failed\n"); | |
175 | goto err_out; | |
176 | } | |
177 | ||
178 | ret = idr_get_new_above(&file_priv->context_idr, *ctx_out, | |
179 | DEFAULT_CONTEXT_ID + 1, &id); | |
180 | if (ret == 0) | |
181 | (*ctx_out)->id = id; | |
182 | ||
183 | if (ret == -EAGAIN) | |
184 | goto again; | |
185 | else if (ret) | |
186 | goto err_out; | |
187 | ||
188 | return 0; | |
189 | ||
190 | err_out: | |
191 | do_destroy(*ctx_out); | |
192 | return ret; | |
193 | } | |
194 | ||
e0556841 BW |
195 | static inline bool is_default_context(struct i915_hw_context *ctx) |
196 | { | |
197 | return (ctx == ctx->ring->default_context); | |
198 | } | |
199 | ||
254f965c BW |
200 | /** |
201 | * The default context needs to exist per ring that uses contexts. It stores the | |
202 | * context state of the GPU for applications that don't utilize HW contexts, as | |
203 | * well as an idle case. | |
204 | */ | |
205 | static int create_default_context(struct drm_i915_private *dev_priv) | |
206 | { | |
40521054 BW |
207 | struct i915_hw_context *ctx; |
208 | int ret; | |
209 | ||
210 | BUG_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); | |
211 | ||
212 | ret = create_hw_context(dev_priv->dev, NULL, | |
213 | &dev_priv->ring[RCS].default_context); | |
214 | if (ret) | |
215 | return ret; | |
216 | ||
217 | /* We may need to do things with the shrinker which require us to | |
218 | * immediately switch back to the default context. This can cause a | |
219 | * problem as pinning the default context also requires GTT space which | |
220 | * may not be available. To avoid this we always pin the | |
221 | * default context. | |
222 | */ | |
223 | ctx = dev_priv->ring[RCS].default_context; | |
224 | ret = i915_gem_object_pin(ctx->obj, CONTEXT_ALIGN, false); | |
225 | if (ret) { | |
226 | do_destroy(ctx); | |
227 | return ret; | |
228 | } | |
229 | ||
dfabbcb4 BW |
230 | ret = do_switch(NULL, ctx, 0); |
231 | if (ret) { | |
232 | i915_gem_object_unpin(ctx->obj); | |
233 | do_destroy(ctx); | |
234 | } else { | |
235 | DRM_DEBUG_DRIVER("Default HW context loaded\n"); | |
236 | } | |
237 | ||
40521054 | 238 | return ret; |
254f965c BW |
239 | } |
240 | ||
241 | void i915_gem_context_init(struct drm_device *dev) | |
242 | { | |
243 | struct drm_i915_private *dev_priv = dev->dev_private; | |
244 | uint32_t ctx_size; | |
245 | ||
e158c5aa BW |
246 | if (!HAS_HW_CONTEXTS(dev)) { |
247 | dev_priv->hw_contexts_disabled = true; | |
254f965c | 248 | return; |
e158c5aa | 249 | } |
254f965c BW |
250 | |
251 | /* If called from reset, or thaw... we've been here already */ | |
40521054 BW |
252 | if (dev_priv->hw_contexts_disabled || |
253 | dev_priv->ring[RCS].default_context) | |
254f965c BW |
254 | return; |
255 | ||
256 | ctx_size = get_context_size(dev); | |
257 | dev_priv->hw_context_size = get_context_size(dev); | |
258 | dev_priv->hw_context_size = round_up(dev_priv->hw_context_size, 4096); | |
259 | ||
260 | if (ctx_size <= 0 || ctx_size > (1<<20)) { | |
261 | dev_priv->hw_contexts_disabled = true; | |
262 | return; | |
263 | } | |
264 | ||
265 | if (create_default_context(dev_priv)) { | |
266 | dev_priv->hw_contexts_disabled = true; | |
267 | return; | |
268 | } | |
269 | ||
270 | DRM_DEBUG_DRIVER("HW context support initialized\n"); | |
271 | } | |
272 | ||
273 | void i915_gem_context_fini(struct drm_device *dev) | |
274 | { | |
275 | struct drm_i915_private *dev_priv = dev->dev_private; | |
276 | ||
277 | if (dev_priv->hw_contexts_disabled) | |
278 | return; | |
40521054 | 279 | |
55a66628 DV |
280 | /* The only known way to stop the gpu from accessing the hw context is |
281 | * to reset it. Do this as the very last operation to avoid confusing | |
282 | * other code, leading to spurious errors. */ | |
283 | intel_gpu_reset(dev); | |
284 | ||
40521054 BW |
285 | i915_gem_object_unpin(dev_priv->ring[RCS].default_context->obj); |
286 | ||
287 | do_destroy(dev_priv->ring[RCS].default_context); | |
254f965c BW |
288 | } |
289 | ||
40521054 BW |
290 | static int context_idr_cleanup(int id, void *p, void *data) |
291 | { | |
73c273eb | 292 | struct i915_hw_context *ctx = p; |
40521054 BW |
293 | |
294 | BUG_ON(id == DEFAULT_CONTEXT_ID); | |
40521054 BW |
295 | |
296 | do_destroy(ctx); | |
297 | ||
298 | return 0; | |
254f965c BW |
299 | } |
300 | ||
301 | void i915_gem_context_close(struct drm_device *dev, struct drm_file *file) | |
302 | { | |
40521054 | 303 | struct drm_i915_file_private *file_priv = file->driver_priv; |
254f965c | 304 | |
40521054 | 305 | mutex_lock(&dev->struct_mutex); |
73c273eb | 306 | idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL); |
40521054 BW |
307 | idr_destroy(&file_priv->context_idr); |
308 | mutex_unlock(&dev->struct_mutex); | |
309 | } | |
310 | ||
e0556841 | 311 | static struct i915_hw_context * |
40521054 BW |
312 | i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id) |
313 | { | |
314 | return (struct i915_hw_context *)idr_find(&file_priv->context_idr, id); | |
254f965c | 315 | } |
e0556841 BW |
316 | |
317 | static inline int | |
318 | mi_set_context(struct intel_ring_buffer *ring, | |
319 | struct i915_hw_context *new_context, | |
320 | u32 hw_flags) | |
321 | { | |
322 | int ret; | |
323 | ||
12b0286f BW |
324 | /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB |
325 | * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value | |
326 | * explicitly, so we rely on the value at ring init, stored in | |
327 | * itlb_before_ctx_switch. | |
328 | */ | |
329 | if (IS_GEN6(ring->dev) && ring->itlb_before_ctx_switch) { | |
330 | ret = ring->flush(ring, 0, 0); | |
331 | if (ret) | |
332 | return ret; | |
333 | } | |
334 | ||
e37ec39b | 335 | ret = intel_ring_begin(ring, 6); |
e0556841 BW |
336 | if (ret) |
337 | return ret; | |
338 | ||
e37ec39b BW |
339 | if (IS_GEN7(ring->dev)) |
340 | intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); | |
341 | else | |
342 | intel_ring_emit(ring, MI_NOOP); | |
343 | ||
e0556841 BW |
344 | intel_ring_emit(ring, MI_NOOP); |
345 | intel_ring_emit(ring, MI_SET_CONTEXT); | |
346 | intel_ring_emit(ring, new_context->obj->gtt_offset | | |
347 | MI_MM_SPACE_GTT | | |
348 | MI_SAVE_EXT_STATE_EN | | |
349 | MI_RESTORE_EXT_STATE_EN | | |
350 | hw_flags); | |
351 | /* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP */ | |
352 | intel_ring_emit(ring, MI_NOOP); | |
353 | ||
e37ec39b BW |
354 | if (IS_GEN7(ring->dev)) |
355 | intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); | |
356 | else | |
357 | intel_ring_emit(ring, MI_NOOP); | |
358 | ||
e0556841 BW |
359 | intel_ring_advance(ring); |
360 | ||
361 | return ret; | |
362 | } | |
363 | ||
364 | static int do_switch(struct drm_i915_gem_object *from_obj, | |
365 | struct i915_hw_context *to, | |
366 | u32 seqno) | |
367 | { | |
368 | struct intel_ring_buffer *ring = NULL; | |
369 | u32 hw_flags = 0; | |
370 | int ret; | |
371 | ||
372 | BUG_ON(to == NULL); | |
373 | BUG_ON(from_obj != NULL && from_obj->pin_count == 0); | |
374 | ||
375 | ret = i915_gem_object_pin(to->obj, CONTEXT_ALIGN, false); | |
376 | if (ret) | |
377 | return ret; | |
378 | ||
3af7b857 DV |
379 | if (!to->obj->has_global_gtt_mapping) |
380 | i915_gem_gtt_bind_object(to->obj, to->obj->cache_level); | |
381 | ||
e0556841 BW |
382 | if (!to->is_initialized || is_default_context(to)) |
383 | hw_flags |= MI_RESTORE_INHIBIT; | |
384 | else if (WARN_ON_ONCE(from_obj == to->obj)) /* not yet expected */ | |
385 | hw_flags |= MI_FORCE_RESTORE; | |
386 | ||
387 | ring = to->ring; | |
388 | ret = mi_set_context(ring, to, hw_flags); | |
389 | if (ret) { | |
390 | i915_gem_object_unpin(to->obj); | |
391 | return ret; | |
392 | } | |
393 | ||
394 | /* The backing object for the context is done after switching to the | |
395 | * *next* context. Therefore we cannot retire the previous context until | |
396 | * the next context has already started running. In fact, the below code | |
397 | * is a bit suboptimal because the retiring can occur simply after the | |
398 | * MI_SET_CONTEXT instead of when the next seqno has completed. | |
399 | */ | |
400 | if (from_obj != NULL) { | |
401 | from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; | |
402 | i915_gem_object_move_to_active(from_obj, ring, seqno); | |
403 | /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the | |
404 | * whole damn pipeline, we don't need to explicitly mark the | |
405 | * object dirty. The only exception is that the context must be | |
406 | * correct in case the object gets swapped out. Ideally we'd be | |
407 | * able to defer doing this until we know the object would be | |
408 | * swapped, but there is no way to do that yet. | |
409 | */ | |
410 | from_obj->dirty = 1; | |
411 | BUG_ON(from_obj->ring != to->ring); | |
412 | i915_gem_object_unpin(from_obj); | |
413 | } | |
414 | ||
415 | ring->last_context_obj = to->obj; | |
416 | to->is_initialized = true; | |
417 | ||
418 | return 0; | |
419 | } | |
420 | ||
421 | /** | |
422 | * i915_switch_context() - perform a GPU context switch. | |
423 | * @ring: ring for which we'll execute the context switch | |
424 | * @file_priv: file_priv associated with the context, may be NULL | |
425 | * @id: context id number | |
426 | * @seqno: sequence number by which the new context will be switched to | |
427 | * @flags: | |
428 | * | |
429 | * The context life cycle is simple. The context refcount is incremented and | |
430 | * decremented by 1 and create and destroy. If the context is in use by the GPU, | |
431 | * it will have a refoucnt > 1. This allows us to destroy the context abstract | |
432 | * object while letting the normal object tracking destroy the backing BO. | |
433 | */ | |
434 | int i915_switch_context(struct intel_ring_buffer *ring, | |
435 | struct drm_file *file, | |
436 | int to_id) | |
437 | { | |
438 | struct drm_i915_private *dev_priv = ring->dev->dev_private; | |
439 | struct drm_i915_file_private *file_priv = NULL; | |
440 | struct i915_hw_context *to; | |
441 | struct drm_i915_gem_object *from_obj = ring->last_context_obj; | |
442 | int ret; | |
443 | ||
444 | if (dev_priv->hw_contexts_disabled) | |
445 | return 0; | |
446 | ||
447 | if (ring != &dev_priv->ring[RCS]) | |
448 | return 0; | |
449 | ||
450 | if (file) | |
451 | file_priv = file->driver_priv; | |
452 | ||
453 | if (to_id == DEFAULT_CONTEXT_ID) { | |
454 | to = ring->default_context; | |
455 | } else { | |
456 | to = i915_gem_context_get(file_priv, to_id); | |
457 | if (to == NULL) | |
0d326013 | 458 | return -ENOENT; |
e0556841 BW |
459 | } |
460 | ||
461 | if (from_obj == to->obj) | |
462 | return 0; | |
463 | ||
464 | ret = do_switch(from_obj, to, i915_gem_next_request_seqno(to->ring)); | |
465 | if (ret) | |
466 | return ret; | |
467 | ||
468 | /* Just to make the code a little cleaner we take the object reference | |
469 | * after the switch was successful. It would be more intuitive to ref | |
470 | * the 'to' object before the switch but we know the refcount must be >0 | |
471 | * if context_get() succeeded, and we hold struct mutex. So it's safe to | |
472 | * do this here/now | |
473 | */ | |
474 | drm_gem_object_reference(&to->obj->base); | |
475 | if (from_obj != NULL) | |
476 | drm_gem_object_unreference(&from_obj->base); | |
477 | return ret; | |
478 | } | |
84624813 BW |
479 | |
480 | int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, | |
481 | struct drm_file *file) | |
482 | { | |
84624813 BW |
483 | struct drm_i915_gem_context_create *args = data; |
484 | struct drm_i915_file_private *file_priv = file->driver_priv; | |
485 | struct i915_hw_context *ctx; | |
486 | int ret; | |
487 | ||
488 | if (!(dev->driver->driver_features & DRIVER_GEM)) | |
489 | return -ENODEV; | |
490 | ||
491 | ret = i915_mutex_lock_interruptible(dev); | |
492 | if (ret) | |
493 | return ret; | |
494 | ||
495 | ret = create_hw_context(dev, file_priv, &ctx); | |
496 | mutex_unlock(&dev->struct_mutex); | |
497 | ||
498 | args->ctx_id = ctx->id; | |
499 | DRM_DEBUG_DRIVER("HW context %d created\n", args->ctx_id); | |
500 | ||
501 | return ret; | |
502 | } | |
503 | ||
504 | int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, | |
505 | struct drm_file *file) | |
506 | { | |
507 | struct drm_i915_gem_context_destroy *args = data; | |
508 | struct drm_i915_file_private *file_priv = file->driver_priv; | |
84624813 BW |
509 | struct i915_hw_context *ctx; |
510 | int ret; | |
511 | ||
512 | if (!(dev->driver->driver_features & DRIVER_GEM)) | |
513 | return -ENODEV; | |
514 | ||
515 | ret = i915_mutex_lock_interruptible(dev); | |
516 | if (ret) | |
517 | return ret; | |
518 | ||
519 | ctx = i915_gem_context_get(file_priv, args->ctx_id); | |
520 | if (!ctx) { | |
521 | mutex_unlock(&dev->struct_mutex); | |
0d326013 | 522 | return -ENOENT; |
84624813 BW |
523 | } |
524 | ||
525 | do_destroy(ctx); | |
526 | ||
527 | mutex_unlock(&dev->struct_mutex); | |
528 | ||
529 | DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id); | |
530 | return 0; | |
531 | } |