]>
git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/gpu/drm/i915/selftests/intel_hangcheck.c
2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "../i915_selftest.h"
28 struct drm_i915_private
*i915
;
29 struct drm_i915_gem_object
*hws
;
30 struct drm_i915_gem_object
*obj
;
35 static int hang_init(struct hang
*h
, struct drm_i915_private
*i915
)
40 memset(h
, 0, sizeof(*h
));
43 h
->hws
= i915_gem_object_create_internal(i915
, PAGE_SIZE
);
45 return PTR_ERR(h
->hws
);
47 h
->obj
= i915_gem_object_create_internal(i915
, PAGE_SIZE
);
49 err
= PTR_ERR(h
->obj
);
53 i915_gem_object_set_cache_level(h
->hws
, I915_CACHE_LLC
);
54 vaddr
= i915_gem_object_pin_map(h
->hws
, I915_MAP_WB
);
59 h
->seqno
= memset(vaddr
, 0xff, PAGE_SIZE
);
61 vaddr
= i915_gem_object_pin_map(h
->obj
,
62 HAS_LLC(i915
) ? I915_MAP_WB
: I915_MAP_WC
);
72 i915_gem_object_unpin_map(h
->hws
);
74 i915_gem_object_put(h
->obj
);
76 i915_gem_object_put(h
->hws
);
80 static u64
hws_address(const struct i915_vma
*hws
,
81 const struct drm_i915_gem_request
*rq
)
83 return hws
->node
.start
+ offset_in_page(sizeof(u32
)*rq
->fence
.context
);
86 static int emit_recurse_batch(struct hang
*h
,
87 struct drm_i915_gem_request
*rq
)
89 struct drm_i915_private
*i915
= h
->i915
;
90 struct i915_address_space
*vm
= rq
->ctx
->ppgtt
? &rq
->ctx
->ppgtt
->base
: &i915
->ggtt
.base
;
91 struct i915_vma
*hws
, *vma
;
96 vma
= i915_vma_instance(h
->obj
, vm
, NULL
);
100 hws
= i915_vma_instance(h
->hws
, vm
, NULL
);
104 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
108 err
= i915_vma_pin(hws
, 0, 0, PIN_USER
);
112 err
= rq
->engine
->emit_flush(rq
, EMIT_INVALIDATE
);
116 err
= i915_switch_context(rq
);
120 i915_vma_move_to_active(vma
, rq
, 0);
121 if (!i915_gem_object_has_active_reference(vma
->obj
)) {
122 i915_gem_object_get(vma
->obj
);
123 i915_gem_object_set_active_reference(vma
->obj
);
126 i915_vma_move_to_active(hws
, rq
, 0);
127 if (!i915_gem_object_has_active_reference(hws
->obj
)) {
128 i915_gem_object_get(hws
->obj
);
129 i915_gem_object_set_active_reference(hws
->obj
);
133 if (INTEL_GEN(i915
) >= 8) {
134 *batch
++ = MI_STORE_DWORD_IMM_GEN4
;
135 *batch
++ = lower_32_bits(hws_address(hws
, rq
));
136 *batch
++ = upper_32_bits(hws_address(hws
, rq
));
137 *batch
++ = rq
->fence
.seqno
;
138 *batch
++ = MI_BATCH_BUFFER_START
| 1 << 8 | 1;
139 *batch
++ = lower_32_bits(vma
->node
.start
);
140 *batch
++ = upper_32_bits(vma
->node
.start
);
141 } else if (INTEL_GEN(i915
) >= 6) {
142 *batch
++ = MI_STORE_DWORD_IMM_GEN4
;
144 *batch
++ = lower_32_bits(hws_address(hws
, rq
));
145 *batch
++ = rq
->fence
.seqno
;
146 *batch
++ = MI_BATCH_BUFFER_START
| 1 << 8;
147 *batch
++ = lower_32_bits(vma
->node
.start
);
148 } else if (INTEL_GEN(i915
) >= 4) {
149 *batch
++ = MI_STORE_DWORD_IMM_GEN4
| 1 << 22;
151 *batch
++ = lower_32_bits(hws_address(hws
, rq
));
152 *batch
++ = rq
->fence
.seqno
;
153 *batch
++ = MI_BATCH_BUFFER_START
| 2 << 6;
154 *batch
++ = lower_32_bits(vma
->node
.start
);
156 *batch
++ = MI_STORE_DWORD_IMM
;
157 *batch
++ = lower_32_bits(hws_address(hws
, rq
));
158 *batch
++ = rq
->fence
.seqno
;
159 *batch
++ = MI_BATCH_BUFFER_START
| 2 << 6 | 1;
160 *batch
++ = lower_32_bits(vma
->node
.start
);
162 *batch
++ = MI_BATCH_BUFFER_END
; /* not reached */
165 if (INTEL_GEN(vm
->i915
) <= 5)
166 flags
|= I915_DISPATCH_SECURE
;
168 err
= rq
->engine
->emit_bb_start(rq
, vma
->node
.start
, PAGE_SIZE
, flags
);
177 static struct drm_i915_gem_request
*
178 hang_create_request(struct hang
*h
,
179 struct intel_engine_cs
*engine
,
180 struct i915_gem_context
*ctx
)
182 struct drm_i915_gem_request
*rq
;
185 if (i915_gem_object_is_active(h
->obj
)) {
186 struct drm_i915_gem_object
*obj
;
189 obj
= i915_gem_object_create_internal(h
->i915
, PAGE_SIZE
);
191 return ERR_CAST(obj
);
193 vaddr
= i915_gem_object_pin_map(obj
,
194 HAS_LLC(h
->i915
) ? I915_MAP_WB
: I915_MAP_WC
);
196 i915_gem_object_put(obj
);
197 return ERR_CAST(vaddr
);
200 i915_gem_object_unpin_map(h
->obj
);
201 i915_gem_object_put(h
->obj
);
207 rq
= i915_gem_request_alloc(engine
, ctx
);
211 err
= emit_recurse_batch(h
, rq
);
213 __i915_add_request(rq
, false);
220 static u32
hws_seqno(const struct hang
*h
,
221 const struct drm_i915_gem_request
*rq
)
223 return READ_ONCE(h
->seqno
[rq
->fence
.context
% (PAGE_SIZE
/sizeof(u32
))]);
226 static void hang_fini(struct hang
*h
)
228 *h
->batch
= MI_BATCH_BUFFER_END
;
231 i915_gem_object_unpin_map(h
->obj
);
232 i915_gem_object_put(h
->obj
);
234 i915_gem_object_unpin_map(h
->hws
);
235 i915_gem_object_put(h
->hws
);
237 i915_gem_wait_for_idle(h
->i915
, I915_WAIT_LOCKED
);
240 static int igt_hang_sanitycheck(void *arg
)
242 struct drm_i915_private
*i915
= arg
;
243 struct drm_i915_gem_request
*rq
;
244 struct intel_engine_cs
*engine
;
245 enum intel_engine_id id
;
249 /* Basic check that we can execute our hanging batch */
251 if (!igt_can_mi_store_dword_imm(i915
))
254 mutex_lock(&i915
->drm
.struct_mutex
);
255 err
= hang_init(&h
, i915
);
259 for_each_engine(engine
, i915
, id
) {
262 rq
= hang_create_request(&h
, engine
, i915
->kernel_context
);
265 pr_err("Failed to create request for %s, err=%d\n",
270 i915_gem_request_get(rq
);
272 *h
.batch
= MI_BATCH_BUFFER_END
;
273 __i915_add_request(rq
, true);
275 timeout
= i915_wait_request(rq
,
277 MAX_SCHEDULE_TIMEOUT
);
278 i915_gem_request_put(rq
);
282 pr_err("Wait for request failed on %s, err=%d\n",
291 mutex_unlock(&i915
->drm
.struct_mutex
);
295 static int igt_global_reset(void *arg
)
297 struct drm_i915_private
*i915
= arg
;
298 unsigned int reset_count
;
301 /* Check that we can issue a global GPU reset */
303 set_bit(I915_RESET_BACKOFF
, &i915
->gpu_error
.flags
);
304 set_bit(I915_RESET_HANDOFF
, &i915
->gpu_error
.flags
);
306 mutex_lock(&i915
->drm
.struct_mutex
);
307 reset_count
= i915_reset_count(&i915
->gpu_error
);
311 if (i915_reset_count(&i915
->gpu_error
) == reset_count
) {
312 pr_err("No GPU reset recorded!\n");
315 mutex_unlock(&i915
->drm
.struct_mutex
);
317 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF
, &i915
->gpu_error
.flags
));
318 clear_bit(I915_RESET_BACKOFF
, &i915
->gpu_error
.flags
);
319 if (i915_terminally_wedged(&i915
->gpu_error
))
325 static u32
fake_hangcheck(struct drm_i915_gem_request
*rq
)
329 rq
->engine
->hangcheck
.stalled
= true;
330 rq
->engine
->hangcheck
.seqno
= intel_engine_get_seqno(rq
->engine
);
332 reset_count
= i915_reset_count(&rq
->i915
->gpu_error
);
334 set_bit(I915_RESET_HANDOFF
, &rq
->i915
->gpu_error
.flags
);
335 wake_up_all(&rq
->i915
->gpu_error
.wait_queue
);
340 static bool wait_for_hang(struct hang
*h
, struct drm_i915_gem_request
*rq
)
342 return !(wait_for_us(i915_seqno_passed(hws_seqno(h
, rq
),
345 wait_for(i915_seqno_passed(hws_seqno(h
, rq
),
350 static int igt_wait_reset(void *arg
)
352 struct drm_i915_private
*i915
= arg
;
353 struct drm_i915_gem_request
*rq
;
354 unsigned int reset_count
;
359 /* Check that we detect a stuck waiter and issue a reset */
361 set_bit(I915_RESET_BACKOFF
, &i915
->gpu_error
.flags
);
363 mutex_lock(&i915
->drm
.struct_mutex
);
364 err
= hang_init(&h
, i915
);
368 rq
= hang_create_request(&h
, i915
->engine
[RCS
], i915
->kernel_context
);
374 i915_gem_request_get(rq
);
375 __i915_add_request(rq
, true);
377 if (!wait_for_hang(&h
, rq
)) {
378 pr_err("Failed to start request %x\n", rq
->fence
.seqno
);
383 reset_count
= fake_hangcheck(rq
);
385 timeout
= i915_wait_request(rq
, I915_WAIT_LOCKED
, 10);
387 pr_err("i915_wait_request failed on a stuck request: err=%ld\n",
393 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF
, &i915
->gpu_error
.flags
));
394 if (i915_reset_count(&i915
->gpu_error
) == reset_count
) {
395 pr_err("No GPU reset recorded!\n");
401 i915_gem_request_put(rq
);
405 mutex_unlock(&i915
->drm
.struct_mutex
);
406 clear_bit(I915_RESET_BACKOFF
, &i915
->gpu_error
.flags
);
408 if (i915_terminally_wedged(&i915
->gpu_error
))
414 static int igt_reset_queue(void *arg
)
416 struct drm_i915_private
*i915
= arg
;
417 struct intel_engine_cs
*engine
;
418 enum intel_engine_id id
;
422 /* Check that we replay pending requests following a hang */
424 if (!igt_can_mi_store_dword_imm(i915
))
427 set_bit(I915_RESET_BACKOFF
, &i915
->gpu_error
.flags
);
428 mutex_lock(&i915
->drm
.struct_mutex
);
429 err
= hang_init(&h
, i915
);
433 for_each_engine(engine
, i915
, id
) {
434 struct drm_i915_gem_request
*prev
;
435 IGT_TIMEOUT(end_time
);
438 prev
= hang_create_request(&h
, engine
, i915
->kernel_context
);
444 i915_gem_request_get(prev
);
445 __i915_add_request(prev
, true);
449 struct drm_i915_gem_request
*rq
;
450 unsigned int reset_count
;
452 rq
= hang_create_request(&h
,
454 i915
->kernel_context
);
460 i915_gem_request_get(rq
);
461 __i915_add_request(rq
, true);
463 if (!wait_for_hang(&h
, prev
)) {
464 pr_err("Failed to start request %x\n",
466 i915_gem_request_put(rq
);
467 i915_gem_request_put(prev
);
472 reset_count
= fake_hangcheck(prev
);
476 GEM_BUG_ON(test_bit(I915_RESET_HANDOFF
,
477 &i915
->gpu_error
.flags
));
479 if (prev
->fence
.error
!= -EIO
) {
480 pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
482 i915_gem_request_put(rq
);
483 i915_gem_request_put(prev
);
488 if (rq
->fence
.error
) {
489 pr_err("Fence error status not zero [%d] after unrelated reset\n",
491 i915_gem_request_put(rq
);
492 i915_gem_request_put(prev
);
497 if (i915_reset_count(&i915
->gpu_error
) == reset_count
) {
498 pr_err("No GPU reset recorded!\n");
499 i915_gem_request_put(rq
);
500 i915_gem_request_put(prev
);
505 i915_gem_request_put(prev
);
508 } while (time_before(jiffies
, end_time
));
509 pr_info("%s: Completed %d resets\n", engine
->name
, count
);
511 *h
.batch
= MI_BATCH_BUFFER_END
;
514 i915_gem_request_put(prev
);
520 mutex_unlock(&i915
->drm
.struct_mutex
);
521 clear_bit(I915_RESET_BACKOFF
, &i915
->gpu_error
.flags
);
523 if (i915_terminally_wedged(&i915
->gpu_error
))
529 int intel_hangcheck_live_selftests(struct drm_i915_private
*i915
)
531 static const struct i915_subtest tests
[] = {
532 SUBTEST(igt_hang_sanitycheck
),
533 SUBTEST(igt_global_reset
),
534 SUBTEST(igt_wait_reset
),
535 SUBTEST(igt_reset_queue
),
538 if (!intel_has_gpu_reset(i915
))
541 return i915_subtests(tests
, i915
);