]>
Commit | Line | Data |
---|---|---|
496b575e CW |
1 | /* |
2 | * Copyright © 2016 Intel Corporation | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice (including the next | |
12 | * paragraph) shall be included in all copies or substantial portions of the | |
13 | * Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 | * IN THE SOFTWARE. | |
22 | * | |
23 | */ | |
24 | ||
25 | #include "../i915_selftest.h" | |
26 | ||
27 | struct hang { | |
28 | struct drm_i915_private *i915; | |
29 | struct drm_i915_gem_object *hws; | |
30 | struct drm_i915_gem_object *obj; | |
31 | u32 *seqno; | |
32 | u32 *batch; | |
33 | }; | |
34 | ||
35 | static int hang_init(struct hang *h, struct drm_i915_private *i915) | |
36 | { | |
37 | void *vaddr; | |
38 | int err; | |
39 | ||
40 | memset(h, 0, sizeof(*h)); | |
41 | h->i915 = i915; | |
42 | ||
43 | h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE); | |
44 | if (IS_ERR(h->hws)) | |
45 | return PTR_ERR(h->hws); | |
46 | ||
47 | h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE); | |
48 | if (IS_ERR(h->obj)) { | |
49 | err = PTR_ERR(h->obj); | |
50 | goto err_hws; | |
51 | } | |
52 | ||
53 | i915_gem_object_set_cache_level(h->hws, I915_CACHE_LLC); | |
54 | vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB); | |
55 | if (IS_ERR(vaddr)) { | |
56 | err = PTR_ERR(vaddr); | |
57 | goto err_obj; | |
58 | } | |
59 | h->seqno = memset(vaddr, 0xff, PAGE_SIZE); | |
60 | ||
61 | vaddr = i915_gem_object_pin_map(h->obj, | |
62 | HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC); | |
63 | if (IS_ERR(vaddr)) { | |
64 | err = PTR_ERR(vaddr); | |
65 | goto err_unpin_hws; | |
66 | } | |
67 | h->batch = vaddr; | |
68 | ||
69 | return 0; | |
70 | ||
71 | err_unpin_hws: | |
72 | i915_gem_object_unpin_map(h->hws); | |
73 | err_obj: | |
74 | i915_gem_object_put(h->obj); | |
75 | err_hws: | |
76 | i915_gem_object_put(h->hws); | |
77 | return err; | |
78 | } | |
79 | ||
80 | static u64 hws_address(const struct i915_vma *hws, | |
81 | const struct drm_i915_gem_request *rq) | |
82 | { | |
83 | return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context); | |
84 | } | |
85 | ||
86 | static int emit_recurse_batch(struct hang *h, | |
87 | struct drm_i915_gem_request *rq) | |
88 | { | |
89 | struct drm_i915_private *i915 = h->i915; | |
90 | struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base; | |
91 | struct i915_vma *hws, *vma; | |
92 | unsigned int flags; | |
93 | u32 *batch; | |
94 | int err; | |
95 | ||
96 | vma = i915_vma_instance(h->obj, vm, NULL); | |
97 | if (IS_ERR(vma)) | |
98 | return PTR_ERR(vma); | |
99 | ||
100 | hws = i915_vma_instance(h->hws, vm, NULL); | |
101 | if (IS_ERR(hws)) | |
102 | return PTR_ERR(hws); | |
103 | ||
104 | err = i915_vma_pin(vma, 0, 0, PIN_USER); | |
105 | if (err) | |
106 | return err; | |
107 | ||
108 | err = i915_vma_pin(hws, 0, 0, PIN_USER); | |
109 | if (err) | |
110 | goto unpin_vma; | |
111 | ||
112 | err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); | |
113 | if (err) | |
114 | goto unpin_hws; | |
115 | ||
116 | err = i915_switch_context(rq); | |
117 | if (err) | |
118 | goto unpin_hws; | |
119 | ||
120 | i915_vma_move_to_active(vma, rq, 0); | |
121 | if (!i915_gem_object_has_active_reference(vma->obj)) { | |
122 | i915_gem_object_get(vma->obj); | |
123 | i915_gem_object_set_active_reference(vma->obj); | |
124 | } | |
125 | ||
126 | i915_vma_move_to_active(hws, rq, 0); | |
127 | if (!i915_gem_object_has_active_reference(hws->obj)) { | |
128 | i915_gem_object_get(hws->obj); | |
129 | i915_gem_object_set_active_reference(hws->obj); | |
130 | } | |
131 | ||
132 | batch = h->batch; | |
133 | if (INTEL_GEN(i915) >= 8) { | |
134 | *batch++ = MI_STORE_DWORD_IMM_GEN4; | |
135 | *batch++ = lower_32_bits(hws_address(hws, rq)); | |
136 | *batch++ = upper_32_bits(hws_address(hws, rq)); | |
137 | *batch++ = rq->fence.seqno; | |
138 | *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; | |
139 | *batch++ = lower_32_bits(vma->node.start); | |
140 | *batch++ = upper_32_bits(vma->node.start); | |
141 | } else if (INTEL_GEN(i915) >= 6) { | |
142 | *batch++ = MI_STORE_DWORD_IMM_GEN4; | |
143 | *batch++ = 0; | |
144 | *batch++ = lower_32_bits(hws_address(hws, rq)); | |
145 | *batch++ = rq->fence.seqno; | |
146 | *batch++ = MI_BATCH_BUFFER_START | 1 << 8; | |
147 | *batch++ = lower_32_bits(vma->node.start); | |
148 | } else if (INTEL_GEN(i915) >= 4) { | |
149 | *batch++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; | |
150 | *batch++ = 0; | |
151 | *batch++ = lower_32_bits(hws_address(hws, rq)); | |
152 | *batch++ = rq->fence.seqno; | |
153 | *batch++ = MI_BATCH_BUFFER_START | 2 << 6; | |
154 | *batch++ = lower_32_bits(vma->node.start); | |
155 | } else { | |
156 | *batch++ = MI_STORE_DWORD_IMM; | |
157 | *batch++ = lower_32_bits(hws_address(hws, rq)); | |
158 | *batch++ = rq->fence.seqno; | |
159 | *batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1; | |
160 | *batch++ = lower_32_bits(vma->node.start); | |
161 | } | |
162 | *batch++ = MI_BATCH_BUFFER_END; /* not reached */ | |
163 | ||
164 | flags = 0; | |
165 | if (INTEL_GEN(vm->i915) <= 5) | |
166 | flags |= I915_DISPATCH_SECURE; | |
167 | ||
168 | err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); | |
169 | ||
170 | unpin_hws: | |
171 | i915_vma_unpin(hws); | |
172 | unpin_vma: | |
173 | i915_vma_unpin(vma); | |
174 | return err; | |
175 | } | |
176 | ||
177 | static struct drm_i915_gem_request * | |
178 | hang_create_request(struct hang *h, | |
179 | struct intel_engine_cs *engine, | |
180 | struct i915_gem_context *ctx) | |
181 | { | |
182 | struct drm_i915_gem_request *rq; | |
183 | int err; | |
184 | ||
185 | if (i915_gem_object_is_active(h->obj)) { | |
186 | struct drm_i915_gem_object *obj; | |
187 | void *vaddr; | |
188 | ||
189 | obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE); | |
190 | if (IS_ERR(obj)) | |
191 | return ERR_CAST(obj); | |
192 | ||
193 | vaddr = i915_gem_object_pin_map(obj, | |
194 | HAS_LLC(h->i915) ? I915_MAP_WB : I915_MAP_WC); | |
195 | if (IS_ERR(vaddr)) { | |
196 | i915_gem_object_put(obj); | |
197 | return ERR_CAST(vaddr); | |
198 | } | |
199 | ||
200 | i915_gem_object_unpin_map(h->obj); | |
201 | i915_gem_object_put(h->obj); | |
202 | ||
203 | h->obj = obj; | |
204 | h->batch = vaddr; | |
205 | } | |
206 | ||
207 | rq = i915_gem_request_alloc(engine, ctx); | |
208 | if (IS_ERR(rq)) | |
209 | return rq; | |
210 | ||
211 | err = emit_recurse_batch(h, rq); | |
212 | if (err) { | |
213 | __i915_add_request(rq, false); | |
214 | return ERR_PTR(err); | |
215 | } | |
216 | ||
217 | return rq; | |
218 | } | |
219 | ||
220 | static u32 hws_seqno(const struct hang *h, | |
221 | const struct drm_i915_gem_request *rq) | |
222 | { | |
223 | return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); | |
224 | } | |
225 | ||
226 | static void hang_fini(struct hang *h) | |
227 | { | |
228 | *h->batch = MI_BATCH_BUFFER_END; | |
229 | wmb(); | |
230 | ||
231 | i915_gem_object_unpin_map(h->obj); | |
232 | i915_gem_object_put(h->obj); | |
233 | ||
234 | i915_gem_object_unpin_map(h->hws); | |
235 | i915_gem_object_put(h->hws); | |
236 | ||
237 | i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED); | |
496b575e CW |
238 | } |
239 | ||
240 | static int igt_hang_sanitycheck(void *arg) | |
241 | { | |
242 | struct drm_i915_private *i915 = arg; | |
243 | struct drm_i915_gem_request *rq; | |
244 | struct intel_engine_cs *engine; | |
245 | enum intel_engine_id id; | |
246 | struct hang h; | |
247 | int err; | |
248 | ||
249 | /* Basic check that we can execute our hanging batch */ | |
250 | ||
251 | if (!igt_can_mi_store_dword_imm(i915)) | |
252 | return 0; | |
253 | ||
254 | mutex_lock(&i915->drm.struct_mutex); | |
255 | err = hang_init(&h, i915); | |
256 | if (err) | |
257 | goto unlock; | |
258 | ||
259 | for_each_engine(engine, i915, id) { | |
260 | long timeout; | |
261 | ||
262 | rq = hang_create_request(&h, engine, i915->kernel_context); | |
263 | if (IS_ERR(rq)) { | |
264 | err = PTR_ERR(rq); | |
265 | pr_err("Failed to create request for %s, err=%d\n", | |
266 | engine->name, err); | |
267 | goto fini; | |
268 | } | |
269 | ||
270 | i915_gem_request_get(rq); | |
271 | ||
272 | *h.batch = MI_BATCH_BUFFER_END; | |
273 | __i915_add_request(rq, true); | |
274 | ||
275 | timeout = i915_wait_request(rq, | |
276 | I915_WAIT_LOCKED, | |
277 | MAX_SCHEDULE_TIMEOUT); | |
278 | i915_gem_request_put(rq); | |
279 | ||
280 | if (timeout < 0) { | |
281 | err = timeout; | |
282 | pr_err("Wait for request failed on %s, err=%d\n", | |
283 | engine->name, err); | |
284 | goto fini; | |
285 | } | |
286 | } | |
287 | ||
288 | fini: | |
289 | hang_fini(&h); | |
290 | unlock: | |
291 | mutex_unlock(&i915->drm.struct_mutex); | |
292 | return err; | |
293 | } | |
294 | ||
295 | static int igt_global_reset(void *arg) | |
296 | { | |
297 | struct drm_i915_private *i915 = arg; | |
298 | unsigned int reset_count; | |
299 | int err = 0; | |
300 | ||
301 | /* Check that we can issue a global GPU reset */ | |
302 | ||
8c185eca CW |
303 | set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); |
304 | set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags); | |
496b575e CW |
305 | |
306 | mutex_lock(&i915->drm.struct_mutex); | |
307 | reset_count = i915_reset_count(&i915->gpu_error); | |
308 | ||
309 | i915_reset(i915); | |
310 | ||
311 | if (i915_reset_count(&i915->gpu_error) == reset_count) { | |
312 | pr_err("No GPU reset recorded!\n"); | |
313 | err = -EINVAL; | |
314 | } | |
315 | mutex_unlock(&i915->drm.struct_mutex); | |
316 | ||
8c185eca CW |
317 | GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); |
318 | clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); | |
496b575e CW |
319 | if (i915_terminally_wedged(&i915->gpu_error)) |
320 | err = -EIO; | |
321 | ||
322 | return err; | |
323 | } | |
324 | ||
325 | static u32 fake_hangcheck(struct drm_i915_gem_request *rq) | |
326 | { | |
327 | u32 reset_count; | |
328 | ||
329 | rq->engine->hangcheck.stalled = true; | |
330 | rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine); | |
331 | ||
332 | reset_count = i915_reset_count(&rq->i915->gpu_error); | |
333 | ||
8c185eca | 334 | set_bit(I915_RESET_HANDOFF, &rq->i915->gpu_error.flags); |
496b575e CW |
335 | wake_up_all(&rq->i915->gpu_error.wait_queue); |
336 | ||
337 | return reset_count; | |
338 | } | |
339 | ||
340 | static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) | |
341 | { | |
342 | return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), | |
343 | rq->fence.seqno), | |
344 | 10) && | |
345 | wait_for(i915_seqno_passed(hws_seqno(h, rq), | |
346 | rq->fence.seqno), | |
347 | 1000)); | |
348 | } | |
349 | ||
350 | static int igt_wait_reset(void *arg) | |
351 | { | |
352 | struct drm_i915_private *i915 = arg; | |
353 | struct drm_i915_gem_request *rq; | |
354 | unsigned int reset_count; | |
355 | struct hang h; | |
356 | long timeout; | |
357 | int err; | |
358 | ||
359 | /* Check that we detect a stuck waiter and issue a reset */ | |
360 | ||
8c185eca | 361 | set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); |
496b575e CW |
362 | |
363 | mutex_lock(&i915->drm.struct_mutex); | |
364 | err = hang_init(&h, i915); | |
365 | if (err) | |
366 | goto unlock; | |
367 | ||
368 | rq = hang_create_request(&h, i915->engine[RCS], i915->kernel_context); | |
369 | if (IS_ERR(rq)) { | |
370 | err = PTR_ERR(rq); | |
371 | goto fini; | |
372 | } | |
373 | ||
374 | i915_gem_request_get(rq); | |
375 | __i915_add_request(rq, true); | |
376 | ||
377 | if (!wait_for_hang(&h, rq)) { | |
378 | pr_err("Failed to start request %x\n", rq->fence.seqno); | |
379 | err = -EIO; | |
380 | goto out_rq; | |
381 | } | |
382 | ||
383 | reset_count = fake_hangcheck(rq); | |
384 | ||
385 | timeout = i915_wait_request(rq, I915_WAIT_LOCKED, 10); | |
386 | if (timeout < 0) { | |
387 | pr_err("i915_wait_request failed on a stuck request: err=%ld\n", | |
388 | timeout); | |
389 | err = timeout; | |
390 | goto out_rq; | |
391 | } | |
496b575e | 392 | |
8c185eca | 393 | GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); |
496b575e CW |
394 | if (i915_reset_count(&i915->gpu_error) == reset_count) { |
395 | pr_err("No GPU reset recorded!\n"); | |
396 | err = -EINVAL; | |
397 | goto out_rq; | |
398 | } | |
399 | ||
400 | out_rq: | |
401 | i915_gem_request_put(rq); | |
402 | fini: | |
403 | hang_fini(&h); | |
404 | unlock: | |
405 | mutex_unlock(&i915->drm.struct_mutex); | |
8c185eca | 406 | clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); |
496b575e CW |
407 | |
408 | if (i915_terminally_wedged(&i915->gpu_error)) | |
409 | return -EIO; | |
410 | ||
411 | return err; | |
412 | } | |
413 | ||
414 | static int igt_reset_queue(void *arg) | |
415 | { | |
416 | struct drm_i915_private *i915 = arg; | |
417 | struct intel_engine_cs *engine; | |
418 | enum intel_engine_id id; | |
419 | struct hang h; | |
420 | int err; | |
421 | ||
422 | /* Check that we replay pending requests following a hang */ | |
423 | ||
424 | if (!igt_can_mi_store_dword_imm(i915)) | |
425 | return 0; | |
426 | ||
8c185eca | 427 | set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); |
496b575e CW |
428 | mutex_lock(&i915->drm.struct_mutex); |
429 | err = hang_init(&h, i915); | |
430 | if (err) | |
431 | goto unlock; | |
432 | ||
433 | for_each_engine(engine, i915, id) { | |
434 | struct drm_i915_gem_request *prev; | |
435 | IGT_TIMEOUT(end_time); | |
436 | unsigned int count; | |
437 | ||
438 | prev = hang_create_request(&h, engine, i915->kernel_context); | |
439 | if (IS_ERR(prev)) { | |
440 | err = PTR_ERR(prev); | |
441 | goto fini; | |
442 | } | |
443 | ||
444 | i915_gem_request_get(prev); | |
445 | __i915_add_request(prev, true); | |
446 | ||
447 | count = 0; | |
448 | do { | |
449 | struct drm_i915_gem_request *rq; | |
450 | unsigned int reset_count; | |
451 | ||
452 | rq = hang_create_request(&h, | |
453 | engine, | |
454 | i915->kernel_context); | |
455 | if (IS_ERR(rq)) { | |
456 | err = PTR_ERR(rq); | |
457 | goto fini; | |
458 | } | |
459 | ||
460 | i915_gem_request_get(rq); | |
461 | __i915_add_request(rq, true); | |
462 | ||
463 | if (!wait_for_hang(&h, prev)) { | |
464 | pr_err("Failed to start request %x\n", | |
465 | prev->fence.seqno); | |
466 | i915_gem_request_put(rq); | |
467 | i915_gem_request_put(prev); | |
468 | err = -EIO; | |
469 | goto fini; | |
470 | } | |
471 | ||
472 | reset_count = fake_hangcheck(prev); | |
473 | ||
474 | i915_reset(i915); | |
475 | ||
8c185eca | 476 | GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, |
496b575e | 477 | &i915->gpu_error.flags)); |
8c185eca | 478 | |
496b575e CW |
479 | if (prev->fence.error != -EIO) { |
480 | pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", | |
481 | prev->fence.error); | |
482 | i915_gem_request_put(rq); | |
483 | i915_gem_request_put(prev); | |
484 | err = -EINVAL; | |
485 | goto fini; | |
486 | } | |
487 | ||
488 | if (rq->fence.error) { | |
489 | pr_err("Fence error status not zero [%d] after unrelated reset\n", | |
490 | rq->fence.error); | |
491 | i915_gem_request_put(rq); | |
492 | i915_gem_request_put(prev); | |
493 | err = -EINVAL; | |
494 | goto fini; | |
495 | } | |
496 | ||
497 | if (i915_reset_count(&i915->gpu_error) == reset_count) { | |
498 | pr_err("No GPU reset recorded!\n"); | |
499 | i915_gem_request_put(rq); | |
500 | i915_gem_request_put(prev); | |
501 | err = -EINVAL; | |
502 | goto fini; | |
503 | } | |
504 | ||
505 | i915_gem_request_put(prev); | |
506 | prev = rq; | |
507 | count++; | |
508 | } while (time_before(jiffies, end_time)); | |
509 | pr_info("%s: Completed %d resets\n", engine->name, count); | |
510 | ||
511 | *h.batch = MI_BATCH_BUFFER_END; | |
512 | wmb(); | |
513 | ||
514 | i915_gem_request_put(prev); | |
515 | } | |
516 | ||
517 | fini: | |
518 | hang_fini(&h); | |
519 | unlock: | |
520 | mutex_unlock(&i915->drm.struct_mutex); | |
8c185eca | 521 | clear_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags); |
496b575e CW |
522 | |
523 | if (i915_terminally_wedged(&i915->gpu_error)) | |
524 | return -EIO; | |
525 | ||
526 | return err; | |
527 | } | |
528 | ||
529 | int intel_hangcheck_live_selftests(struct drm_i915_private *i915) | |
530 | { | |
531 | static const struct i915_subtest tests[] = { | |
532 | SUBTEST(igt_hang_sanitycheck), | |
533 | SUBTEST(igt_global_reset), | |
534 | SUBTEST(igt_wait_reset), | |
535 | SUBTEST(igt_reset_queue), | |
536 | }; | |
537 | ||
538 | if (!intel_has_gpu_reset(i915)) | |
539 | return 0; | |
540 | ||
541 | return i915_subtests(tests, i915); | |
542 | } |