]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/gpu/drm/i915/intel_breadcrumbs.c
d89b2c963618298f5e8f3b23bfdddaca46473f5a
[mirror_ubuntu-bionic-kernel.git] / drivers / gpu / drm / i915 / intel_breadcrumbs.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include <linux/kthread.h>
26
27 #include "i915_drv.h"
28
29 static void intel_breadcrumbs_fake_irq(unsigned long data)
30 {
31 struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
32
33 /*
34 * The timer persists in case we cannot enable interrupts,
35 * or if we have previously seen seqno/interrupt incoherency
36 * ("missed interrupt" syndrome). Here the worker will wake up
37 * every jiffie in order to kick the oldest waiter to do the
38 * coherent seqno check.
39 */
40 rcu_read_lock();
41 if (intel_engine_wakeup(engine))
42 mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
43 rcu_read_unlock();
44 }
45
46 static void irq_enable(struct intel_engine_cs *engine)
47 {
48 /* Enabling the IRQ may miss the generation of the interrupt, but
49 * we still need to force the barrier before reading the seqno,
50 * just in case.
51 */
52 engine->breadcrumbs.irq_posted = true;
53
54 spin_lock_irq(&engine->i915->irq_lock);
55 engine->irq_enable(engine);
56 spin_unlock_irq(&engine->i915->irq_lock);
57 }
58
59 static void irq_disable(struct intel_engine_cs *engine)
60 {
61 spin_lock_irq(&engine->i915->irq_lock);
62 engine->irq_disable(engine);
63 spin_unlock_irq(&engine->i915->irq_lock);
64
65 engine->breadcrumbs.irq_posted = false;
66 }
67
68 static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
69 {
70 struct intel_engine_cs *engine =
71 container_of(b, struct intel_engine_cs, breadcrumbs);
72 struct drm_i915_private *i915 = engine->i915;
73
74 assert_spin_locked(&b->lock);
75 if (b->rpm_wakelock)
76 return;
77
78 /* Since we are waiting on a request, the GPU should be busy
79 * and should have its own rpm reference. For completeness,
80 * record an rpm reference for ourselves to cover the
81 * interrupt we unmask.
82 */
83 intel_runtime_pm_get_noresume(i915);
84 b->rpm_wakelock = true;
85
86 /* No interrupts? Kick the waiter every jiffie! */
87 if (intel_irqs_enabled(i915)) {
88 if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings))
89 irq_enable(engine);
90 b->irq_enabled = true;
91 }
92
93 if (!b->irq_enabled ||
94 test_bit(engine->id, &i915->gpu_error.missed_irq_rings))
95 mod_timer(&b->fake_irq, jiffies + 1);
96 }
97
98 static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
99 {
100 struct intel_engine_cs *engine =
101 container_of(b, struct intel_engine_cs, breadcrumbs);
102
103 assert_spin_locked(&b->lock);
104 if (!b->rpm_wakelock)
105 return;
106
107 if (b->irq_enabled) {
108 irq_disable(engine);
109 b->irq_enabled = false;
110 }
111
112 intel_runtime_pm_put(engine->i915);
113 b->rpm_wakelock = false;
114 }
115
116 static inline struct intel_wait *to_wait(struct rb_node *node)
117 {
118 return container_of(node, struct intel_wait, node);
119 }
120
121 static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
122 struct intel_wait *wait)
123 {
124 assert_spin_locked(&b->lock);
125
126 /* This request is completed, so remove it from the tree, mark it as
127 * complete, and *then* wake up the associated task.
128 */
129 rb_erase(&wait->node, &b->waiters);
130 RB_CLEAR_NODE(&wait->node);
131
132 wake_up_process(wait->tsk); /* implicit smp_wmb() */
133 }
134
135 static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
136 struct intel_wait *wait)
137 {
138 struct intel_breadcrumbs *b = &engine->breadcrumbs;
139 struct rb_node **p, *parent, *completed;
140 bool first;
141 u32 seqno;
142
143 /* Insert the request into the retirement ordered list
144 * of waiters by walking the rbtree. If we are the oldest
145 * seqno in the tree (the first to be retired), then
146 * set ourselves as the bottom-half.
147 *
148 * As we descend the tree, prune completed branches since we hold the
149 * spinlock we know that the first_waiter must be delayed and can
150 * reduce some of the sequential wake up latency if we take action
151 * ourselves and wake up the completed tasks in parallel. Also, by
152 * removing stale elements in the tree, we may be able to reduce the
153 * ping-pong between the old bottom-half and ourselves as first-waiter.
154 */
155 first = true;
156 parent = NULL;
157 completed = NULL;
158 seqno = intel_engine_get_seqno(engine);
159
160 /* If the request completed before we managed to grab the spinlock,
161 * return now before adding ourselves to the rbtree. We let the
162 * current bottom-half handle any pending wakeups and instead
163 * try and get out of the way quickly.
164 */
165 if (i915_seqno_passed(seqno, wait->seqno)) {
166 RB_CLEAR_NODE(&wait->node);
167 return first;
168 }
169
170 p = &b->waiters.rb_node;
171 while (*p) {
172 parent = *p;
173 if (wait->seqno == to_wait(parent)->seqno) {
174 /* We have multiple waiters on the same seqno, select
175 * the highest priority task (that with the smallest
176 * task->prio) to serve as the bottom-half for this
177 * group.
178 */
179 if (wait->tsk->prio > to_wait(parent)->tsk->prio) {
180 p = &parent->rb_right;
181 first = false;
182 } else {
183 p = &parent->rb_left;
184 }
185 } else if (i915_seqno_passed(wait->seqno,
186 to_wait(parent)->seqno)) {
187 p = &parent->rb_right;
188 if (i915_seqno_passed(seqno, to_wait(parent)->seqno))
189 completed = parent;
190 else
191 first = false;
192 } else {
193 p = &parent->rb_left;
194 }
195 }
196 rb_link_node(&wait->node, parent, p);
197 rb_insert_color(&wait->node, &b->waiters);
198 GEM_BUG_ON(!first && !b->irq_seqno_bh);
199
200 if (completed) {
201 struct rb_node *next = rb_next(completed);
202
203 GEM_BUG_ON(!next && !first);
204 if (next && next != &wait->node) {
205 GEM_BUG_ON(first);
206 b->first_wait = to_wait(next);
207 smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
208 /* As there is a delay between reading the current
209 * seqno, processing the completed tasks and selecting
210 * the next waiter, we may have missed the interrupt
211 * and so need for the next bottom-half to wakeup.
212 *
213 * Also as we enable the IRQ, we may miss the
214 * interrupt for that seqno, so we have to wake up
215 * the next bottom-half in order to do a coherent check
216 * in case the seqno passed.
217 */
218 __intel_breadcrumbs_enable_irq(b);
219 if (READ_ONCE(b->irq_posted))
220 wake_up_process(to_wait(next)->tsk);
221 }
222
223 do {
224 struct intel_wait *crumb = to_wait(completed);
225 completed = rb_prev(completed);
226 __intel_breadcrumbs_finish(b, crumb);
227 } while (completed);
228 }
229
230 if (first) {
231 GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
232 b->first_wait = wait;
233 smp_store_mb(b->irq_seqno_bh, wait->tsk);
234 /* After assigning ourselves as the new bottom-half, we must
235 * perform a cursory check to prevent a missed interrupt.
236 * Either we miss the interrupt whilst programming the hardware,
237 * or if there was a previous waiter (for a later seqno) they
238 * may be woken instead of us (due to the inherent race
239 * in the unlocked read of b->irq_seqno_bh in the irq handler)
240 * and so we miss the wake up.
241 */
242 __intel_breadcrumbs_enable_irq(b);
243 }
244 GEM_BUG_ON(!b->irq_seqno_bh);
245 GEM_BUG_ON(!b->first_wait);
246 GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
247
248 return first;
249 }
250
251 bool intel_engine_add_wait(struct intel_engine_cs *engine,
252 struct intel_wait *wait)
253 {
254 struct intel_breadcrumbs *b = &engine->breadcrumbs;
255 bool first;
256
257 spin_lock(&b->lock);
258 first = __intel_engine_add_wait(engine, wait);
259 spin_unlock(&b->lock);
260
261 return first;
262 }
263
264 void intel_engine_enable_fake_irq(struct intel_engine_cs *engine)
265 {
266 mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
267 }
268
269 static inline bool chain_wakeup(struct rb_node *rb, int priority)
270 {
271 return rb && to_wait(rb)->tsk->prio <= priority;
272 }
273
274 static inline int wakeup_priority(struct intel_breadcrumbs *b,
275 struct task_struct *tsk)
276 {
277 if (tsk == b->signaler)
278 return INT_MIN;
279 else
280 return tsk->prio;
281 }
282
283 void intel_engine_remove_wait(struct intel_engine_cs *engine,
284 struct intel_wait *wait)
285 {
286 struct intel_breadcrumbs *b = &engine->breadcrumbs;
287
288 /* Quick check to see if this waiter was already decoupled from
289 * the tree by the bottom-half to avoid contention on the spinlock
290 * by the herd.
291 */
292 if (RB_EMPTY_NODE(&wait->node))
293 return;
294
295 spin_lock(&b->lock);
296
297 if (RB_EMPTY_NODE(&wait->node))
298 goto out_unlock;
299
300 if (b->first_wait == wait) {
301 const int priority = wakeup_priority(b, wait->tsk);
302 struct rb_node *next;
303
304 GEM_BUG_ON(b->irq_seqno_bh != wait->tsk);
305
306 /* We are the current bottom-half. Find the next candidate,
307 * the first waiter in the queue on the remaining oldest
308 * request. As multiple seqnos may complete in the time it
309 * takes us to wake up and find the next waiter, we have to
310 * wake up that waiter for it to perform its own coherent
311 * completion check.
312 */
313 next = rb_next(&wait->node);
314 if (chain_wakeup(next, priority)) {
315 /* If the next waiter is already complete,
316 * wake it up and continue onto the next waiter. So
317 * if have a small herd, they will wake up in parallel
318 * rather than sequentially, which should reduce
319 * the overall latency in waking all the completed
320 * clients.
321 *
322 * However, waking up a chain adds extra latency to
323 * the first_waiter. This is undesirable if that
324 * waiter is a high priority task.
325 */
326 u32 seqno = intel_engine_get_seqno(engine);
327
328 while (i915_seqno_passed(seqno, to_wait(next)->seqno)) {
329 struct rb_node *n = rb_next(next);
330
331 __intel_breadcrumbs_finish(b, to_wait(next));
332 next = n;
333 if (!chain_wakeup(next, priority))
334 break;
335 }
336 }
337
338 if (next) {
339 /* In our haste, we may have completed the first waiter
340 * before we enabled the interrupt. Do so now as we
341 * have a second waiter for a future seqno. Afterwards,
342 * we have to wake up that waiter in case we missed
343 * the interrupt, or if we have to handle an
344 * exception rather than a seqno completion.
345 */
346 b->first_wait = to_wait(next);
347 smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
348 if (b->first_wait->seqno != wait->seqno)
349 __intel_breadcrumbs_enable_irq(b);
350 wake_up_process(b->irq_seqno_bh);
351 } else {
352 b->first_wait = NULL;
353 WRITE_ONCE(b->irq_seqno_bh, NULL);
354 __intel_breadcrumbs_disable_irq(b);
355 }
356 } else {
357 GEM_BUG_ON(rb_first(&b->waiters) == &wait->node);
358 }
359
360 GEM_BUG_ON(RB_EMPTY_NODE(&wait->node));
361 rb_erase(&wait->node, &b->waiters);
362
363 out_unlock:
364 GEM_BUG_ON(b->first_wait == wait);
365 GEM_BUG_ON(rb_first(&b->waiters) !=
366 (b->first_wait ? &b->first_wait->node : NULL));
367 GEM_BUG_ON(!b->irq_seqno_bh ^ RB_EMPTY_ROOT(&b->waiters));
368 spin_unlock(&b->lock);
369 }
370
371 static bool signal_complete(struct drm_i915_gem_request *request)
372 {
373 if (!request)
374 return false;
375
376 /* If another process served as the bottom-half it may have already
377 * signalled that this wait is already completed.
378 */
379 if (intel_wait_complete(&request->signaling.wait))
380 return true;
381
382 /* Carefully check if the request is complete, giving time for the
383 * seqno to be visible or if the GPU hung.
384 */
385 if (__i915_request_irq_complete(request))
386 return true;
387
388 return false;
389 }
390
391 static struct drm_i915_gem_request *to_signaler(struct rb_node *rb)
392 {
393 return container_of(rb, struct drm_i915_gem_request, signaling.node);
394 }
395
396 static void signaler_set_rtpriority(void)
397 {
398 struct sched_param param = { .sched_priority = 1 };
399
400 sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
401 }
402
403 static int intel_breadcrumbs_signaler(void *arg)
404 {
405 struct intel_engine_cs *engine = arg;
406 struct intel_breadcrumbs *b = &engine->breadcrumbs;
407 struct drm_i915_gem_request *request;
408
409 /* Install ourselves with high priority to reduce signalling latency */
410 signaler_set_rtpriority();
411
412 do {
413 set_current_state(TASK_INTERRUPTIBLE);
414
415 /* We are either woken up by the interrupt bottom-half,
416 * or by a client adding a new signaller. In both cases,
417 * the GPU seqno may have advanced beyond our oldest signal.
418 * If it has, propagate the signal, remove the waiter and
419 * check again with the next oldest signal. Otherwise we
420 * need to wait for a new interrupt from the GPU or for
421 * a new client.
422 */
423 request = READ_ONCE(b->first_signal);
424 if (signal_complete(request)) {
425 /* Wake up all other completed waiters and select the
426 * next bottom-half for the next user interrupt.
427 */
428 intel_engine_remove_wait(engine,
429 &request->signaling.wait);
430
431 /* Find the next oldest signal. Note that as we have
432 * not been holding the lock, another client may
433 * have installed an even older signal than the one
434 * we just completed - so double check we are still
435 * the oldest before picking the next one.
436 */
437 spin_lock(&b->lock);
438 if (request == b->first_signal) {
439 struct rb_node *rb =
440 rb_next(&request->signaling.node);
441 b->first_signal = rb ? to_signaler(rb) : NULL;
442 }
443 rb_erase(&request->signaling.node, &b->signals);
444 spin_unlock(&b->lock);
445
446 i915_gem_request_unreference(request);
447 } else {
448 if (kthread_should_stop())
449 break;
450
451 schedule();
452 }
453 } while (1);
454 __set_current_state(TASK_RUNNING);
455
456 return 0;
457 }
458
459 void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
460 {
461 struct intel_engine_cs *engine = request->engine;
462 struct intel_breadcrumbs *b = &engine->breadcrumbs;
463 struct rb_node *parent, **p;
464 bool first, wakeup;
465
466 if (unlikely(READ_ONCE(request->signaling.wait.tsk)))
467 return;
468
469 spin_lock(&b->lock);
470 if (unlikely(request->signaling.wait.tsk)) {
471 wakeup = false;
472 goto unlock;
473 }
474
475 request->signaling.wait.tsk = b->signaler;
476 request->signaling.wait.seqno = request->seqno;
477 i915_gem_request_reference(request);
478
479 /* First add ourselves into the list of waiters, but register our
480 * bottom-half as the signaller thread. As per usual, only the oldest
481 * waiter (not just signaller) is tasked as the bottom-half waking
482 * up all completed waiters after the user interrupt.
483 *
484 * If we are the oldest waiter, enable the irq (after which we
485 * must double check that the seqno did not complete).
486 */
487 wakeup = __intel_engine_add_wait(engine, &request->signaling.wait);
488
489 /* Now insert ourselves into the retirement ordered list of signals
490 * on this engine. We track the oldest seqno as that will be the
491 * first signal to complete.
492 */
493 parent = NULL;
494 first = true;
495 p = &b->signals.rb_node;
496 while (*p) {
497 parent = *p;
498 if (i915_seqno_passed(request->seqno,
499 to_signaler(parent)->seqno)) {
500 p = &parent->rb_right;
501 first = false;
502 } else {
503 p = &parent->rb_left;
504 }
505 }
506 rb_link_node(&request->signaling.node, parent, p);
507 rb_insert_color(&request->signaling.node, &b->signals);
508 if (first)
509 smp_store_mb(b->first_signal, request);
510
511 unlock:
512 spin_unlock(&b->lock);
513
514 if (wakeup)
515 wake_up_process(b->signaler);
516 }
517
518 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
519 {
520 struct intel_breadcrumbs *b = &engine->breadcrumbs;
521 struct task_struct *tsk;
522
523 spin_lock_init(&b->lock);
524 setup_timer(&b->fake_irq,
525 intel_breadcrumbs_fake_irq,
526 (unsigned long)engine);
527
528 /* Spawn a thread to provide a common bottom-half for all signals.
529 * As this is an asynchronous interface we cannot steal the current
530 * task for handling the bottom-half to the user interrupt, therefore
531 * we create a thread to do the coherent seqno dance after the
532 * interrupt and then signal the waitqueue (via the dma-buf/fence).
533 */
534 tsk = kthread_run(intel_breadcrumbs_signaler, engine,
535 "i915/signal:%d", engine->id);
536 if (IS_ERR(tsk))
537 return PTR_ERR(tsk);
538
539 b->signaler = tsk;
540
541 return 0;
542 }
543
544 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
545 {
546 struct intel_breadcrumbs *b = &engine->breadcrumbs;
547
548 if (!IS_ERR_OR_NULL(b->signaler))
549 kthread_stop(b->signaler);
550
551 del_timer_sync(&b->fake_irq);
552 }
553
554 unsigned int intel_kick_waiters(struct drm_i915_private *i915)
555 {
556 struct intel_engine_cs *engine;
557 unsigned int mask = 0;
558
559 /* To avoid the task_struct disappearing beneath us as we wake up
560 * the process, we must first inspect the task_struct->state under the
561 * RCU lock, i.e. as we call wake_up_process() we must be holding the
562 * rcu_read_lock().
563 */
564 rcu_read_lock();
565 for_each_engine(engine, i915)
566 if (unlikely(intel_engine_wakeup(engine)))
567 mask |= intel_engine_flag(engine);
568 rcu_read_unlock();
569
570 return mask;
571 }
572
573 unsigned int intel_kick_signalers(struct drm_i915_private *i915)
574 {
575 struct intel_engine_cs *engine;
576 unsigned int mask = 0;
577
578 for_each_engine(engine, i915) {
579 if (unlikely(READ_ONCE(engine->breadcrumbs.first_signal))) {
580 wake_up_process(engine->breadcrumbs.signaler);
581 mask |= intel_engine_flag(engine);
582 }
583 }
584
585 return mask;
586 }