]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - drivers/gpu/drm/msm/msm_gpu.c
drm/msm/gpu: Rearrange the code that collects the task during a hang
[mirror_ubuntu-focal-kernel.git] / drivers / gpu / drm / msm / msm_gpu.c
CommitLineData
7198e6b0
RC
1/*
2 * Copyright (C) 2013 Red Hat
3 * Author: Rob Clark <robdclark@gmail.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "msm_gpu.h"
19#include "msm_gem.h"
871d812a 20#include "msm_mmu.h"
fde5de6c 21#include "msm_fence.h"
7198e6b0 22
18bb8a6c 23#include <linux/string_helpers.h>
f91c14ab
JC
24#include <linux/pm_opp.h>
25#include <linux/devfreq.h>
18bb8a6c 26
7198e6b0
RC
27
28/*
29 * Power Management:
30 */
31
f91c14ab
JC
32static int msm_devfreq_target(struct device *dev, unsigned long *freq,
33 u32 flags)
34{
35 struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
36 struct dev_pm_opp *opp;
37
38 opp = devfreq_recommended_opp(dev, freq, flags);
39
40 if (IS_ERR(opp))
41 return PTR_ERR(opp);
42
43 clk_set_rate(gpu->core_clk, *freq);
44 dev_pm_opp_put(opp);
45
46 return 0;
47}
48
49static int msm_devfreq_get_dev_status(struct device *dev,
50 struct devfreq_dev_status *status)
51{
52 struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
53 u64 cycles;
54 u32 freq = ((u32) status->current_frequency) / 1000000;
55 ktime_t time;
56
57 status->current_frequency = (unsigned long) clk_get_rate(gpu->core_clk);
58 gpu->funcs->gpu_busy(gpu, &cycles);
59
60 status->busy_time = ((u32) (cycles - gpu->devfreq.busy_cycles)) / freq;
61
62 gpu->devfreq.busy_cycles = cycles;
63
64 time = ktime_get();
65 status->total_time = ktime_us_delta(time, gpu->devfreq.time);
66 gpu->devfreq.time = time;
67
68 return 0;
69}
70
71static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq)
72{
73 struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev));
74
75 *freq = (unsigned long) clk_get_rate(gpu->core_clk);
76
77 return 0;
78}
79
80static struct devfreq_dev_profile msm_devfreq_profile = {
81 .polling_ms = 10,
82 .target = msm_devfreq_target,
83 .get_dev_status = msm_devfreq_get_dev_status,
84 .get_cur_freq = msm_devfreq_get_cur_freq,
85};
86
87static void msm_devfreq_init(struct msm_gpu *gpu)
88{
89 /* We need target support to do devfreq */
90 if (!gpu->funcs->gpu_busy)
91 return;
92
93 msm_devfreq_profile.initial_freq = gpu->fast_rate;
94
95 /*
96 * Don't set the freq_table or max_state and let devfreq build the table
97 * from OPP
98 */
99
100 gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev,
101 &msm_devfreq_profile, "simple_ondemand", NULL);
102
103 if (IS_ERR(gpu->devfreq.devfreq)) {
104 dev_err(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n");
105 gpu->devfreq.devfreq = NULL;
106 }
107}
108
7198e6b0
RC
109static int enable_pwrrail(struct msm_gpu *gpu)
110{
111 struct drm_device *dev = gpu->dev;
112 int ret = 0;
113
114 if (gpu->gpu_reg) {
115 ret = regulator_enable(gpu->gpu_reg);
116 if (ret) {
117 dev_err(dev->dev, "failed to enable 'gpu_reg': %d\n", ret);
118 return ret;
119 }
120 }
121
122 if (gpu->gpu_cx) {
123 ret = regulator_enable(gpu->gpu_cx);
124 if (ret) {
125 dev_err(dev->dev, "failed to enable 'gpu_cx': %d\n", ret);
126 return ret;
127 }
128 }
129
130 return 0;
131}
132
133static int disable_pwrrail(struct msm_gpu *gpu)
134{
135 if (gpu->gpu_cx)
136 regulator_disable(gpu->gpu_cx);
137 if (gpu->gpu_reg)
138 regulator_disable(gpu->gpu_reg);
139 return 0;
140}
141
142static int enable_clk(struct msm_gpu *gpu)
143{
7198e6b0
RC
144 int i;
145
98db803f
JC
146 if (gpu->core_clk && gpu->fast_rate)
147 clk_set_rate(gpu->core_clk, gpu->fast_rate);
7198e6b0 148
b5f103ab 149 /* Set the RBBM timer rate to 19.2Mhz */
98db803f
JC
150 if (gpu->rbbmtimer_clk)
151 clk_set_rate(gpu->rbbmtimer_clk, 19200000);
b5f103ab 152
98db803f 153 for (i = gpu->nr_clocks - 1; i >= 0; i--)
89d777a5
JC
154 if (gpu->grp_clks[i])
155 clk_prepare(gpu->grp_clks[i]);
7198e6b0 156
98db803f 157 for (i = gpu->nr_clocks - 1; i >= 0; i--)
7198e6b0
RC
158 if (gpu->grp_clks[i])
159 clk_enable(gpu->grp_clks[i]);
160
161 return 0;
162}
163
164static int disable_clk(struct msm_gpu *gpu)
165{
7198e6b0
RC
166 int i;
167
98db803f 168 for (i = gpu->nr_clocks - 1; i >= 0; i--)
89d777a5 169 if (gpu->grp_clks[i])
7198e6b0 170 clk_disable(gpu->grp_clks[i]);
7198e6b0 171
98db803f 172 for (i = gpu->nr_clocks - 1; i >= 0; i--)
7198e6b0
RC
173 if (gpu->grp_clks[i])
174 clk_unprepare(gpu->grp_clks[i]);
175
bf5af4ae
JC
176 /*
177 * Set the clock to a deliberately low rate. On older targets the clock
178 * speed had to be non zero to avoid problems. On newer targets this
179 * will be rounded down to zero anyway so it all works out.
180 */
98db803f
JC
181 if (gpu->core_clk)
182 clk_set_rate(gpu->core_clk, 27000000);
89d777a5 183
98db803f
JC
184 if (gpu->rbbmtimer_clk)
185 clk_set_rate(gpu->rbbmtimer_clk, 0);
b5f103ab 186
7198e6b0
RC
187 return 0;
188}
189
190static int enable_axi(struct msm_gpu *gpu)
191{
192 if (gpu->ebi1_clk)
193 clk_prepare_enable(gpu->ebi1_clk);
7198e6b0
RC
194 return 0;
195}
196
197static int disable_axi(struct msm_gpu *gpu)
198{
199 if (gpu->ebi1_clk)
200 clk_disable_unprepare(gpu->ebi1_clk);
7198e6b0
RC
201 return 0;
202}
203
204int msm_gpu_pm_resume(struct msm_gpu *gpu)
205{
206 int ret;
207
eeb75474 208 DBG("%s", gpu->name);
7198e6b0
RC
209
210 ret = enable_pwrrail(gpu);
211 if (ret)
212 return ret;
213
214 ret = enable_clk(gpu);
215 if (ret)
216 return ret;
217
218 ret = enable_axi(gpu);
219 if (ret)
220 return ret;
221
f91c14ab
JC
222 if (gpu->devfreq.devfreq) {
223 gpu->devfreq.busy_cycles = 0;
224 gpu->devfreq.time = ktime_get();
225
226 devfreq_resume_device(gpu->devfreq.devfreq);
227 }
228
eeb75474
RC
229 gpu->needs_hw_init = true;
230
7198e6b0
RC
231 return 0;
232}
233
234int msm_gpu_pm_suspend(struct msm_gpu *gpu)
235{
236 int ret;
237
eeb75474 238 DBG("%s", gpu->name);
7198e6b0 239
f91c14ab
JC
240 if (gpu->devfreq.devfreq)
241 devfreq_suspend_device(gpu->devfreq.devfreq);
242
7198e6b0
RC
243 ret = disable_axi(gpu);
244 if (ret)
245 return ret;
246
247 ret = disable_clk(gpu);
248 if (ret)
249 return ret;
250
251 ret = disable_pwrrail(gpu);
252 if (ret)
253 return ret;
254
255 return 0;
256}
257
eeb75474 258int msm_gpu_hw_init(struct msm_gpu *gpu)
37d77c3a 259{
eeb75474 260 int ret;
37d77c3a 261
cb1e3818
RC
262 WARN_ON(!mutex_is_locked(&gpu->dev->struct_mutex));
263
eeb75474
RC
264 if (!gpu->needs_hw_init)
265 return 0;
37d77c3a 266
eeb75474
RC
267 disable_irq(gpu->irq);
268 ret = gpu->funcs->hw_init(gpu);
269 if (!ret)
270 gpu->needs_hw_init = false;
271 enable_irq(gpu->irq);
37d77c3a 272
eeb75474 273 return ret;
37d77c3a
RC
274}
275
bd6f82d8
RC
276/*
277 * Hangcheck detection for locked gpu:
278 */
279
f97decac
JC
280static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
281 uint32_t fence)
282{
283 struct msm_gem_submit *submit;
284
285 list_for_each_entry(submit, &ring->submits, node) {
286 if (submit->seqno > fence)
287 break;
288
289 msm_update_fence(submit->ring->fctx,
290 submit->fence->seqno);
291 }
292}
293
18bb8a6c
RC
294static struct msm_gem_submit *
295find_submit(struct msm_ringbuffer *ring, uint32_t fence)
296{
297 struct msm_gem_submit *submit;
298
299 WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex));
300
301 list_for_each_entry(submit, &ring->submits, node)
302 if (submit->seqno == fence)
303 return submit;
304
305 return NULL;
306}
307
b6295f9a 308static void retire_submits(struct msm_gpu *gpu);
1a370be9 309
bd6f82d8
RC
310static void recover_worker(struct work_struct *work)
311{
312 struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
313 struct drm_device *dev = gpu->dev;
96169f4e 314 struct msm_drm_private *priv = dev->dev_private;
4816b626 315 struct msm_gem_submit *submit;
f97decac 316 struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
65a3c274 317 char *comm = NULL, *cmd = NULL;
f97decac
JC
318 int i;
319
bd6f82d8 320 mutex_lock(&dev->struct_mutex);
1a370be9 321
4816b626 322 dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
f97decac 323
96169f4e 324 submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
18bb8a6c
RC
325 if (submit) {
326 struct task_struct *task;
327
328 rcu_read_lock();
329 task = pid_task(submit->pid, PIDTYPE_PID);
330 if (task) {
65a3c274 331 comm = kstrdup(task->comm, GFP_ATOMIC);
18bb8a6c
RC
332
333 /*
334 * So slightly annoying, in other paths like
335 * mmap'ing gem buffers, mmap_sem is acquired
336 * before struct_mutex, which means we can't
337 * hold struct_mutex across the call to
338 * get_cmdline(). But submits are retired
339 * from the same in-order workqueue, so we can
340 * safely drop the lock here without worrying
341 * about the submit going away.
342 */
343 mutex_unlock(&dev->struct_mutex);
65a3c274 344 cmd = kstrdup_quotable_cmdline(task, GFP_ATOMIC);
18bb8a6c 345 mutex_lock(&dev->struct_mutex);
65a3c274
JC
346 }
347 rcu_read_unlock();
18bb8a6c 348
65a3c274 349 if (comm && cmd) {
18bb8a6c 350 dev_err(dev->dev, "%s: offending task: %s (%s)\n",
65a3c274 351 gpu->name, comm, cmd);
96169f4e
RC
352
353 msm_rd_dump_submit(priv->hangrd, submit,
65a3c274
JC
354 "offending task: %s (%s)", comm, cmd);
355 } else
96169f4e 356 msm_rd_dump_submit(priv->hangrd, submit, NULL);
96169f4e
RC
357 }
358
65a3c274
JC
359 kfree(cmd);
360 kfree(comm);
96169f4e
RC
361
362 /*
363 * Update all the rings with the latest and greatest fence.. this
364 * needs to happen after msm_rd_dump_submit() to ensure that the
365 * bo's referenced by the offending submit are still around.
366 */
7ddae82e 367 for (i = 0; i < gpu->nr_rings; i++) {
96169f4e
RC
368 struct msm_ringbuffer *ring = gpu->rb[i];
369
370 uint32_t fence = ring->memptrs->fence;
18bb8a6c 371
96169f4e
RC
372 /*
373 * For the current (faulting?) ring/submit advance the fence by
374 * one more to clear the faulting submit
375 */
376 if (ring == cur_ring)
377 fence++;
378
379 update_fences(gpu, ring, fence);
4816b626
RC
380 }
381
382 if (msm_gpu_active(gpu)) {
1a370be9 383 /* retire completed submits, plus the one that hung: */
b6295f9a 384 retire_submits(gpu);
1a370be9 385
eeb75474 386 pm_runtime_get_sync(&gpu->pdev->dev);
37d77c3a 387 gpu->funcs->recover(gpu);
eeb75474 388 pm_runtime_put_sync(&gpu->pdev->dev);
1a370be9 389
f97decac
JC
390 /*
391 * Replay all remaining submits starting with highest priority
392 * ring
393 */
b1fc2839 394 for (i = 0; i < gpu->nr_rings; i++) {
f97decac
JC
395 struct msm_ringbuffer *ring = gpu->rb[i];
396
397 list_for_each_entry(submit, &ring->submits, node)
398 gpu->funcs->submit(gpu, submit, NULL);
1a370be9 399 }
37d77c3a 400 }
4816b626 401
bd6f82d8
RC
402 mutex_unlock(&dev->struct_mutex);
403
404 msm_gpu_retire(gpu);
405}
406
407static void hangcheck_timer_reset(struct msm_gpu *gpu)
408{
409 DBG("%s", gpu->name);
410 mod_timer(&gpu->hangcheck_timer,
411 round_jiffies_up(jiffies + DRM_MSM_HANGCHECK_JIFFIES));
412}
413
e99e88a9 414static void hangcheck_handler(struct timer_list *t)
bd6f82d8 415{
e99e88a9 416 struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer);
6b8819c8
RC
417 struct drm_device *dev = gpu->dev;
418 struct msm_drm_private *priv = dev->dev_private;
f97decac
JC
419 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
420 uint32_t fence = ring->memptrs->fence;
bd6f82d8 421
f97decac 422 if (fence != ring->hangcheck_fence) {
bd6f82d8 423 /* some progress has been made.. ya! */
f97decac
JC
424 ring->hangcheck_fence = fence;
425 } else if (fence < ring->seqno) {
bd6f82d8 426 /* no progress and not done.. hung! */
f97decac
JC
427 ring->hangcheck_fence = fence;
428 dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n",
429 gpu->name, ring->id);
26791c48
RC
430 dev_err(dev->dev, "%s: completed fence: %u\n",
431 gpu->name, fence);
432 dev_err(dev->dev, "%s: submitted fence: %u\n",
f97decac
JC
433 gpu->name, ring->seqno);
434
bd6f82d8
RC
435 queue_work(priv->wq, &gpu->recover_work);
436 }
437
438 /* if still more pending work, reset the hangcheck timer: */
f97decac 439 if (ring->seqno > ring->hangcheck_fence)
bd6f82d8 440 hangcheck_timer_reset(gpu);
6b8819c8
RC
441
442 /* workaround for missing irq: */
443 queue_work(priv->wq, &gpu->retire_work);
bd6f82d8
RC
444}
445
70c70f09
RC
446/*
447 * Performance Counters:
448 */
449
450/* called under perf_lock */
451static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs)
452{
453 uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)];
454 int i, n = min(ncntrs, gpu->num_perfcntrs);
455
456 /* read current values: */
457 for (i = 0; i < gpu->num_perfcntrs; i++)
458 current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg);
459
460 /* update cntrs: */
461 for (i = 0; i < n; i++)
462 cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i];
463
464 /* save current values: */
465 for (i = 0; i < gpu->num_perfcntrs; i++)
466 gpu->last_cntrs[i] = current_cntrs[i];
467
468 return n;
469}
470
471static void update_sw_cntrs(struct msm_gpu *gpu)
472{
473 ktime_t time;
474 uint32_t elapsed;
475 unsigned long flags;
476
477 spin_lock_irqsave(&gpu->perf_lock, flags);
478 if (!gpu->perfcntr_active)
479 goto out;
480
481 time = ktime_get();
482 elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time));
483
484 gpu->totaltime += elapsed;
485 if (gpu->last_sample.active)
486 gpu->activetime += elapsed;
487
488 gpu->last_sample.active = msm_gpu_active(gpu);
489 gpu->last_sample.time = time;
490
491out:
492 spin_unlock_irqrestore(&gpu->perf_lock, flags);
493}
494
495void msm_gpu_perfcntr_start(struct msm_gpu *gpu)
496{
497 unsigned long flags;
498
eeb75474
RC
499 pm_runtime_get_sync(&gpu->pdev->dev);
500
70c70f09
RC
501 spin_lock_irqsave(&gpu->perf_lock, flags);
502 /* we could dynamically enable/disable perfcntr registers too.. */
503 gpu->last_sample.active = msm_gpu_active(gpu);
504 gpu->last_sample.time = ktime_get();
505 gpu->activetime = gpu->totaltime = 0;
506 gpu->perfcntr_active = true;
507 update_hw_cntrs(gpu, 0, NULL);
508 spin_unlock_irqrestore(&gpu->perf_lock, flags);
509}
510
511void msm_gpu_perfcntr_stop(struct msm_gpu *gpu)
512{
513 gpu->perfcntr_active = false;
eeb75474 514 pm_runtime_put_sync(&gpu->pdev->dev);
70c70f09
RC
515}
516
517/* returns -errno or # of cntrs sampled */
518int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
519 uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs)
520{
521 unsigned long flags;
522 int ret;
523
524 spin_lock_irqsave(&gpu->perf_lock, flags);
525
526 if (!gpu->perfcntr_active) {
527 ret = -EINVAL;
528 goto out;
529 }
530
531 *activetime = gpu->activetime;
532 *totaltime = gpu->totaltime;
533
534 gpu->activetime = gpu->totaltime = 0;
535
536 ret = update_hw_cntrs(gpu, ncntrs, cntrs);
537
538out:
539 spin_unlock_irqrestore(&gpu->perf_lock, flags);
540
541 return ret;
542}
543
7198e6b0
RC
544/*
545 * Cmdstream submission/retirement:
546 */
547
7d12a279
RC
548static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
549{
550 int i;
551
552 for (i = 0; i < submit->nr_bos; i++) {
553 struct msm_gem_object *msm_obj = submit->bos[i].obj;
554 /* move to inactive: */
555 msm_gem_move_to_inactive(&msm_obj->base);
8bdcd949 556 msm_gem_put_iova(&msm_obj->base, gpu->aspace);
dc9a9b32 557 drm_gem_object_put(&msm_obj->base);
7d12a279
RC
558 }
559
eeb75474
RC
560 pm_runtime_mark_last_busy(&gpu->pdev->dev);
561 pm_runtime_put_autosuspend(&gpu->pdev->dev);
40e6815b 562 msm_gem_submit_free(submit);
7d12a279
RC
563}
564
b6295f9a 565static void retire_submits(struct msm_gpu *gpu)
1a370be9
RC
566{
567 struct drm_device *dev = gpu->dev;
f97decac
JC
568 struct msm_gem_submit *submit, *tmp;
569 int i;
1a370be9
RC
570
571 WARN_ON(!mutex_is_locked(&dev->struct_mutex));
572
f97decac 573 /* Retire the commits starting with highest priority */
b1fc2839 574 for (i = 0; i < gpu->nr_rings; i++) {
f97decac 575 struct msm_ringbuffer *ring = gpu->rb[i];
1a370be9 576
f97decac
JC
577 list_for_each_entry_safe(submit, tmp, &ring->submits, node) {
578 if (dma_fence_is_signaled(submit->fence))
579 retire_submit(gpu, submit);
1a370be9
RC
580 }
581 }
582}
583
7198e6b0
RC
584static void retire_worker(struct work_struct *work)
585{
586 struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work);
587 struct drm_device *dev = gpu->dev;
f97decac 588 int i;
7198e6b0 589
f97decac
JC
590 for (i = 0; i < gpu->nr_rings; i++)
591 update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence);
edd4fc63 592
7198e6b0 593 mutex_lock(&dev->struct_mutex);
b6295f9a 594 retire_submits(gpu);
7198e6b0
RC
595 mutex_unlock(&dev->struct_mutex);
596}
597
598/* call from irq handler to schedule work to retire bo's */
599void msm_gpu_retire(struct msm_gpu *gpu)
600{
601 struct msm_drm_private *priv = gpu->dev->dev_private;
602 queue_work(priv->wq, &gpu->retire_work);
70c70f09 603 update_sw_cntrs(gpu);
7198e6b0
RC
604}
605
606/* add bo's to gpu's ring, and kick gpu: */
f44d32c7 607void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
7198e6b0
RC
608 struct msm_file_private *ctx)
609{
610 struct drm_device *dev = gpu->dev;
611 struct msm_drm_private *priv = dev->dev_private;
f97decac 612 struct msm_ringbuffer *ring = submit->ring;
f44d32c7 613 int i;
7198e6b0 614
1a370be9
RC
615 WARN_ON(!mutex_is_locked(&dev->struct_mutex));
616
eeb75474
RC
617 pm_runtime_get_sync(&gpu->pdev->dev);
618
619 msm_gpu_hw_init(gpu);
37d77c3a 620
f97decac
JC
621 submit->seqno = ++ring->seqno;
622
623 list_add_tail(&submit->node, &ring->submits);
1a370be9 624
998b9a58 625 msm_rd_dump_submit(priv->rd, submit, NULL);
a7d3c950 626
70c70f09
RC
627 update_sw_cntrs(gpu);
628
7198e6b0
RC
629 for (i = 0; i < submit->nr_bos; i++) {
630 struct msm_gem_object *msm_obj = submit->bos[i].obj;
78babc16 631 uint64_t iova;
7198e6b0
RC
632
633 /* can't happen yet.. but when we add 2d support we'll have
634 * to deal w/ cross-ring synchronization:
635 */
636 WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu));
637
7d12a279 638 /* submit takes a reference to the bo and iova until retired: */
dc9a9b32 639 drm_gem_object_get(&msm_obj->base);
0e08270a 640 msm_gem_get_iova(&msm_obj->base,
8bdcd949 641 submit->gpu->aspace, &iova);
7198e6b0 642
bf6811f3
RC
643 if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE)
644 msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence);
b6295f9a
RC
645 else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ)
646 msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->fence);
7198e6b0 647 }
1a370be9 648
1193c3bc 649 gpu->funcs->submit(gpu, submit, ctx);
1a370be9
RC
650 priv->lastctx = ctx;
651
bd6f82d8 652 hangcheck_timer_reset(gpu);
7198e6b0
RC
653}
654
655/*
656 * Init/Cleanup:
657 */
658
659static irqreturn_t irq_handler(int irq, void *data)
660{
661 struct msm_gpu *gpu = data;
662 return gpu->funcs->irq(gpu);
663}
664
98db803f
JC
665static struct clk *get_clock(struct device *dev, const char *name)
666{
667 struct clk *clk = devm_clk_get(dev, name);
668
669 return IS_ERR(clk) ? NULL : clk;
670}
671
672static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu)
673{
674 struct device *dev = &pdev->dev;
675 struct property *prop;
676 const char *name;
677 int i = 0;
678
679 gpu->nr_clocks = of_property_count_strings(dev->of_node, "clock-names");
680 if (gpu->nr_clocks < 1) {
681 gpu->nr_clocks = 0;
682 return 0;
683 }
684
685 gpu->grp_clks = devm_kcalloc(dev, sizeof(struct clk *), gpu->nr_clocks,
686 GFP_KERNEL);
9d20a0e6
JC
687 if (!gpu->grp_clks) {
688 gpu->nr_clocks = 0;
98db803f 689 return -ENOMEM;
9d20a0e6 690 }
98db803f
JC
691
692 of_property_for_each_string(dev->of_node, "clock-names", prop, name) {
693 gpu->grp_clks[i] = get_clock(dev, name);
694
695 /* Remember the key clocks that we need to control later */
134ccada 696 if (!strcmp(name, "core") || !strcmp(name, "core_clk"))
98db803f 697 gpu->core_clk = gpu->grp_clks[i];
134ccada 698 else if (!strcmp(name, "rbbmtimer") || !strcmp(name, "rbbmtimer_clk"))
98db803f
JC
699 gpu->rbbmtimer_clk = gpu->grp_clks[i];
700
701 ++i;
702 }
703
704 return 0;
705}
7198e6b0 706
1267a4df
JC
707static struct msm_gem_address_space *
708msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev,
709 uint64_t va_start, uint64_t va_end)
710{
711 struct iommu_domain *iommu;
712 struct msm_gem_address_space *aspace;
713 int ret;
714
715 /*
716 * Setup IOMMU.. eventually we will (I think) do this once per context
717 * and have separate page tables per context. For now, to keep things
718 * simple and to get something working, just use a single address space:
719 */
720 iommu = iommu_domain_alloc(&platform_bus_type);
721 if (!iommu)
722 return NULL;
723
724 iommu->geometry.aperture_start = va_start;
725 iommu->geometry.aperture_end = va_end;
726
727 dev_info(gpu->dev->dev, "%s: using IOMMU\n", gpu->name);
728
729 aspace = msm_gem_address_space_create(&pdev->dev, iommu, "gpu");
730 if (IS_ERR(aspace)) {
731 dev_err(gpu->dev->dev, "failed to init iommu: %ld\n",
732 PTR_ERR(aspace));
733 iommu_domain_free(iommu);
734 return ERR_CAST(aspace);
735 }
736
737 ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0);
738 if (ret) {
739 msm_gem_address_space_put(aspace);
740 return ERR_PTR(ret);
741 }
742
743 return aspace;
744}
745
7198e6b0
RC
746int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
747 struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
5770fc7a 748 const char *name, struct msm_gpu_config *config)
7198e6b0 749{
f97decac
JC
750 int i, ret, nr_rings = config->nr_rings;
751 void *memptrs;
752 uint64_t memptrs_iova;
7198e6b0 753
70c70f09
RC
754 if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs)))
755 gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs);
756
7198e6b0
RC
757 gpu->dev = drm;
758 gpu->funcs = funcs;
759 gpu->name = name;
760
761 INIT_LIST_HEAD(&gpu->active_list);
762 INIT_WORK(&gpu->retire_work, retire_worker);
bd6f82d8
RC
763 INIT_WORK(&gpu->recover_work, recover_worker);
764
1a370be9 765
e99e88a9 766 timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0);
7198e6b0 767
70c70f09
RC
768 spin_lock_init(&gpu->perf_lock);
769
7198e6b0
RC
770
771 /* Map registers: */
5770fc7a 772 gpu->mmio = msm_ioremap(pdev, config->ioname, name);
7198e6b0
RC
773 if (IS_ERR(gpu->mmio)) {
774 ret = PTR_ERR(gpu->mmio);
775 goto fail;
776 }
777
778 /* Get Interrupt: */
5770fc7a 779 gpu->irq = platform_get_irq_byname(pdev, config->irqname);
7198e6b0
RC
780 if (gpu->irq < 0) {
781 ret = gpu->irq;
782 dev_err(drm->dev, "failed to get irq: %d\n", ret);
783 goto fail;
784 }
785
786 ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler,
787 IRQF_TRIGGER_HIGH, gpu->name, gpu);
788 if (ret) {
789 dev_err(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret);
790 goto fail;
791 }
792
98db803f
JC
793 ret = get_clocks(pdev, gpu);
794 if (ret)
795 goto fail;
7198e6b0 796
720c3bb8 797 gpu->ebi1_clk = msm_clk_get(pdev, "bus");
7198e6b0
RC
798 DBG("ebi1_clk: %p", gpu->ebi1_clk);
799 if (IS_ERR(gpu->ebi1_clk))
800 gpu->ebi1_clk = NULL;
801
802 /* Acquire regulators: */
803 gpu->gpu_reg = devm_regulator_get(&pdev->dev, "vdd");
804 DBG("gpu_reg: %p", gpu->gpu_reg);
805 if (IS_ERR(gpu->gpu_reg))
806 gpu->gpu_reg = NULL;
807
808 gpu->gpu_cx = devm_regulator_get(&pdev->dev, "vddcx");
809 DBG("gpu_cx: %p", gpu->gpu_cx);
810 if (IS_ERR(gpu->gpu_cx))
811 gpu->gpu_cx = NULL;
812
1267a4df
JC
813 gpu->pdev = pdev;
814 platform_set_drvdata(pdev, gpu);
815
f91c14ab
JC
816 msm_devfreq_init(gpu);
817
1267a4df
JC
818 gpu->aspace = msm_gpu_create_address_space(gpu, pdev,
819 config->va_start, config->va_end);
820
821 if (gpu->aspace == NULL)
871d812a 822 dev_info(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name);
1267a4df
JC
823 else if (IS_ERR(gpu->aspace)) {
824 ret = PTR_ERR(gpu->aspace);
825 goto fail;
7198e6b0 826 }
a1ad3523 827
f97decac 828 memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo),
cd414f3d 829 MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo,
f97decac 830 &memptrs_iova);
cd414f3d 831
f97decac
JC
832 if (IS_ERR(memptrs)) {
833 ret = PTR_ERR(memptrs);
cd414f3d
JC
834 dev_err(drm->dev, "could not allocate memptrs: %d\n", ret);
835 goto fail;
836 }
837
f97decac 838 if (nr_rings > ARRAY_SIZE(gpu->rb)) {
39ae0d3e 839 DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n",
f97decac
JC
840 ARRAY_SIZE(gpu->rb));
841 nr_rings = ARRAY_SIZE(gpu->rb);
7198e6b0
RC
842 }
843
f97decac
JC
844 /* Create ringbuffer(s): */
845 for (i = 0; i < nr_rings; i++) {
846 gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova);
847
848 if (IS_ERR(gpu->rb[i])) {
849 ret = PTR_ERR(gpu->rb[i]);
850 dev_err(drm->dev,
851 "could not create ringbuffer %d: %d\n", i, ret);
852 goto fail;
853 }
854
855 memptrs += sizeof(struct msm_rbmemptrs);
856 memptrs_iova += sizeof(struct msm_rbmemptrs);
857 }
858
859 gpu->nr_rings = nr_rings;
860
7198e6b0
RC
861 return 0;
862
863fail:
f97decac
JC
864 for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
865 msm_ringbuffer_destroy(gpu->rb[i]);
866 gpu->rb[i] = NULL;
867 }
868
cd414f3d
JC
869 if (gpu->memptrs_bo) {
870 msm_gem_put_vaddr(gpu->memptrs_bo);
871 msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace);
dc9a9b32 872 drm_gem_object_put_unlocked(gpu->memptrs_bo);
cd414f3d
JC
873 }
874
1267a4df 875 platform_set_drvdata(pdev, NULL);
7198e6b0
RC
876 return ret;
877}
878
879void msm_gpu_cleanup(struct msm_gpu *gpu)
880{
f97decac
JC
881 int i;
882
7198e6b0
RC
883 DBG("%s", gpu->name);
884
885 WARN_ON(!list_empty(&gpu->active_list));
886
f97decac
JC
887 for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
888 msm_ringbuffer_destroy(gpu->rb[i]);
889 gpu->rb[i] = NULL;
7198e6b0 890 }
cd414f3d
JC
891
892 if (gpu->memptrs_bo) {
893 msm_gem_put_vaddr(gpu->memptrs_bo);
894 msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace);
dc9a9b32 895 drm_gem_object_put_unlocked(gpu->memptrs_bo);
cd414f3d
JC
896 }
897
898 if (!IS_ERR_OR_NULL(gpu->aspace)) {
1267a4df
JC
899 gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu,
900 NULL, 0);
901 msm_gem_address_space_put(gpu->aspace);
902 }
7198e6b0 903}