2 * Copyright (C) 2013 Red Hat
3 * Author: Rob Clark <robdclark@gmail.com>
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * You should have received a copy of the GNU General Public License along with
15 * this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "msm_fence.h"
22 #include "msm_gpu_trace.h"
23 #include "adreno/adreno_gpu.h"
25 #include <generated/utsrelease.h>
26 #include <linux/string_helpers.h>
27 #include <linux/pm_opp.h>
28 #include <linux/devfreq.h>
29 #include <linux/devcoredump.h>
35 static int msm_devfreq_target(struct device
*dev
, unsigned long *freq
,
38 struct msm_gpu
*gpu
= platform_get_drvdata(to_platform_device(dev
));
39 struct dev_pm_opp
*opp
;
41 opp
= devfreq_recommended_opp(dev
, freq
, flags
);
46 if (gpu
->funcs
->gpu_set_freq
)
47 gpu
->funcs
->gpu_set_freq(gpu
, (u64
)*freq
);
49 clk_set_rate(gpu
->core_clk
, *freq
);
56 static int msm_devfreq_get_dev_status(struct device
*dev
,
57 struct devfreq_dev_status
*status
)
59 struct msm_gpu
*gpu
= platform_get_drvdata(to_platform_device(dev
));
62 if (gpu
->funcs
->gpu_get_freq
)
63 status
->current_frequency
= gpu
->funcs
->gpu_get_freq(gpu
);
65 status
->current_frequency
= clk_get_rate(gpu
->core_clk
);
67 status
->busy_time
= gpu
->funcs
->gpu_busy(gpu
);
70 status
->total_time
= ktime_us_delta(time
, gpu
->devfreq
.time
);
71 gpu
->devfreq
.time
= time
;
76 static int msm_devfreq_get_cur_freq(struct device
*dev
, unsigned long *freq
)
78 struct msm_gpu
*gpu
= platform_get_drvdata(to_platform_device(dev
));
80 if (gpu
->funcs
->gpu_get_freq
)
81 *freq
= gpu
->funcs
->gpu_get_freq(gpu
);
83 *freq
= clk_get_rate(gpu
->core_clk
);
88 static struct devfreq_dev_profile msm_devfreq_profile
= {
90 .target
= msm_devfreq_target
,
91 .get_dev_status
= msm_devfreq_get_dev_status
,
92 .get_cur_freq
= msm_devfreq_get_cur_freq
,
95 static void msm_devfreq_init(struct msm_gpu
*gpu
)
97 /* We need target support to do devfreq */
98 if (!gpu
->funcs
->gpu_busy
)
101 msm_devfreq_profile
.initial_freq
= gpu
->fast_rate
;
104 * Don't set the freq_table or max_state and let devfreq build the table
108 gpu
->devfreq
.devfreq
= devm_devfreq_add_device(&gpu
->pdev
->dev
,
109 &msm_devfreq_profile
, "simple_ondemand", NULL
);
111 if (IS_ERR(gpu
->devfreq
.devfreq
)) {
112 DRM_DEV_ERROR(&gpu
->pdev
->dev
, "Couldn't initialize GPU devfreq\n");
113 gpu
->devfreq
.devfreq
= NULL
;
116 devfreq_suspend_device(gpu
->devfreq
.devfreq
);
119 static int enable_pwrrail(struct msm_gpu
*gpu
)
121 struct drm_device
*dev
= gpu
->dev
;
125 ret
= regulator_enable(gpu
->gpu_reg
);
127 DRM_DEV_ERROR(dev
->dev
, "failed to enable 'gpu_reg': %d\n", ret
);
133 ret
= regulator_enable(gpu
->gpu_cx
);
135 DRM_DEV_ERROR(dev
->dev
, "failed to enable 'gpu_cx': %d\n", ret
);
143 static int disable_pwrrail(struct msm_gpu
*gpu
)
146 regulator_disable(gpu
->gpu_cx
);
148 regulator_disable(gpu
->gpu_reg
);
152 static int enable_clk(struct msm_gpu
*gpu
)
154 if (gpu
->core_clk
&& gpu
->fast_rate
)
155 clk_set_rate(gpu
->core_clk
, gpu
->fast_rate
);
157 /* Set the RBBM timer rate to 19.2Mhz */
158 if (gpu
->rbbmtimer_clk
)
159 clk_set_rate(gpu
->rbbmtimer_clk
, 19200000);
161 return clk_bulk_prepare_enable(gpu
->nr_clocks
, gpu
->grp_clks
);
164 static int disable_clk(struct msm_gpu
*gpu
)
166 clk_bulk_disable_unprepare(gpu
->nr_clocks
, gpu
->grp_clks
);
169 * Set the clock to a deliberately low rate. On older targets the clock
170 * speed had to be non zero to avoid problems. On newer targets this
171 * will be rounded down to zero anyway so it all works out.
174 clk_set_rate(gpu
->core_clk
, 27000000);
176 if (gpu
->rbbmtimer_clk
)
177 clk_set_rate(gpu
->rbbmtimer_clk
, 0);
182 static int enable_axi(struct msm_gpu
*gpu
)
185 clk_prepare_enable(gpu
->ebi1_clk
);
189 static int disable_axi(struct msm_gpu
*gpu
)
192 clk_disable_unprepare(gpu
->ebi1_clk
);
196 void msm_gpu_resume_devfreq(struct msm_gpu
*gpu
)
198 gpu
->devfreq
.busy_cycles
= 0;
199 gpu
->devfreq
.time
= ktime_get();
201 devfreq_resume_device(gpu
->devfreq
.devfreq
);
204 int msm_gpu_pm_resume(struct msm_gpu
*gpu
)
208 DBG("%s", gpu
->name
);
210 ret
= enable_pwrrail(gpu
);
214 ret
= enable_clk(gpu
);
218 ret
= enable_axi(gpu
);
222 msm_gpu_resume_devfreq(gpu
);
224 gpu
->needs_hw_init
= true;
229 int msm_gpu_pm_suspend(struct msm_gpu
*gpu
)
233 DBG("%s", gpu
->name
);
235 devfreq_suspend_device(gpu
->devfreq
.devfreq
);
237 ret
= disable_axi(gpu
);
241 ret
= disable_clk(gpu
);
245 ret
= disable_pwrrail(gpu
);
252 int msm_gpu_hw_init(struct msm_gpu
*gpu
)
256 WARN_ON(!mutex_is_locked(&gpu
->dev
->struct_mutex
));
258 if (!gpu
->needs_hw_init
)
261 disable_irq(gpu
->irq
);
262 ret
= gpu
->funcs
->hw_init(gpu
);
264 gpu
->needs_hw_init
= false;
265 enable_irq(gpu
->irq
);
270 #ifdef CONFIG_DEV_COREDUMP
271 static ssize_t
msm_gpu_devcoredump_read(char *buffer
, loff_t offset
,
272 size_t count
, void *data
, size_t datalen
)
274 struct msm_gpu
*gpu
= data
;
275 struct drm_print_iterator iter
;
276 struct drm_printer p
;
277 struct msm_gpu_state
*state
;
279 state
= msm_gpu_crashstate_get(gpu
);
288 p
= drm_coredump_printer(&iter
);
290 drm_printf(&p
, "---\n");
291 drm_printf(&p
, "kernel: " UTS_RELEASE
"\n");
292 drm_printf(&p
, "module: " KBUILD_MODNAME
"\n");
293 drm_printf(&p
, "time: %lld.%09ld\n",
294 state
->time
.tv_sec
, state
->time
.tv_nsec
);
296 drm_printf(&p
, "comm: %s\n", state
->comm
);
298 drm_printf(&p
, "cmdline: %s\n", state
->cmd
);
300 gpu
->funcs
->show(gpu
, state
, &p
);
302 msm_gpu_crashstate_put(gpu
);
304 return count
- iter
.remain
;
307 static void msm_gpu_devcoredump_free(void *data
)
309 struct msm_gpu
*gpu
= data
;
311 msm_gpu_crashstate_put(gpu
);
314 static void msm_gpu_crashstate_get_bo(struct msm_gpu_state
*state
,
315 struct msm_gem_object
*obj
, u64 iova
, u32 flags
)
317 struct msm_gpu_state_bo
*state_bo
= &state
->bos
[state
->nr_bos
];
319 /* Don't record write only objects */
320 state_bo
->size
= obj
->base
.size
;
321 state_bo
->iova
= iova
;
323 /* Only store data for non imported buffer objects marked for read */
324 if ((flags
& MSM_SUBMIT_BO_READ
) && !obj
->base
.import_attach
) {
327 state_bo
->data
= kvmalloc(obj
->base
.size
, GFP_KERNEL
);
331 ptr
= msm_gem_get_vaddr_active(&obj
->base
);
333 kvfree(state_bo
->data
);
334 state_bo
->data
= NULL
;
338 memcpy(state_bo
->data
, ptr
, obj
->base
.size
);
339 msm_gem_put_vaddr(&obj
->base
);
345 static void msm_gpu_crashstate_capture(struct msm_gpu
*gpu
,
346 struct msm_gem_submit
*submit
, char *comm
, char *cmd
)
348 struct msm_gpu_state
*state
;
350 /* Check if the target supports capturing crash state */
351 if (!gpu
->funcs
->gpu_state_get
)
354 /* Only save one crash state at a time */
358 state
= gpu
->funcs
->gpu_state_get(gpu
);
359 if (IS_ERR_OR_NULL(state
))
362 /* Fill in the additional crash state information */
363 state
->comm
= kstrdup(comm
, GFP_KERNEL
);
364 state
->cmd
= kstrdup(cmd
, GFP_KERNEL
);
369 state
->bos
= kcalloc(submit
->nr_cmds
,
370 sizeof(struct msm_gpu_state_bo
), GFP_KERNEL
);
372 for (i
= 0; state
->bos
&& i
< submit
->nr_cmds
; i
++) {
373 int idx
= submit
->cmd
[i
].idx
;
375 msm_gpu_crashstate_get_bo(state
, submit
->bos
[idx
].obj
,
376 submit
->bos
[idx
].iova
, submit
->bos
[idx
].flags
);
380 /* Set the active crash state to be dumped on failure */
381 gpu
->crashstate
= state
;
383 /* FIXME: Release the crashstate if this errors out? */
384 dev_coredumpm(gpu
->dev
->dev
, THIS_MODULE
, gpu
, 0, GFP_KERNEL
,
385 msm_gpu_devcoredump_read
, msm_gpu_devcoredump_free
);
388 static void msm_gpu_crashstate_capture(struct msm_gpu
*gpu
,
389 struct msm_gem_submit
*submit
, char *comm
, char *cmd
)
395 * Hangcheck detection for locked gpu:
398 static void update_fences(struct msm_gpu
*gpu
, struct msm_ringbuffer
*ring
,
401 struct msm_gem_submit
*submit
;
403 list_for_each_entry(submit
, &ring
->submits
, node
) {
404 if (submit
->seqno
> fence
)
407 msm_update_fence(submit
->ring
->fctx
,
408 submit
->fence
->seqno
);
412 static struct msm_gem_submit
*
413 find_submit(struct msm_ringbuffer
*ring
, uint32_t fence
)
415 struct msm_gem_submit
*submit
;
417 WARN_ON(!mutex_is_locked(&ring
->gpu
->dev
->struct_mutex
));
419 list_for_each_entry(submit
, &ring
->submits
, node
)
420 if (submit
->seqno
== fence
)
426 static void retire_submits(struct msm_gpu
*gpu
);
428 static void recover_worker(struct work_struct
*work
)
430 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, recover_work
);
431 struct drm_device
*dev
= gpu
->dev
;
432 struct msm_drm_private
*priv
= dev
->dev_private
;
433 struct msm_gem_submit
*submit
;
434 struct msm_ringbuffer
*cur_ring
= gpu
->funcs
->active_ring(gpu
);
435 char *comm
= NULL
, *cmd
= NULL
;
438 mutex_lock(&dev
->struct_mutex
);
440 DRM_DEV_ERROR(dev
->dev
, "%s: hangcheck recover!\n", gpu
->name
);
442 submit
= find_submit(cur_ring
, cur_ring
->memptrs
->fence
+ 1);
444 struct task_struct
*task
;
446 task
= get_pid_task(submit
->pid
, PIDTYPE_PID
);
448 comm
= kstrdup(task
->comm
, GFP_KERNEL
);
451 * So slightly annoying, in other paths like
452 * mmap'ing gem buffers, mmap_sem is acquired
453 * before struct_mutex, which means we can't
454 * hold struct_mutex across the call to
455 * get_cmdline(). But submits are retired
456 * from the same in-order workqueue, so we can
457 * safely drop the lock here without worrying
458 * about the submit going away.
460 mutex_unlock(&dev
->struct_mutex
);
461 cmd
= kstrdup_quotable_cmdline(task
, GFP_KERNEL
);
462 put_task_struct(task
);
463 mutex_lock(&dev
->struct_mutex
);
467 DRM_DEV_ERROR(dev
->dev
, "%s: offending task: %s (%s)\n",
468 gpu
->name
, comm
, cmd
);
470 msm_rd_dump_submit(priv
->hangrd
, submit
,
471 "offending task: %s (%s)", comm
, cmd
);
473 msm_rd_dump_submit(priv
->hangrd
, submit
, NULL
);
476 /* Record the crash state */
477 pm_runtime_get_sync(&gpu
->pdev
->dev
);
478 msm_gpu_crashstate_capture(gpu
, submit
, comm
, cmd
);
479 pm_runtime_put_sync(&gpu
->pdev
->dev
);
485 * Update all the rings with the latest and greatest fence.. this
486 * needs to happen after msm_rd_dump_submit() to ensure that the
487 * bo's referenced by the offending submit are still around.
489 for (i
= 0; i
< gpu
->nr_rings
; i
++) {
490 struct msm_ringbuffer
*ring
= gpu
->rb
[i
];
492 uint32_t fence
= ring
->memptrs
->fence
;
495 * For the current (faulting?) ring/submit advance the fence by
496 * one more to clear the faulting submit
498 if (ring
== cur_ring
)
501 update_fences(gpu
, ring
, fence
);
504 if (msm_gpu_active(gpu
)) {
505 /* retire completed submits, plus the one that hung: */
508 pm_runtime_get_sync(&gpu
->pdev
->dev
);
509 gpu
->funcs
->recover(gpu
);
510 pm_runtime_put_sync(&gpu
->pdev
->dev
);
513 * Replay all remaining submits starting with highest priority
516 for (i
= 0; i
< gpu
->nr_rings
; i
++) {
517 struct msm_ringbuffer
*ring
= gpu
->rb
[i
];
519 list_for_each_entry(submit
, &ring
->submits
, node
)
520 gpu
->funcs
->submit(gpu
, submit
, NULL
);
524 mutex_unlock(&dev
->struct_mutex
);
529 static void hangcheck_timer_reset(struct msm_gpu
*gpu
)
531 DBG("%s", gpu
->name
);
532 mod_timer(&gpu
->hangcheck_timer
,
533 round_jiffies_up(jiffies
+ DRM_MSM_HANGCHECK_JIFFIES
));
536 static void hangcheck_handler(struct timer_list
*t
)
538 struct msm_gpu
*gpu
= from_timer(gpu
, t
, hangcheck_timer
);
539 struct drm_device
*dev
= gpu
->dev
;
540 struct msm_drm_private
*priv
= dev
->dev_private
;
541 struct msm_ringbuffer
*ring
= gpu
->funcs
->active_ring(gpu
);
542 uint32_t fence
= ring
->memptrs
->fence
;
544 if (fence
!= ring
->hangcheck_fence
) {
545 /* some progress has been made.. ya! */
546 ring
->hangcheck_fence
= fence
;
547 } else if (fence
< ring
->seqno
) {
548 /* no progress and not done.. hung! */
549 ring
->hangcheck_fence
= fence
;
550 DRM_DEV_ERROR(dev
->dev
, "%s: hangcheck detected gpu lockup rb %d!\n",
551 gpu
->name
, ring
->id
);
552 DRM_DEV_ERROR(dev
->dev
, "%s: completed fence: %u\n",
554 DRM_DEV_ERROR(dev
->dev
, "%s: submitted fence: %u\n",
555 gpu
->name
, ring
->seqno
);
557 queue_work(priv
->wq
, &gpu
->recover_work
);
560 /* if still more pending work, reset the hangcheck timer: */
561 if (ring
->seqno
> ring
->hangcheck_fence
)
562 hangcheck_timer_reset(gpu
);
564 /* workaround for missing irq: */
565 queue_work(priv
->wq
, &gpu
->retire_work
);
569 * Performance Counters:
572 /* called under perf_lock */
573 static int update_hw_cntrs(struct msm_gpu
*gpu
, uint32_t ncntrs
, uint32_t *cntrs
)
575 uint32_t current_cntrs
[ARRAY_SIZE(gpu
->last_cntrs
)];
576 int i
, n
= min(ncntrs
, gpu
->num_perfcntrs
);
578 /* read current values: */
579 for (i
= 0; i
< gpu
->num_perfcntrs
; i
++)
580 current_cntrs
[i
] = gpu_read(gpu
, gpu
->perfcntrs
[i
].sample_reg
);
583 for (i
= 0; i
< n
; i
++)
584 cntrs
[i
] = current_cntrs
[i
] - gpu
->last_cntrs
[i
];
586 /* save current values: */
587 for (i
= 0; i
< gpu
->num_perfcntrs
; i
++)
588 gpu
->last_cntrs
[i
] = current_cntrs
[i
];
593 static void update_sw_cntrs(struct msm_gpu
*gpu
)
599 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
600 if (!gpu
->perfcntr_active
)
604 elapsed
= ktime_to_us(ktime_sub(time
, gpu
->last_sample
.time
));
606 gpu
->totaltime
+= elapsed
;
607 if (gpu
->last_sample
.active
)
608 gpu
->activetime
+= elapsed
;
610 gpu
->last_sample
.active
= msm_gpu_active(gpu
);
611 gpu
->last_sample
.time
= time
;
614 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
617 void msm_gpu_perfcntr_start(struct msm_gpu
*gpu
)
621 pm_runtime_get_sync(&gpu
->pdev
->dev
);
623 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
624 /* we could dynamically enable/disable perfcntr registers too.. */
625 gpu
->last_sample
.active
= msm_gpu_active(gpu
);
626 gpu
->last_sample
.time
= ktime_get();
627 gpu
->activetime
= gpu
->totaltime
= 0;
628 gpu
->perfcntr_active
= true;
629 update_hw_cntrs(gpu
, 0, NULL
);
630 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
633 void msm_gpu_perfcntr_stop(struct msm_gpu
*gpu
)
635 gpu
->perfcntr_active
= false;
636 pm_runtime_put_sync(&gpu
->pdev
->dev
);
639 /* returns -errno or # of cntrs sampled */
640 int msm_gpu_perfcntr_sample(struct msm_gpu
*gpu
, uint32_t *activetime
,
641 uint32_t *totaltime
, uint32_t ncntrs
, uint32_t *cntrs
)
646 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
648 if (!gpu
->perfcntr_active
) {
653 *activetime
= gpu
->activetime
;
654 *totaltime
= gpu
->totaltime
;
656 gpu
->activetime
= gpu
->totaltime
= 0;
658 ret
= update_hw_cntrs(gpu
, ncntrs
, cntrs
);
661 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
667 * Cmdstream submission/retirement:
670 static void retire_submit(struct msm_gpu
*gpu
, struct msm_ringbuffer
*ring
,
671 struct msm_gem_submit
*submit
)
673 int index
= submit
->seqno
% MSM_GPU_SUBMIT_STATS_COUNT
;
674 volatile struct msm_gpu_submit_stats
*stats
;
675 u64 elapsed
, clock
= 0;
678 stats
= &ring
->memptrs
->stats
[index
];
679 /* Convert 19.2Mhz alwayson ticks to nanoseconds for elapsed time */
680 elapsed
= (stats
->alwayson_end
- stats
->alwayson_start
) * 10000;
681 do_div(elapsed
, 192);
683 /* Calculate the clock frequency from the number of CP cycles */
685 clock
= (stats
->cpcycles_end
- stats
->cpcycles_start
) * 1000;
686 do_div(clock
, elapsed
);
689 trace_msm_gpu_submit_retired(submit
, elapsed
, clock
,
690 stats
->alwayson_start
, stats
->alwayson_end
);
692 for (i
= 0; i
< submit
->nr_bos
; i
++) {
693 struct msm_gem_object
*msm_obj
= submit
->bos
[i
].obj
;
694 /* move to inactive: */
695 msm_gem_move_to_inactive(&msm_obj
->base
);
696 msm_gem_unpin_iova(&msm_obj
->base
, gpu
->aspace
);
697 drm_gem_object_put(&msm_obj
->base
);
700 pm_runtime_mark_last_busy(&gpu
->pdev
->dev
);
701 pm_runtime_put_autosuspend(&gpu
->pdev
->dev
);
702 msm_gem_submit_free(submit
);
705 static void retire_submits(struct msm_gpu
*gpu
)
707 struct drm_device
*dev
= gpu
->dev
;
708 struct msm_gem_submit
*submit
, *tmp
;
711 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
713 /* Retire the commits starting with highest priority */
714 for (i
= 0; i
< gpu
->nr_rings
; i
++) {
715 struct msm_ringbuffer
*ring
= gpu
->rb
[i
];
717 list_for_each_entry_safe(submit
, tmp
, &ring
->submits
, node
) {
718 if (dma_fence_is_signaled(submit
->fence
))
719 retire_submit(gpu
, ring
, submit
);
724 static void retire_worker(struct work_struct
*work
)
726 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, retire_work
);
727 struct drm_device
*dev
= gpu
->dev
;
730 for (i
= 0; i
< gpu
->nr_rings
; i
++)
731 update_fences(gpu
, gpu
->rb
[i
], gpu
->rb
[i
]->memptrs
->fence
);
733 mutex_lock(&dev
->struct_mutex
);
735 mutex_unlock(&dev
->struct_mutex
);
738 /* call from irq handler to schedule work to retire bo's */
739 void msm_gpu_retire(struct msm_gpu
*gpu
)
741 struct msm_drm_private
*priv
= gpu
->dev
->dev_private
;
742 queue_work(priv
->wq
, &gpu
->retire_work
);
743 update_sw_cntrs(gpu
);
746 /* add bo's to gpu's ring, and kick gpu: */
747 void msm_gpu_submit(struct msm_gpu
*gpu
, struct msm_gem_submit
*submit
,
748 struct msm_file_private
*ctx
)
750 struct drm_device
*dev
= gpu
->dev
;
751 struct msm_drm_private
*priv
= dev
->dev_private
;
752 struct msm_ringbuffer
*ring
= submit
->ring
;
755 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
757 pm_runtime_get_sync(&gpu
->pdev
->dev
);
759 msm_gpu_hw_init(gpu
);
761 submit
->seqno
= ++ring
->seqno
;
763 list_add_tail(&submit
->node
, &ring
->submits
);
765 msm_rd_dump_submit(priv
->rd
, submit
, NULL
);
767 update_sw_cntrs(gpu
);
769 for (i
= 0; i
< submit
->nr_bos
; i
++) {
770 struct msm_gem_object
*msm_obj
= submit
->bos
[i
].obj
;
773 /* can't happen yet.. but when we add 2d support we'll have
774 * to deal w/ cross-ring synchronization:
776 WARN_ON(is_active(msm_obj
) && (msm_obj
->gpu
!= gpu
));
778 /* submit takes a reference to the bo and iova until retired: */
779 drm_gem_object_get(&msm_obj
->base
);
780 msm_gem_get_and_pin_iova(&msm_obj
->base
,
781 submit
->gpu
->aspace
, &iova
);
783 if (submit
->bos
[i
].flags
& MSM_SUBMIT_BO_WRITE
)
784 msm_gem_move_to_active(&msm_obj
->base
, gpu
, true, submit
->fence
);
785 else if (submit
->bos
[i
].flags
& MSM_SUBMIT_BO_READ
)
786 msm_gem_move_to_active(&msm_obj
->base
, gpu
, false, submit
->fence
);
789 gpu
->funcs
->submit(gpu
, submit
, ctx
);
792 hangcheck_timer_reset(gpu
);
799 static irqreturn_t
irq_handler(int irq
, void *data
)
801 struct msm_gpu
*gpu
= data
;
802 return gpu
->funcs
->irq(gpu
);
805 static int get_clocks(struct platform_device
*pdev
, struct msm_gpu
*gpu
)
807 int ret
= msm_clk_bulk_get(&pdev
->dev
, &gpu
->grp_clks
);
814 gpu
->nr_clocks
= ret
;
816 gpu
->core_clk
= msm_clk_bulk_get_clock(gpu
->grp_clks
,
817 gpu
->nr_clocks
, "core");
819 gpu
->rbbmtimer_clk
= msm_clk_bulk_get_clock(gpu
->grp_clks
,
820 gpu
->nr_clocks
, "rbbmtimer");
825 static struct msm_gem_address_space
*
826 msm_gpu_create_address_space(struct msm_gpu
*gpu
, struct platform_device
*pdev
,
827 uint64_t va_start
, uint64_t va_end
)
829 struct msm_gem_address_space
*aspace
;
833 * Setup IOMMU.. eventually we will (I think) do this once per context
834 * and have separate page tables per context. For now, to keep things
835 * simple and to get something working, just use a single address space:
837 if (!adreno_is_a2xx(to_adreno_gpu(gpu
))) {
838 struct iommu_domain
*iommu
= iommu_domain_alloc(&platform_bus_type
);
842 iommu
->geometry
.aperture_start
= va_start
;
843 iommu
->geometry
.aperture_end
= va_end
;
845 DRM_DEV_INFO(gpu
->dev
->dev
, "%s: using IOMMU\n", gpu
->name
);
847 aspace
= msm_gem_address_space_create(&pdev
->dev
, iommu
, "gpu");
849 iommu_domain_free(iommu
);
851 aspace
= msm_gem_address_space_create_a2xx(&pdev
->dev
, gpu
, "gpu",
855 if (IS_ERR(aspace
)) {
856 DRM_DEV_ERROR(gpu
->dev
->dev
, "failed to init mmu: %ld\n",
858 return ERR_CAST(aspace
);
861 ret
= aspace
->mmu
->funcs
->attach(aspace
->mmu
, NULL
, 0);
863 msm_gem_address_space_put(aspace
);
870 int msm_gpu_init(struct drm_device
*drm
, struct platform_device
*pdev
,
871 struct msm_gpu
*gpu
, const struct msm_gpu_funcs
*funcs
,
872 const char *name
, struct msm_gpu_config
*config
)
874 int i
, ret
, nr_rings
= config
->nr_rings
;
876 uint64_t memptrs_iova
;
878 if (WARN_ON(gpu
->num_perfcntrs
> ARRAY_SIZE(gpu
->last_cntrs
)))
879 gpu
->num_perfcntrs
= ARRAY_SIZE(gpu
->last_cntrs
);
885 INIT_LIST_HEAD(&gpu
->active_list
);
886 INIT_WORK(&gpu
->retire_work
, retire_worker
);
887 INIT_WORK(&gpu
->recover_work
, recover_worker
);
890 timer_setup(&gpu
->hangcheck_timer
, hangcheck_handler
, 0);
892 spin_lock_init(&gpu
->perf_lock
);
896 gpu
->mmio
= msm_ioremap(pdev
, config
->ioname
, name
);
897 if (IS_ERR(gpu
->mmio
)) {
898 ret
= PTR_ERR(gpu
->mmio
);
903 gpu
->irq
= platform_get_irq(pdev
, 0);
906 DRM_DEV_ERROR(drm
->dev
, "failed to get irq: %d\n", ret
);
910 ret
= devm_request_irq(&pdev
->dev
, gpu
->irq
, irq_handler
,
911 IRQF_TRIGGER_HIGH
, gpu
->name
, gpu
);
913 DRM_DEV_ERROR(drm
->dev
, "failed to request IRQ%u: %d\n", gpu
->irq
, ret
);
917 ret
= get_clocks(pdev
, gpu
);
921 gpu
->ebi1_clk
= msm_clk_get(pdev
, "bus");
922 DBG("ebi1_clk: %p", gpu
->ebi1_clk
);
923 if (IS_ERR(gpu
->ebi1_clk
))
924 gpu
->ebi1_clk
= NULL
;
926 /* Acquire regulators: */
927 gpu
->gpu_reg
= devm_regulator_get(&pdev
->dev
, "vdd");
928 DBG("gpu_reg: %p", gpu
->gpu_reg
);
929 if (IS_ERR(gpu
->gpu_reg
))
932 gpu
->gpu_cx
= devm_regulator_get(&pdev
->dev
, "vddcx");
933 DBG("gpu_cx: %p", gpu
->gpu_cx
);
934 if (IS_ERR(gpu
->gpu_cx
))
938 platform_set_drvdata(pdev
, gpu
);
940 msm_devfreq_init(gpu
);
942 gpu
->aspace
= msm_gpu_create_address_space(gpu
, pdev
,
943 config
->va_start
, config
->va_end
);
945 if (gpu
->aspace
== NULL
)
946 DRM_DEV_INFO(drm
->dev
, "%s: no IOMMU, fallback to VRAM carveout!\n", name
);
947 else if (IS_ERR(gpu
->aspace
)) {
948 ret
= PTR_ERR(gpu
->aspace
);
952 memptrs
= msm_gem_kernel_new(drm
,
953 sizeof(struct msm_rbmemptrs
) * nr_rings
,
954 MSM_BO_UNCACHED
, gpu
->aspace
, &gpu
->memptrs_bo
,
957 if (IS_ERR(memptrs
)) {
958 ret
= PTR_ERR(memptrs
);
959 DRM_DEV_ERROR(drm
->dev
, "could not allocate memptrs: %d\n", ret
);
963 msm_gem_object_set_name(gpu
->memptrs_bo
, "memptrs");
965 if (nr_rings
> ARRAY_SIZE(gpu
->rb
)) {
966 DRM_DEV_INFO_ONCE(drm
->dev
, "Only creating %zu ringbuffers\n",
967 ARRAY_SIZE(gpu
->rb
));
968 nr_rings
= ARRAY_SIZE(gpu
->rb
);
971 /* Create ringbuffer(s): */
972 for (i
= 0; i
< nr_rings
; i
++) {
973 gpu
->rb
[i
] = msm_ringbuffer_new(gpu
, i
, memptrs
, memptrs_iova
);
975 if (IS_ERR(gpu
->rb
[i
])) {
976 ret
= PTR_ERR(gpu
->rb
[i
]);
977 DRM_DEV_ERROR(drm
->dev
,
978 "could not create ringbuffer %d: %d\n", i
, ret
);
982 memptrs
+= sizeof(struct msm_rbmemptrs
);
983 memptrs_iova
+= sizeof(struct msm_rbmemptrs
);
986 gpu
->nr_rings
= nr_rings
;
991 for (i
= 0; i
< ARRAY_SIZE(gpu
->rb
); i
++) {
992 msm_ringbuffer_destroy(gpu
->rb
[i
]);
996 msm_gem_kernel_put(gpu
->memptrs_bo
, gpu
->aspace
, false);
998 platform_set_drvdata(pdev
, NULL
);
1002 void msm_gpu_cleanup(struct msm_gpu
*gpu
)
1006 DBG("%s", gpu
->name
);
1008 WARN_ON(!list_empty(&gpu
->active_list
));
1010 for (i
= 0; i
< ARRAY_SIZE(gpu
->rb
); i
++) {
1011 msm_ringbuffer_destroy(gpu
->rb
[i
]);
1015 msm_gem_kernel_put(gpu
->memptrs_bo
, gpu
->aspace
, false);
1017 if (!IS_ERR_OR_NULL(gpu
->aspace
)) {
1018 gpu
->aspace
->mmu
->funcs
->detach(gpu
->aspace
->mmu
,
1020 msm_gem_address_space_put(gpu
->aspace
);