2 * Copyright (C) 2013 Red Hat
3 * Author: Rob Clark <robdclark@gmail.com>
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published by
7 * the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * You should have received a copy of the GNU General Public License along with
15 * this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "msm_fence.h"
28 #ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
29 #include <mach/board.h>
30 static void bs_init(struct msm_gpu
*gpu
)
32 if (gpu
->bus_scale_table
) {
33 gpu
->bsc
= msm_bus_scale_register_client(gpu
->bus_scale_table
);
34 DBG("bus scale client: %08x", gpu
->bsc
);
38 static void bs_fini(struct msm_gpu
*gpu
)
41 msm_bus_scale_unregister_client(gpu
->bsc
);
46 static void bs_set(struct msm_gpu
*gpu
, int idx
)
49 DBG("set bus scaling: %d", idx
);
50 msm_bus_scale_client_update_request(gpu
->bsc
, idx
);
54 static void bs_init(struct msm_gpu
*gpu
) {}
55 static void bs_fini(struct msm_gpu
*gpu
) {}
56 static void bs_set(struct msm_gpu
*gpu
, int idx
) {}
59 static int enable_pwrrail(struct msm_gpu
*gpu
)
61 struct drm_device
*dev
= gpu
->dev
;
65 ret
= regulator_enable(gpu
->gpu_reg
);
67 dev_err(dev
->dev
, "failed to enable 'gpu_reg': %d\n", ret
);
73 ret
= regulator_enable(gpu
->gpu_cx
);
75 dev_err(dev
->dev
, "failed to enable 'gpu_cx': %d\n", ret
);
83 static int disable_pwrrail(struct msm_gpu
*gpu
)
86 regulator_disable(gpu
->gpu_cx
);
88 regulator_disable(gpu
->gpu_reg
);
92 static int enable_clk(struct msm_gpu
*gpu
)
96 if (gpu
->core_clk
&& gpu
->fast_rate
)
97 clk_set_rate(gpu
->core_clk
, gpu
->fast_rate
);
99 /* Set the RBBM timer rate to 19.2Mhz */
100 if (gpu
->rbbmtimer_clk
)
101 clk_set_rate(gpu
->rbbmtimer_clk
, 19200000);
103 for (i
= gpu
->nr_clocks
- 1; i
>= 0; i
--)
104 if (gpu
->grp_clks
[i
])
105 clk_prepare(gpu
->grp_clks
[i
]);
107 for (i
= gpu
->nr_clocks
- 1; i
>= 0; i
--)
108 if (gpu
->grp_clks
[i
])
109 clk_enable(gpu
->grp_clks
[i
]);
114 static int disable_clk(struct msm_gpu
*gpu
)
118 for (i
= gpu
->nr_clocks
- 1; i
>= 0; i
--)
119 if (gpu
->grp_clks
[i
])
120 clk_disable(gpu
->grp_clks
[i
]);
122 for (i
= gpu
->nr_clocks
- 1; i
>= 0; i
--)
123 if (gpu
->grp_clks
[i
])
124 clk_unprepare(gpu
->grp_clks
[i
]);
127 * Set the clock to a deliberately low rate. On older targets the clock
128 * speed had to be non zero to avoid problems. On newer targets this
129 * will be rounded down to zero anyway so it all works out.
132 clk_set_rate(gpu
->core_clk
, 27000000);
134 if (gpu
->rbbmtimer_clk
)
135 clk_set_rate(gpu
->rbbmtimer_clk
, 0);
140 static int enable_axi(struct msm_gpu
*gpu
)
143 clk_prepare_enable(gpu
->ebi1_clk
);
145 bs_set(gpu
, gpu
->bus_freq
);
149 static int disable_axi(struct msm_gpu
*gpu
)
152 clk_disable_unprepare(gpu
->ebi1_clk
);
158 int msm_gpu_pm_resume(struct msm_gpu
*gpu
)
162 DBG("%s", gpu
->name
);
164 ret
= enable_pwrrail(gpu
);
168 ret
= enable_clk(gpu
);
172 ret
= enable_axi(gpu
);
176 gpu
->needs_hw_init
= true;
181 int msm_gpu_pm_suspend(struct msm_gpu
*gpu
)
185 DBG("%s", gpu
->name
);
187 ret
= disable_axi(gpu
);
191 ret
= disable_clk(gpu
);
195 ret
= disable_pwrrail(gpu
);
202 int msm_gpu_hw_init(struct msm_gpu
*gpu
)
206 if (!gpu
->needs_hw_init
)
209 disable_irq(gpu
->irq
);
210 ret
= gpu
->funcs
->hw_init(gpu
);
212 gpu
->needs_hw_init
= false;
213 enable_irq(gpu
->irq
);
219 * Hangcheck detection for locked gpu:
222 static void retire_submits(struct msm_gpu
*gpu
);
224 static void recover_worker(struct work_struct
*work
)
226 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, recover_work
);
227 struct drm_device
*dev
= gpu
->dev
;
228 struct msm_gem_submit
*submit
;
229 uint32_t fence
= gpu
->funcs
->last_fence(gpu
);
231 msm_update_fence(gpu
->fctx
, fence
+ 1);
233 mutex_lock(&dev
->struct_mutex
);
235 dev_err(dev
->dev
, "%s: hangcheck recover!\n", gpu
->name
);
236 list_for_each_entry(submit
, &gpu
->submit_list
, node
) {
237 if (submit
->fence
->seqno
== (fence
+ 1)) {
238 struct task_struct
*task
;
241 task
= pid_task(submit
->pid
, PIDTYPE_PID
);
243 dev_err(dev
->dev
, "%s: offending task: %s\n",
244 gpu
->name
, task
->comm
);
251 if (msm_gpu_active(gpu
)) {
252 /* retire completed submits, plus the one that hung: */
255 pm_runtime_get_sync(&gpu
->pdev
->dev
);
256 gpu
->funcs
->recover(gpu
);
257 pm_runtime_put_sync(&gpu
->pdev
->dev
);
259 /* replay the remaining submits after the one that hung: */
260 list_for_each_entry(submit
, &gpu
->submit_list
, node
) {
261 gpu
->funcs
->submit(gpu
, submit
, NULL
);
265 mutex_unlock(&dev
->struct_mutex
);
270 static void hangcheck_timer_reset(struct msm_gpu
*gpu
)
272 DBG("%s", gpu
->name
);
273 mod_timer(&gpu
->hangcheck_timer
,
274 round_jiffies_up(jiffies
+ DRM_MSM_HANGCHECK_JIFFIES
));
277 static void hangcheck_handler(unsigned long data
)
279 struct msm_gpu
*gpu
= (struct msm_gpu
*)data
;
280 struct drm_device
*dev
= gpu
->dev
;
281 struct msm_drm_private
*priv
= dev
->dev_private
;
282 uint32_t fence
= gpu
->funcs
->last_fence(gpu
);
284 if (fence
!= gpu
->hangcheck_fence
) {
285 /* some progress has been made.. ya! */
286 gpu
->hangcheck_fence
= fence
;
287 } else if (fence
< gpu
->fctx
->last_fence
) {
288 /* no progress and not done.. hung! */
289 gpu
->hangcheck_fence
= fence
;
290 dev_err(dev
->dev
, "%s: hangcheck detected gpu lockup!\n",
292 dev_err(dev
->dev
, "%s: completed fence: %u\n",
294 dev_err(dev
->dev
, "%s: submitted fence: %u\n",
295 gpu
->name
, gpu
->fctx
->last_fence
);
296 queue_work(priv
->wq
, &gpu
->recover_work
);
299 /* if still more pending work, reset the hangcheck timer: */
300 if (gpu
->fctx
->last_fence
> gpu
->hangcheck_fence
)
301 hangcheck_timer_reset(gpu
);
303 /* workaround for missing irq: */
304 queue_work(priv
->wq
, &gpu
->retire_work
);
308 * Performance Counters:
311 /* called under perf_lock */
312 static int update_hw_cntrs(struct msm_gpu
*gpu
, uint32_t ncntrs
, uint32_t *cntrs
)
314 uint32_t current_cntrs
[ARRAY_SIZE(gpu
->last_cntrs
)];
315 int i
, n
= min(ncntrs
, gpu
->num_perfcntrs
);
317 /* read current values: */
318 for (i
= 0; i
< gpu
->num_perfcntrs
; i
++)
319 current_cntrs
[i
] = gpu_read(gpu
, gpu
->perfcntrs
[i
].sample_reg
);
322 for (i
= 0; i
< n
; i
++)
323 cntrs
[i
] = current_cntrs
[i
] - gpu
->last_cntrs
[i
];
325 /* save current values: */
326 for (i
= 0; i
< gpu
->num_perfcntrs
; i
++)
327 gpu
->last_cntrs
[i
] = current_cntrs
[i
];
332 static void update_sw_cntrs(struct msm_gpu
*gpu
)
338 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
339 if (!gpu
->perfcntr_active
)
343 elapsed
= ktime_to_us(ktime_sub(time
, gpu
->last_sample
.time
));
345 gpu
->totaltime
+= elapsed
;
346 if (gpu
->last_sample
.active
)
347 gpu
->activetime
+= elapsed
;
349 gpu
->last_sample
.active
= msm_gpu_active(gpu
);
350 gpu
->last_sample
.time
= time
;
353 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
356 void msm_gpu_perfcntr_start(struct msm_gpu
*gpu
)
360 pm_runtime_get_sync(&gpu
->pdev
->dev
);
362 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
363 /* we could dynamically enable/disable perfcntr registers too.. */
364 gpu
->last_sample
.active
= msm_gpu_active(gpu
);
365 gpu
->last_sample
.time
= ktime_get();
366 gpu
->activetime
= gpu
->totaltime
= 0;
367 gpu
->perfcntr_active
= true;
368 update_hw_cntrs(gpu
, 0, NULL
);
369 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
372 void msm_gpu_perfcntr_stop(struct msm_gpu
*gpu
)
374 gpu
->perfcntr_active
= false;
375 pm_runtime_put_sync(&gpu
->pdev
->dev
);
378 /* returns -errno or # of cntrs sampled */
379 int msm_gpu_perfcntr_sample(struct msm_gpu
*gpu
, uint32_t *activetime
,
380 uint32_t *totaltime
, uint32_t ncntrs
, uint32_t *cntrs
)
385 spin_lock_irqsave(&gpu
->perf_lock
, flags
);
387 if (!gpu
->perfcntr_active
) {
392 *activetime
= gpu
->activetime
;
393 *totaltime
= gpu
->totaltime
;
395 gpu
->activetime
= gpu
->totaltime
= 0;
397 ret
= update_hw_cntrs(gpu
, ncntrs
, cntrs
);
400 spin_unlock_irqrestore(&gpu
->perf_lock
, flags
);
406 * Cmdstream submission/retirement:
409 static void retire_submit(struct msm_gpu
*gpu
, struct msm_gem_submit
*submit
)
413 for (i
= 0; i
< submit
->nr_bos
; i
++) {
414 struct msm_gem_object
*msm_obj
= submit
->bos
[i
].obj
;
415 /* move to inactive: */
416 msm_gem_move_to_inactive(&msm_obj
->base
);
417 msm_gem_put_iova(&msm_obj
->base
, gpu
->id
);
418 drm_gem_object_unreference(&msm_obj
->base
);
421 pm_runtime_mark_last_busy(&gpu
->pdev
->dev
);
422 pm_runtime_put_autosuspend(&gpu
->pdev
->dev
);
423 msm_gem_submit_free(submit
);
426 static void retire_submits(struct msm_gpu
*gpu
)
428 struct drm_device
*dev
= gpu
->dev
;
430 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
432 while (!list_empty(&gpu
->submit_list
)) {
433 struct msm_gem_submit
*submit
;
435 submit
= list_first_entry(&gpu
->submit_list
,
436 struct msm_gem_submit
, node
);
438 if (dma_fence_is_signaled(submit
->fence
)) {
439 retire_submit(gpu
, submit
);
446 static void retire_worker(struct work_struct
*work
)
448 struct msm_gpu
*gpu
= container_of(work
, struct msm_gpu
, retire_work
);
449 struct drm_device
*dev
= gpu
->dev
;
450 uint32_t fence
= gpu
->funcs
->last_fence(gpu
);
452 msm_update_fence(gpu
->fctx
, fence
);
454 mutex_lock(&dev
->struct_mutex
);
456 mutex_unlock(&dev
->struct_mutex
);
459 /* call from irq handler to schedule work to retire bo's */
460 void msm_gpu_retire(struct msm_gpu
*gpu
)
462 struct msm_drm_private
*priv
= gpu
->dev
->dev_private
;
463 queue_work(priv
->wq
, &gpu
->retire_work
);
464 update_sw_cntrs(gpu
);
467 /* add bo's to gpu's ring, and kick gpu: */
468 void msm_gpu_submit(struct msm_gpu
*gpu
, struct msm_gem_submit
*submit
,
469 struct msm_file_private
*ctx
)
471 struct drm_device
*dev
= gpu
->dev
;
472 struct msm_drm_private
*priv
= dev
->dev_private
;
475 WARN_ON(!mutex_is_locked(&dev
->struct_mutex
));
477 pm_runtime_get_sync(&gpu
->pdev
->dev
);
479 msm_gpu_hw_init(gpu
);
481 list_add_tail(&submit
->node
, &gpu
->submit_list
);
483 msm_rd_dump_submit(submit
);
485 update_sw_cntrs(gpu
);
487 for (i
= 0; i
< submit
->nr_bos
; i
++) {
488 struct msm_gem_object
*msm_obj
= submit
->bos
[i
].obj
;
491 /* can't happen yet.. but when we add 2d support we'll have
492 * to deal w/ cross-ring synchronization:
494 WARN_ON(is_active(msm_obj
) && (msm_obj
->gpu
!= gpu
));
496 /* submit takes a reference to the bo and iova until retired: */
497 drm_gem_object_reference(&msm_obj
->base
);
498 msm_gem_get_iova_locked(&msm_obj
->base
,
499 submit
->gpu
->id
, &iova
);
501 if (submit
->bos
[i
].flags
& MSM_SUBMIT_BO_WRITE
)
502 msm_gem_move_to_active(&msm_obj
->base
, gpu
, true, submit
->fence
);
503 else if (submit
->bos
[i
].flags
& MSM_SUBMIT_BO_READ
)
504 msm_gem_move_to_active(&msm_obj
->base
, gpu
, false, submit
->fence
);
507 gpu
->funcs
->submit(gpu
, submit
, ctx
);
510 hangcheck_timer_reset(gpu
);
517 static irqreturn_t
irq_handler(int irq
, void *data
)
519 struct msm_gpu
*gpu
= data
;
520 return gpu
->funcs
->irq(gpu
);
523 static struct clk
*get_clock(struct device
*dev
, const char *name
)
525 struct clk
*clk
= devm_clk_get(dev
, name
);
527 return IS_ERR(clk
) ? NULL
: clk
;
530 static int get_clocks(struct platform_device
*pdev
, struct msm_gpu
*gpu
)
532 struct device
*dev
= &pdev
->dev
;
533 struct property
*prop
;
537 gpu
->nr_clocks
= of_property_count_strings(dev
->of_node
, "clock-names");
538 if (gpu
->nr_clocks
< 1) {
543 gpu
->grp_clks
= devm_kcalloc(dev
, sizeof(struct clk
*), gpu
->nr_clocks
,
548 of_property_for_each_string(dev
->of_node
, "clock-names", prop
, name
) {
549 gpu
->grp_clks
[i
] = get_clock(dev
, name
);
551 /* Remember the key clocks that we need to control later */
552 if (!strcmp(name
, "core"))
553 gpu
->core_clk
= gpu
->grp_clks
[i
];
554 else if (!strcmp(name
, "rbbmtimer"))
555 gpu
->rbbmtimer_clk
= gpu
->grp_clks
[i
];
563 int msm_gpu_init(struct drm_device
*drm
, struct platform_device
*pdev
,
564 struct msm_gpu
*gpu
, const struct msm_gpu_funcs
*funcs
,
565 const char *name
, const char *ioname
, const char *irqname
, int ringsz
)
567 struct iommu_domain
*iommu
;
570 if (WARN_ON(gpu
->num_perfcntrs
> ARRAY_SIZE(gpu
->last_cntrs
)))
571 gpu
->num_perfcntrs
= ARRAY_SIZE(gpu
->last_cntrs
);
576 gpu
->fctx
= msm_fence_context_alloc(drm
, name
);
577 if (IS_ERR(gpu
->fctx
)) {
578 ret
= PTR_ERR(gpu
->fctx
);
583 INIT_LIST_HEAD(&gpu
->active_list
);
584 INIT_WORK(&gpu
->retire_work
, retire_worker
);
585 INIT_WORK(&gpu
->recover_work
, recover_worker
);
587 INIT_LIST_HEAD(&gpu
->submit_list
);
589 setup_timer(&gpu
->hangcheck_timer
, hangcheck_handler
,
592 spin_lock_init(&gpu
->perf_lock
);
596 gpu
->mmio
= msm_ioremap(pdev
, ioname
, name
);
597 if (IS_ERR(gpu
->mmio
)) {
598 ret
= PTR_ERR(gpu
->mmio
);
603 gpu
->irq
= platform_get_irq_byname(pdev
, irqname
);
606 dev_err(drm
->dev
, "failed to get irq: %d\n", ret
);
610 ret
= devm_request_irq(&pdev
->dev
, gpu
->irq
, irq_handler
,
611 IRQF_TRIGGER_HIGH
, gpu
->name
, gpu
);
613 dev_err(drm
->dev
, "failed to request IRQ%u: %d\n", gpu
->irq
, ret
);
617 ret
= get_clocks(pdev
, gpu
);
621 gpu
->ebi1_clk
= msm_clk_get(pdev
, "bus");
622 DBG("ebi1_clk: %p", gpu
->ebi1_clk
);
623 if (IS_ERR(gpu
->ebi1_clk
))
624 gpu
->ebi1_clk
= NULL
;
626 /* Acquire regulators: */
627 gpu
->gpu_reg
= devm_regulator_get(&pdev
->dev
, "vdd");
628 DBG("gpu_reg: %p", gpu
->gpu_reg
);
629 if (IS_ERR(gpu
->gpu_reg
))
632 gpu
->gpu_cx
= devm_regulator_get(&pdev
->dev
, "vddcx");
633 DBG("gpu_cx: %p", gpu
->gpu_cx
);
634 if (IS_ERR(gpu
->gpu_cx
))
637 /* Setup IOMMU.. eventually we will (I think) do this once per context
638 * and have separate page tables per context. For now, to keep things
639 * simple and to get something working, just use a single address space:
641 iommu
= iommu_domain_alloc(&platform_bus_type
);
643 /* TODO 32b vs 64b address space.. */
644 iommu
->geometry
.aperture_start
= SZ_16M
;
645 iommu
->geometry
.aperture_end
= 0xffffffff;
647 dev_info(drm
->dev
, "%s: using IOMMU\n", name
);
648 gpu
->aspace
= msm_gem_address_space_create(&pdev
->dev
,
650 if (IS_ERR(gpu
->aspace
)) {
651 ret
= PTR_ERR(gpu
->aspace
);
652 dev_err(drm
->dev
, "failed to init iommu: %d\n", ret
);
654 iommu_domain_free(iommu
);
659 dev_info(drm
->dev
, "%s: no IOMMU, fallback to VRAM carveout!\n", name
);
661 gpu
->id
= msm_register_address_space(drm
, gpu
->aspace
);
664 /* Create ringbuffer: */
665 mutex_lock(&drm
->struct_mutex
);
666 gpu
->rb
= msm_ringbuffer_new(gpu
, ringsz
);
667 mutex_unlock(&drm
->struct_mutex
);
668 if (IS_ERR(gpu
->rb
)) {
669 ret
= PTR_ERR(gpu
->rb
);
671 dev_err(drm
->dev
, "could not create ringbuffer: %d\n", ret
);
676 platform_set_drvdata(pdev
, gpu
);
686 void msm_gpu_cleanup(struct msm_gpu
*gpu
)
688 DBG("%s", gpu
->name
);
690 WARN_ON(!list_empty(&gpu
->active_list
));
696 msm_gem_put_iova(gpu
->rb
->bo
, gpu
->id
);
697 msm_ringbuffer_destroy(gpu
->rb
);
701 msm_fence_context_free(gpu
->fctx
);