]>
Commit | Line | Data |
---|---|---|
7198e6b0 RC |
1 | /* |
2 | * Copyright (C) 2013 Red Hat | |
3 | * Author: Rob Clark <robdclark@gmail.com> | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify it | |
6 | * under the terms of the GNU General Public License version 2 as published by | |
7 | * the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
12 | * more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License along with | |
15 | * this program. If not, see <http://www.gnu.org/licenses/>. | |
16 | */ | |
17 | ||
18 | #include "msm_gpu.h" | |
19 | #include "msm_gem.h" | |
871d812a | 20 | #include "msm_mmu.h" |
fde5de6c | 21 | #include "msm_fence.h" |
7198e6b0 | 22 | |
18bb8a6c | 23 | #include <linux/string_helpers.h> |
f91c14ab JC |
24 | #include <linux/pm_opp.h> |
25 | #include <linux/devfreq.h> | |
18bb8a6c | 26 | |
7198e6b0 RC |
27 | |
28 | /* | |
29 | * Power Management: | |
30 | */ | |
31 | ||
f91c14ab JC |
32 | static int msm_devfreq_target(struct device *dev, unsigned long *freq, |
33 | u32 flags) | |
34 | { | |
35 | struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev)); | |
36 | struct dev_pm_opp *opp; | |
37 | ||
38 | opp = devfreq_recommended_opp(dev, freq, flags); | |
39 | ||
40 | if (IS_ERR(opp)) | |
41 | return PTR_ERR(opp); | |
42 | ||
43 | clk_set_rate(gpu->core_clk, *freq); | |
44 | dev_pm_opp_put(opp); | |
45 | ||
46 | return 0; | |
47 | } | |
48 | ||
49 | static int msm_devfreq_get_dev_status(struct device *dev, | |
50 | struct devfreq_dev_status *status) | |
51 | { | |
52 | struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev)); | |
53 | u64 cycles; | |
54 | u32 freq = ((u32) status->current_frequency) / 1000000; | |
55 | ktime_t time; | |
56 | ||
57 | status->current_frequency = (unsigned long) clk_get_rate(gpu->core_clk); | |
58 | gpu->funcs->gpu_busy(gpu, &cycles); | |
59 | ||
60 | status->busy_time = ((u32) (cycles - gpu->devfreq.busy_cycles)) / freq; | |
61 | ||
62 | gpu->devfreq.busy_cycles = cycles; | |
63 | ||
64 | time = ktime_get(); | |
65 | status->total_time = ktime_us_delta(time, gpu->devfreq.time); | |
66 | gpu->devfreq.time = time; | |
67 | ||
68 | return 0; | |
69 | } | |
70 | ||
71 | static int msm_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) | |
72 | { | |
73 | struct msm_gpu *gpu = platform_get_drvdata(to_platform_device(dev)); | |
74 | ||
75 | *freq = (unsigned long) clk_get_rate(gpu->core_clk); | |
76 | ||
77 | return 0; | |
78 | } | |
79 | ||
80 | static struct devfreq_dev_profile msm_devfreq_profile = { | |
81 | .polling_ms = 10, | |
82 | .target = msm_devfreq_target, | |
83 | .get_dev_status = msm_devfreq_get_dev_status, | |
84 | .get_cur_freq = msm_devfreq_get_cur_freq, | |
85 | }; | |
86 | ||
87 | static void msm_devfreq_init(struct msm_gpu *gpu) | |
88 | { | |
89 | /* We need target support to do devfreq */ | |
90 | if (!gpu->funcs->gpu_busy) | |
91 | return; | |
92 | ||
93 | msm_devfreq_profile.initial_freq = gpu->fast_rate; | |
94 | ||
95 | /* | |
96 | * Don't set the freq_table or max_state and let devfreq build the table | |
97 | * from OPP | |
98 | */ | |
99 | ||
100 | gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev, | |
101 | &msm_devfreq_profile, "simple_ondemand", NULL); | |
102 | ||
103 | if (IS_ERR(gpu->devfreq.devfreq)) { | |
104 | dev_err(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n"); | |
105 | gpu->devfreq.devfreq = NULL; | |
106 | } | |
107 | } | |
108 | ||
7198e6b0 RC |
109 | static int enable_pwrrail(struct msm_gpu *gpu) |
110 | { | |
111 | struct drm_device *dev = gpu->dev; | |
112 | int ret = 0; | |
113 | ||
114 | if (gpu->gpu_reg) { | |
115 | ret = regulator_enable(gpu->gpu_reg); | |
116 | if (ret) { | |
117 | dev_err(dev->dev, "failed to enable 'gpu_reg': %d\n", ret); | |
118 | return ret; | |
119 | } | |
120 | } | |
121 | ||
122 | if (gpu->gpu_cx) { | |
123 | ret = regulator_enable(gpu->gpu_cx); | |
124 | if (ret) { | |
125 | dev_err(dev->dev, "failed to enable 'gpu_cx': %d\n", ret); | |
126 | return ret; | |
127 | } | |
128 | } | |
129 | ||
130 | return 0; | |
131 | } | |
132 | ||
133 | static int disable_pwrrail(struct msm_gpu *gpu) | |
134 | { | |
135 | if (gpu->gpu_cx) | |
136 | regulator_disable(gpu->gpu_cx); | |
137 | if (gpu->gpu_reg) | |
138 | regulator_disable(gpu->gpu_reg); | |
139 | return 0; | |
140 | } | |
141 | ||
142 | static int enable_clk(struct msm_gpu *gpu) | |
143 | { | |
7198e6b0 RC |
144 | int i; |
145 | ||
98db803f JC |
146 | if (gpu->core_clk && gpu->fast_rate) |
147 | clk_set_rate(gpu->core_clk, gpu->fast_rate); | |
7198e6b0 | 148 | |
b5f103ab | 149 | /* Set the RBBM timer rate to 19.2Mhz */ |
98db803f JC |
150 | if (gpu->rbbmtimer_clk) |
151 | clk_set_rate(gpu->rbbmtimer_clk, 19200000); | |
b5f103ab | 152 | |
98db803f | 153 | for (i = gpu->nr_clocks - 1; i >= 0; i--) |
89d777a5 JC |
154 | if (gpu->grp_clks[i]) |
155 | clk_prepare(gpu->grp_clks[i]); | |
7198e6b0 | 156 | |
98db803f | 157 | for (i = gpu->nr_clocks - 1; i >= 0; i--) |
7198e6b0 RC |
158 | if (gpu->grp_clks[i]) |
159 | clk_enable(gpu->grp_clks[i]); | |
160 | ||
161 | return 0; | |
162 | } | |
163 | ||
164 | static int disable_clk(struct msm_gpu *gpu) | |
165 | { | |
7198e6b0 RC |
166 | int i; |
167 | ||
98db803f | 168 | for (i = gpu->nr_clocks - 1; i >= 0; i--) |
89d777a5 | 169 | if (gpu->grp_clks[i]) |
7198e6b0 | 170 | clk_disable(gpu->grp_clks[i]); |
7198e6b0 | 171 | |
98db803f | 172 | for (i = gpu->nr_clocks - 1; i >= 0; i--) |
7198e6b0 RC |
173 | if (gpu->grp_clks[i]) |
174 | clk_unprepare(gpu->grp_clks[i]); | |
175 | ||
bf5af4ae JC |
176 | /* |
177 | * Set the clock to a deliberately low rate. On older targets the clock | |
178 | * speed had to be non zero to avoid problems. On newer targets this | |
179 | * will be rounded down to zero anyway so it all works out. | |
180 | */ | |
98db803f JC |
181 | if (gpu->core_clk) |
182 | clk_set_rate(gpu->core_clk, 27000000); | |
89d777a5 | 183 | |
98db803f JC |
184 | if (gpu->rbbmtimer_clk) |
185 | clk_set_rate(gpu->rbbmtimer_clk, 0); | |
b5f103ab | 186 | |
7198e6b0 RC |
187 | return 0; |
188 | } | |
189 | ||
190 | static int enable_axi(struct msm_gpu *gpu) | |
191 | { | |
192 | if (gpu->ebi1_clk) | |
193 | clk_prepare_enable(gpu->ebi1_clk); | |
7198e6b0 RC |
194 | return 0; |
195 | } | |
196 | ||
197 | static int disable_axi(struct msm_gpu *gpu) | |
198 | { | |
199 | if (gpu->ebi1_clk) | |
200 | clk_disable_unprepare(gpu->ebi1_clk); | |
7198e6b0 RC |
201 | return 0; |
202 | } | |
203 | ||
204 | int msm_gpu_pm_resume(struct msm_gpu *gpu) | |
205 | { | |
206 | int ret; | |
207 | ||
eeb75474 | 208 | DBG("%s", gpu->name); |
7198e6b0 RC |
209 | |
210 | ret = enable_pwrrail(gpu); | |
211 | if (ret) | |
212 | return ret; | |
213 | ||
214 | ret = enable_clk(gpu); | |
215 | if (ret) | |
216 | return ret; | |
217 | ||
218 | ret = enable_axi(gpu); | |
219 | if (ret) | |
220 | return ret; | |
221 | ||
f91c14ab JC |
222 | if (gpu->devfreq.devfreq) { |
223 | gpu->devfreq.busy_cycles = 0; | |
224 | gpu->devfreq.time = ktime_get(); | |
225 | ||
226 | devfreq_resume_device(gpu->devfreq.devfreq); | |
227 | } | |
228 | ||
eeb75474 RC |
229 | gpu->needs_hw_init = true; |
230 | ||
7198e6b0 RC |
231 | return 0; |
232 | } | |
233 | ||
234 | int msm_gpu_pm_suspend(struct msm_gpu *gpu) | |
235 | { | |
236 | int ret; | |
237 | ||
eeb75474 | 238 | DBG("%s", gpu->name); |
7198e6b0 | 239 | |
f91c14ab JC |
240 | if (gpu->devfreq.devfreq) |
241 | devfreq_suspend_device(gpu->devfreq.devfreq); | |
242 | ||
7198e6b0 RC |
243 | ret = disable_axi(gpu); |
244 | if (ret) | |
245 | return ret; | |
246 | ||
247 | ret = disable_clk(gpu); | |
248 | if (ret) | |
249 | return ret; | |
250 | ||
251 | ret = disable_pwrrail(gpu); | |
252 | if (ret) | |
253 | return ret; | |
254 | ||
255 | return 0; | |
256 | } | |
257 | ||
eeb75474 | 258 | int msm_gpu_hw_init(struct msm_gpu *gpu) |
37d77c3a | 259 | { |
eeb75474 | 260 | int ret; |
37d77c3a | 261 | |
cb1e3818 RC |
262 | WARN_ON(!mutex_is_locked(&gpu->dev->struct_mutex)); |
263 | ||
eeb75474 RC |
264 | if (!gpu->needs_hw_init) |
265 | return 0; | |
37d77c3a | 266 | |
eeb75474 RC |
267 | disable_irq(gpu->irq); |
268 | ret = gpu->funcs->hw_init(gpu); | |
269 | if (!ret) | |
270 | gpu->needs_hw_init = false; | |
271 | enable_irq(gpu->irq); | |
37d77c3a | 272 | |
eeb75474 | 273 | return ret; |
37d77c3a RC |
274 | } |
275 | ||
bd6f82d8 RC |
276 | /* |
277 | * Hangcheck detection for locked gpu: | |
278 | */ | |
279 | ||
f97decac JC |
280 | static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, |
281 | uint32_t fence) | |
282 | { | |
283 | struct msm_gem_submit *submit; | |
284 | ||
285 | list_for_each_entry(submit, &ring->submits, node) { | |
286 | if (submit->seqno > fence) | |
287 | break; | |
288 | ||
289 | msm_update_fence(submit->ring->fctx, | |
290 | submit->fence->seqno); | |
291 | } | |
292 | } | |
293 | ||
18bb8a6c RC |
294 | static struct msm_gem_submit * |
295 | find_submit(struct msm_ringbuffer *ring, uint32_t fence) | |
296 | { | |
297 | struct msm_gem_submit *submit; | |
298 | ||
299 | WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex)); | |
300 | ||
301 | list_for_each_entry(submit, &ring->submits, node) | |
302 | if (submit->seqno == fence) | |
303 | return submit; | |
304 | ||
305 | return NULL; | |
306 | } | |
307 | ||
b6295f9a | 308 | static void retire_submits(struct msm_gpu *gpu); |
1a370be9 | 309 | |
bd6f82d8 RC |
310 | static void recover_worker(struct work_struct *work) |
311 | { | |
312 | struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); | |
313 | struct drm_device *dev = gpu->dev; | |
96169f4e | 314 | struct msm_drm_private *priv = dev->dev_private; |
4816b626 | 315 | struct msm_gem_submit *submit; |
f97decac | 316 | struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); |
65a3c274 | 317 | char *comm = NULL, *cmd = NULL; |
f97decac JC |
318 | int i; |
319 | ||
bd6f82d8 | 320 | mutex_lock(&dev->struct_mutex); |
1a370be9 | 321 | |
4816b626 | 322 | dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); |
f97decac | 323 | |
96169f4e | 324 | submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); |
18bb8a6c RC |
325 | if (submit) { |
326 | struct task_struct *task; | |
327 | ||
328 | rcu_read_lock(); | |
329 | task = pid_task(submit->pid, PIDTYPE_PID); | |
330 | if (task) { | |
65a3c274 | 331 | comm = kstrdup(task->comm, GFP_ATOMIC); |
18bb8a6c RC |
332 | |
333 | /* | |
334 | * So slightly annoying, in other paths like | |
335 | * mmap'ing gem buffers, mmap_sem is acquired | |
336 | * before struct_mutex, which means we can't | |
337 | * hold struct_mutex across the call to | |
338 | * get_cmdline(). But submits are retired | |
339 | * from the same in-order workqueue, so we can | |
340 | * safely drop the lock here without worrying | |
341 | * about the submit going away. | |
342 | */ | |
343 | mutex_unlock(&dev->struct_mutex); | |
65a3c274 | 344 | cmd = kstrdup_quotable_cmdline(task, GFP_ATOMIC); |
18bb8a6c | 345 | mutex_lock(&dev->struct_mutex); |
65a3c274 JC |
346 | } |
347 | rcu_read_unlock(); | |
18bb8a6c | 348 | |
65a3c274 | 349 | if (comm && cmd) { |
18bb8a6c | 350 | dev_err(dev->dev, "%s: offending task: %s (%s)\n", |
65a3c274 | 351 | gpu->name, comm, cmd); |
96169f4e RC |
352 | |
353 | msm_rd_dump_submit(priv->hangrd, submit, | |
65a3c274 JC |
354 | "offending task: %s (%s)", comm, cmd); |
355 | } else | |
96169f4e | 356 | msm_rd_dump_submit(priv->hangrd, submit, NULL); |
96169f4e RC |
357 | } |
358 | ||
65a3c274 JC |
359 | kfree(cmd); |
360 | kfree(comm); | |
96169f4e RC |
361 | |
362 | /* | |
363 | * Update all the rings with the latest and greatest fence.. this | |
364 | * needs to happen after msm_rd_dump_submit() to ensure that the | |
365 | * bo's referenced by the offending submit are still around. | |
366 | */ | |
7ddae82e | 367 | for (i = 0; i < gpu->nr_rings; i++) { |
96169f4e RC |
368 | struct msm_ringbuffer *ring = gpu->rb[i]; |
369 | ||
370 | uint32_t fence = ring->memptrs->fence; | |
18bb8a6c | 371 | |
96169f4e RC |
372 | /* |
373 | * For the current (faulting?) ring/submit advance the fence by | |
374 | * one more to clear the faulting submit | |
375 | */ | |
376 | if (ring == cur_ring) | |
377 | fence++; | |
378 | ||
379 | update_fences(gpu, ring, fence); | |
4816b626 RC |
380 | } |
381 | ||
382 | if (msm_gpu_active(gpu)) { | |
1a370be9 | 383 | /* retire completed submits, plus the one that hung: */ |
b6295f9a | 384 | retire_submits(gpu); |
1a370be9 | 385 | |
eeb75474 | 386 | pm_runtime_get_sync(&gpu->pdev->dev); |
37d77c3a | 387 | gpu->funcs->recover(gpu); |
eeb75474 | 388 | pm_runtime_put_sync(&gpu->pdev->dev); |
1a370be9 | 389 | |
f97decac JC |
390 | /* |
391 | * Replay all remaining submits starting with highest priority | |
392 | * ring | |
393 | */ | |
b1fc2839 | 394 | for (i = 0; i < gpu->nr_rings; i++) { |
f97decac JC |
395 | struct msm_ringbuffer *ring = gpu->rb[i]; |
396 | ||
397 | list_for_each_entry(submit, &ring->submits, node) | |
398 | gpu->funcs->submit(gpu, submit, NULL); | |
1a370be9 | 399 | } |
37d77c3a | 400 | } |
4816b626 | 401 | |
bd6f82d8 RC |
402 | mutex_unlock(&dev->struct_mutex); |
403 | ||
404 | msm_gpu_retire(gpu); | |
405 | } | |
406 | ||
407 | static void hangcheck_timer_reset(struct msm_gpu *gpu) | |
408 | { | |
409 | DBG("%s", gpu->name); | |
410 | mod_timer(&gpu->hangcheck_timer, | |
411 | round_jiffies_up(jiffies + DRM_MSM_HANGCHECK_JIFFIES)); | |
412 | } | |
413 | ||
e99e88a9 | 414 | static void hangcheck_handler(struct timer_list *t) |
bd6f82d8 | 415 | { |
e99e88a9 | 416 | struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer); |
6b8819c8 RC |
417 | struct drm_device *dev = gpu->dev; |
418 | struct msm_drm_private *priv = dev->dev_private; | |
f97decac JC |
419 | struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); |
420 | uint32_t fence = ring->memptrs->fence; | |
bd6f82d8 | 421 | |
f97decac | 422 | if (fence != ring->hangcheck_fence) { |
bd6f82d8 | 423 | /* some progress has been made.. ya! */ |
f97decac JC |
424 | ring->hangcheck_fence = fence; |
425 | } else if (fence < ring->seqno) { | |
bd6f82d8 | 426 | /* no progress and not done.. hung! */ |
f97decac JC |
427 | ring->hangcheck_fence = fence; |
428 | dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", | |
429 | gpu->name, ring->id); | |
26791c48 RC |
430 | dev_err(dev->dev, "%s: completed fence: %u\n", |
431 | gpu->name, fence); | |
432 | dev_err(dev->dev, "%s: submitted fence: %u\n", | |
f97decac JC |
433 | gpu->name, ring->seqno); |
434 | ||
bd6f82d8 RC |
435 | queue_work(priv->wq, &gpu->recover_work); |
436 | } | |
437 | ||
438 | /* if still more pending work, reset the hangcheck timer: */ | |
f97decac | 439 | if (ring->seqno > ring->hangcheck_fence) |
bd6f82d8 | 440 | hangcheck_timer_reset(gpu); |
6b8819c8 RC |
441 | |
442 | /* workaround for missing irq: */ | |
443 | queue_work(priv->wq, &gpu->retire_work); | |
bd6f82d8 RC |
444 | } |
445 | ||
70c70f09 RC |
446 | /* |
447 | * Performance Counters: | |
448 | */ | |
449 | ||
450 | /* called under perf_lock */ | |
451 | static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs) | |
452 | { | |
453 | uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)]; | |
454 | int i, n = min(ncntrs, gpu->num_perfcntrs); | |
455 | ||
456 | /* read current values: */ | |
457 | for (i = 0; i < gpu->num_perfcntrs; i++) | |
458 | current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg); | |
459 | ||
460 | /* update cntrs: */ | |
461 | for (i = 0; i < n; i++) | |
462 | cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i]; | |
463 | ||
464 | /* save current values: */ | |
465 | for (i = 0; i < gpu->num_perfcntrs; i++) | |
466 | gpu->last_cntrs[i] = current_cntrs[i]; | |
467 | ||
468 | return n; | |
469 | } | |
470 | ||
471 | static void update_sw_cntrs(struct msm_gpu *gpu) | |
472 | { | |
473 | ktime_t time; | |
474 | uint32_t elapsed; | |
475 | unsigned long flags; | |
476 | ||
477 | spin_lock_irqsave(&gpu->perf_lock, flags); | |
478 | if (!gpu->perfcntr_active) | |
479 | goto out; | |
480 | ||
481 | time = ktime_get(); | |
482 | elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time)); | |
483 | ||
484 | gpu->totaltime += elapsed; | |
485 | if (gpu->last_sample.active) | |
486 | gpu->activetime += elapsed; | |
487 | ||
488 | gpu->last_sample.active = msm_gpu_active(gpu); | |
489 | gpu->last_sample.time = time; | |
490 | ||
491 | out: | |
492 | spin_unlock_irqrestore(&gpu->perf_lock, flags); | |
493 | } | |
494 | ||
495 | void msm_gpu_perfcntr_start(struct msm_gpu *gpu) | |
496 | { | |
497 | unsigned long flags; | |
498 | ||
eeb75474 RC |
499 | pm_runtime_get_sync(&gpu->pdev->dev); |
500 | ||
70c70f09 RC |
501 | spin_lock_irqsave(&gpu->perf_lock, flags); |
502 | /* we could dynamically enable/disable perfcntr registers too.. */ | |
503 | gpu->last_sample.active = msm_gpu_active(gpu); | |
504 | gpu->last_sample.time = ktime_get(); | |
505 | gpu->activetime = gpu->totaltime = 0; | |
506 | gpu->perfcntr_active = true; | |
507 | update_hw_cntrs(gpu, 0, NULL); | |
508 | spin_unlock_irqrestore(&gpu->perf_lock, flags); | |
509 | } | |
510 | ||
511 | void msm_gpu_perfcntr_stop(struct msm_gpu *gpu) | |
512 | { | |
513 | gpu->perfcntr_active = false; | |
eeb75474 | 514 | pm_runtime_put_sync(&gpu->pdev->dev); |
70c70f09 RC |
515 | } |
516 | ||
517 | /* returns -errno or # of cntrs sampled */ | |
518 | int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, | |
519 | uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs) | |
520 | { | |
521 | unsigned long flags; | |
522 | int ret; | |
523 | ||
524 | spin_lock_irqsave(&gpu->perf_lock, flags); | |
525 | ||
526 | if (!gpu->perfcntr_active) { | |
527 | ret = -EINVAL; | |
528 | goto out; | |
529 | } | |
530 | ||
531 | *activetime = gpu->activetime; | |
532 | *totaltime = gpu->totaltime; | |
533 | ||
534 | gpu->activetime = gpu->totaltime = 0; | |
535 | ||
536 | ret = update_hw_cntrs(gpu, ncntrs, cntrs); | |
537 | ||
538 | out: | |
539 | spin_unlock_irqrestore(&gpu->perf_lock, flags); | |
540 | ||
541 | return ret; | |
542 | } | |
543 | ||
7198e6b0 RC |
544 | /* |
545 | * Cmdstream submission/retirement: | |
546 | */ | |
547 | ||
7d12a279 RC |
548 | static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) |
549 | { | |
550 | int i; | |
551 | ||
552 | for (i = 0; i < submit->nr_bos; i++) { | |
553 | struct msm_gem_object *msm_obj = submit->bos[i].obj; | |
554 | /* move to inactive: */ | |
555 | msm_gem_move_to_inactive(&msm_obj->base); | |
8bdcd949 | 556 | msm_gem_put_iova(&msm_obj->base, gpu->aspace); |
dc9a9b32 | 557 | drm_gem_object_put(&msm_obj->base); |
7d12a279 RC |
558 | } |
559 | ||
eeb75474 RC |
560 | pm_runtime_mark_last_busy(&gpu->pdev->dev); |
561 | pm_runtime_put_autosuspend(&gpu->pdev->dev); | |
40e6815b | 562 | msm_gem_submit_free(submit); |
7d12a279 RC |
563 | } |
564 | ||
b6295f9a | 565 | static void retire_submits(struct msm_gpu *gpu) |
1a370be9 RC |
566 | { |
567 | struct drm_device *dev = gpu->dev; | |
f97decac JC |
568 | struct msm_gem_submit *submit, *tmp; |
569 | int i; | |
1a370be9 RC |
570 | |
571 | WARN_ON(!mutex_is_locked(&dev->struct_mutex)); | |
572 | ||
f97decac | 573 | /* Retire the commits starting with highest priority */ |
b1fc2839 | 574 | for (i = 0; i < gpu->nr_rings; i++) { |
f97decac | 575 | struct msm_ringbuffer *ring = gpu->rb[i]; |
1a370be9 | 576 | |
f97decac JC |
577 | list_for_each_entry_safe(submit, tmp, &ring->submits, node) { |
578 | if (dma_fence_is_signaled(submit->fence)) | |
579 | retire_submit(gpu, submit); | |
1a370be9 RC |
580 | } |
581 | } | |
582 | } | |
583 | ||
7198e6b0 RC |
584 | static void retire_worker(struct work_struct *work) |
585 | { | |
586 | struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); | |
587 | struct drm_device *dev = gpu->dev; | |
f97decac | 588 | int i; |
7198e6b0 | 589 | |
f97decac JC |
590 | for (i = 0; i < gpu->nr_rings; i++) |
591 | update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); | |
edd4fc63 | 592 | |
7198e6b0 | 593 | mutex_lock(&dev->struct_mutex); |
b6295f9a | 594 | retire_submits(gpu); |
7198e6b0 RC |
595 | mutex_unlock(&dev->struct_mutex); |
596 | } | |
597 | ||
598 | /* call from irq handler to schedule work to retire bo's */ | |
599 | void msm_gpu_retire(struct msm_gpu *gpu) | |
600 | { | |
601 | struct msm_drm_private *priv = gpu->dev->dev_private; | |
602 | queue_work(priv->wq, &gpu->retire_work); | |
70c70f09 | 603 | update_sw_cntrs(gpu); |
7198e6b0 RC |
604 | } |
605 | ||
606 | /* add bo's to gpu's ring, and kick gpu: */ | |
f44d32c7 | 607 | void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, |
7198e6b0 RC |
608 | struct msm_file_private *ctx) |
609 | { | |
610 | struct drm_device *dev = gpu->dev; | |
611 | struct msm_drm_private *priv = dev->dev_private; | |
f97decac | 612 | struct msm_ringbuffer *ring = submit->ring; |
f44d32c7 | 613 | int i; |
7198e6b0 | 614 | |
1a370be9 RC |
615 | WARN_ON(!mutex_is_locked(&dev->struct_mutex)); |
616 | ||
eeb75474 RC |
617 | pm_runtime_get_sync(&gpu->pdev->dev); |
618 | ||
619 | msm_gpu_hw_init(gpu); | |
37d77c3a | 620 | |
f97decac JC |
621 | submit->seqno = ++ring->seqno; |
622 | ||
623 | list_add_tail(&submit->node, &ring->submits); | |
1a370be9 | 624 | |
998b9a58 | 625 | msm_rd_dump_submit(priv->rd, submit, NULL); |
a7d3c950 | 626 | |
70c70f09 RC |
627 | update_sw_cntrs(gpu); |
628 | ||
7198e6b0 RC |
629 | for (i = 0; i < submit->nr_bos; i++) { |
630 | struct msm_gem_object *msm_obj = submit->bos[i].obj; | |
78babc16 | 631 | uint64_t iova; |
7198e6b0 RC |
632 | |
633 | /* can't happen yet.. but when we add 2d support we'll have | |
634 | * to deal w/ cross-ring synchronization: | |
635 | */ | |
636 | WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu)); | |
637 | ||
7d12a279 | 638 | /* submit takes a reference to the bo and iova until retired: */ |
dc9a9b32 | 639 | drm_gem_object_get(&msm_obj->base); |
0e08270a | 640 | msm_gem_get_iova(&msm_obj->base, |
8bdcd949 | 641 | submit->gpu->aspace, &iova); |
7198e6b0 | 642 | |
bf6811f3 RC |
643 | if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) |
644 | msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence); | |
b6295f9a RC |
645 | else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) |
646 | msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->fence); | |
7198e6b0 | 647 | } |
1a370be9 | 648 | |
1193c3bc | 649 | gpu->funcs->submit(gpu, submit, ctx); |
1a370be9 RC |
650 | priv->lastctx = ctx; |
651 | ||
bd6f82d8 | 652 | hangcheck_timer_reset(gpu); |
7198e6b0 RC |
653 | } |
654 | ||
655 | /* | |
656 | * Init/Cleanup: | |
657 | */ | |
658 | ||
659 | static irqreturn_t irq_handler(int irq, void *data) | |
660 | { | |
661 | struct msm_gpu *gpu = data; | |
662 | return gpu->funcs->irq(gpu); | |
663 | } | |
664 | ||
98db803f JC |
665 | static struct clk *get_clock(struct device *dev, const char *name) |
666 | { | |
667 | struct clk *clk = devm_clk_get(dev, name); | |
668 | ||
669 | return IS_ERR(clk) ? NULL : clk; | |
670 | } | |
671 | ||
672 | static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu) | |
673 | { | |
674 | struct device *dev = &pdev->dev; | |
675 | struct property *prop; | |
676 | const char *name; | |
677 | int i = 0; | |
678 | ||
679 | gpu->nr_clocks = of_property_count_strings(dev->of_node, "clock-names"); | |
680 | if (gpu->nr_clocks < 1) { | |
681 | gpu->nr_clocks = 0; | |
682 | return 0; | |
683 | } | |
684 | ||
685 | gpu->grp_clks = devm_kcalloc(dev, sizeof(struct clk *), gpu->nr_clocks, | |
686 | GFP_KERNEL); | |
9d20a0e6 JC |
687 | if (!gpu->grp_clks) { |
688 | gpu->nr_clocks = 0; | |
98db803f | 689 | return -ENOMEM; |
9d20a0e6 | 690 | } |
98db803f JC |
691 | |
692 | of_property_for_each_string(dev->of_node, "clock-names", prop, name) { | |
693 | gpu->grp_clks[i] = get_clock(dev, name); | |
694 | ||
695 | /* Remember the key clocks that we need to control later */ | |
134ccada | 696 | if (!strcmp(name, "core") || !strcmp(name, "core_clk")) |
98db803f | 697 | gpu->core_clk = gpu->grp_clks[i]; |
134ccada | 698 | else if (!strcmp(name, "rbbmtimer") || !strcmp(name, "rbbmtimer_clk")) |
98db803f JC |
699 | gpu->rbbmtimer_clk = gpu->grp_clks[i]; |
700 | ||
701 | ++i; | |
702 | } | |
703 | ||
704 | return 0; | |
705 | } | |
7198e6b0 | 706 | |
1267a4df JC |
707 | static struct msm_gem_address_space * |
708 | msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev, | |
709 | uint64_t va_start, uint64_t va_end) | |
710 | { | |
711 | struct iommu_domain *iommu; | |
712 | struct msm_gem_address_space *aspace; | |
713 | int ret; | |
714 | ||
715 | /* | |
716 | * Setup IOMMU.. eventually we will (I think) do this once per context | |
717 | * and have separate page tables per context. For now, to keep things | |
718 | * simple and to get something working, just use a single address space: | |
719 | */ | |
720 | iommu = iommu_domain_alloc(&platform_bus_type); | |
721 | if (!iommu) | |
722 | return NULL; | |
723 | ||
724 | iommu->geometry.aperture_start = va_start; | |
725 | iommu->geometry.aperture_end = va_end; | |
726 | ||
727 | dev_info(gpu->dev->dev, "%s: using IOMMU\n", gpu->name); | |
728 | ||
729 | aspace = msm_gem_address_space_create(&pdev->dev, iommu, "gpu"); | |
730 | if (IS_ERR(aspace)) { | |
731 | dev_err(gpu->dev->dev, "failed to init iommu: %ld\n", | |
732 | PTR_ERR(aspace)); | |
733 | iommu_domain_free(iommu); | |
734 | return ERR_CAST(aspace); | |
735 | } | |
736 | ||
737 | ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0); | |
738 | if (ret) { | |
739 | msm_gem_address_space_put(aspace); | |
740 | return ERR_PTR(ret); | |
741 | } | |
742 | ||
743 | return aspace; | |
744 | } | |
745 | ||
7198e6b0 RC |
746 | int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, |
747 | struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, | |
5770fc7a | 748 | const char *name, struct msm_gpu_config *config) |
7198e6b0 | 749 | { |
f97decac JC |
750 | int i, ret, nr_rings = config->nr_rings; |
751 | void *memptrs; | |
752 | uint64_t memptrs_iova; | |
7198e6b0 | 753 | |
70c70f09 RC |
754 | if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) |
755 | gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); | |
756 | ||
7198e6b0 RC |
757 | gpu->dev = drm; |
758 | gpu->funcs = funcs; | |
759 | gpu->name = name; | |
760 | ||
761 | INIT_LIST_HEAD(&gpu->active_list); | |
762 | INIT_WORK(&gpu->retire_work, retire_worker); | |
bd6f82d8 RC |
763 | INIT_WORK(&gpu->recover_work, recover_worker); |
764 | ||
1a370be9 | 765 | |
e99e88a9 | 766 | timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0); |
7198e6b0 | 767 | |
70c70f09 RC |
768 | spin_lock_init(&gpu->perf_lock); |
769 | ||
7198e6b0 RC |
770 | |
771 | /* Map registers: */ | |
5770fc7a | 772 | gpu->mmio = msm_ioremap(pdev, config->ioname, name); |
7198e6b0 RC |
773 | if (IS_ERR(gpu->mmio)) { |
774 | ret = PTR_ERR(gpu->mmio); | |
775 | goto fail; | |
776 | } | |
777 | ||
778 | /* Get Interrupt: */ | |
5770fc7a | 779 | gpu->irq = platform_get_irq_byname(pdev, config->irqname); |
7198e6b0 RC |
780 | if (gpu->irq < 0) { |
781 | ret = gpu->irq; | |
782 | dev_err(drm->dev, "failed to get irq: %d\n", ret); | |
783 | goto fail; | |
784 | } | |
785 | ||
786 | ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler, | |
787 | IRQF_TRIGGER_HIGH, gpu->name, gpu); | |
788 | if (ret) { | |
789 | dev_err(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret); | |
790 | goto fail; | |
791 | } | |
792 | ||
98db803f JC |
793 | ret = get_clocks(pdev, gpu); |
794 | if (ret) | |
795 | goto fail; | |
7198e6b0 | 796 | |
720c3bb8 | 797 | gpu->ebi1_clk = msm_clk_get(pdev, "bus"); |
7198e6b0 RC |
798 | DBG("ebi1_clk: %p", gpu->ebi1_clk); |
799 | if (IS_ERR(gpu->ebi1_clk)) | |
800 | gpu->ebi1_clk = NULL; | |
801 | ||
802 | /* Acquire regulators: */ | |
803 | gpu->gpu_reg = devm_regulator_get(&pdev->dev, "vdd"); | |
804 | DBG("gpu_reg: %p", gpu->gpu_reg); | |
805 | if (IS_ERR(gpu->gpu_reg)) | |
806 | gpu->gpu_reg = NULL; | |
807 | ||
808 | gpu->gpu_cx = devm_regulator_get(&pdev->dev, "vddcx"); | |
809 | DBG("gpu_cx: %p", gpu->gpu_cx); | |
810 | if (IS_ERR(gpu->gpu_cx)) | |
811 | gpu->gpu_cx = NULL; | |
812 | ||
1267a4df JC |
813 | gpu->pdev = pdev; |
814 | platform_set_drvdata(pdev, gpu); | |
815 | ||
f91c14ab JC |
816 | msm_devfreq_init(gpu); |
817 | ||
1267a4df JC |
818 | gpu->aspace = msm_gpu_create_address_space(gpu, pdev, |
819 | config->va_start, config->va_end); | |
820 | ||
821 | if (gpu->aspace == NULL) | |
871d812a | 822 | dev_info(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name); |
1267a4df JC |
823 | else if (IS_ERR(gpu->aspace)) { |
824 | ret = PTR_ERR(gpu->aspace); | |
825 | goto fail; | |
7198e6b0 | 826 | } |
a1ad3523 | 827 | |
f97decac | 828 | memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo), |
cd414f3d | 829 | MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo, |
f97decac | 830 | &memptrs_iova); |
cd414f3d | 831 | |
f97decac JC |
832 | if (IS_ERR(memptrs)) { |
833 | ret = PTR_ERR(memptrs); | |
cd414f3d JC |
834 | dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); |
835 | goto fail; | |
836 | } | |
837 | ||
f97decac | 838 | if (nr_rings > ARRAY_SIZE(gpu->rb)) { |
39ae0d3e | 839 | DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n", |
f97decac JC |
840 | ARRAY_SIZE(gpu->rb)); |
841 | nr_rings = ARRAY_SIZE(gpu->rb); | |
7198e6b0 RC |
842 | } |
843 | ||
f97decac JC |
844 | /* Create ringbuffer(s): */ |
845 | for (i = 0; i < nr_rings; i++) { | |
846 | gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova); | |
847 | ||
848 | if (IS_ERR(gpu->rb[i])) { | |
849 | ret = PTR_ERR(gpu->rb[i]); | |
850 | dev_err(drm->dev, | |
851 | "could not create ringbuffer %d: %d\n", i, ret); | |
852 | goto fail; | |
853 | } | |
854 | ||
855 | memptrs += sizeof(struct msm_rbmemptrs); | |
856 | memptrs_iova += sizeof(struct msm_rbmemptrs); | |
857 | } | |
858 | ||
859 | gpu->nr_rings = nr_rings; | |
860 | ||
7198e6b0 RC |
861 | return 0; |
862 | ||
863 | fail: | |
f97decac JC |
864 | for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { |
865 | msm_ringbuffer_destroy(gpu->rb[i]); | |
866 | gpu->rb[i] = NULL; | |
867 | } | |
868 | ||
cd414f3d JC |
869 | if (gpu->memptrs_bo) { |
870 | msm_gem_put_vaddr(gpu->memptrs_bo); | |
871 | msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); | |
dc9a9b32 | 872 | drm_gem_object_put_unlocked(gpu->memptrs_bo); |
cd414f3d JC |
873 | } |
874 | ||
1267a4df | 875 | platform_set_drvdata(pdev, NULL); |
7198e6b0 RC |
876 | return ret; |
877 | } | |
878 | ||
879 | void msm_gpu_cleanup(struct msm_gpu *gpu) | |
880 | { | |
f97decac JC |
881 | int i; |
882 | ||
7198e6b0 RC |
883 | DBG("%s", gpu->name); |
884 | ||
885 | WARN_ON(!list_empty(&gpu->active_list)); | |
886 | ||
f97decac JC |
887 | for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { |
888 | msm_ringbuffer_destroy(gpu->rb[i]); | |
889 | gpu->rb[i] = NULL; | |
7198e6b0 | 890 | } |
cd414f3d JC |
891 | |
892 | if (gpu->memptrs_bo) { | |
893 | msm_gem_put_vaddr(gpu->memptrs_bo); | |
894 | msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); | |
dc9a9b32 | 895 | drm_gem_object_put_unlocked(gpu->memptrs_bo); |
cd414f3d JC |
896 | } |
897 | ||
898 | if (!IS_ERR_OR_NULL(gpu->aspace)) { | |
1267a4df JC |
899 | gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu, |
900 | NULL, 0); | |
901 | msm_gem_address_space_put(gpu->aspace); | |
902 | } | |
7198e6b0 | 903 | } |