]>
Commit | Line | Data |
---|---|---|
7198e6b0 RC |
1 | /* |
2 | * Copyright (C) 2013 Red Hat | |
3 | * Author: Rob Clark <robdclark@gmail.com> | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify it | |
6 | * under the terms of the GNU General Public License version 2 as published by | |
7 | * the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, but WITHOUT | |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
12 | * more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License along with | |
15 | * this program. If not, see <http://www.gnu.org/licenses/>. | |
16 | */ | |
17 | ||
18 | #include "msm_gpu.h" | |
19 | #include "msm_gem.h" | |
871d812a | 20 | #include "msm_mmu.h" |
fde5de6c | 21 | #include "msm_fence.h" |
7198e6b0 | 22 | |
18bb8a6c RC |
23 | #include <linux/string_helpers.h> |
24 | ||
7198e6b0 RC |
25 | |
26 | /* | |
27 | * Power Management: | |
28 | */ | |
29 | ||
6490ad47 | 30 | #ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING |
7198e6b0 | 31 | #include <mach/board.h> |
bf2b33af | 32 | static void bs_init(struct msm_gpu *gpu) |
7198e6b0 | 33 | { |
bf2b33af RC |
34 | if (gpu->bus_scale_table) { |
35 | gpu->bsc = msm_bus_scale_register_client(gpu->bus_scale_table); | |
7198e6b0 RC |
36 | DBG("bus scale client: %08x", gpu->bsc); |
37 | } | |
38 | } | |
39 | ||
40 | static void bs_fini(struct msm_gpu *gpu) | |
41 | { | |
42 | if (gpu->bsc) { | |
43 | msm_bus_scale_unregister_client(gpu->bsc); | |
44 | gpu->bsc = 0; | |
45 | } | |
46 | } | |
47 | ||
48 | static void bs_set(struct msm_gpu *gpu, int idx) | |
49 | { | |
50 | if (gpu->bsc) { | |
51 | DBG("set bus scaling: %d", idx); | |
52 | msm_bus_scale_client_update_request(gpu->bsc, idx); | |
53 | } | |
54 | } | |
55 | #else | |
bf2b33af | 56 | static void bs_init(struct msm_gpu *gpu) {} |
7198e6b0 RC |
57 | static void bs_fini(struct msm_gpu *gpu) {} |
58 | static void bs_set(struct msm_gpu *gpu, int idx) {} | |
59 | #endif | |
60 | ||
61 | static int enable_pwrrail(struct msm_gpu *gpu) | |
62 | { | |
63 | struct drm_device *dev = gpu->dev; | |
64 | int ret = 0; | |
65 | ||
66 | if (gpu->gpu_reg) { | |
67 | ret = regulator_enable(gpu->gpu_reg); | |
68 | if (ret) { | |
69 | dev_err(dev->dev, "failed to enable 'gpu_reg': %d\n", ret); | |
70 | return ret; | |
71 | } | |
72 | } | |
73 | ||
74 | if (gpu->gpu_cx) { | |
75 | ret = regulator_enable(gpu->gpu_cx); | |
76 | if (ret) { | |
77 | dev_err(dev->dev, "failed to enable 'gpu_cx': %d\n", ret); | |
78 | return ret; | |
79 | } | |
80 | } | |
81 | ||
82 | return 0; | |
83 | } | |
84 | ||
85 | static int disable_pwrrail(struct msm_gpu *gpu) | |
86 | { | |
87 | if (gpu->gpu_cx) | |
88 | regulator_disable(gpu->gpu_cx); | |
89 | if (gpu->gpu_reg) | |
90 | regulator_disable(gpu->gpu_reg); | |
91 | return 0; | |
92 | } | |
93 | ||
94 | static int enable_clk(struct msm_gpu *gpu) | |
95 | { | |
7198e6b0 RC |
96 | int i; |
97 | ||
98db803f JC |
98 | if (gpu->core_clk && gpu->fast_rate) |
99 | clk_set_rate(gpu->core_clk, gpu->fast_rate); | |
7198e6b0 | 100 | |
b5f103ab | 101 | /* Set the RBBM timer rate to 19.2Mhz */ |
98db803f JC |
102 | if (gpu->rbbmtimer_clk) |
103 | clk_set_rate(gpu->rbbmtimer_clk, 19200000); | |
b5f103ab | 104 | |
98db803f | 105 | for (i = gpu->nr_clocks - 1; i >= 0; i--) |
89d777a5 JC |
106 | if (gpu->grp_clks[i]) |
107 | clk_prepare(gpu->grp_clks[i]); | |
7198e6b0 | 108 | |
98db803f | 109 | for (i = gpu->nr_clocks - 1; i >= 0; i--) |
7198e6b0 RC |
110 | if (gpu->grp_clks[i]) |
111 | clk_enable(gpu->grp_clks[i]); | |
112 | ||
113 | return 0; | |
114 | } | |
115 | ||
116 | static int disable_clk(struct msm_gpu *gpu) | |
117 | { | |
7198e6b0 RC |
118 | int i; |
119 | ||
98db803f | 120 | for (i = gpu->nr_clocks - 1; i >= 0; i--) |
89d777a5 | 121 | if (gpu->grp_clks[i]) |
7198e6b0 | 122 | clk_disable(gpu->grp_clks[i]); |
7198e6b0 | 123 | |
98db803f | 124 | for (i = gpu->nr_clocks - 1; i >= 0; i--) |
7198e6b0 RC |
125 | if (gpu->grp_clks[i]) |
126 | clk_unprepare(gpu->grp_clks[i]); | |
127 | ||
bf5af4ae JC |
128 | /* |
129 | * Set the clock to a deliberately low rate. On older targets the clock | |
130 | * speed had to be non zero to avoid problems. On newer targets this | |
131 | * will be rounded down to zero anyway so it all works out. | |
132 | */ | |
98db803f JC |
133 | if (gpu->core_clk) |
134 | clk_set_rate(gpu->core_clk, 27000000); | |
89d777a5 | 135 | |
98db803f JC |
136 | if (gpu->rbbmtimer_clk) |
137 | clk_set_rate(gpu->rbbmtimer_clk, 0); | |
b5f103ab | 138 | |
7198e6b0 RC |
139 | return 0; |
140 | } | |
141 | ||
142 | static int enable_axi(struct msm_gpu *gpu) | |
143 | { | |
144 | if (gpu->ebi1_clk) | |
145 | clk_prepare_enable(gpu->ebi1_clk); | |
146 | if (gpu->bus_freq) | |
147 | bs_set(gpu, gpu->bus_freq); | |
148 | return 0; | |
149 | } | |
150 | ||
151 | static int disable_axi(struct msm_gpu *gpu) | |
152 | { | |
153 | if (gpu->ebi1_clk) | |
154 | clk_disable_unprepare(gpu->ebi1_clk); | |
155 | if (gpu->bus_freq) | |
156 | bs_set(gpu, 0); | |
157 | return 0; | |
158 | } | |
159 | ||
160 | int msm_gpu_pm_resume(struct msm_gpu *gpu) | |
161 | { | |
162 | int ret; | |
163 | ||
eeb75474 | 164 | DBG("%s", gpu->name); |
7198e6b0 RC |
165 | |
166 | ret = enable_pwrrail(gpu); | |
167 | if (ret) | |
168 | return ret; | |
169 | ||
170 | ret = enable_clk(gpu); | |
171 | if (ret) | |
172 | return ret; | |
173 | ||
174 | ret = enable_axi(gpu); | |
175 | if (ret) | |
176 | return ret; | |
177 | ||
eeb75474 RC |
178 | gpu->needs_hw_init = true; |
179 | ||
7198e6b0 RC |
180 | return 0; |
181 | } | |
182 | ||
183 | int msm_gpu_pm_suspend(struct msm_gpu *gpu) | |
184 | { | |
185 | int ret; | |
186 | ||
eeb75474 | 187 | DBG("%s", gpu->name); |
7198e6b0 RC |
188 | |
189 | ret = disable_axi(gpu); | |
190 | if (ret) | |
191 | return ret; | |
192 | ||
193 | ret = disable_clk(gpu); | |
194 | if (ret) | |
195 | return ret; | |
196 | ||
197 | ret = disable_pwrrail(gpu); | |
198 | if (ret) | |
199 | return ret; | |
200 | ||
201 | return 0; | |
202 | } | |
203 | ||
eeb75474 | 204 | int msm_gpu_hw_init(struct msm_gpu *gpu) |
37d77c3a | 205 | { |
eeb75474 | 206 | int ret; |
37d77c3a | 207 | |
cb1e3818 RC |
208 | WARN_ON(!mutex_is_locked(&gpu->dev->struct_mutex)); |
209 | ||
eeb75474 RC |
210 | if (!gpu->needs_hw_init) |
211 | return 0; | |
37d77c3a | 212 | |
eeb75474 RC |
213 | disable_irq(gpu->irq); |
214 | ret = gpu->funcs->hw_init(gpu); | |
215 | if (!ret) | |
216 | gpu->needs_hw_init = false; | |
217 | enable_irq(gpu->irq); | |
37d77c3a | 218 | |
eeb75474 | 219 | return ret; |
37d77c3a RC |
220 | } |
221 | ||
bd6f82d8 RC |
222 | /* |
223 | * Hangcheck detection for locked gpu: | |
224 | */ | |
225 | ||
f97decac JC |
226 | static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, |
227 | uint32_t fence) | |
228 | { | |
229 | struct msm_gem_submit *submit; | |
230 | ||
231 | list_for_each_entry(submit, &ring->submits, node) { | |
232 | if (submit->seqno > fence) | |
233 | break; | |
234 | ||
235 | msm_update_fence(submit->ring->fctx, | |
236 | submit->fence->seqno); | |
237 | } | |
238 | } | |
239 | ||
18bb8a6c RC |
240 | static struct msm_gem_submit * |
241 | find_submit(struct msm_ringbuffer *ring, uint32_t fence) | |
242 | { | |
243 | struct msm_gem_submit *submit; | |
244 | ||
245 | WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex)); | |
246 | ||
247 | list_for_each_entry(submit, &ring->submits, node) | |
248 | if (submit->seqno == fence) | |
249 | return submit; | |
250 | ||
251 | return NULL; | |
252 | } | |
253 | ||
b6295f9a | 254 | static void retire_submits(struct msm_gpu *gpu); |
1a370be9 | 255 | |
bd6f82d8 RC |
256 | static void recover_worker(struct work_struct *work) |
257 | { | |
258 | struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); | |
259 | struct drm_device *dev = gpu->dev; | |
96169f4e | 260 | struct msm_drm_private *priv = dev->dev_private; |
4816b626 | 261 | struct msm_gem_submit *submit; |
f97decac | 262 | struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); |
f97decac JC |
263 | int i; |
264 | ||
bd6f82d8 | 265 | mutex_lock(&dev->struct_mutex); |
1a370be9 | 266 | |
4816b626 | 267 | dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); |
f97decac | 268 | |
96169f4e | 269 | submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); |
18bb8a6c RC |
270 | if (submit) { |
271 | struct task_struct *task; | |
272 | ||
273 | rcu_read_lock(); | |
274 | task = pid_task(submit->pid, PIDTYPE_PID); | |
275 | if (task) { | |
276 | char *cmd; | |
277 | ||
278 | /* | |
279 | * So slightly annoying, in other paths like | |
280 | * mmap'ing gem buffers, mmap_sem is acquired | |
281 | * before struct_mutex, which means we can't | |
282 | * hold struct_mutex across the call to | |
283 | * get_cmdline(). But submits are retired | |
284 | * from the same in-order workqueue, so we can | |
285 | * safely drop the lock here without worrying | |
286 | * about the submit going away. | |
287 | */ | |
288 | mutex_unlock(&dev->struct_mutex); | |
289 | cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL); | |
290 | mutex_lock(&dev->struct_mutex); | |
291 | ||
292 | dev_err(dev->dev, "%s: offending task: %s (%s)\n", | |
293 | gpu->name, task->comm, cmd); | |
96169f4e RC |
294 | |
295 | msm_rd_dump_submit(priv->hangrd, submit, | |
296 | "offending task: %s (%s)", task->comm, cmd); | |
297 | } else { | |
298 | msm_rd_dump_submit(priv->hangrd, submit, NULL); | |
4816b626 | 299 | } |
18bb8a6c | 300 | rcu_read_unlock(); |
96169f4e RC |
301 | } |
302 | ||
303 | ||
304 | /* | |
305 | * Update all the rings with the latest and greatest fence.. this | |
306 | * needs to happen after msm_rd_dump_submit() to ensure that the | |
307 | * bo's referenced by the offending submit are still around. | |
308 | */ | |
309 | for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { | |
310 | struct msm_ringbuffer *ring = gpu->rb[i]; | |
311 | ||
312 | uint32_t fence = ring->memptrs->fence; | |
18bb8a6c | 313 | |
96169f4e RC |
314 | /* |
315 | * For the current (faulting?) ring/submit advance the fence by | |
316 | * one more to clear the faulting submit | |
317 | */ | |
318 | if (ring == cur_ring) | |
319 | fence++; | |
320 | ||
321 | update_fences(gpu, ring, fence); | |
4816b626 RC |
322 | } |
323 | ||
324 | if (msm_gpu_active(gpu)) { | |
1a370be9 | 325 | /* retire completed submits, plus the one that hung: */ |
b6295f9a | 326 | retire_submits(gpu); |
1a370be9 | 327 | |
eeb75474 | 328 | pm_runtime_get_sync(&gpu->pdev->dev); |
37d77c3a | 329 | gpu->funcs->recover(gpu); |
eeb75474 | 330 | pm_runtime_put_sync(&gpu->pdev->dev); |
1a370be9 | 331 | |
f97decac JC |
332 | /* |
333 | * Replay all remaining submits starting with highest priority | |
334 | * ring | |
335 | */ | |
b1fc2839 | 336 | for (i = 0; i < gpu->nr_rings; i++) { |
f97decac JC |
337 | struct msm_ringbuffer *ring = gpu->rb[i]; |
338 | ||
339 | list_for_each_entry(submit, &ring->submits, node) | |
340 | gpu->funcs->submit(gpu, submit, NULL); | |
1a370be9 | 341 | } |
37d77c3a | 342 | } |
4816b626 | 343 | |
bd6f82d8 RC |
344 | mutex_unlock(&dev->struct_mutex); |
345 | ||
346 | msm_gpu_retire(gpu); | |
347 | } | |
348 | ||
349 | static void hangcheck_timer_reset(struct msm_gpu *gpu) | |
350 | { | |
351 | DBG("%s", gpu->name); | |
352 | mod_timer(&gpu->hangcheck_timer, | |
353 | round_jiffies_up(jiffies + DRM_MSM_HANGCHECK_JIFFIES)); | |
354 | } | |
355 | ||
356 | static void hangcheck_handler(unsigned long data) | |
357 | { | |
358 | struct msm_gpu *gpu = (struct msm_gpu *)data; | |
6b8819c8 RC |
359 | struct drm_device *dev = gpu->dev; |
360 | struct msm_drm_private *priv = dev->dev_private; | |
f97decac JC |
361 | struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); |
362 | uint32_t fence = ring->memptrs->fence; | |
bd6f82d8 | 363 | |
f97decac | 364 | if (fence != ring->hangcheck_fence) { |
bd6f82d8 | 365 | /* some progress has been made.. ya! */ |
f97decac JC |
366 | ring->hangcheck_fence = fence; |
367 | } else if (fence < ring->seqno) { | |
bd6f82d8 | 368 | /* no progress and not done.. hung! */ |
f97decac JC |
369 | ring->hangcheck_fence = fence; |
370 | dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", | |
371 | gpu->name, ring->id); | |
26791c48 RC |
372 | dev_err(dev->dev, "%s: completed fence: %u\n", |
373 | gpu->name, fence); | |
374 | dev_err(dev->dev, "%s: submitted fence: %u\n", | |
f97decac JC |
375 | gpu->name, ring->seqno); |
376 | ||
bd6f82d8 RC |
377 | queue_work(priv->wq, &gpu->recover_work); |
378 | } | |
379 | ||
380 | /* if still more pending work, reset the hangcheck timer: */ | |
f97decac | 381 | if (ring->seqno > ring->hangcheck_fence) |
bd6f82d8 | 382 | hangcheck_timer_reset(gpu); |
6b8819c8 RC |
383 | |
384 | /* workaround for missing irq: */ | |
385 | queue_work(priv->wq, &gpu->retire_work); | |
bd6f82d8 RC |
386 | } |
387 | ||
70c70f09 RC |
388 | /* |
389 | * Performance Counters: | |
390 | */ | |
391 | ||
392 | /* called under perf_lock */ | |
393 | static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs) | |
394 | { | |
395 | uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)]; | |
396 | int i, n = min(ncntrs, gpu->num_perfcntrs); | |
397 | ||
398 | /* read current values: */ | |
399 | for (i = 0; i < gpu->num_perfcntrs; i++) | |
400 | current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg); | |
401 | ||
402 | /* update cntrs: */ | |
403 | for (i = 0; i < n; i++) | |
404 | cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i]; | |
405 | ||
406 | /* save current values: */ | |
407 | for (i = 0; i < gpu->num_perfcntrs; i++) | |
408 | gpu->last_cntrs[i] = current_cntrs[i]; | |
409 | ||
410 | return n; | |
411 | } | |
412 | ||
413 | static void update_sw_cntrs(struct msm_gpu *gpu) | |
414 | { | |
415 | ktime_t time; | |
416 | uint32_t elapsed; | |
417 | unsigned long flags; | |
418 | ||
419 | spin_lock_irqsave(&gpu->perf_lock, flags); | |
420 | if (!gpu->perfcntr_active) | |
421 | goto out; | |
422 | ||
423 | time = ktime_get(); | |
424 | elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time)); | |
425 | ||
426 | gpu->totaltime += elapsed; | |
427 | if (gpu->last_sample.active) | |
428 | gpu->activetime += elapsed; | |
429 | ||
430 | gpu->last_sample.active = msm_gpu_active(gpu); | |
431 | gpu->last_sample.time = time; | |
432 | ||
433 | out: | |
434 | spin_unlock_irqrestore(&gpu->perf_lock, flags); | |
435 | } | |
436 | ||
437 | void msm_gpu_perfcntr_start(struct msm_gpu *gpu) | |
438 | { | |
439 | unsigned long flags; | |
440 | ||
eeb75474 RC |
441 | pm_runtime_get_sync(&gpu->pdev->dev); |
442 | ||
70c70f09 RC |
443 | spin_lock_irqsave(&gpu->perf_lock, flags); |
444 | /* we could dynamically enable/disable perfcntr registers too.. */ | |
445 | gpu->last_sample.active = msm_gpu_active(gpu); | |
446 | gpu->last_sample.time = ktime_get(); | |
447 | gpu->activetime = gpu->totaltime = 0; | |
448 | gpu->perfcntr_active = true; | |
449 | update_hw_cntrs(gpu, 0, NULL); | |
450 | spin_unlock_irqrestore(&gpu->perf_lock, flags); | |
451 | } | |
452 | ||
453 | void msm_gpu_perfcntr_stop(struct msm_gpu *gpu) | |
454 | { | |
455 | gpu->perfcntr_active = false; | |
eeb75474 | 456 | pm_runtime_put_sync(&gpu->pdev->dev); |
70c70f09 RC |
457 | } |
458 | ||
459 | /* returns -errno or # of cntrs sampled */ | |
460 | int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, | |
461 | uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs) | |
462 | { | |
463 | unsigned long flags; | |
464 | int ret; | |
465 | ||
466 | spin_lock_irqsave(&gpu->perf_lock, flags); | |
467 | ||
468 | if (!gpu->perfcntr_active) { | |
469 | ret = -EINVAL; | |
470 | goto out; | |
471 | } | |
472 | ||
473 | *activetime = gpu->activetime; | |
474 | *totaltime = gpu->totaltime; | |
475 | ||
476 | gpu->activetime = gpu->totaltime = 0; | |
477 | ||
478 | ret = update_hw_cntrs(gpu, ncntrs, cntrs); | |
479 | ||
480 | out: | |
481 | spin_unlock_irqrestore(&gpu->perf_lock, flags); | |
482 | ||
483 | return ret; | |
484 | } | |
485 | ||
7198e6b0 RC |
486 | /* |
487 | * Cmdstream submission/retirement: | |
488 | */ | |
489 | ||
7d12a279 RC |
490 | static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) |
491 | { | |
492 | int i; | |
493 | ||
494 | for (i = 0; i < submit->nr_bos; i++) { | |
495 | struct msm_gem_object *msm_obj = submit->bos[i].obj; | |
496 | /* move to inactive: */ | |
497 | msm_gem_move_to_inactive(&msm_obj->base); | |
8bdcd949 | 498 | msm_gem_put_iova(&msm_obj->base, gpu->aspace); |
7d12a279 RC |
499 | drm_gem_object_unreference(&msm_obj->base); |
500 | } | |
501 | ||
eeb75474 RC |
502 | pm_runtime_mark_last_busy(&gpu->pdev->dev); |
503 | pm_runtime_put_autosuspend(&gpu->pdev->dev); | |
40e6815b | 504 | msm_gem_submit_free(submit); |
7d12a279 RC |
505 | } |
506 | ||
b6295f9a | 507 | static void retire_submits(struct msm_gpu *gpu) |
1a370be9 RC |
508 | { |
509 | struct drm_device *dev = gpu->dev; | |
f97decac JC |
510 | struct msm_gem_submit *submit, *tmp; |
511 | int i; | |
1a370be9 RC |
512 | |
513 | WARN_ON(!mutex_is_locked(&dev->struct_mutex)); | |
514 | ||
f97decac | 515 | /* Retire the commits starting with highest priority */ |
b1fc2839 | 516 | for (i = 0; i < gpu->nr_rings; i++) { |
f97decac | 517 | struct msm_ringbuffer *ring = gpu->rb[i]; |
1a370be9 | 518 | |
f97decac JC |
519 | list_for_each_entry_safe(submit, tmp, &ring->submits, node) { |
520 | if (dma_fence_is_signaled(submit->fence)) | |
521 | retire_submit(gpu, submit); | |
1a370be9 RC |
522 | } |
523 | } | |
524 | } | |
525 | ||
7198e6b0 RC |
526 | static void retire_worker(struct work_struct *work) |
527 | { | |
528 | struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); | |
529 | struct drm_device *dev = gpu->dev; | |
f97decac | 530 | int i; |
7198e6b0 | 531 | |
f97decac JC |
532 | for (i = 0; i < gpu->nr_rings; i++) |
533 | update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); | |
edd4fc63 | 534 | |
7198e6b0 | 535 | mutex_lock(&dev->struct_mutex); |
b6295f9a | 536 | retire_submits(gpu); |
7198e6b0 RC |
537 | mutex_unlock(&dev->struct_mutex); |
538 | } | |
539 | ||
540 | /* call from irq handler to schedule work to retire bo's */ | |
541 | void msm_gpu_retire(struct msm_gpu *gpu) | |
542 | { | |
543 | struct msm_drm_private *priv = gpu->dev->dev_private; | |
544 | queue_work(priv->wq, &gpu->retire_work); | |
70c70f09 | 545 | update_sw_cntrs(gpu); |
7198e6b0 RC |
546 | } |
547 | ||
548 | /* add bo's to gpu's ring, and kick gpu: */ | |
f44d32c7 | 549 | void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, |
7198e6b0 RC |
550 | struct msm_file_private *ctx) |
551 | { | |
552 | struct drm_device *dev = gpu->dev; | |
553 | struct msm_drm_private *priv = dev->dev_private; | |
f97decac | 554 | struct msm_ringbuffer *ring = submit->ring; |
f44d32c7 | 555 | int i; |
7198e6b0 | 556 | |
1a370be9 RC |
557 | WARN_ON(!mutex_is_locked(&dev->struct_mutex)); |
558 | ||
eeb75474 RC |
559 | pm_runtime_get_sync(&gpu->pdev->dev); |
560 | ||
561 | msm_gpu_hw_init(gpu); | |
37d77c3a | 562 | |
f97decac JC |
563 | submit->seqno = ++ring->seqno; |
564 | ||
565 | list_add_tail(&submit->node, &ring->submits); | |
1a370be9 | 566 | |
998b9a58 | 567 | msm_rd_dump_submit(priv->rd, submit, NULL); |
a7d3c950 | 568 | |
70c70f09 RC |
569 | update_sw_cntrs(gpu); |
570 | ||
7198e6b0 RC |
571 | for (i = 0; i < submit->nr_bos; i++) { |
572 | struct msm_gem_object *msm_obj = submit->bos[i].obj; | |
78babc16 | 573 | uint64_t iova; |
7198e6b0 RC |
574 | |
575 | /* can't happen yet.. but when we add 2d support we'll have | |
576 | * to deal w/ cross-ring synchronization: | |
577 | */ | |
578 | WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu)); | |
579 | ||
7d12a279 RC |
580 | /* submit takes a reference to the bo and iova until retired: */ |
581 | drm_gem_object_reference(&msm_obj->base); | |
0e08270a | 582 | msm_gem_get_iova(&msm_obj->base, |
8bdcd949 | 583 | submit->gpu->aspace, &iova); |
7198e6b0 | 584 | |
bf6811f3 RC |
585 | if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) |
586 | msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence); | |
b6295f9a RC |
587 | else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) |
588 | msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->fence); | |
7198e6b0 | 589 | } |
1a370be9 | 590 | |
1193c3bc | 591 | gpu->funcs->submit(gpu, submit, ctx); |
1a370be9 RC |
592 | priv->lastctx = ctx; |
593 | ||
bd6f82d8 | 594 | hangcheck_timer_reset(gpu); |
7198e6b0 RC |
595 | } |
596 | ||
597 | /* | |
598 | * Init/Cleanup: | |
599 | */ | |
600 | ||
601 | static irqreturn_t irq_handler(int irq, void *data) | |
602 | { | |
603 | struct msm_gpu *gpu = data; | |
604 | return gpu->funcs->irq(gpu); | |
605 | } | |
606 | ||
98db803f JC |
607 | static struct clk *get_clock(struct device *dev, const char *name) |
608 | { | |
609 | struct clk *clk = devm_clk_get(dev, name); | |
610 | ||
611 | return IS_ERR(clk) ? NULL : clk; | |
612 | } | |
613 | ||
614 | static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu) | |
615 | { | |
616 | struct device *dev = &pdev->dev; | |
617 | struct property *prop; | |
618 | const char *name; | |
619 | int i = 0; | |
620 | ||
621 | gpu->nr_clocks = of_property_count_strings(dev->of_node, "clock-names"); | |
622 | if (gpu->nr_clocks < 1) { | |
623 | gpu->nr_clocks = 0; | |
624 | return 0; | |
625 | } | |
626 | ||
627 | gpu->grp_clks = devm_kcalloc(dev, sizeof(struct clk *), gpu->nr_clocks, | |
628 | GFP_KERNEL); | |
629 | if (!gpu->grp_clks) | |
630 | return -ENOMEM; | |
631 | ||
632 | of_property_for_each_string(dev->of_node, "clock-names", prop, name) { | |
633 | gpu->grp_clks[i] = get_clock(dev, name); | |
634 | ||
635 | /* Remember the key clocks that we need to control later */ | |
134ccada | 636 | if (!strcmp(name, "core") || !strcmp(name, "core_clk")) |
98db803f | 637 | gpu->core_clk = gpu->grp_clks[i]; |
134ccada | 638 | else if (!strcmp(name, "rbbmtimer") || !strcmp(name, "rbbmtimer_clk")) |
98db803f JC |
639 | gpu->rbbmtimer_clk = gpu->grp_clks[i]; |
640 | ||
641 | ++i; | |
642 | } | |
643 | ||
644 | return 0; | |
645 | } | |
7198e6b0 | 646 | |
1267a4df JC |
647 | static struct msm_gem_address_space * |
648 | msm_gpu_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev, | |
649 | uint64_t va_start, uint64_t va_end) | |
650 | { | |
651 | struct iommu_domain *iommu; | |
652 | struct msm_gem_address_space *aspace; | |
653 | int ret; | |
654 | ||
655 | /* | |
656 | * Setup IOMMU.. eventually we will (I think) do this once per context | |
657 | * and have separate page tables per context. For now, to keep things | |
658 | * simple and to get something working, just use a single address space: | |
659 | */ | |
660 | iommu = iommu_domain_alloc(&platform_bus_type); | |
661 | if (!iommu) | |
662 | return NULL; | |
663 | ||
664 | iommu->geometry.aperture_start = va_start; | |
665 | iommu->geometry.aperture_end = va_end; | |
666 | ||
667 | dev_info(gpu->dev->dev, "%s: using IOMMU\n", gpu->name); | |
668 | ||
669 | aspace = msm_gem_address_space_create(&pdev->dev, iommu, "gpu"); | |
670 | if (IS_ERR(aspace)) { | |
671 | dev_err(gpu->dev->dev, "failed to init iommu: %ld\n", | |
672 | PTR_ERR(aspace)); | |
673 | iommu_domain_free(iommu); | |
674 | return ERR_CAST(aspace); | |
675 | } | |
676 | ||
677 | ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0); | |
678 | if (ret) { | |
679 | msm_gem_address_space_put(aspace); | |
680 | return ERR_PTR(ret); | |
681 | } | |
682 | ||
683 | return aspace; | |
684 | } | |
685 | ||
7198e6b0 RC |
686 | int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, |
687 | struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, | |
5770fc7a | 688 | const char *name, struct msm_gpu_config *config) |
7198e6b0 | 689 | { |
f97decac JC |
690 | int i, ret, nr_rings = config->nr_rings; |
691 | void *memptrs; | |
692 | uint64_t memptrs_iova; | |
7198e6b0 | 693 | |
70c70f09 RC |
694 | if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) |
695 | gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); | |
696 | ||
7198e6b0 RC |
697 | gpu->dev = drm; |
698 | gpu->funcs = funcs; | |
699 | gpu->name = name; | |
700 | ||
701 | INIT_LIST_HEAD(&gpu->active_list); | |
702 | INIT_WORK(&gpu->retire_work, retire_worker); | |
bd6f82d8 RC |
703 | INIT_WORK(&gpu->recover_work, recover_worker); |
704 | ||
1a370be9 | 705 | |
bd6f82d8 RC |
706 | setup_timer(&gpu->hangcheck_timer, hangcheck_handler, |
707 | (unsigned long)gpu); | |
7198e6b0 | 708 | |
70c70f09 RC |
709 | spin_lock_init(&gpu->perf_lock); |
710 | ||
7198e6b0 RC |
711 | |
712 | /* Map registers: */ | |
5770fc7a | 713 | gpu->mmio = msm_ioremap(pdev, config->ioname, name); |
7198e6b0 RC |
714 | if (IS_ERR(gpu->mmio)) { |
715 | ret = PTR_ERR(gpu->mmio); | |
716 | goto fail; | |
717 | } | |
718 | ||
719 | /* Get Interrupt: */ | |
5770fc7a | 720 | gpu->irq = platform_get_irq_byname(pdev, config->irqname); |
7198e6b0 RC |
721 | if (gpu->irq < 0) { |
722 | ret = gpu->irq; | |
723 | dev_err(drm->dev, "failed to get irq: %d\n", ret); | |
724 | goto fail; | |
725 | } | |
726 | ||
727 | ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler, | |
728 | IRQF_TRIGGER_HIGH, gpu->name, gpu); | |
729 | if (ret) { | |
730 | dev_err(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret); | |
731 | goto fail; | |
732 | } | |
733 | ||
98db803f JC |
734 | ret = get_clocks(pdev, gpu); |
735 | if (ret) | |
736 | goto fail; | |
7198e6b0 | 737 | |
720c3bb8 | 738 | gpu->ebi1_clk = msm_clk_get(pdev, "bus"); |
7198e6b0 RC |
739 | DBG("ebi1_clk: %p", gpu->ebi1_clk); |
740 | if (IS_ERR(gpu->ebi1_clk)) | |
741 | gpu->ebi1_clk = NULL; | |
742 | ||
743 | /* Acquire regulators: */ | |
744 | gpu->gpu_reg = devm_regulator_get(&pdev->dev, "vdd"); | |
745 | DBG("gpu_reg: %p", gpu->gpu_reg); | |
746 | if (IS_ERR(gpu->gpu_reg)) | |
747 | gpu->gpu_reg = NULL; | |
748 | ||
749 | gpu->gpu_cx = devm_regulator_get(&pdev->dev, "vddcx"); | |
750 | DBG("gpu_cx: %p", gpu->gpu_cx); | |
751 | if (IS_ERR(gpu->gpu_cx)) | |
752 | gpu->gpu_cx = NULL; | |
753 | ||
1267a4df JC |
754 | gpu->pdev = pdev; |
755 | platform_set_drvdata(pdev, gpu); | |
756 | ||
757 | bs_init(gpu); | |
5e921b19 | 758 | |
1267a4df JC |
759 | gpu->aspace = msm_gpu_create_address_space(gpu, pdev, |
760 | config->va_start, config->va_end); | |
761 | ||
762 | if (gpu->aspace == NULL) | |
871d812a | 763 | dev_info(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name); |
1267a4df JC |
764 | else if (IS_ERR(gpu->aspace)) { |
765 | ret = PTR_ERR(gpu->aspace); | |
766 | goto fail; | |
7198e6b0 | 767 | } |
a1ad3523 | 768 | |
f97decac | 769 | memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo), |
cd414f3d | 770 | MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo, |
f97decac | 771 | &memptrs_iova); |
cd414f3d | 772 | |
f97decac JC |
773 | if (IS_ERR(memptrs)) { |
774 | ret = PTR_ERR(memptrs); | |
cd414f3d JC |
775 | dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); |
776 | goto fail; | |
777 | } | |
778 | ||
f97decac | 779 | if (nr_rings > ARRAY_SIZE(gpu->rb)) { |
39ae0d3e | 780 | DRM_DEV_INFO_ONCE(drm->dev, "Only creating %zu ringbuffers\n", |
f97decac JC |
781 | ARRAY_SIZE(gpu->rb)); |
782 | nr_rings = ARRAY_SIZE(gpu->rb); | |
7198e6b0 RC |
783 | } |
784 | ||
f97decac JC |
785 | /* Create ringbuffer(s): */ |
786 | for (i = 0; i < nr_rings; i++) { | |
787 | gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova); | |
788 | ||
789 | if (IS_ERR(gpu->rb[i])) { | |
790 | ret = PTR_ERR(gpu->rb[i]); | |
791 | dev_err(drm->dev, | |
792 | "could not create ringbuffer %d: %d\n", i, ret); | |
793 | goto fail; | |
794 | } | |
795 | ||
796 | memptrs += sizeof(struct msm_rbmemptrs); | |
797 | memptrs_iova += sizeof(struct msm_rbmemptrs); | |
798 | } | |
799 | ||
800 | gpu->nr_rings = nr_rings; | |
801 | ||
7198e6b0 RC |
802 | return 0; |
803 | ||
804 | fail: | |
f97decac JC |
805 | for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { |
806 | msm_ringbuffer_destroy(gpu->rb[i]); | |
807 | gpu->rb[i] = NULL; | |
808 | } | |
809 | ||
cd414f3d JC |
810 | if (gpu->memptrs_bo) { |
811 | msm_gem_put_vaddr(gpu->memptrs_bo); | |
812 | msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); | |
813 | drm_gem_object_unreference_unlocked(gpu->memptrs_bo); | |
814 | } | |
815 | ||
1267a4df | 816 | platform_set_drvdata(pdev, NULL); |
7198e6b0 RC |
817 | return ret; |
818 | } | |
819 | ||
820 | void msm_gpu_cleanup(struct msm_gpu *gpu) | |
821 | { | |
f97decac JC |
822 | int i; |
823 | ||
7198e6b0 RC |
824 | DBG("%s", gpu->name); |
825 | ||
826 | WARN_ON(!list_empty(&gpu->active_list)); | |
827 | ||
828 | bs_fini(gpu); | |
829 | ||
f97decac JC |
830 | for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { |
831 | msm_ringbuffer_destroy(gpu->rb[i]); | |
832 | gpu->rb[i] = NULL; | |
7198e6b0 | 833 | } |
cd414f3d JC |
834 | |
835 | if (gpu->memptrs_bo) { | |
836 | msm_gem_put_vaddr(gpu->memptrs_bo); | |
837 | msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); | |
838 | drm_gem_object_unreference_unlocked(gpu->memptrs_bo); | |
839 | } | |
840 | ||
841 | if (!IS_ERR_OR_NULL(gpu->aspace)) { | |
1267a4df JC |
842 | gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu, |
843 | NULL, 0); | |
844 | msm_gem_address_space_put(gpu->aspace); | |
845 | } | |
7198e6b0 | 846 | } |