]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (C) 2013 Red Hat | |
3 | * Author: Rob Clark <robdclark@gmail.com> | |
4 | * | |
5 | * Copyright (c) 2014 The Linux Foundation. All rights reserved. | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify it | |
8 | * under the terms of the GNU General Public License version 2 as published by | |
9 | * the Free Software Foundation. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, but WITHOUT | |
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
14 | * more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License along with | |
17 | * this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | */ | |
19 | ||
20 | #include "adreno_gpu.h" | |
21 | #include "msm_gem.h" | |
22 | #include "msm_mmu.h" | |
23 | ||
24 | #define RB_SIZE SZ_32K | |
25 | #define RB_BLKSIZE 32 | |
26 | ||
27 | int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) | |
28 | { | |
29 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); | |
30 | ||
31 | switch (param) { | |
32 | case MSM_PARAM_GPU_ID: | |
33 | *value = adreno_gpu->info->revn; | |
34 | return 0; | |
35 | case MSM_PARAM_GMEM_SIZE: | |
36 | *value = adreno_gpu->gmem; | |
37 | return 0; | |
38 | case MSM_PARAM_GMEM_BASE: | |
39 | *value = 0x100000; | |
40 | return 0; | |
41 | case MSM_PARAM_CHIP_ID: | |
42 | *value = adreno_gpu->rev.patchid | | |
43 | (adreno_gpu->rev.minor << 8) | | |
44 | (adreno_gpu->rev.major << 16) | | |
45 | (adreno_gpu->rev.core << 24); | |
46 | return 0; | |
47 | case MSM_PARAM_MAX_FREQ: | |
48 | *value = adreno_gpu->base.fast_rate; | |
49 | return 0; | |
50 | case MSM_PARAM_TIMESTAMP: | |
51 | if (adreno_gpu->funcs->get_timestamp) { | |
52 | int ret; | |
53 | ||
54 | pm_runtime_get_sync(&gpu->pdev->dev); | |
55 | ret = adreno_gpu->funcs->get_timestamp(gpu, value); | |
56 | pm_runtime_put_autosuspend(&gpu->pdev->dev); | |
57 | ||
58 | return ret; | |
59 | } | |
60 | return -EINVAL; | |
61 | default: | |
62 | DBG("%s: invalid param: %u", gpu->name, param); | |
63 | return -EINVAL; | |
64 | } | |
65 | } | |
66 | ||
67 | int adreno_hw_init(struct msm_gpu *gpu) | |
68 | { | |
69 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); | |
70 | int ret; | |
71 | ||
72 | DBG("%s", gpu->name); | |
73 | ||
74 | ret = msm_gem_get_iova(gpu->rb->bo, gpu->aspace, &gpu->rb_iova); | |
75 | if (ret) { | |
76 | gpu->rb_iova = 0; | |
77 | dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret); | |
78 | return ret; | |
79 | } | |
80 | ||
81 | /* reset ringbuffer: */ | |
82 | gpu->rb->cur = gpu->rb->start; | |
83 | ||
84 | /* reset completed fence seqno: */ | |
85 | adreno_gpu->memptrs->fence = gpu->fctx->completed_fence; | |
86 | adreno_gpu->memptrs->rptr = 0; | |
87 | ||
88 | /* Setup REG_CP_RB_CNTL: */ | |
89 | adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL, | |
90 | /* size is log2(quad-words): */ | |
91 | AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) | | |
92 | AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) | | |
93 | (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); | |
94 | ||
95 | /* Setup ringbuffer address: */ | |
96 | adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE, | |
97 | REG_ADRENO_CP_RB_BASE_HI, gpu->rb_iova); | |
98 | ||
99 | if (!adreno_is_a430(adreno_gpu)) { | |
100 | adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, | |
101 | REG_ADRENO_CP_RB_RPTR_ADDR_HI, | |
102 | rbmemptr(adreno_gpu, rptr)); | |
103 | } | |
104 | ||
105 | return 0; | |
106 | } | |
107 | ||
108 | static uint32_t get_wptr(struct msm_ringbuffer *ring) | |
109 | { | |
110 | return ring->cur - ring->start; | |
111 | } | |
112 | ||
113 | /* Use this helper to read rptr, since a430 doesn't update rptr in memory */ | |
114 | static uint32_t get_rptr(struct adreno_gpu *adreno_gpu) | |
115 | { | |
116 | if (adreno_is_a430(adreno_gpu)) | |
117 | return adreno_gpu->memptrs->rptr = adreno_gpu_read( | |
118 | adreno_gpu, REG_ADRENO_CP_RB_RPTR); | |
119 | else | |
120 | return adreno_gpu->memptrs->rptr; | |
121 | } | |
122 | ||
123 | uint32_t adreno_last_fence(struct msm_gpu *gpu) | |
124 | { | |
125 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); | |
126 | return adreno_gpu->memptrs->fence; | |
127 | } | |
128 | ||
129 | void adreno_recover(struct msm_gpu *gpu) | |
130 | { | |
131 | struct drm_device *dev = gpu->dev; | |
132 | int ret; | |
133 | ||
134 | // XXX pm-runtime?? we *need* the device to be off after this | |
135 | // so maybe continuing to call ->pm_suspend/resume() is better? | |
136 | ||
137 | gpu->funcs->pm_suspend(gpu); | |
138 | gpu->funcs->pm_resume(gpu); | |
139 | ||
140 | ret = msm_gpu_hw_init(gpu); | |
141 | if (ret) { | |
142 | dev_err(dev->dev, "gpu hw init failed: %d\n", ret); | |
143 | /* hmm, oh well? */ | |
144 | } | |
145 | } | |
146 | ||
147 | void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, | |
148 | struct msm_file_private *ctx) | |
149 | { | |
150 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); | |
151 | struct msm_drm_private *priv = gpu->dev->dev_private; | |
152 | struct msm_ringbuffer *ring = gpu->rb; | |
153 | unsigned i; | |
154 | ||
155 | for (i = 0; i < submit->nr_cmds; i++) { | |
156 | switch (submit->cmd[i].type) { | |
157 | case MSM_SUBMIT_CMD_IB_TARGET_BUF: | |
158 | /* ignore IB-targets */ | |
159 | break; | |
160 | case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: | |
161 | /* ignore if there has not been a ctx switch: */ | |
162 | if (priv->lastctx == ctx) | |
163 | break; | |
164 | case MSM_SUBMIT_CMD_BUF: | |
165 | OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ? | |
166 | CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2); | |
167 | OUT_RING(ring, submit->cmd[i].iova); | |
168 | OUT_RING(ring, submit->cmd[i].size); | |
169 | OUT_PKT2(ring); | |
170 | break; | |
171 | } | |
172 | } | |
173 | ||
174 | OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1); | |
175 | OUT_RING(ring, submit->fence->seqno); | |
176 | ||
177 | if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) { | |
178 | /* Flush HLSQ lazy updates to make sure there is nothing | |
179 | * pending for indirect loads after the timestamp has | |
180 | * passed: | |
181 | */ | |
182 | OUT_PKT3(ring, CP_EVENT_WRITE, 1); | |
183 | OUT_RING(ring, HLSQ_FLUSH); | |
184 | ||
185 | OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); | |
186 | OUT_RING(ring, 0x00000000); | |
187 | } | |
188 | ||
189 | OUT_PKT3(ring, CP_EVENT_WRITE, 3); | |
190 | OUT_RING(ring, CACHE_FLUSH_TS); | |
191 | OUT_RING(ring, rbmemptr(adreno_gpu, fence)); | |
192 | OUT_RING(ring, submit->fence->seqno); | |
193 | ||
194 | /* we could maybe be clever and only CP_COND_EXEC the interrupt: */ | |
195 | OUT_PKT3(ring, CP_INTERRUPT, 1); | |
196 | OUT_RING(ring, 0x80000000); | |
197 | ||
198 | /* Workaround for missing irq issue on 8x16/a306. Unsure if the | |
199 | * root cause is a platform issue or some a306 quirk, but this | |
200 | * keeps things humming along: | |
201 | */ | |
202 | if (adreno_is_a306(adreno_gpu)) { | |
203 | OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); | |
204 | OUT_RING(ring, 0x00000000); | |
205 | OUT_PKT3(ring, CP_INTERRUPT, 1); | |
206 | OUT_RING(ring, 0x80000000); | |
207 | } | |
208 | ||
209 | #if 0 | |
210 | if (adreno_is_a3xx(adreno_gpu)) { | |
211 | /* Dummy set-constant to trigger context rollover */ | |
212 | OUT_PKT3(ring, CP_SET_CONSTANT, 2); | |
213 | OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG)); | |
214 | OUT_RING(ring, 0x00000000); | |
215 | } | |
216 | #endif | |
217 | ||
218 | gpu->funcs->flush(gpu); | |
219 | } | |
220 | ||
221 | void adreno_flush(struct msm_gpu *gpu) | |
222 | { | |
223 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); | |
224 | uint32_t wptr; | |
225 | ||
226 | /* | |
227 | * Mask wptr value that we calculate to fit in the HW range. This is | |
228 | * to account for the possibility that the last command fit exactly into | |
229 | * the ringbuffer and rb->next hasn't wrapped to zero yet | |
230 | */ | |
231 | wptr = get_wptr(gpu->rb) & ((gpu->rb->size / 4) - 1); | |
232 | ||
233 | /* ensure writes to ringbuffer have hit system memory: */ | |
234 | mb(); | |
235 | ||
236 | adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_WPTR, wptr); | |
237 | } | |
238 | ||
239 | bool adreno_idle(struct msm_gpu *gpu) | |
240 | { | |
241 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); | |
242 | uint32_t wptr = get_wptr(gpu->rb); | |
243 | ||
244 | /* wait for CP to drain ringbuffer: */ | |
245 | if (!spin_until(get_rptr(adreno_gpu) == wptr)) | |
246 | return true; | |
247 | ||
248 | /* TODO maybe we need to reset GPU here to recover from hang? */ | |
249 | DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name); | |
250 | return false; | |
251 | } | |
252 | ||
253 | #ifdef CONFIG_DEBUG_FS | |
254 | void adreno_show(struct msm_gpu *gpu, struct seq_file *m) | |
255 | { | |
256 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); | |
257 | int i; | |
258 | ||
259 | seq_printf(m, "revision: %d (%d.%d.%d.%d)\n", | |
260 | adreno_gpu->info->revn, adreno_gpu->rev.core, | |
261 | adreno_gpu->rev.major, adreno_gpu->rev.minor, | |
262 | adreno_gpu->rev.patchid); | |
263 | ||
264 | seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence, | |
265 | gpu->fctx->last_fence); | |
266 | seq_printf(m, "rptr: %d\n", get_rptr(adreno_gpu)); | |
267 | seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb)); | |
268 | ||
269 | /* dump these out in a form that can be parsed by demsm: */ | |
270 | seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name); | |
271 | for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) { | |
272 | uint32_t start = adreno_gpu->registers[i]; | |
273 | uint32_t end = adreno_gpu->registers[i+1]; | |
274 | uint32_t addr; | |
275 | ||
276 | for (addr = start; addr <= end; addr++) { | |
277 | uint32_t val = gpu_read(gpu, addr); | |
278 | seq_printf(m, "IO:R %08x %08x\n", addr<<2, val); | |
279 | } | |
280 | } | |
281 | } | |
282 | #endif | |
283 | ||
284 | /* Dump common gpu status and scratch registers on any hang, to make | |
285 | * the hangcheck logs more useful. The scratch registers seem always | |
286 | * safe to read when GPU has hung (unlike some other regs, depending | |
287 | * on how the GPU hung), and they are useful to match up to cmdstream | |
288 | * dumps when debugging hangs: | |
289 | */ | |
290 | void adreno_dump_info(struct msm_gpu *gpu) | |
291 | { | |
292 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); | |
293 | ||
294 | printk("revision: %d (%d.%d.%d.%d)\n", | |
295 | adreno_gpu->info->revn, adreno_gpu->rev.core, | |
296 | adreno_gpu->rev.major, adreno_gpu->rev.minor, | |
297 | adreno_gpu->rev.patchid); | |
298 | ||
299 | printk("fence: %d/%d\n", adreno_gpu->memptrs->fence, | |
300 | gpu->fctx->last_fence); | |
301 | printk("rptr: %d\n", get_rptr(adreno_gpu)); | |
302 | printk("rb wptr: %d\n", get_wptr(gpu->rb)); | |
303 | } | |
304 | ||
305 | /* would be nice to not have to duplicate the _show() stuff with printk(): */ | |
306 | void adreno_dump(struct msm_gpu *gpu) | |
307 | { | |
308 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); | |
309 | int i; | |
310 | ||
311 | /* dump these out in a form that can be parsed by demsm: */ | |
312 | printk("IO:region %s 00000000 00020000\n", gpu->name); | |
313 | for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) { | |
314 | uint32_t start = adreno_gpu->registers[i]; | |
315 | uint32_t end = adreno_gpu->registers[i+1]; | |
316 | uint32_t addr; | |
317 | ||
318 | for (addr = start; addr <= end; addr++) { | |
319 | uint32_t val = gpu_read(gpu, addr); | |
320 | printk("IO:R %08x %08x\n", addr<<2, val); | |
321 | } | |
322 | } | |
323 | } | |
324 | ||
325 | static uint32_t ring_freewords(struct msm_gpu *gpu) | |
326 | { | |
327 | struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); | |
328 | uint32_t size = gpu->rb->size / 4; | |
329 | uint32_t wptr = get_wptr(gpu->rb); | |
330 | uint32_t rptr = get_rptr(adreno_gpu); | |
331 | return (rptr + (size - 1) - wptr) % size; | |
332 | } | |
333 | ||
334 | void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords) | |
335 | { | |
336 | if (spin_until(ring_freewords(gpu) >= ndwords)) | |
337 | DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name); | |
338 | } | |
339 | ||
340 | static const char *iommu_ports[] = { | |
341 | "gfx3d_user", "gfx3d_priv", | |
342 | "gfx3d1_user", "gfx3d1_priv", | |
343 | }; | |
344 | ||
345 | int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, | |
346 | struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs) | |
347 | { | |
348 | struct adreno_platform_config *config = pdev->dev.platform_data; | |
349 | struct msm_gpu_config adreno_gpu_config = { 0 }; | |
350 | struct msm_gpu *gpu = &adreno_gpu->base; | |
351 | int ret; | |
352 | ||
353 | adreno_gpu->funcs = funcs; | |
354 | adreno_gpu->info = adreno_info(config->rev); | |
355 | adreno_gpu->gmem = adreno_gpu->info->gmem; | |
356 | adreno_gpu->revn = adreno_gpu->info->revn; | |
357 | adreno_gpu->rev = config->rev; | |
358 | ||
359 | gpu->fast_rate = config->fast_rate; | |
360 | gpu->bus_freq = config->bus_freq; | |
361 | #ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING | |
362 | gpu->bus_scale_table = config->bus_scale_table; | |
363 | #endif | |
364 | ||
365 | DBG("fast_rate=%u, slow_rate=27000000, bus_freq=%u", | |
366 | gpu->fast_rate, gpu->bus_freq); | |
367 | ||
368 | adreno_gpu_config.ioname = "kgsl_3d0_reg_memory"; | |
369 | adreno_gpu_config.irqname = "kgsl_3d0_irq"; | |
370 | ||
371 | adreno_gpu_config.va_start = SZ_16M; | |
372 | adreno_gpu_config.va_end = 0xffffffff; | |
373 | ||
374 | adreno_gpu_config.ringsz = RB_SIZE; | |
375 | ||
376 | ret = msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base, | |
377 | adreno_gpu->info->name, &adreno_gpu_config); | |
378 | if (ret) | |
379 | return ret; | |
380 | ||
381 | pm_runtime_set_autosuspend_delay(&pdev->dev, DRM_MSM_INACTIVE_PERIOD); | |
382 | pm_runtime_use_autosuspend(&pdev->dev); | |
383 | pm_runtime_enable(&pdev->dev); | |
384 | ||
385 | ret = request_firmware(&adreno_gpu->pm4, adreno_gpu->info->pm4fw, drm->dev); | |
386 | if (ret) { | |
387 | dev_err(drm->dev, "failed to load %s PM4 firmware: %d\n", | |
388 | adreno_gpu->info->pm4fw, ret); | |
389 | return ret; | |
390 | } | |
391 | ||
392 | ret = request_firmware(&adreno_gpu->pfp, adreno_gpu->info->pfpfw, drm->dev); | |
393 | if (ret) { | |
394 | dev_err(drm->dev, "failed to load %s PFP firmware: %d\n", | |
395 | adreno_gpu->info->pfpfw, ret); | |
396 | return ret; | |
397 | } | |
398 | ||
399 | if (gpu->aspace && gpu->aspace->mmu) { | |
400 | struct msm_mmu *mmu = gpu->aspace->mmu; | |
401 | ret = mmu->funcs->attach(mmu, iommu_ports, | |
402 | ARRAY_SIZE(iommu_ports)); | |
403 | if (ret) | |
404 | return ret; | |
405 | } | |
406 | ||
407 | adreno_gpu->memptrs_bo = msm_gem_new(drm, sizeof(*adreno_gpu->memptrs), | |
408 | MSM_BO_UNCACHED); | |
409 | if (IS_ERR(adreno_gpu->memptrs_bo)) { | |
410 | ret = PTR_ERR(adreno_gpu->memptrs_bo); | |
411 | adreno_gpu->memptrs_bo = NULL; | |
412 | dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); | |
413 | return ret; | |
414 | } | |
415 | ||
416 | adreno_gpu->memptrs = msm_gem_get_vaddr(adreno_gpu->memptrs_bo); | |
417 | if (IS_ERR(adreno_gpu->memptrs)) { | |
418 | dev_err(drm->dev, "could not vmap memptrs\n"); | |
419 | return -ENOMEM; | |
420 | } | |
421 | ||
422 | ret = msm_gem_get_iova(adreno_gpu->memptrs_bo, gpu->aspace, | |
423 | &adreno_gpu->memptrs_iova); | |
424 | if (ret) { | |
425 | dev_err(drm->dev, "could not map memptrs: %d\n", ret); | |
426 | return ret; | |
427 | } | |
428 | ||
429 | return 0; | |
430 | } | |
431 | ||
432 | void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) | |
433 | { | |
434 | struct msm_gpu *gpu = &adreno_gpu->base; | |
435 | ||
436 | if (adreno_gpu->memptrs_bo) { | |
437 | if (adreno_gpu->memptrs) | |
438 | msm_gem_put_vaddr(adreno_gpu->memptrs_bo); | |
439 | ||
440 | if (adreno_gpu->memptrs_iova) | |
441 | msm_gem_put_iova(adreno_gpu->memptrs_bo, gpu->aspace); | |
442 | ||
443 | drm_gem_object_unreference_unlocked(adreno_gpu->memptrs_bo); | |
444 | } | |
445 | release_firmware(adreno_gpu->pm4); | |
446 | release_firmware(adreno_gpu->pfp); | |
447 | ||
448 | msm_gpu_cleanup(gpu); | |
449 | ||
450 | if (gpu->aspace) { | |
451 | gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu, | |
452 | iommu_ports, ARRAY_SIZE(iommu_ports)); | |
453 | msm_gem_address_space_put(gpu->aspace); | |
454 | } | |
455 | } |