]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/gpu/drm/msm/adreno/a5xx_gpu.c
drm/msm: Add msm_gem_get_and_pin_iova()
[mirror_ubuntu-jammy-kernel.git] / drivers / gpu / drm / msm / adreno / a5xx_gpu.c
CommitLineData
2002c9c3 1/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
b5f103ab
JC
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
bec2dd69 14#include <linux/kernel.h>
7c65817e
JC
15#include <linux/types.h>
16#include <linux/cpumask.h>
17#include <linux/qcom_scm.h>
18#include <linux/dma-mapping.h>
8f93e043 19#include <linux/of_address.h>
7c65817e 20#include <linux/soc/qcom/mdt_loader.h>
f56d9df6
JC
21#include <linux/pm_opp.h>
22#include <linux/nvmem-consumer.h>
bec2dd69 23#include <linux/slab.h>
b5f103ab 24#include "msm_gem.h"
7f8036b7 25#include "msm_mmu.h"
b5f103ab
JC
26#include "a5xx_gpu.h"
27
28extern bool hang_debug;
29static void a5xx_dump(struct msm_gpu *gpu);
30
7c65817e
JC
31#define GPU_PAS_ID 13
32
e8f3de96 33static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
7c65817e 34{
e8f3de96 35 struct device *dev = &gpu->pdev->dev;
7c65817e 36 const struct firmware *fw;
8f93e043
AB
37 struct device_node *np;
38 struct resource r;
7c65817e
JC
39 phys_addr_t mem_phys;
40 ssize_t mem_size;
41 void *mem_region = NULL;
42 int ret;
43
bdab8e8b
AB
44 if (!IS_ENABLED(CONFIG_ARCH_QCOM))
45 return -EINVAL;
46
8f93e043
AB
47 np = of_get_child_by_name(dev->of_node, "zap-shader");
48 if (!np)
49 return -ENODEV;
50
51 np = of_parse_phandle(np, "memory-region", 0);
52 if (!np)
53 return -EINVAL;
54
55 ret = of_address_to_resource(np, 0, &r);
56 if (ret)
57 return ret;
58
59 mem_phys = r.start;
60 mem_size = resource_size(&r);
61
7c65817e 62 /* Request the MDT file for the firmware */
e8f3de96
RC
63 fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
64 if (IS_ERR(fw)) {
7c65817e 65 DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
e8f3de96 66 return PTR_ERR(fw);
7c65817e
JC
67 }
68
69 /* Figure out how much memory we need */
70 mem_size = qcom_mdt_get_size(fw);
71 if (mem_size < 0) {
72 ret = mem_size;
73 goto out;
74 }
75
76 /* Allocate memory for the firmware image */
8f93e043 77 mem_region = memremap(mem_phys, mem_size, MEMREMAP_WC);
7c65817e
JC
78 if (!mem_region) {
79 ret = -ENOMEM;
80 goto out;
81 }
82
2c41ef1b
RC
83 /*
84 * Load the rest of the MDT
85 *
86 * Note that we could be dealing with two different paths, since
87 * with upstream linux-firmware it would be in a qcom/ subdir..
88 * adreno_request_fw() handles this, but qcom_mdt_load() does
89 * not. But since we've already gotten thru adreno_request_fw()
90 * we know which of the two cases it is:
91 */
92 if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
93 ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
4dd27f54 94 mem_region, mem_phys, mem_size, NULL);
2c41ef1b 95 } else {
bec2dd69 96 char *newname;
2c41ef1b 97
bec2dd69 98 newname = kasprintf(GFP_KERNEL, "qcom/%s", fwname);
2c41ef1b
RC
99
100 ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
4dd27f54 101 mem_region, mem_phys, mem_size, NULL);
bec2dd69 102 kfree(newname);
2c41ef1b 103 }
7c65817e
JC
104 if (ret)
105 goto out;
106
107 /* Send the image to the secure world */
108 ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID);
109 if (ret)
110 DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
111
112out:
8f93e043
AB
113 if (mem_region)
114 memunmap(mem_region);
115
7c65817e
JC
116 release_firmware(fw);
117
118 return ret;
119}
7c65817e 120
b1fc2839
JC
121static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
122{
123 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
124 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
125 uint32_t wptr;
126 unsigned long flags;
127
128 spin_lock_irqsave(&ring->lock, flags);
129
130 /* Copy the shadow to the actual register */
131 ring->cur = ring->next;
132
133 /* Make sure to wrap wptr if we need to */
134 wptr = get_wptr(ring);
135
136 spin_unlock_irqrestore(&ring->lock, flags);
137
138 /* Make sure everything is posted before making a decision */
139 mb();
140
141 /* Update HW if this is the current ring and we are not in preempt */
142 if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
143 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
144}
145
6a8bd08d
RC
146static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
147 struct msm_file_private *ctx)
148{
149 struct msm_drm_private *priv = gpu->dev->dev_private;
150 struct msm_ringbuffer *ring = submit->ring;
151 struct msm_gem_object *obj;
152 uint32_t *ptr, dwords;
153 unsigned int i;
154
155 for (i = 0; i < submit->nr_cmds; i++) {
156 switch (submit->cmd[i].type) {
157 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
158 break;
159 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
160 if (priv->lastctx == ctx)
161 break;
162 case MSM_SUBMIT_CMD_BUF:
163 /* copy commands into RB: */
164 obj = submit->bos[submit->cmd[i].idx].obj;
165 dwords = submit->cmd[i].size;
166
167 ptr = msm_gem_get_vaddr(&obj->base);
168
169 /* _get_vaddr() shouldn't fail at this point,
170 * since we've already mapped it once in
171 * submit_reloc()
172 */
173 if (WARN_ON(!ptr))
174 return;
175
176 for (i = 0; i < dwords; i++) {
177 /* normally the OUT_PKTn() would wait
178 * for space for the packet. But since
179 * we just OUT_RING() the whole thing,
180 * need to call adreno_wait_ring()
181 * ourself:
182 */
183 adreno_wait_ring(ring, 1);
184 OUT_RING(ring, ptr[i]);
185 }
186
187 msm_gem_put_vaddr(&obj->base);
188
189 break;
190 }
191 }
192
193 a5xx_flush(gpu, ring);
194 a5xx_preempt_trigger(gpu);
195
196 /* we might not necessarily have a cmd from userspace to
197 * trigger an event to know that submit has completed, so
198 * do this manually:
199 */
200 a5xx_idle(gpu, ring);
201 ring->memptrs->fence = submit->seqno;
202 msm_gpu_retire(gpu);
203}
204
b5f103ab
JC
205static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
206 struct msm_file_private *ctx)
207{
b1fc2839
JC
208 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
209 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
b5f103ab 210 struct msm_drm_private *priv = gpu->dev->dev_private;
f97decac 211 struct msm_ringbuffer *ring = submit->ring;
b5f103ab
JC
212 unsigned int i, ibs = 0;
213
6a8bd08d
RC
214 if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
215 priv->lastctx = NULL;
216 a5xx_submit_in_rb(gpu, submit, ctx);
217 return;
218 }
219
b1fc2839
JC
220 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
221 OUT_RING(ring, 0x02);
222
223 /* Turn off protected mode to write to special registers */
224 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
225 OUT_RING(ring, 0);
226
227 /* Set the save preemption record for the ring/command */
228 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
229 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
230 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
231
232 /* Turn back on protected mode */
233 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
234 OUT_RING(ring, 1);
235
236 /* Enable local preemption for finegrain preemption */
237 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
238 OUT_RING(ring, 0x02);
239
240 /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
241 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
242 OUT_RING(ring, 0x02);
243
244 /* Submit the commands */
b5f103ab
JC
245 for (i = 0; i < submit->nr_cmds; i++) {
246 switch (submit->cmd[i].type) {
247 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
248 break;
249 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
250 if (priv->lastctx == ctx)
251 break;
252 case MSM_SUBMIT_CMD_BUF:
253 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
254 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
255 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
256 OUT_RING(ring, submit->cmd[i].size);
257 ibs++;
258 break;
259 }
260 }
261
b1fc2839
JC
262 /*
263 * Write the render mode to NULL (0) to indicate to the CP that the IBs
264 * are done rendering - otherwise a lucky preemption would start
265 * replaying from the last checkpoint
266 */
267 OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
268 OUT_RING(ring, 0);
269 OUT_RING(ring, 0);
270 OUT_RING(ring, 0);
271 OUT_RING(ring, 0);
272 OUT_RING(ring, 0);
273
274 /* Turn off IB level preemptions */
275 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
276 OUT_RING(ring, 0x01);
277
278 /* Write the fence to the scratch register */
b5f103ab 279 OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
f97decac 280 OUT_RING(ring, submit->seqno);
b5f103ab 281
b1fc2839
JC
282 /*
283 * Execute a CACHE_FLUSH_TS event. This will ensure that the
284 * timestamp is written to the memory and then triggers the interrupt
285 */
b5f103ab
JC
286 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
287 OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
f97decac
JC
288 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
289 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
290 OUT_RING(ring, submit->seqno);
b5f103ab 291
b1fc2839
JC
292 /* Yield the floor on command completion */
293 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
294 /*
295 * If dword[2:1] are non zero, they specify an address for the CP to
296 * write the value of dword[3] to on preemption complete. Write 0 to
297 * skip the write
298 */
299 OUT_RING(ring, 0x00);
300 OUT_RING(ring, 0x00);
301 /* Data value - not used if the address above is 0 */
302 OUT_RING(ring, 0x01);
303 /* Set bit 0 to trigger an interrupt on preempt complete */
304 OUT_RING(ring, 0x01);
305
306 a5xx_flush(gpu, ring);
307
308 /* Check to see if we need to start preemption */
309 a5xx_preempt_trigger(gpu);
b5f103ab
JC
310}
311
6e749e59 312static const struct {
b5f103ab
JC
313 u32 offset;
314 u32 value;
6e749e59 315} a5xx_hwcg[] = {
b5f103ab
JC
316 {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
317 {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
318 {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
319 {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
320 {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
321 {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
322 {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
323 {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
324 {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
325 {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
326 {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
327 {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
328 {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
329 {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
330 {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
331 {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
332 {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
333 {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
334 {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
335 {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
336 {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
337 {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
338 {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
339 {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
340 {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
341 {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
342 {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
343 {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
344 {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
345 {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
346 {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
347 {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
348 {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
349 {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
350 {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
351 {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
352 {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
353 {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
354 {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
355 {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
356 {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
357 {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
358 {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
359 {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
360 {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
361 {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
362 {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
363 {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
364 {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
365 {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
366 {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
367 {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
368 {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
369 {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
370 {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
371 {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
372 {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
373 {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
374 {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
375 {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
376 {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
377 {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
378 {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
379 {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
380 {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
381 {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
382 {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
383 {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
384 {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
385 {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
386 {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
387 {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
388 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
389 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
390 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
391 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
392 {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
393 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
394 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
395 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
396 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
397 {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
398 {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
399 {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
400 {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
401 {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
402 {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
403 {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
404 {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
405 {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
406 {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
407 {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
408};
409
6e749e59 410void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
b5f103ab
JC
411{
412 unsigned int i;
413
6e749e59
JC
414 for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
415 gpu_write(gpu, a5xx_hwcg[i].offset,
416 state ? a5xx_hwcg[i].value : 0);
b5f103ab 417
6e749e59
JC
418 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
419 gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
b5f103ab
JC
420}
421
422static int a5xx_me_init(struct msm_gpu *gpu)
423{
424 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
f97decac 425 struct msm_ringbuffer *ring = gpu->rb[0];
b5f103ab
JC
426
427 OUT_PKT7(ring, CP_ME_INIT, 8);
428
429 OUT_RING(ring, 0x0000002F);
430
431 /* Enable multiple hardware contexts */
432 OUT_RING(ring, 0x00000003);
433
434 /* Enable error detection */
435 OUT_RING(ring, 0x20000000);
436
437 /* Don't enable header dump */
438 OUT_RING(ring, 0x00000000);
439 OUT_RING(ring, 0x00000000);
440
441 /* Specify workarounds for various microcode issues */
442 if (adreno_is_a530(adreno_gpu)) {
443 /* Workaround for token end syncs
444 * Force a WFI after every direct-render 3D mode draw and every
445 * 2D mode 3 draw
446 */
447 OUT_RING(ring, 0x0000000B);
448 } else {
449 /* No workarounds enabled */
450 OUT_RING(ring, 0x00000000);
451 }
452
453 OUT_RING(ring, 0x00000000);
454 OUT_RING(ring, 0x00000000);
455
f97decac
JC
456 gpu->funcs->flush(gpu, ring);
457 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
b5f103ab
JC
458}
459
b1fc2839
JC
460static int a5xx_preempt_start(struct msm_gpu *gpu)
461{
462 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
463 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
464 struct msm_ringbuffer *ring = gpu->rb[0];
465
466 if (gpu->nr_rings == 1)
467 return 0;
468
469 /* Turn off protected mode to write to special registers */
470 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
471 OUT_RING(ring, 0);
472
473 /* Set the save preemption record for the ring/command */
474 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
475 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
476 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
477
478 /* Turn back on protected mode */
479 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
480 OUT_RING(ring, 1);
481
482 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
483 OUT_RING(ring, 0x00);
484
485 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
486 OUT_RING(ring, 0x01);
487
488 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
489 OUT_RING(ring, 0x01);
490
491 /* Yield the floor on command completion */
492 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
493 OUT_RING(ring, 0x00);
494 OUT_RING(ring, 0x00);
495 OUT_RING(ring, 0x01);
496 OUT_RING(ring, 0x01);
497
498 gpu->funcs->flush(gpu, ring);
499
500 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
501}
502
b5f103ab
JC
503static int a5xx_ucode_init(struct msm_gpu *gpu)
504{
505 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
506 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
507 int ret;
508
509 if (!a5xx_gpu->pm4_bo) {
9de43e79 510 a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
c5e3548c 511 adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
b5f103ab
JC
512
513 if (IS_ERR(a5xx_gpu->pm4_bo)) {
514 ret = PTR_ERR(a5xx_gpu->pm4_bo);
515 a5xx_gpu->pm4_bo = NULL;
6a41da17 516 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
b5f103ab
JC
517 ret);
518 return ret;
519 }
520 }
521
522 if (!a5xx_gpu->pfp_bo) {
9de43e79 523 a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
c5e3548c 524 adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
b5f103ab
JC
525
526 if (IS_ERR(a5xx_gpu->pfp_bo)) {
527 ret = PTR_ERR(a5xx_gpu->pfp_bo);
528 a5xx_gpu->pfp_bo = NULL;
6a41da17 529 DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
b5f103ab
JC
530 ret);
531 return ret;
532 }
533 }
534
535 gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
536 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
537
538 gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
539 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
540
541 return 0;
542}
543
7c65817e
JC
544#define SCM_GPU_ZAP_SHADER_RESUME 0
545
546static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
547{
548 int ret;
549
550 ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
551 if (ret)
552 DRM_ERROR("%s: zap-shader resume failed: %d\n",
553 gpu->name, ret);
554
555 return ret;
556}
557
7c65817e
JC
558static int a5xx_zap_shader_init(struct msm_gpu *gpu)
559{
560 static bool loaded;
561 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
8d6f0827 562 struct platform_device *pdev = gpu->pdev;
7c65817e
JC
563 int ret;
564
565 /*
566 * If the zap shader is already loaded into memory we just need to kick
567 * the remote processor to reinitialize it
568 */
569 if (loaded)
570 return a5xx_zap_shader_resume(gpu);
571
572 /* We need SCM to be able to load the firmware */
573 if (!qcom_scm_is_available()) {
574 DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
575 return -EPROBE_DEFER;
576 }
577
578 /* Each GPU has a target specific zap shader firmware name to use */
579 if (!adreno_gpu->info->zapfw) {
580 DRM_DEV_ERROR(&pdev->dev,
581 "Zap shader firmware file not specified for this target\n");
582 return -ENODEV;
583 }
584
e8f3de96 585 ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
7c65817e
JC
586
587 loaded = !ret;
588
589 return ret;
590}
591
b5f103ab
JC
592#define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
593 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
594 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
595 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
596 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
597 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
598 A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
ac1b5ab4 599 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
b1fc2839 600 A5XX_RBBM_INT_0_MASK_CP_SW | \
b5f103ab
JC
601 A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
602 A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
603 A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
604
605static int a5xx_hw_init(struct msm_gpu *gpu)
606{
607 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
608 int ret;
609
610 gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
611
612 /* Make all blocks contribute to the GPU BUSY perf counter */
613 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
614
615 /* Enable RBBM error reporting bits */
616 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
617
4e09b95d 618 if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
b5f103ab
JC
619 /*
620 * Mask out the activity signals from RB1-3 to avoid false
621 * positives
622 */
623
624 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
625 0xF0000000);
626 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
627 0xFFFFFFFF);
628 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
629 0xFFFFFFFF);
630 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
631 0xFFFFFFFF);
632 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
633 0xFFFFFFFF);
634 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
635 0xFFFFFFFF);
636 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
637 0xFFFFFFFF);
638 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
639 0xFFFFFFFF);
640 }
641
642 /* Enable fault detection */
643 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
644 (1 << 30) | 0xFFFF);
645
646 /* Turn on performance counters */
647 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
648
c09513cf
JC
649 /* Select CP0 to always count cycles */
650 gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
651
f91c14ab
JC
652 /* Select RBBM0 to countable 6 to get the busy status for devfreq */
653 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
654
b5f103ab
JC
655 /* Increase VFD cache access so LRZ and other data gets evicted less */
656 gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
657
658 /* Disable L2 bypass in the UCHE */
659 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
660 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
661 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
662 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
663
664 /* Set the GMEM VA range (0 to gpu->gmem) */
665 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
666 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
667 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
668 0x00100000 + adreno_gpu->gmem - 1);
669 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
670
671 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
672 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
673 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
674 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
675
676 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
677
4e09b95d 678 if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
b5f103ab
JC
679 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
680
681 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
682
683 /* Enable USE_RETENTION_FLOPS */
684 gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
685
686 /* Enable ME/PFP split notification */
687 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
688
689 /* Enable HWCG */
6e749e59 690 a5xx_set_hwcg(gpu, true);
b5f103ab
JC
691
692 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
693
694 /* Set the highest bank bit */
695 gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
696 gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
697
698 /* Protect registers from the CP */
699 gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
700
701 /* RBBM */
702 gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
703 gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
704 gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
705 gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
706 gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
707 gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
708
709 /* Content protect */
710 gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
711 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
712 16));
713 gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
714 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
715
716 /* CP */
717 gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
718 gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
719 gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
720 gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
721
722 /* RB */
723 gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
724 gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
725
726 /* VPC */
727 gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
728 gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
729
730 /* UCHE */
731 gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
732
733 if (adreno_is_a530(adreno_gpu))
734 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
735 ADRENO_PROTECT_RW(0x10000, 0x8000));
736
737 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
738 /*
739 * Disable the trusted memory range - we don't actually supported secure
740 * memory rendering at this point in time and we don't want to block off
741 * part of the virtual memory space.
742 */
743 gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
744 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
745 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
746
747 ret = adreno_hw_init(gpu);
748 if (ret)
749 return ret;
750
b1fc2839
JC
751 a5xx_preempt_hw_init(gpu);
752
eec874ce
RC
753 a5xx_gpmu_ucode_init(gpu);
754
b5f103ab
JC
755 ret = a5xx_ucode_init(gpu);
756 if (ret)
757 return ret;
758
759 /* Disable the interrupts through the initial bringup stage */
760 gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
761
762 /* Clear ME_HALT to start the micro engine */
763 gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
764 ret = a5xx_me_init(gpu);
765 if (ret)
766 return ret;
767
2401a008
JC
768 ret = a5xx_power_init(gpu);
769 if (ret)
770 return ret;
b5f103ab
JC
771
772 /*
773 * Send a pipeline event stat to get misbehaving counters to start
774 * ticking correctly
775 */
776 if (adreno_is_a530(adreno_gpu)) {
f97decac
JC
777 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
778 OUT_RING(gpu->rb[0], 0x0F);
b5f103ab 779
f97decac
JC
780 gpu->funcs->flush(gpu, gpu->rb[0]);
781 if (!a5xx_idle(gpu, gpu->rb[0]))
b5f103ab
JC
782 return -EINVAL;
783 }
784
7c65817e
JC
785 /*
786 * Try to load a zap shader into the secure world. If successful
787 * we can use the CP to switch out of secure mode. If not then we
788 * have no resource but to try to switch ourselves out manually. If we
789 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
790 * be blocked and a permissions violation will soon follow.
791 */
792 ret = a5xx_zap_shader_init(gpu);
793 if (!ret) {
f97decac
JC
794 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
795 OUT_RING(gpu->rb[0], 0x00000000);
7c65817e 796
f97decac
JC
797 gpu->funcs->flush(gpu, gpu->rb[0]);
798 if (!a5xx_idle(gpu, gpu->rb[0]))
7c65817e
JC
799 return -EINVAL;
800 } else {
801 /* Print a warning so if we die, we know why */
802 dev_warn_once(gpu->dev->dev,
803 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
804 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
805 }
2401a008 806
b1fc2839
JC
807 /* Last step - yield the ringbuffer */
808 a5xx_preempt_start(gpu);
809
b5f103ab
JC
810 return 0;
811}
812
813static void a5xx_recover(struct msm_gpu *gpu)
814{
815 int i;
816
817 adreno_dump_info(gpu);
818
819 for (i = 0; i < 8; i++) {
820 printk("CP_SCRATCH_REG%d: %u\n", i,
821 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
822 }
823
824 if (hang_debug)
825 a5xx_dump(gpu);
826
827 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
828 gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
829 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
830 adreno_recover(gpu);
831}
832
833static void a5xx_destroy(struct msm_gpu *gpu)
834{
835 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
836 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
837
838 DBG("%s", gpu->name);
839
b1fc2839
JC
840 a5xx_preempt_fini(gpu);
841
b5f103ab
JC
842 if (a5xx_gpu->pm4_bo) {
843 if (a5xx_gpu->pm4_iova)
8bdcd949 844 msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
dc9a9b32 845 drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
b5f103ab
JC
846 }
847
848 if (a5xx_gpu->pfp_bo) {
849 if (a5xx_gpu->pfp_iova)
8bdcd949 850 msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace);
dc9a9b32 851 drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
b5f103ab
JC
852 }
853
2401a008 854 if (a5xx_gpu->gpmu_bo) {
2002c9c3 855 if (a5xx_gpu->gpmu_iova)
8bdcd949 856 msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
dc9a9b32 857 drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
2401a008
JC
858 }
859
b5f103ab
JC
860 adreno_gpu_cleanup(adreno_gpu);
861 kfree(a5xx_gpu);
862}
863
864static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
865{
866 if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
867 return false;
868
869 /*
870 * Nearly every abnormality ends up pausing the GPU and triggering a
871 * fault so we can safely just watch for this one interrupt to fire
872 */
873 return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
874 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
875}
876
f97decac 877bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
b5f103ab 878{
b1fc2839
JC
879 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
880 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
881
882 if (ring != a5xx_gpu->cur_ring) {
883 WARN(1, "Tried to idle a non-current ringbuffer\n");
884 return false;
885 }
886
b5f103ab 887 /* wait for CP to drain ringbuffer: */
f97decac 888 if (!adreno_idle(gpu, ring))
b5f103ab
JC
889 return false;
890
891 if (spin_until(_a5xx_check_idle(gpu))) {
f97decac 892 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
b5f103ab
JC
893 gpu->name, __builtin_return_address(0),
894 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
f97decac
JC
895 gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
896 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
897 gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
b5f103ab
JC
898 return false;
899 }
900
901 return true;
902}
903
7f8036b7
RC
904static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
905{
906 struct msm_gpu *gpu = arg;
907 pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
908 iova, flags,
909 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
910 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
911 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
912 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
913
914 return -EFAULT;
915}
916
b5f103ab
JC
917static void a5xx_cp_err_irq(struct msm_gpu *gpu)
918{
919 u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
920
921 if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
922 u32 val;
923
924 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
925
926 /*
927 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
928 * read it twice
929 */
930
931 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
932 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
933
934 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
935 val);
936 }
937
938 if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
939 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
940 gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
941
942 if (status & A5XX_CP_INT_CP_DMA_ERROR)
943 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
944
945 if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
946 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
947
948 dev_err_ratelimited(gpu->dev->dev,
949 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
950 val & (1 << 24) ? "WRITE" : "READ",
951 (val & 0xFFFFF) >> 2, val);
952 }
953
954 if (status & A5XX_CP_INT_CP_AHB_ERROR) {
955 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
956 const char *access[16] = { "reserved", "reserved",
957 "timestamp lo", "timestamp hi", "pfp read", "pfp write",
958 "", "", "me read", "me write", "", "", "crashdump read",
959 "crashdump write" };
960
961 dev_err_ratelimited(gpu->dev->dev,
962 "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
963 status & 0xFFFFF, access[(status >> 24) & 0xF],
964 (status & (1 << 31)), status);
965 }
966}
967
7352fb5a 968static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
b5f103ab 969{
b5f103ab
JC
970 if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
971 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
972
973 dev_err_ratelimited(gpu->dev->dev,
974 "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
975 val & (1 << 28) ? "WRITE" : "READ",
976 (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
977 (val >> 24) & 0xF);
978
979 /* Clear the error */
980 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
7352fb5a
JC
981
982 /* Clear the interrupt */
983 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
984 A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
b5f103ab
JC
985 }
986
987 if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
988 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
989
990 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
991 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
992 gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
993
994 if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
995 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
996 gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
997
998 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
999 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
1000 gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1001
1002 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1003 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1004
1005 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1006 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1007}
1008
1009static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1010{
1011 uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1012
1013 addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1014
1015 dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1016 addr);
1017}
1018
1019static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1020{
1021 dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1022}
1023
ac1b5ab4
JC
1024static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1025{
1026 struct drm_device *dev = gpu->dev;
1027 struct msm_drm_private *priv = dev->dev_private;
f97decac 1028 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
ac1b5ab4 1029
6a41da17 1030 DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
f97decac 1031 ring ? ring->id : -1, ring ? ring->seqno : 0,
ac1b5ab4
JC
1032 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1033 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1034 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1035 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1036 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1037 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1038 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1039
1040 /* Turn off the hangcheck timer to keep it from bothering us */
1041 del_timer(&gpu->hangcheck_timer);
1042
1043 queue_work(priv->wq, &gpu->recover_work);
1044}
1045
b5f103ab
JC
1046#define RBBM_ERROR_MASK \
1047 (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1048 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1049 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1050 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1051 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1052 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1053
1054static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1055{
1056 u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1057
7352fb5a
JC
1058 /*
1059 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1060 * before the source is cleared the interrupt will storm.
1061 */
1062 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1063 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
b5f103ab 1064
7352fb5a 1065 /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
b5f103ab 1066 if (status & RBBM_ERROR_MASK)
7352fb5a 1067 a5xx_rbbm_err_irq(gpu, status);
b5f103ab
JC
1068
1069 if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1070 a5xx_cp_err_irq(gpu);
1071
ac1b5ab4
JC
1072 if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1073 a5xx_fault_detect_irq(gpu);
1074
b5f103ab
JC
1075 if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1076 a5xx_uche_err_irq(gpu);
1077
1078 if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1079 a5xx_gpmu_err_irq(gpu);
1080
b1fc2839
JC
1081 if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1082 a5xx_preempt_trigger(gpu);
b5f103ab 1083 msm_gpu_retire(gpu);
b1fc2839
JC
1084 }
1085
1086 if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1087 a5xx_preempt_irq(gpu);
b5f103ab
JC
1088
1089 return IRQ_HANDLED;
1090}
1091
1092static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1093 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1094 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1095 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1096 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1097 REG_A5XX_CP_RB_RPTR_ADDR_HI),
1098 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1099 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1100 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1101};
1102
1103static const u32 a5xx_registers[] = {
1104 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1105 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1106 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
3394f561
JC
1107 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1108 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1109 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1110 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1111 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1112 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1113 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1114 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1115 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1116 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1117 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1118 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1119 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1120 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1121 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1122 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1123 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1124 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1125 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1126 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1127 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1128 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
50f8d218
JC
1129 0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1130 0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1131 0xAC60, 0xAC60, ~0,
b5f103ab
JC
1132};
1133
1134static void a5xx_dump(struct msm_gpu *gpu)
1135{
6a41da17 1136 DRM_DEV_INFO(gpu->dev->dev, "status: %08x\n",
b5f103ab
JC
1137 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1138 adreno_dump(gpu);
1139}
1140
1141static int a5xx_pm_resume(struct msm_gpu *gpu)
1142{
2401a008
JC
1143 int ret;
1144
1145 /* Turn on the core power */
1146 ret = msm_gpu_pm_resume(gpu);
1147 if (ret)
1148 return ret;
1149
1150 /* Turn the RBCCU domain first to limit the chances of voltage droop */
1151 gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1152
1153 /* Wait 3 usecs before polling */
1154 udelay(3);
1155
1156 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1157 (1 << 20), (1 << 20));
1158 if (ret) {
1159 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1160 gpu->name,
1161 gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1162 return ret;
1163 }
1164
1165 /* Turn on the SP domain */
1166 gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1167 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1168 (1 << 20), (1 << 20));
1169 if (ret)
1170 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1171 gpu->name);
1172
1173 return ret;
b5f103ab
JC
1174}
1175
1176static int a5xx_pm_suspend(struct msm_gpu *gpu)
1177{
2401a008
JC
1178 /* Clear the VBIF pipe before shutting down */
1179 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1180 spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1181
1182 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1183
1184 /*
1185 * Reset the VBIF before power collapse to avoid issue with FIFO
1186 * entries
1187 */
1188 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1189 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1190
b5f103ab
JC
1191 return msm_gpu_pm_suspend(gpu);
1192}
1193
1194static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1195{
1196 *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1197 REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1198
1199 return 0;
1200}
1201
50f8d218
JC
1202struct a5xx_crashdumper {
1203 void *ptr;
1204 struct drm_gem_object *bo;
1205 u64 iova;
1206};
1207
1208struct a5xx_gpu_state {
1209 struct msm_gpu_state base;
1210 u32 *hlsqregs;
1211};
1212
50f8d218
JC
1213static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1214 struct a5xx_crashdumper *dumper)
1215{
1216 dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1217 SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1218 &dumper->bo, &dumper->iova);
1219
c97ea6a6 1220 return PTR_ERR_OR_ZERO(dumper->ptr);
50f8d218
JC
1221}
1222
50f8d218
JC
1223static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1224 struct a5xx_crashdumper *dumper)
1225{
1226 u32 val;
1227
1228 if (IS_ERR_OR_NULL(dumper->ptr))
1229 return -EINVAL;
1230
1231 gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1232 REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1233
1234 gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1235
1236 return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1237 val & 0x04, 100, 10000);
1238}
1239
1240/*
1241 * These are a list of the registers that need to be read through the HLSQ
1242 * aperture through the crashdumper. These are not nominally accessible from
1243 * the CPU on a secure platform.
1244 */
1245static const struct {
1246 u32 type;
1247 u32 regoffset;
1248 u32 count;
1249} a5xx_hlsq_aperture_regs[] = {
1250 { 0x35, 0xe00, 0x32 }, /* HSLQ non-context */
1251 { 0x31, 0x2080, 0x1 }, /* HLSQ 2D context 0 */
1252 { 0x33, 0x2480, 0x1 }, /* HLSQ 2D context 1 */
1253 { 0x32, 0xe780, 0x62 }, /* HLSQ 3D context 0 */
1254 { 0x34, 0xef80, 0x62 }, /* HLSQ 3D context 1 */
1255 { 0x3f, 0x0ec0, 0x40 }, /* SP non-context */
1256 { 0x3d, 0x2040, 0x1 }, /* SP 2D context 0 */
1257 { 0x3b, 0x2440, 0x1 }, /* SP 2D context 1 */
1258 { 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1259 { 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1260 { 0x3a, 0x0f00, 0x1c }, /* TP non-context */
1261 { 0x38, 0x2000, 0xa }, /* TP 2D context 0 */
1262 { 0x36, 0x2400, 0xa }, /* TP 2D context 1 */
1263 { 0x39, 0xe700, 0x80 }, /* TP 3D context 0 */
1264 { 0x37, 0xef00, 0x80 }, /* TP 3D context 1 */
1265};
1266
1267static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1268 struct a5xx_gpu_state *a5xx_state)
1269{
1270 struct a5xx_crashdumper dumper = { 0 };
1271 u32 offset, count = 0;
1272 u64 *ptr;
1273 int i;
1274
1275 if (a5xx_crashdumper_init(gpu, &dumper))
1276 return;
1277
1278 /* The script will be written at offset 0 */
1279 ptr = dumper.ptr;
1280
1281 /* Start writing the data at offset 256k */
1282 offset = dumper.iova + (256 * SZ_1K);
1283
1284 /* Count how many additional registers to get from the HLSQ aperture */
1285 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1286 count += a5xx_hlsq_aperture_regs[i].count;
1287
1288 a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1289 if (!a5xx_state->hlsqregs)
1290 return;
1291
1292 /* Build the crashdump script */
1293 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1294 u32 type = a5xx_hlsq_aperture_regs[i].type;
1295 u32 c = a5xx_hlsq_aperture_regs[i].count;
1296
1297 /* Write the register to select the desired bank */
1298 *ptr++ = ((u64) type << 8);
1299 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1300 (1 << 21) | 1;
1301
1302 *ptr++ = offset;
1303 *ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1304 | c;
1305
1306 offset += c * sizeof(u32);
1307 }
1308
1309 /* Write two zeros to close off the script */
1310 *ptr++ = 0;
1311 *ptr++ = 0;
1312
1313 if (a5xx_crashdumper_run(gpu, &dumper)) {
1314 kfree(a5xx_state->hlsqregs);
1e29dff0 1315 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
50f8d218
JC
1316 return;
1317 }
1318
1319 /* Copy the data from the crashdumper to the state */
1320 memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1321 count * sizeof(u32));
1322
1e29dff0 1323 msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
50f8d218
JC
1324}
1325
e00e473d
JC
1326static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1327{
50f8d218
JC
1328 struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1329 GFP_KERNEL);
e00e473d 1330
50f8d218
JC
1331 if (!a5xx_state)
1332 return ERR_PTR(-ENOMEM);
1333
1334 /* Temporarily disable hardware clock gating before reading the hw */
e00e473d
JC
1335 a5xx_set_hwcg(gpu, false);
1336
50f8d218
JC
1337 /* First get the generic state from the adreno core */
1338 adreno_gpu_state_get(gpu, &(a5xx_state->base));
1339
1340 a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
e00e473d 1341
50f8d218
JC
1342 /* Get the HLSQ regs with the help of the crashdumper */
1343 a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
e00e473d
JC
1344
1345 a5xx_set_hwcg(gpu, true);
1346
50f8d218
JC
1347 return &a5xx_state->base;
1348}
1349
1350static void a5xx_gpu_state_destroy(struct kref *kref)
1351{
1352 struct msm_gpu_state *state = container_of(kref,
1353 struct msm_gpu_state, ref);
1354 struct a5xx_gpu_state *a5xx_state = container_of(state,
1355 struct a5xx_gpu_state, base);
1356
1357 kfree(a5xx_state->hlsqregs);
1358
1359 adreno_gpu_state_destroy(state);
1360 kfree(a5xx_state);
1361}
1362
1363int a5xx_gpu_state_put(struct msm_gpu_state *state)
1364{
1365 if (IS_ERR_OR_NULL(state))
1366 return 1;
1367
1368 return kref_put(&state->ref, a5xx_gpu_state_destroy);
1369}
1370
1371
1372#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1373void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1374 struct drm_printer *p)
1375{
1376 int i, j;
1377 u32 pos = 0;
1378 struct a5xx_gpu_state *a5xx_state = container_of(state,
1379 struct a5xx_gpu_state, base);
1380
1381 if (IS_ERR_OR_NULL(state))
1382 return;
1383
1384 adreno_show(gpu, state, p);
1385
1386 /* Dump the additional a5xx HLSQ registers */
1387 if (!a5xx_state->hlsqregs)
1388 return;
1389
1390 drm_printf(p, "registers-hlsq:\n");
1391
1392 for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1393 u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1394 u32 c = a5xx_hlsq_aperture_regs[i].count;
1395
1396 for (j = 0; j < c; j++, pos++, o++) {
1397 /*
1398 * To keep the crashdump simple we pull the entire range
1399 * for each register type but not all of the registers
1400 * in the range are valid. Fortunately invalid registers
1401 * stick out like a sore thumb with a value of
1402 * 0xdeadbeef
1403 */
1404 if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1405 continue;
1406
1407 drm_printf(p, " - { offset: 0x%04x, value: 0x%08x }\n",
1408 o << 2, a5xx_state->hlsqregs[pos]);
1409 }
1410 }
e00e473d 1411}
50f8d218 1412#endif
e00e473d 1413
b1fc2839
JC
1414static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1415{
1416 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1417 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1418
1419 return a5xx_gpu->cur_ring;
1420}
1421
de0a3d09 1422static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
f91c14ab 1423{
f926a2e1 1424 u64 busy_cycles, busy_time;
f91c14ab 1425
de0a3d09
SM
1426 busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1427 REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1428
0f542721
SP
1429 busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1430 do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
de0a3d09
SM
1431
1432 gpu->devfreq.busy_cycles = busy_cycles;
1433
f926a2e1
SP
1434 if (WARN_ON(busy_time > ~0LU))
1435 return ~0LU;
1436
1437 return (unsigned long)busy_time;
f91c14ab
JC
1438}
1439
b5f103ab
JC
1440static const struct adreno_gpu_funcs funcs = {
1441 .base = {
1442 .get_param = adreno_get_param,
1443 .hw_init = a5xx_hw_init,
1444 .pm_suspend = a5xx_pm_suspend,
1445 .pm_resume = a5xx_pm_resume,
1446 .recover = a5xx_recover,
b5f103ab 1447 .submit = a5xx_submit,
b1fc2839
JC
1448 .flush = a5xx_flush,
1449 .active_ring = a5xx_active_ring,
b5f103ab
JC
1450 .irq = a5xx_irq,
1451 .destroy = a5xx_destroy,
c0fec7f5 1452#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
50f8d218 1453 .show = a5xx_show,
c0fec7f5
JC
1454#endif
1455#if defined(CONFIG_DEBUG_FS)
331dc0bc 1456 .debugfs_init = a5xx_debugfs_init,
0c3eaf1f 1457#endif
f91c14ab 1458 .gpu_busy = a5xx_gpu_busy,
e00e473d 1459 .gpu_state_get = a5xx_gpu_state_get,
50f8d218 1460 .gpu_state_put = a5xx_gpu_state_put,
b5f103ab
JC
1461 },
1462 .get_timestamp = a5xx_get_timestamp,
1463};
1464
f56d9df6
JC
1465static void check_speed_bin(struct device *dev)
1466{
1467 struct nvmem_cell *cell;
1468 u32 bin, val;
1469
1470 cell = nvmem_cell_get(dev, "speed_bin");
1471
1472 /* If a nvmem cell isn't defined, nothing to do */
1473 if (IS_ERR(cell))
1474 return;
1475
1476 bin = *((u32 *) nvmem_cell_read(cell, NULL));
1477 nvmem_cell_put(cell);
1478
1479 val = (1 << bin);
1480
1481 dev_pm_opp_set_supported_hw(dev, &val, 1);
1482}
1483
b5f103ab
JC
1484struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1485{
1486 struct msm_drm_private *priv = dev->dev_private;
1487 struct platform_device *pdev = priv->gpu_pdev;
1488 struct a5xx_gpu *a5xx_gpu = NULL;
1489 struct adreno_gpu *adreno_gpu;
1490 struct msm_gpu *gpu;
1491 int ret;
1492
1493 if (!pdev) {
6a41da17 1494 DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
b5f103ab
JC
1495 return ERR_PTR(-ENXIO);
1496 }
1497
1498 a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1499 if (!a5xx_gpu)
1500 return ERR_PTR(-ENOMEM);
1501
1502 adreno_gpu = &a5xx_gpu->base;
1503 gpu = &adreno_gpu->base;
1504
b5f103ab
JC
1505 adreno_gpu->registers = a5xx_registers;
1506 adreno_gpu->reg_offsets = a5xx_register_offsets;
1507
2401a008
JC
1508 a5xx_gpu->lm_leakage = 0x4E001A;
1509
f56d9df6
JC
1510 check_speed_bin(&pdev->dev);
1511
b1fc2839 1512 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
b5f103ab
JC
1513 if (ret) {
1514 a5xx_destroy(&(a5xx_gpu->base.base));
1515 return ERR_PTR(ret);
1516 }
1517
7f8036b7
RC
1518 if (gpu->aspace)
1519 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1520
b1fc2839
JC
1521 /* Set up the preemption specific bits and pieces for each ringbuffer */
1522 a5xx_preempt_init(gpu);
1523
b5f103ab
JC
1524 return gpu;
1525}