]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/gpu/drm/msm/adreno/a5xx_gpu.c
drm/msm/adreno: Define a list of firmware files to load per target
[mirror_ubuntu-jammy-kernel.git] / drivers / gpu / drm / msm / adreno / a5xx_gpu.c
CommitLineData
2002c9c3 1/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
b5f103ab
JC
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
7c65817e
JC
14#include <linux/types.h>
15#include <linux/cpumask.h>
16#include <linux/qcom_scm.h>
17#include <linux/dma-mapping.h>
8f93e043 18#include <linux/of_address.h>
7c65817e 19#include <linux/soc/qcom/mdt_loader.h>
f56d9df6
JC
20#include <linux/pm_opp.h>
21#include <linux/nvmem-consumer.h>
b5f103ab 22#include "msm_gem.h"
7f8036b7 23#include "msm_mmu.h"
b5f103ab
JC
24#include "a5xx_gpu.h"
25
26extern bool hang_debug;
27static void a5xx_dump(struct msm_gpu *gpu);
28
7c65817e
JC
29#define GPU_PAS_ID 13
30
e8f3de96 31static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
7c65817e 32{
e8f3de96 33 struct device *dev = &gpu->pdev->dev;
7c65817e 34 const struct firmware *fw;
8f93e043
AB
35 struct device_node *np;
36 struct resource r;
7c65817e
JC
37 phys_addr_t mem_phys;
38 ssize_t mem_size;
39 void *mem_region = NULL;
40 int ret;
41
bdab8e8b
AB
42 if (!IS_ENABLED(CONFIG_ARCH_QCOM))
43 return -EINVAL;
44
8f93e043
AB
45 np = of_get_child_by_name(dev->of_node, "zap-shader");
46 if (!np)
47 return -ENODEV;
48
49 np = of_parse_phandle(np, "memory-region", 0);
50 if (!np)
51 return -EINVAL;
52
53 ret = of_address_to_resource(np, 0, &r);
54 if (ret)
55 return ret;
56
57 mem_phys = r.start;
58 mem_size = resource_size(&r);
59
7c65817e 60 /* Request the MDT file for the firmware */
e8f3de96
RC
61 fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
62 if (IS_ERR(fw)) {
7c65817e 63 DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
e8f3de96 64 return PTR_ERR(fw);
7c65817e
JC
65 }
66
67 /* Figure out how much memory we need */
68 mem_size = qcom_mdt_get_size(fw);
69 if (mem_size < 0) {
70 ret = mem_size;
71 goto out;
72 }
73
74 /* Allocate memory for the firmware image */
8f93e043 75 mem_region = memremap(mem_phys, mem_size, MEMREMAP_WC);
7c65817e
JC
76 if (!mem_region) {
77 ret = -ENOMEM;
78 goto out;
79 }
80
2c41ef1b
RC
81 /*
82 * Load the rest of the MDT
83 *
84 * Note that we could be dealing with two different paths, since
85 * with upstream linux-firmware it would be in a qcom/ subdir..
86 * adreno_request_fw() handles this, but qcom_mdt_load() does
87 * not. But since we've already gotten thru adreno_request_fw()
88 * we know which of the two cases it is:
89 */
90 if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
91 ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
92 mem_region, mem_phys, mem_size);
93 } else {
94 char newname[strlen("qcom/") + strlen(fwname) + 1];
95
96 sprintf(newname, "qcom/%s", fwname);
97
98 ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
99 mem_region, mem_phys, mem_size);
100 }
7c65817e
JC
101 if (ret)
102 goto out;
103
104 /* Send the image to the secure world */
105 ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID);
106 if (ret)
107 DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
108
109out:
8f93e043
AB
110 if (mem_region)
111 memunmap(mem_region);
112
7c65817e
JC
113 release_firmware(fw);
114
115 return ret;
116}
7c65817e 117
b1fc2839
JC
118static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
119{
120 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
121 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
122 uint32_t wptr;
123 unsigned long flags;
124
125 spin_lock_irqsave(&ring->lock, flags);
126
127 /* Copy the shadow to the actual register */
128 ring->cur = ring->next;
129
130 /* Make sure to wrap wptr if we need to */
131 wptr = get_wptr(ring);
132
133 spin_unlock_irqrestore(&ring->lock, flags);
134
135 /* Make sure everything is posted before making a decision */
136 mb();
137
138 /* Update HW if this is the current ring and we are not in preempt */
139 if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
140 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
141}
142
6a8bd08d
RC
143static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
144 struct msm_file_private *ctx)
145{
146 struct msm_drm_private *priv = gpu->dev->dev_private;
147 struct msm_ringbuffer *ring = submit->ring;
148 struct msm_gem_object *obj;
149 uint32_t *ptr, dwords;
150 unsigned int i;
151
152 for (i = 0; i < submit->nr_cmds; i++) {
153 switch (submit->cmd[i].type) {
154 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
155 break;
156 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
157 if (priv->lastctx == ctx)
158 break;
159 case MSM_SUBMIT_CMD_BUF:
160 /* copy commands into RB: */
161 obj = submit->bos[submit->cmd[i].idx].obj;
162 dwords = submit->cmd[i].size;
163
164 ptr = msm_gem_get_vaddr(&obj->base);
165
166 /* _get_vaddr() shouldn't fail at this point,
167 * since we've already mapped it once in
168 * submit_reloc()
169 */
170 if (WARN_ON(!ptr))
171 return;
172
173 for (i = 0; i < dwords; i++) {
174 /* normally the OUT_PKTn() would wait
175 * for space for the packet. But since
176 * we just OUT_RING() the whole thing,
177 * need to call adreno_wait_ring()
178 * ourself:
179 */
180 adreno_wait_ring(ring, 1);
181 OUT_RING(ring, ptr[i]);
182 }
183
184 msm_gem_put_vaddr(&obj->base);
185
186 break;
187 }
188 }
189
190 a5xx_flush(gpu, ring);
191 a5xx_preempt_trigger(gpu);
192
193 /* we might not necessarily have a cmd from userspace to
194 * trigger an event to know that submit has completed, so
195 * do this manually:
196 */
197 a5xx_idle(gpu, ring);
198 ring->memptrs->fence = submit->seqno;
199 msm_gpu_retire(gpu);
200}
201
b5f103ab
JC
202static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
203 struct msm_file_private *ctx)
204{
b1fc2839
JC
205 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
206 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
b5f103ab 207 struct msm_drm_private *priv = gpu->dev->dev_private;
f97decac 208 struct msm_ringbuffer *ring = submit->ring;
b5f103ab
JC
209 unsigned int i, ibs = 0;
210
6a8bd08d
RC
211 if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
212 priv->lastctx = NULL;
213 a5xx_submit_in_rb(gpu, submit, ctx);
214 return;
215 }
216
b1fc2839
JC
217 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
218 OUT_RING(ring, 0x02);
219
220 /* Turn off protected mode to write to special registers */
221 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
222 OUT_RING(ring, 0);
223
224 /* Set the save preemption record for the ring/command */
225 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
226 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
227 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
228
229 /* Turn back on protected mode */
230 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
231 OUT_RING(ring, 1);
232
233 /* Enable local preemption for finegrain preemption */
234 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
235 OUT_RING(ring, 0x02);
236
237 /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
238 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
239 OUT_RING(ring, 0x02);
240
241 /* Submit the commands */
b5f103ab
JC
242 for (i = 0; i < submit->nr_cmds; i++) {
243 switch (submit->cmd[i].type) {
244 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
245 break;
246 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
247 if (priv->lastctx == ctx)
248 break;
249 case MSM_SUBMIT_CMD_BUF:
250 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
251 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
252 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
253 OUT_RING(ring, submit->cmd[i].size);
254 ibs++;
255 break;
256 }
257 }
258
b1fc2839
JC
259 /*
260 * Write the render mode to NULL (0) to indicate to the CP that the IBs
261 * are done rendering - otherwise a lucky preemption would start
262 * replaying from the last checkpoint
263 */
264 OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
265 OUT_RING(ring, 0);
266 OUT_RING(ring, 0);
267 OUT_RING(ring, 0);
268 OUT_RING(ring, 0);
269 OUT_RING(ring, 0);
270
271 /* Turn off IB level preemptions */
272 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
273 OUT_RING(ring, 0x01);
274
275 /* Write the fence to the scratch register */
b5f103ab 276 OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
f97decac 277 OUT_RING(ring, submit->seqno);
b5f103ab 278
b1fc2839
JC
279 /*
280 * Execute a CACHE_FLUSH_TS event. This will ensure that the
281 * timestamp is written to the memory and then triggers the interrupt
282 */
b5f103ab
JC
283 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
284 OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
f97decac
JC
285 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
286 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
287 OUT_RING(ring, submit->seqno);
b5f103ab 288
b1fc2839
JC
289 /* Yield the floor on command completion */
290 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
291 /*
292 * If dword[2:1] are non zero, they specify an address for the CP to
293 * write the value of dword[3] to on preemption complete. Write 0 to
294 * skip the write
295 */
296 OUT_RING(ring, 0x00);
297 OUT_RING(ring, 0x00);
298 /* Data value - not used if the address above is 0 */
299 OUT_RING(ring, 0x01);
300 /* Set bit 0 to trigger an interrupt on preempt complete */
301 OUT_RING(ring, 0x01);
302
303 a5xx_flush(gpu, ring);
304
305 /* Check to see if we need to start preemption */
306 a5xx_preempt_trigger(gpu);
b5f103ab
JC
307}
308
6e749e59 309static const struct {
b5f103ab
JC
310 u32 offset;
311 u32 value;
6e749e59 312} a5xx_hwcg[] = {
b5f103ab
JC
313 {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
314 {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
315 {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
316 {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
317 {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
318 {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
319 {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
320 {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
321 {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
322 {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
323 {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
324 {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
325 {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
326 {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
327 {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
328 {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
329 {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
330 {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
331 {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
332 {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
333 {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
334 {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
335 {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
336 {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
337 {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
338 {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
339 {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
340 {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
341 {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
342 {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
343 {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
344 {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
345 {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
346 {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
347 {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
348 {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
349 {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
350 {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
351 {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
352 {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
353 {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
354 {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
355 {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
356 {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
357 {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
358 {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
359 {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
360 {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
361 {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
362 {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
363 {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
364 {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
365 {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
366 {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
367 {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
368 {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
369 {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
370 {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
371 {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
372 {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
373 {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
374 {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
375 {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
376 {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
377 {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
378 {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
379 {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
380 {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
381 {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
382 {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
383 {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
384 {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
385 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
386 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
387 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
388 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
389 {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
390 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
391 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
392 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
393 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
394 {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
395 {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
396 {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
397 {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
398 {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
399 {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
400 {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
401 {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
402 {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
403 {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
404 {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
405};
406
6e749e59 407void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
b5f103ab
JC
408{
409 unsigned int i;
410
6e749e59
JC
411 for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
412 gpu_write(gpu, a5xx_hwcg[i].offset,
413 state ? a5xx_hwcg[i].value : 0);
b5f103ab 414
6e749e59
JC
415 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
416 gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
b5f103ab
JC
417}
418
419static int a5xx_me_init(struct msm_gpu *gpu)
420{
421 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
f97decac 422 struct msm_ringbuffer *ring = gpu->rb[0];
b5f103ab
JC
423
424 OUT_PKT7(ring, CP_ME_INIT, 8);
425
426 OUT_RING(ring, 0x0000002F);
427
428 /* Enable multiple hardware contexts */
429 OUT_RING(ring, 0x00000003);
430
431 /* Enable error detection */
432 OUT_RING(ring, 0x20000000);
433
434 /* Don't enable header dump */
435 OUT_RING(ring, 0x00000000);
436 OUT_RING(ring, 0x00000000);
437
438 /* Specify workarounds for various microcode issues */
439 if (adreno_is_a530(adreno_gpu)) {
440 /* Workaround for token end syncs
441 * Force a WFI after every direct-render 3D mode draw and every
442 * 2D mode 3 draw
443 */
444 OUT_RING(ring, 0x0000000B);
445 } else {
446 /* No workarounds enabled */
447 OUT_RING(ring, 0x00000000);
448 }
449
450 OUT_RING(ring, 0x00000000);
451 OUT_RING(ring, 0x00000000);
452
f97decac
JC
453 gpu->funcs->flush(gpu, ring);
454 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
b5f103ab
JC
455}
456
b1fc2839
JC
457static int a5xx_preempt_start(struct msm_gpu *gpu)
458{
459 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
460 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
461 struct msm_ringbuffer *ring = gpu->rb[0];
462
463 if (gpu->nr_rings == 1)
464 return 0;
465
466 /* Turn off protected mode to write to special registers */
467 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
468 OUT_RING(ring, 0);
469
470 /* Set the save preemption record for the ring/command */
471 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
472 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
473 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
474
475 /* Turn back on protected mode */
476 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
477 OUT_RING(ring, 1);
478
479 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
480 OUT_RING(ring, 0x00);
481
482 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
483 OUT_RING(ring, 0x01);
484
485 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
486 OUT_RING(ring, 0x01);
487
488 /* Yield the floor on command completion */
489 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
490 OUT_RING(ring, 0x00);
491 OUT_RING(ring, 0x00);
492 OUT_RING(ring, 0x01);
493 OUT_RING(ring, 0x01);
494
495 gpu->funcs->flush(gpu, ring);
496
497 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
498}
499
500
b5f103ab
JC
501static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu,
502 const struct firmware *fw, u64 *iova)
503{
b5f103ab
JC
504 struct drm_gem_object *bo;
505 void *ptr;
506
8223286d
JC
507 ptr = msm_gem_kernel_new_locked(gpu->dev, fw->size - 4,
508 MSM_BO_UNCACHED | MSM_BO_GPU_READONLY, gpu->aspace, &bo, iova);
b5f103ab 509
8223286d
JC
510 if (IS_ERR(ptr))
511 return ERR_CAST(ptr);
b5f103ab
JC
512
513 memcpy(ptr, &fw->data[4], fw->size - 4);
514
0e08270a 515 msm_gem_put_vaddr(bo);
b5f103ab
JC
516 return bo;
517}
518
519static int a5xx_ucode_init(struct msm_gpu *gpu)
520{
521 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
522 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
523 int ret;
524
525 if (!a5xx_gpu->pm4_bo) {
c5e3548c
JC
526 a5xx_gpu->pm4_bo = a5xx_ucode_load_bo(gpu,
527 adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
b5f103ab
JC
528
529 if (IS_ERR(a5xx_gpu->pm4_bo)) {
530 ret = PTR_ERR(a5xx_gpu->pm4_bo);
531 a5xx_gpu->pm4_bo = NULL;
532 dev_err(gpu->dev->dev, "could not allocate PM4: %d\n",
533 ret);
534 return ret;
535 }
536 }
537
538 if (!a5xx_gpu->pfp_bo) {
c5e3548c
JC
539 a5xx_gpu->pfp_bo = a5xx_ucode_load_bo(gpu,
540 adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
b5f103ab
JC
541
542 if (IS_ERR(a5xx_gpu->pfp_bo)) {
543 ret = PTR_ERR(a5xx_gpu->pfp_bo);
544 a5xx_gpu->pfp_bo = NULL;
545 dev_err(gpu->dev->dev, "could not allocate PFP: %d\n",
546 ret);
547 return ret;
548 }
549 }
550
551 gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
552 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
553
554 gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
555 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
556
557 return 0;
558}
559
7c65817e
JC
560#define SCM_GPU_ZAP_SHADER_RESUME 0
561
562static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
563{
564 int ret;
565
566 ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
567 if (ret)
568 DRM_ERROR("%s: zap-shader resume failed: %d\n",
569 gpu->name, ret);
570
571 return ret;
572}
573
7c65817e
JC
574static int a5xx_zap_shader_init(struct msm_gpu *gpu)
575{
576 static bool loaded;
577 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
8d6f0827 578 struct platform_device *pdev = gpu->pdev;
7c65817e
JC
579 int ret;
580
581 /*
582 * If the zap shader is already loaded into memory we just need to kick
583 * the remote processor to reinitialize it
584 */
585 if (loaded)
586 return a5xx_zap_shader_resume(gpu);
587
588 /* We need SCM to be able to load the firmware */
589 if (!qcom_scm_is_available()) {
590 DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
591 return -EPROBE_DEFER;
592 }
593
594 /* Each GPU has a target specific zap shader firmware name to use */
595 if (!adreno_gpu->info->zapfw) {
596 DRM_DEV_ERROR(&pdev->dev,
597 "Zap shader firmware file not specified for this target\n");
598 return -ENODEV;
599 }
600
e8f3de96 601 ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
7c65817e
JC
602
603 loaded = !ret;
604
605 return ret;
606}
607
b5f103ab
JC
608#define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
609 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
610 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
611 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
612 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
613 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
614 A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
ac1b5ab4 615 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
b1fc2839 616 A5XX_RBBM_INT_0_MASK_CP_SW | \
b5f103ab
JC
617 A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
618 A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
619 A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
620
621static int a5xx_hw_init(struct msm_gpu *gpu)
622{
623 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
624 int ret;
625
626 gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
627
628 /* Make all blocks contribute to the GPU BUSY perf counter */
629 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
630
631 /* Enable RBBM error reporting bits */
632 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
633
4e09b95d 634 if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
b5f103ab
JC
635 /*
636 * Mask out the activity signals from RB1-3 to avoid false
637 * positives
638 */
639
640 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
641 0xF0000000);
642 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
643 0xFFFFFFFF);
644 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
645 0xFFFFFFFF);
646 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
647 0xFFFFFFFF);
648 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
649 0xFFFFFFFF);
650 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
651 0xFFFFFFFF);
652 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
653 0xFFFFFFFF);
654 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
655 0xFFFFFFFF);
656 }
657
658 /* Enable fault detection */
659 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
660 (1 << 30) | 0xFFFF);
661
662 /* Turn on performance counters */
663 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
664
c09513cf
JC
665 /* Select CP0 to always count cycles */
666 gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
667
f91c14ab
JC
668 /* Select RBBM0 to countable 6 to get the busy status for devfreq */
669 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
670
b5f103ab
JC
671 /* Increase VFD cache access so LRZ and other data gets evicted less */
672 gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
673
674 /* Disable L2 bypass in the UCHE */
675 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
676 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
677 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
678 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
679
680 /* Set the GMEM VA range (0 to gpu->gmem) */
681 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
682 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
683 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
684 0x00100000 + adreno_gpu->gmem - 1);
685 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
686
687 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
688 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
689 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
690 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
691
692 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
693
4e09b95d 694 if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
b5f103ab
JC
695 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
696
697 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
698
699 /* Enable USE_RETENTION_FLOPS */
700 gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
701
702 /* Enable ME/PFP split notification */
703 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
704
705 /* Enable HWCG */
6e749e59 706 a5xx_set_hwcg(gpu, true);
b5f103ab
JC
707
708 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
709
710 /* Set the highest bank bit */
711 gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
712 gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
713
714 /* Protect registers from the CP */
715 gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
716
717 /* RBBM */
718 gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
719 gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
720 gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
721 gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
722 gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
723 gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
724
725 /* Content protect */
726 gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
727 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
728 16));
729 gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
730 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
731
732 /* CP */
733 gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
734 gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
735 gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
736 gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
737
738 /* RB */
739 gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
740 gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
741
742 /* VPC */
743 gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
744 gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
745
746 /* UCHE */
747 gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
748
749 if (adreno_is_a530(adreno_gpu))
750 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
751 ADRENO_PROTECT_RW(0x10000, 0x8000));
752
753 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
754 /*
755 * Disable the trusted memory range - we don't actually supported secure
756 * memory rendering at this point in time and we don't want to block off
757 * part of the virtual memory space.
758 */
759 gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
760 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
761 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
762
763 ret = adreno_hw_init(gpu);
764 if (ret)
765 return ret;
766
b1fc2839
JC
767 a5xx_preempt_hw_init(gpu);
768
eec874ce
RC
769 a5xx_gpmu_ucode_init(gpu);
770
b5f103ab
JC
771 ret = a5xx_ucode_init(gpu);
772 if (ret)
773 return ret;
774
775 /* Disable the interrupts through the initial bringup stage */
776 gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
777
778 /* Clear ME_HALT to start the micro engine */
779 gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
780 ret = a5xx_me_init(gpu);
781 if (ret)
782 return ret;
783
2401a008
JC
784 ret = a5xx_power_init(gpu);
785 if (ret)
786 return ret;
b5f103ab
JC
787
788 /*
789 * Send a pipeline event stat to get misbehaving counters to start
790 * ticking correctly
791 */
792 if (adreno_is_a530(adreno_gpu)) {
f97decac
JC
793 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
794 OUT_RING(gpu->rb[0], 0x0F);
b5f103ab 795
f97decac
JC
796 gpu->funcs->flush(gpu, gpu->rb[0]);
797 if (!a5xx_idle(gpu, gpu->rb[0]))
b5f103ab
JC
798 return -EINVAL;
799 }
800
7c65817e
JC
801 /*
802 * Try to load a zap shader into the secure world. If successful
803 * we can use the CP to switch out of secure mode. If not then we
804 * have no resource but to try to switch ourselves out manually. If we
805 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
806 * be blocked and a permissions violation will soon follow.
807 */
808 ret = a5xx_zap_shader_init(gpu);
809 if (!ret) {
f97decac
JC
810 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
811 OUT_RING(gpu->rb[0], 0x00000000);
7c65817e 812
f97decac
JC
813 gpu->funcs->flush(gpu, gpu->rb[0]);
814 if (!a5xx_idle(gpu, gpu->rb[0]))
7c65817e
JC
815 return -EINVAL;
816 } else {
817 /* Print a warning so if we die, we know why */
818 dev_warn_once(gpu->dev->dev,
819 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
820 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
821 }
2401a008 822
b1fc2839
JC
823 /* Last step - yield the ringbuffer */
824 a5xx_preempt_start(gpu);
825
b5f103ab
JC
826 return 0;
827}
828
829static void a5xx_recover(struct msm_gpu *gpu)
830{
831 int i;
832
833 adreno_dump_info(gpu);
834
835 for (i = 0; i < 8; i++) {
836 printk("CP_SCRATCH_REG%d: %u\n", i,
837 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
838 }
839
840 if (hang_debug)
841 a5xx_dump(gpu);
842
843 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
844 gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
845 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
846 adreno_recover(gpu);
847}
848
849static void a5xx_destroy(struct msm_gpu *gpu)
850{
851 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
852 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
853
854 DBG("%s", gpu->name);
855
b1fc2839
JC
856 a5xx_preempt_fini(gpu);
857
b5f103ab
JC
858 if (a5xx_gpu->pm4_bo) {
859 if (a5xx_gpu->pm4_iova)
8bdcd949 860 msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
dc9a9b32 861 drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
b5f103ab
JC
862 }
863
864 if (a5xx_gpu->pfp_bo) {
865 if (a5xx_gpu->pfp_iova)
8bdcd949 866 msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace);
dc9a9b32 867 drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
b5f103ab
JC
868 }
869
2401a008 870 if (a5xx_gpu->gpmu_bo) {
2002c9c3 871 if (a5xx_gpu->gpmu_iova)
8bdcd949 872 msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
dc9a9b32 873 drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
2401a008
JC
874 }
875
b5f103ab
JC
876 adreno_gpu_cleanup(adreno_gpu);
877 kfree(a5xx_gpu);
878}
879
880static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
881{
882 if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
883 return false;
884
885 /*
886 * Nearly every abnormality ends up pausing the GPU and triggering a
887 * fault so we can safely just watch for this one interrupt to fire
888 */
889 return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
890 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
891}
892
f97decac 893bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
b5f103ab 894{
b1fc2839
JC
895 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
896 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
897
898 if (ring != a5xx_gpu->cur_ring) {
899 WARN(1, "Tried to idle a non-current ringbuffer\n");
900 return false;
901 }
902
b5f103ab 903 /* wait for CP to drain ringbuffer: */
f97decac 904 if (!adreno_idle(gpu, ring))
b5f103ab
JC
905 return false;
906
907 if (spin_until(_a5xx_check_idle(gpu))) {
f97decac 908 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
b5f103ab
JC
909 gpu->name, __builtin_return_address(0),
910 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
f97decac
JC
911 gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
912 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
913 gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
b5f103ab
JC
914 return false;
915 }
916
917 return true;
918}
919
7f8036b7
RC
920static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
921{
922 struct msm_gpu *gpu = arg;
923 pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
924 iova, flags,
925 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
926 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
927 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
928 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
929
930 return -EFAULT;
931}
932
b5f103ab
JC
933static void a5xx_cp_err_irq(struct msm_gpu *gpu)
934{
935 u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
936
937 if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
938 u32 val;
939
940 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
941
942 /*
943 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
944 * read it twice
945 */
946
947 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
948 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
949
950 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
951 val);
952 }
953
954 if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
955 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
956 gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
957
958 if (status & A5XX_CP_INT_CP_DMA_ERROR)
959 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
960
961 if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
962 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
963
964 dev_err_ratelimited(gpu->dev->dev,
965 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
966 val & (1 << 24) ? "WRITE" : "READ",
967 (val & 0xFFFFF) >> 2, val);
968 }
969
970 if (status & A5XX_CP_INT_CP_AHB_ERROR) {
971 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
972 const char *access[16] = { "reserved", "reserved",
973 "timestamp lo", "timestamp hi", "pfp read", "pfp write",
974 "", "", "me read", "me write", "", "", "crashdump read",
975 "crashdump write" };
976
977 dev_err_ratelimited(gpu->dev->dev,
978 "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
979 status & 0xFFFFF, access[(status >> 24) & 0xF],
980 (status & (1 << 31)), status);
981 }
982}
983
7352fb5a 984static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
b5f103ab 985{
b5f103ab
JC
986 if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
987 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
988
989 dev_err_ratelimited(gpu->dev->dev,
990 "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
991 val & (1 << 28) ? "WRITE" : "READ",
992 (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
993 (val >> 24) & 0xF);
994
995 /* Clear the error */
996 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
7352fb5a
JC
997
998 /* Clear the interrupt */
999 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1000 A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
b5f103ab
JC
1001 }
1002
1003 if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
1004 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
1005
1006 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
1007 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
1008 gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
1009
1010 if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
1011 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
1012 gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
1013
1014 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
1015 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
1016 gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1017
1018 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1019 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1020
1021 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1022 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1023}
1024
1025static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1026{
1027 uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1028
1029 addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1030
1031 dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1032 addr);
1033}
1034
1035static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1036{
1037 dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1038}
1039
ac1b5ab4
JC
1040static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1041{
1042 struct drm_device *dev = gpu->dev;
1043 struct msm_drm_private *priv = dev->dev_private;
f97decac 1044 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
ac1b5ab4 1045
f97decac
JC
1046 dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1047 ring ? ring->id : -1, ring ? ring->seqno : 0,
ac1b5ab4
JC
1048 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1049 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1050 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1051 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1052 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1053 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1054 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1055
1056 /* Turn off the hangcheck timer to keep it from bothering us */
1057 del_timer(&gpu->hangcheck_timer);
1058
1059 queue_work(priv->wq, &gpu->recover_work);
1060}
1061
b5f103ab
JC
1062#define RBBM_ERROR_MASK \
1063 (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1064 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1065 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1066 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1067 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1068 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1069
1070static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1071{
1072 u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1073
7352fb5a
JC
1074 /*
1075 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1076 * before the source is cleared the interrupt will storm.
1077 */
1078 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1079 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
b5f103ab 1080
7352fb5a 1081 /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
b5f103ab 1082 if (status & RBBM_ERROR_MASK)
7352fb5a 1083 a5xx_rbbm_err_irq(gpu, status);
b5f103ab
JC
1084
1085 if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1086 a5xx_cp_err_irq(gpu);
1087
ac1b5ab4
JC
1088 if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1089 a5xx_fault_detect_irq(gpu);
1090
b5f103ab
JC
1091 if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1092 a5xx_uche_err_irq(gpu);
1093
1094 if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1095 a5xx_gpmu_err_irq(gpu);
1096
b1fc2839
JC
1097 if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1098 a5xx_preempt_trigger(gpu);
b5f103ab 1099 msm_gpu_retire(gpu);
b1fc2839
JC
1100 }
1101
1102 if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1103 a5xx_preempt_irq(gpu);
b5f103ab
JC
1104
1105 return IRQ_HANDLED;
1106}
1107
1108static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1109 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1110 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1111 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1112 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1113 REG_A5XX_CP_RB_RPTR_ADDR_HI),
1114 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1115 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1116 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1117};
1118
1119static const u32 a5xx_registers[] = {
1120 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1121 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1122 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
3394f561
JC
1123 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1124 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1125 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1126 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1127 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1128 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1129 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1130 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1131 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1132 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1133 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1134 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1135 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1136 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1137 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1138 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1139 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1140 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1141 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1142 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1143 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1144 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1145 0xEAA5, 0xEAC2, 0xA800, 0xA8FF, 0xAC60, 0xAC60, 0xB000, 0xB97F,
1146 0xB9A0, 0xB9BF, ~0
b5f103ab
JC
1147};
1148
1149static void a5xx_dump(struct msm_gpu *gpu)
1150{
1151 dev_info(gpu->dev->dev, "status: %08x\n",
1152 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1153 adreno_dump(gpu);
1154}
1155
1156static int a5xx_pm_resume(struct msm_gpu *gpu)
1157{
2401a008
JC
1158 int ret;
1159
1160 /* Turn on the core power */
1161 ret = msm_gpu_pm_resume(gpu);
1162 if (ret)
1163 return ret;
1164
1165 /* Turn the RBCCU domain first to limit the chances of voltage droop */
1166 gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1167
1168 /* Wait 3 usecs before polling */
1169 udelay(3);
1170
1171 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1172 (1 << 20), (1 << 20));
1173 if (ret) {
1174 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1175 gpu->name,
1176 gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1177 return ret;
1178 }
1179
1180 /* Turn on the SP domain */
1181 gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1182 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1183 (1 << 20), (1 << 20));
1184 if (ret)
1185 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1186 gpu->name);
1187
1188 return ret;
b5f103ab
JC
1189}
1190
1191static int a5xx_pm_suspend(struct msm_gpu *gpu)
1192{
2401a008
JC
1193 /* Clear the VBIF pipe before shutting down */
1194 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1195 spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1196
1197 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1198
1199 /*
1200 * Reset the VBIF before power collapse to avoid issue with FIFO
1201 * entries
1202 */
1203 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1204 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1205
b5f103ab
JC
1206 return msm_gpu_pm_suspend(gpu);
1207}
1208
1209static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1210{
1211 *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1212 REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1213
1214 return 0;
1215}
1216
1217#ifdef CONFIG_DEBUG_FS
1218static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m)
1219{
b5f103ab
JC
1220 seq_printf(m, "status: %08x\n",
1221 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
a23cb3b5
JC
1222
1223 /*
1224 * Temporarily disable hardware clock gating before going into
1225 * adreno_show to avoid issues while reading the registers
1226 */
1227 a5xx_set_hwcg(gpu, false);
b5f103ab 1228 adreno_show(gpu, m);
a23cb3b5 1229 a5xx_set_hwcg(gpu, true);
b5f103ab
JC
1230}
1231#endif
1232
b1fc2839
JC
1233static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1234{
1235 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1236 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1237
1238 return a5xx_gpu->cur_ring;
1239}
1240
f91c14ab
JC
1241static int a5xx_gpu_busy(struct msm_gpu *gpu, uint64_t *value)
1242{
1243 *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1244 REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1245
1246 return 0;
1247}
1248
b5f103ab
JC
1249static const struct adreno_gpu_funcs funcs = {
1250 .base = {
1251 .get_param = adreno_get_param,
1252 .hw_init = a5xx_hw_init,
1253 .pm_suspend = a5xx_pm_suspend,
1254 .pm_resume = a5xx_pm_resume,
1255 .recover = a5xx_recover,
b5f103ab 1256 .submit = a5xx_submit,
b1fc2839
JC
1257 .flush = a5xx_flush,
1258 .active_ring = a5xx_active_ring,
b5f103ab
JC
1259 .irq = a5xx_irq,
1260 .destroy = a5xx_destroy,
0c3eaf1f 1261#ifdef CONFIG_DEBUG_FS
b5f103ab 1262 .show = a5xx_show,
331dc0bc 1263 .debugfs_init = a5xx_debugfs_init,
0c3eaf1f 1264#endif
f91c14ab 1265 .gpu_busy = a5xx_gpu_busy,
b5f103ab
JC
1266 },
1267 .get_timestamp = a5xx_get_timestamp,
1268};
1269
f56d9df6
JC
1270static void check_speed_bin(struct device *dev)
1271{
1272 struct nvmem_cell *cell;
1273 u32 bin, val;
1274
1275 cell = nvmem_cell_get(dev, "speed_bin");
1276
1277 /* If a nvmem cell isn't defined, nothing to do */
1278 if (IS_ERR(cell))
1279 return;
1280
1281 bin = *((u32 *) nvmem_cell_read(cell, NULL));
1282 nvmem_cell_put(cell);
1283
1284 val = (1 << bin);
1285
1286 dev_pm_opp_set_supported_hw(dev, &val, 1);
1287}
1288
b5f103ab
JC
1289struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1290{
1291 struct msm_drm_private *priv = dev->dev_private;
1292 struct platform_device *pdev = priv->gpu_pdev;
1293 struct a5xx_gpu *a5xx_gpu = NULL;
1294 struct adreno_gpu *adreno_gpu;
1295 struct msm_gpu *gpu;
1296 int ret;
1297
1298 if (!pdev) {
1299 dev_err(dev->dev, "No A5XX device is defined\n");
1300 return ERR_PTR(-ENXIO);
1301 }
1302
1303 a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1304 if (!a5xx_gpu)
1305 return ERR_PTR(-ENOMEM);
1306
1307 adreno_gpu = &a5xx_gpu->base;
1308 gpu = &adreno_gpu->base;
1309
b5f103ab
JC
1310 adreno_gpu->registers = a5xx_registers;
1311 adreno_gpu->reg_offsets = a5xx_register_offsets;
1312
2401a008
JC
1313 a5xx_gpu->lm_leakage = 0x4E001A;
1314
f56d9df6
JC
1315 check_speed_bin(&pdev->dev);
1316
b1fc2839 1317 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
b5f103ab
JC
1318 if (ret) {
1319 a5xx_destroy(&(a5xx_gpu->base.base));
1320 return ERR_PTR(ret);
1321 }
1322
7f8036b7
RC
1323 if (gpu->aspace)
1324 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1325
b1fc2839
JC
1326 /* Set up the preemption specific bits and pieces for each ringbuffer */
1327 a5xx_preempt_init(gpu);
1328
b5f103ab
JC
1329 return gpu;
1330}