]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/gpu/drm/msm/adreno/a5xx_gpu.c
drm/msm/adreno: a5xx: Explicitly program the CP0 performance counter
[mirror_ubuntu-jammy-kernel.git] / drivers / gpu / drm / msm / adreno / a5xx_gpu.c
CommitLineData
2002c9c3 1/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
b5f103ab
JC
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 */
13
7c65817e
JC
14#include <linux/types.h>
15#include <linux/cpumask.h>
16#include <linux/qcom_scm.h>
17#include <linux/dma-mapping.h>
8f93e043 18#include <linux/of_address.h>
7c65817e 19#include <linux/soc/qcom/mdt_loader.h>
f56d9df6
JC
20#include <linux/pm_opp.h>
21#include <linux/nvmem-consumer.h>
b5f103ab 22#include "msm_gem.h"
7f8036b7 23#include "msm_mmu.h"
b5f103ab
JC
24#include "a5xx_gpu.h"
25
26extern bool hang_debug;
27static void a5xx_dump(struct msm_gpu *gpu);
28
7c65817e
JC
29#define GPU_PAS_ID 13
30
e8f3de96 31static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
7c65817e 32{
e8f3de96 33 struct device *dev = &gpu->pdev->dev;
7c65817e 34 const struct firmware *fw;
8f93e043
AB
35 struct device_node *np;
36 struct resource r;
7c65817e
JC
37 phys_addr_t mem_phys;
38 ssize_t mem_size;
39 void *mem_region = NULL;
40 int ret;
41
bdab8e8b
AB
42 if (!IS_ENABLED(CONFIG_ARCH_QCOM))
43 return -EINVAL;
44
8f93e043
AB
45 np = of_get_child_by_name(dev->of_node, "zap-shader");
46 if (!np)
47 return -ENODEV;
48
49 np = of_parse_phandle(np, "memory-region", 0);
50 if (!np)
51 return -EINVAL;
52
53 ret = of_address_to_resource(np, 0, &r);
54 if (ret)
55 return ret;
56
57 mem_phys = r.start;
58 mem_size = resource_size(&r);
59
7c65817e 60 /* Request the MDT file for the firmware */
e8f3de96
RC
61 fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
62 if (IS_ERR(fw)) {
7c65817e 63 DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
e8f3de96 64 return PTR_ERR(fw);
7c65817e
JC
65 }
66
67 /* Figure out how much memory we need */
68 mem_size = qcom_mdt_get_size(fw);
69 if (mem_size < 0) {
70 ret = mem_size;
71 goto out;
72 }
73
74 /* Allocate memory for the firmware image */
8f93e043 75 mem_region = memremap(mem_phys, mem_size, MEMREMAP_WC);
7c65817e
JC
76 if (!mem_region) {
77 ret = -ENOMEM;
78 goto out;
79 }
80
2c41ef1b
RC
81 /*
82 * Load the rest of the MDT
83 *
84 * Note that we could be dealing with two different paths, since
85 * with upstream linux-firmware it would be in a qcom/ subdir..
86 * adreno_request_fw() handles this, but qcom_mdt_load() does
87 * not. But since we've already gotten thru adreno_request_fw()
88 * we know which of the two cases it is:
89 */
90 if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
91 ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
92 mem_region, mem_phys, mem_size);
93 } else {
94 char newname[strlen("qcom/") + strlen(fwname) + 1];
95
96 sprintf(newname, "qcom/%s", fwname);
97
98 ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
99 mem_region, mem_phys, mem_size);
100 }
7c65817e
JC
101 if (ret)
102 goto out;
103
104 /* Send the image to the secure world */
105 ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID);
106 if (ret)
107 DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
108
109out:
8f93e043
AB
110 if (mem_region)
111 memunmap(mem_region);
112
7c65817e
JC
113 release_firmware(fw);
114
115 return ret;
116}
7c65817e 117
b1fc2839
JC
118static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
119{
120 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
121 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
122 uint32_t wptr;
123 unsigned long flags;
124
125 spin_lock_irqsave(&ring->lock, flags);
126
127 /* Copy the shadow to the actual register */
128 ring->cur = ring->next;
129
130 /* Make sure to wrap wptr if we need to */
131 wptr = get_wptr(ring);
132
133 spin_unlock_irqrestore(&ring->lock, flags);
134
135 /* Make sure everything is posted before making a decision */
136 mb();
137
138 /* Update HW if this is the current ring and we are not in preempt */
139 if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
140 gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
141}
142
b5f103ab
JC
143static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
144 struct msm_file_private *ctx)
145{
b1fc2839
JC
146 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
147 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
b5f103ab 148 struct msm_drm_private *priv = gpu->dev->dev_private;
f97decac 149 struct msm_ringbuffer *ring = submit->ring;
b5f103ab
JC
150 unsigned int i, ibs = 0;
151
b1fc2839
JC
152 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
153 OUT_RING(ring, 0x02);
154
155 /* Turn off protected mode to write to special registers */
156 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
157 OUT_RING(ring, 0);
158
159 /* Set the save preemption record for the ring/command */
160 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
161 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
162 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
163
164 /* Turn back on protected mode */
165 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
166 OUT_RING(ring, 1);
167
168 /* Enable local preemption for finegrain preemption */
169 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
170 OUT_RING(ring, 0x02);
171
172 /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
173 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
174 OUT_RING(ring, 0x02);
175
176 /* Submit the commands */
b5f103ab
JC
177 for (i = 0; i < submit->nr_cmds; i++) {
178 switch (submit->cmd[i].type) {
179 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
180 break;
181 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
182 if (priv->lastctx == ctx)
183 break;
184 case MSM_SUBMIT_CMD_BUF:
185 OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
186 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
187 OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
188 OUT_RING(ring, submit->cmd[i].size);
189 ibs++;
190 break;
191 }
192 }
193
b1fc2839
JC
194 /*
195 * Write the render mode to NULL (0) to indicate to the CP that the IBs
196 * are done rendering - otherwise a lucky preemption would start
197 * replaying from the last checkpoint
198 */
199 OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
200 OUT_RING(ring, 0);
201 OUT_RING(ring, 0);
202 OUT_RING(ring, 0);
203 OUT_RING(ring, 0);
204 OUT_RING(ring, 0);
205
206 /* Turn off IB level preemptions */
207 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
208 OUT_RING(ring, 0x01);
209
210 /* Write the fence to the scratch register */
b5f103ab 211 OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
f97decac 212 OUT_RING(ring, submit->seqno);
b5f103ab 213
b1fc2839
JC
214 /*
215 * Execute a CACHE_FLUSH_TS event. This will ensure that the
216 * timestamp is written to the memory and then triggers the interrupt
217 */
b5f103ab
JC
218 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
219 OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
f97decac
JC
220 OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
221 OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
222 OUT_RING(ring, submit->seqno);
b5f103ab 223
b1fc2839
JC
224 /* Yield the floor on command completion */
225 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
226 /*
227 * If dword[2:1] are non zero, they specify an address for the CP to
228 * write the value of dword[3] to on preemption complete. Write 0 to
229 * skip the write
230 */
231 OUT_RING(ring, 0x00);
232 OUT_RING(ring, 0x00);
233 /* Data value - not used if the address above is 0 */
234 OUT_RING(ring, 0x01);
235 /* Set bit 0 to trigger an interrupt on preempt complete */
236 OUT_RING(ring, 0x01);
237
238 a5xx_flush(gpu, ring);
239
240 /* Check to see if we need to start preemption */
241 a5xx_preempt_trigger(gpu);
b5f103ab
JC
242}
243
6e749e59 244static const struct {
b5f103ab
JC
245 u32 offset;
246 u32 value;
6e749e59 247} a5xx_hwcg[] = {
b5f103ab
JC
248 {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
249 {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
250 {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
251 {REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
252 {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
253 {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
254 {REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
255 {REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
256 {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
257 {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
258 {REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
259 {REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
260 {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
261 {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
262 {REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
263 {REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
264 {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
265 {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
266 {REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
267 {REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
268 {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
269 {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
270 {REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
271 {REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
272 {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
273 {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
274 {REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
275 {REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
276 {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
277 {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
278 {REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
279 {REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
280 {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
281 {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
282 {REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
283 {REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
284 {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
285 {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
286 {REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
287 {REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
288 {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
289 {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
290 {REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
291 {REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
292 {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
293 {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
294 {REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
295 {REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
296 {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
297 {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
298 {REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
299 {REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
300 {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
301 {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
302 {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
303 {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
304 {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
305 {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
306 {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
307 {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
308 {REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
309 {REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
310 {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
311 {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
312 {REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
313 {REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
314 {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
315 {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
316 {REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
317 {REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
318 {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
319 {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
320 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
321 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
322 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
323 {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
324 {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
325 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
326 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
327 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
328 {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
329 {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
330 {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
331 {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
332 {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
333 {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
334 {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
335 {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
336 {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
337 {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
338 {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
339 {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
340};
341
6e749e59 342void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
b5f103ab
JC
343{
344 unsigned int i;
345
6e749e59
JC
346 for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
347 gpu_write(gpu, a5xx_hwcg[i].offset,
348 state ? a5xx_hwcg[i].value : 0);
b5f103ab 349
6e749e59
JC
350 gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
351 gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
b5f103ab
JC
352}
353
354static int a5xx_me_init(struct msm_gpu *gpu)
355{
356 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
f97decac 357 struct msm_ringbuffer *ring = gpu->rb[0];
b5f103ab
JC
358
359 OUT_PKT7(ring, CP_ME_INIT, 8);
360
361 OUT_RING(ring, 0x0000002F);
362
363 /* Enable multiple hardware contexts */
364 OUT_RING(ring, 0x00000003);
365
366 /* Enable error detection */
367 OUT_RING(ring, 0x20000000);
368
369 /* Don't enable header dump */
370 OUT_RING(ring, 0x00000000);
371 OUT_RING(ring, 0x00000000);
372
373 /* Specify workarounds for various microcode issues */
374 if (adreno_is_a530(adreno_gpu)) {
375 /* Workaround for token end syncs
376 * Force a WFI after every direct-render 3D mode draw and every
377 * 2D mode 3 draw
378 */
379 OUT_RING(ring, 0x0000000B);
380 } else {
381 /* No workarounds enabled */
382 OUT_RING(ring, 0x00000000);
383 }
384
385 OUT_RING(ring, 0x00000000);
386 OUT_RING(ring, 0x00000000);
387
f97decac
JC
388 gpu->funcs->flush(gpu, ring);
389 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
b5f103ab
JC
390}
391
b1fc2839
JC
392static int a5xx_preempt_start(struct msm_gpu *gpu)
393{
394 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
395 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
396 struct msm_ringbuffer *ring = gpu->rb[0];
397
398 if (gpu->nr_rings == 1)
399 return 0;
400
401 /* Turn off protected mode to write to special registers */
402 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
403 OUT_RING(ring, 0);
404
405 /* Set the save preemption record for the ring/command */
406 OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
407 OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
408 OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
409
410 /* Turn back on protected mode */
411 OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
412 OUT_RING(ring, 1);
413
414 OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
415 OUT_RING(ring, 0x00);
416
417 OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
418 OUT_RING(ring, 0x01);
419
420 OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
421 OUT_RING(ring, 0x01);
422
423 /* Yield the floor on command completion */
424 OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
425 OUT_RING(ring, 0x00);
426 OUT_RING(ring, 0x00);
427 OUT_RING(ring, 0x01);
428 OUT_RING(ring, 0x01);
429
430 gpu->funcs->flush(gpu, ring);
431
432 return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
433}
434
435
b5f103ab
JC
436static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu,
437 const struct firmware *fw, u64 *iova)
438{
b5f103ab
JC
439 struct drm_gem_object *bo;
440 void *ptr;
441
8223286d
JC
442 ptr = msm_gem_kernel_new_locked(gpu->dev, fw->size - 4,
443 MSM_BO_UNCACHED | MSM_BO_GPU_READONLY, gpu->aspace, &bo, iova);
b5f103ab 444
8223286d
JC
445 if (IS_ERR(ptr))
446 return ERR_CAST(ptr);
b5f103ab
JC
447
448 memcpy(ptr, &fw->data[4], fw->size - 4);
449
0e08270a 450 msm_gem_put_vaddr(bo);
b5f103ab
JC
451 return bo;
452}
453
454static int a5xx_ucode_init(struct msm_gpu *gpu)
455{
456 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
457 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
458 int ret;
459
460 if (!a5xx_gpu->pm4_bo) {
461 a5xx_gpu->pm4_bo = a5xx_ucode_load_bo(gpu, adreno_gpu->pm4,
462 &a5xx_gpu->pm4_iova);
463
464 if (IS_ERR(a5xx_gpu->pm4_bo)) {
465 ret = PTR_ERR(a5xx_gpu->pm4_bo);
466 a5xx_gpu->pm4_bo = NULL;
467 dev_err(gpu->dev->dev, "could not allocate PM4: %d\n",
468 ret);
469 return ret;
470 }
471 }
472
473 if (!a5xx_gpu->pfp_bo) {
474 a5xx_gpu->pfp_bo = a5xx_ucode_load_bo(gpu, adreno_gpu->pfp,
475 &a5xx_gpu->pfp_iova);
476
477 if (IS_ERR(a5xx_gpu->pfp_bo)) {
478 ret = PTR_ERR(a5xx_gpu->pfp_bo);
479 a5xx_gpu->pfp_bo = NULL;
480 dev_err(gpu->dev->dev, "could not allocate PFP: %d\n",
481 ret);
482 return ret;
483 }
484 }
485
486 gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
487 REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
488
489 gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
490 REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
491
492 return 0;
493}
494
7c65817e
JC
495#define SCM_GPU_ZAP_SHADER_RESUME 0
496
497static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
498{
499 int ret;
500
501 ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
502 if (ret)
503 DRM_ERROR("%s: zap-shader resume failed: %d\n",
504 gpu->name, ret);
505
506 return ret;
507}
508
7c65817e
JC
509static int a5xx_zap_shader_init(struct msm_gpu *gpu)
510{
511 static bool loaded;
512 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
8d6f0827 513 struct platform_device *pdev = gpu->pdev;
7c65817e
JC
514 int ret;
515
516 /*
517 * If the zap shader is already loaded into memory we just need to kick
518 * the remote processor to reinitialize it
519 */
520 if (loaded)
521 return a5xx_zap_shader_resume(gpu);
522
523 /* We need SCM to be able to load the firmware */
524 if (!qcom_scm_is_available()) {
525 DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
526 return -EPROBE_DEFER;
527 }
528
529 /* Each GPU has a target specific zap shader firmware name to use */
530 if (!adreno_gpu->info->zapfw) {
531 DRM_DEV_ERROR(&pdev->dev,
532 "Zap shader firmware file not specified for this target\n");
533 return -ENODEV;
534 }
535
e8f3de96 536 ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
7c65817e
JC
537
538 loaded = !ret;
539
540 return ret;
541}
542
b5f103ab
JC
543#define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
544 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
545 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
546 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
547 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
548 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
549 A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
ac1b5ab4 550 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
b1fc2839 551 A5XX_RBBM_INT_0_MASK_CP_SW | \
b5f103ab
JC
552 A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
553 A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
554 A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
555
556static int a5xx_hw_init(struct msm_gpu *gpu)
557{
558 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
559 int ret;
560
561 gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
562
563 /* Make all blocks contribute to the GPU BUSY perf counter */
564 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
565
566 /* Enable RBBM error reporting bits */
567 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
568
4e09b95d 569 if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
b5f103ab
JC
570 /*
571 * Mask out the activity signals from RB1-3 to avoid false
572 * positives
573 */
574
575 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
576 0xF0000000);
577 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
578 0xFFFFFFFF);
579 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
580 0xFFFFFFFF);
581 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
582 0xFFFFFFFF);
583 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
584 0xFFFFFFFF);
585 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
586 0xFFFFFFFF);
587 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
588 0xFFFFFFFF);
589 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
590 0xFFFFFFFF);
591 }
592
593 /* Enable fault detection */
594 gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
595 (1 << 30) | 0xFFFF);
596
597 /* Turn on performance counters */
598 gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
599
c09513cf
JC
600 /* Select CP0 to always count cycles */
601 gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
602
b5f103ab
JC
603 /* Increase VFD cache access so LRZ and other data gets evicted less */
604 gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
605
606 /* Disable L2 bypass in the UCHE */
607 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
608 gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
609 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
610 gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
611
612 /* Set the GMEM VA range (0 to gpu->gmem) */
613 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
614 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
615 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
616 0x00100000 + adreno_gpu->gmem - 1);
617 gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
618
619 gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
620 gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
621 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
622 gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
623
624 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
625
4e09b95d 626 if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
b5f103ab
JC
627 gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
628
629 gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
630
631 /* Enable USE_RETENTION_FLOPS */
632 gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
633
634 /* Enable ME/PFP split notification */
635 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
636
637 /* Enable HWCG */
6e749e59 638 a5xx_set_hwcg(gpu, true);
b5f103ab
JC
639
640 gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
641
642 /* Set the highest bank bit */
643 gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
644 gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
645
646 /* Protect registers from the CP */
647 gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
648
649 /* RBBM */
650 gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
651 gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
652 gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
653 gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
654 gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
655 gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
656
657 /* Content protect */
658 gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
659 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
660 16));
661 gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
662 ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
663
664 /* CP */
665 gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
666 gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
667 gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
668 gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
669
670 /* RB */
671 gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
672 gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
673
674 /* VPC */
675 gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
676 gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
677
678 /* UCHE */
679 gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
680
681 if (adreno_is_a530(adreno_gpu))
682 gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
683 ADRENO_PROTECT_RW(0x10000, 0x8000));
684
685 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
686 /*
687 * Disable the trusted memory range - we don't actually supported secure
688 * memory rendering at this point in time and we don't want to block off
689 * part of the virtual memory space.
690 */
691 gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
692 REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
693 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
694
695 ret = adreno_hw_init(gpu);
696 if (ret)
697 return ret;
698
b1fc2839
JC
699 a5xx_preempt_hw_init(gpu);
700
eec874ce
RC
701 a5xx_gpmu_ucode_init(gpu);
702
b5f103ab
JC
703 ret = a5xx_ucode_init(gpu);
704 if (ret)
705 return ret;
706
707 /* Disable the interrupts through the initial bringup stage */
708 gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
709
710 /* Clear ME_HALT to start the micro engine */
711 gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
712 ret = a5xx_me_init(gpu);
713 if (ret)
714 return ret;
715
2401a008
JC
716 ret = a5xx_power_init(gpu);
717 if (ret)
718 return ret;
b5f103ab
JC
719
720 /*
721 * Send a pipeline event stat to get misbehaving counters to start
722 * ticking correctly
723 */
724 if (adreno_is_a530(adreno_gpu)) {
f97decac
JC
725 OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
726 OUT_RING(gpu->rb[0], 0x0F);
b5f103ab 727
f97decac
JC
728 gpu->funcs->flush(gpu, gpu->rb[0]);
729 if (!a5xx_idle(gpu, gpu->rb[0]))
b5f103ab
JC
730 return -EINVAL;
731 }
732
7c65817e
JC
733 /*
734 * Try to load a zap shader into the secure world. If successful
735 * we can use the CP to switch out of secure mode. If not then we
736 * have no resource but to try to switch ourselves out manually. If we
737 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
738 * be blocked and a permissions violation will soon follow.
739 */
740 ret = a5xx_zap_shader_init(gpu);
741 if (!ret) {
f97decac
JC
742 OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
743 OUT_RING(gpu->rb[0], 0x00000000);
7c65817e 744
f97decac
JC
745 gpu->funcs->flush(gpu, gpu->rb[0]);
746 if (!a5xx_idle(gpu, gpu->rb[0]))
7c65817e
JC
747 return -EINVAL;
748 } else {
749 /* Print a warning so if we die, we know why */
750 dev_warn_once(gpu->dev->dev,
751 "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
752 gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
753 }
2401a008 754
b1fc2839
JC
755 /* Last step - yield the ringbuffer */
756 a5xx_preempt_start(gpu);
757
b5f103ab
JC
758 return 0;
759}
760
761static void a5xx_recover(struct msm_gpu *gpu)
762{
763 int i;
764
765 adreno_dump_info(gpu);
766
767 for (i = 0; i < 8; i++) {
768 printk("CP_SCRATCH_REG%d: %u\n", i,
769 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
770 }
771
772 if (hang_debug)
773 a5xx_dump(gpu);
774
775 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
776 gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
777 gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
778 adreno_recover(gpu);
779}
780
781static void a5xx_destroy(struct msm_gpu *gpu)
782{
783 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
784 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
785
786 DBG("%s", gpu->name);
787
b1fc2839
JC
788 a5xx_preempt_fini(gpu);
789
b5f103ab
JC
790 if (a5xx_gpu->pm4_bo) {
791 if (a5xx_gpu->pm4_iova)
8bdcd949 792 msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
b5f103ab
JC
793 drm_gem_object_unreference_unlocked(a5xx_gpu->pm4_bo);
794 }
795
796 if (a5xx_gpu->pfp_bo) {
797 if (a5xx_gpu->pfp_iova)
8bdcd949 798 msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace);
b5f103ab
JC
799 drm_gem_object_unreference_unlocked(a5xx_gpu->pfp_bo);
800 }
801
2401a008 802 if (a5xx_gpu->gpmu_bo) {
2002c9c3 803 if (a5xx_gpu->gpmu_iova)
8bdcd949 804 msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
2401a008
JC
805 drm_gem_object_unreference_unlocked(a5xx_gpu->gpmu_bo);
806 }
807
b5f103ab
JC
808 adreno_gpu_cleanup(adreno_gpu);
809 kfree(a5xx_gpu);
810}
811
812static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
813{
814 if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
815 return false;
816
817 /*
818 * Nearly every abnormality ends up pausing the GPU and triggering a
819 * fault so we can safely just watch for this one interrupt to fire
820 */
821 return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
822 A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
823}
824
f97decac 825bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
b5f103ab 826{
b1fc2839
JC
827 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
828 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
829
830 if (ring != a5xx_gpu->cur_ring) {
831 WARN(1, "Tried to idle a non-current ringbuffer\n");
832 return false;
833 }
834
b5f103ab 835 /* wait for CP to drain ringbuffer: */
f97decac 836 if (!adreno_idle(gpu, ring))
b5f103ab
JC
837 return false;
838
839 if (spin_until(_a5xx_check_idle(gpu))) {
f97decac 840 DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
b5f103ab
JC
841 gpu->name, __builtin_return_address(0),
842 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
f97decac
JC
843 gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
844 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
845 gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
b5f103ab
JC
846 return false;
847 }
848
849 return true;
850}
851
7f8036b7
RC
852static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
853{
854 struct msm_gpu *gpu = arg;
855 pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
856 iova, flags,
857 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
858 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
859 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
860 gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
861
862 return -EFAULT;
863}
864
b5f103ab
JC
865static void a5xx_cp_err_irq(struct msm_gpu *gpu)
866{
867 u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
868
869 if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
870 u32 val;
871
872 gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
873
874 /*
875 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
876 * read it twice
877 */
878
879 gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
880 val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
881
882 dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
883 val);
884 }
885
886 if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
887 dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
888 gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
889
890 if (status & A5XX_CP_INT_CP_DMA_ERROR)
891 dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
892
893 if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
894 u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
895
896 dev_err_ratelimited(gpu->dev->dev,
897 "CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
898 val & (1 << 24) ? "WRITE" : "READ",
899 (val & 0xFFFFF) >> 2, val);
900 }
901
902 if (status & A5XX_CP_INT_CP_AHB_ERROR) {
903 u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
904 const char *access[16] = { "reserved", "reserved",
905 "timestamp lo", "timestamp hi", "pfp read", "pfp write",
906 "", "", "me read", "me write", "", "", "crashdump read",
907 "crashdump write" };
908
909 dev_err_ratelimited(gpu->dev->dev,
910 "CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
911 status & 0xFFFFF, access[(status >> 24) & 0xF],
912 (status & (1 << 31)), status);
913 }
914}
915
7352fb5a 916static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
b5f103ab 917{
b5f103ab
JC
918 if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
919 u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
920
921 dev_err_ratelimited(gpu->dev->dev,
922 "RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
923 val & (1 << 28) ? "WRITE" : "READ",
924 (val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
925 (val >> 24) & 0xF);
926
927 /* Clear the error */
928 gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
7352fb5a
JC
929
930 /* Clear the interrupt */
931 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
932 A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
b5f103ab
JC
933 }
934
935 if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
936 dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
937
938 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
939 dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
940 gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
941
942 if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
943 dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
944 gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
945
946 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
947 dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
948 gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
949
950 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
951 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
952
953 if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
954 dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
955}
956
957static void a5xx_uche_err_irq(struct msm_gpu *gpu)
958{
959 uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
960
961 addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
962
963 dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
964 addr);
965}
966
967static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
968{
969 dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
970}
971
ac1b5ab4
JC
972static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
973{
974 struct drm_device *dev = gpu->dev;
975 struct msm_drm_private *priv = dev->dev_private;
f97decac 976 struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
ac1b5ab4 977
f97decac
JC
978 dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
979 ring ? ring->id : -1, ring ? ring->seqno : 0,
ac1b5ab4
JC
980 gpu_read(gpu, REG_A5XX_RBBM_STATUS),
981 gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
982 gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
983 gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
984 gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
985 gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
986 gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
987
988 /* Turn off the hangcheck timer to keep it from bothering us */
989 del_timer(&gpu->hangcheck_timer);
990
991 queue_work(priv->wq, &gpu->recover_work);
992}
993
b5f103ab
JC
994#define RBBM_ERROR_MASK \
995 (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
996 A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
997 A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
998 A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
999 A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1000 A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1001
1002static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1003{
1004 u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1005
7352fb5a
JC
1006 /*
1007 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1008 * before the source is cleared the interrupt will storm.
1009 */
1010 gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1011 status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
b5f103ab 1012
7352fb5a 1013 /* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
b5f103ab 1014 if (status & RBBM_ERROR_MASK)
7352fb5a 1015 a5xx_rbbm_err_irq(gpu, status);
b5f103ab
JC
1016
1017 if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1018 a5xx_cp_err_irq(gpu);
1019
ac1b5ab4
JC
1020 if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1021 a5xx_fault_detect_irq(gpu);
1022
b5f103ab
JC
1023 if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1024 a5xx_uche_err_irq(gpu);
1025
1026 if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1027 a5xx_gpmu_err_irq(gpu);
1028
b1fc2839
JC
1029 if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1030 a5xx_preempt_trigger(gpu);
b5f103ab 1031 msm_gpu_retire(gpu);
b1fc2839
JC
1032 }
1033
1034 if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1035 a5xx_preempt_irq(gpu);
b5f103ab
JC
1036
1037 return IRQ_HANDLED;
1038}
1039
1040static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1041 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1042 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1043 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1044 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1045 REG_A5XX_CP_RB_RPTR_ADDR_HI),
1046 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1047 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1048 REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1049};
1050
1051static const u32 a5xx_registers[] = {
1052 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1053 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1054 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
3394f561
JC
1055 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1056 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1057 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1058 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1059 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1060 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1061 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1062 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1063 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1064 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1065 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1066 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1067 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1068 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1069 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1070 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1071 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1072 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1073 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1074 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1075 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1076 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1077 0xEAA5, 0xEAC2, 0xA800, 0xA8FF, 0xAC60, 0xAC60, 0xB000, 0xB97F,
1078 0xB9A0, 0xB9BF, ~0
b5f103ab
JC
1079};
1080
1081static void a5xx_dump(struct msm_gpu *gpu)
1082{
1083 dev_info(gpu->dev->dev, "status: %08x\n",
1084 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1085 adreno_dump(gpu);
1086}
1087
1088static int a5xx_pm_resume(struct msm_gpu *gpu)
1089{
2401a008
JC
1090 int ret;
1091
1092 /* Turn on the core power */
1093 ret = msm_gpu_pm_resume(gpu);
1094 if (ret)
1095 return ret;
1096
1097 /* Turn the RBCCU domain first to limit the chances of voltage droop */
1098 gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1099
1100 /* Wait 3 usecs before polling */
1101 udelay(3);
1102
1103 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1104 (1 << 20), (1 << 20));
1105 if (ret) {
1106 DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1107 gpu->name,
1108 gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1109 return ret;
1110 }
1111
1112 /* Turn on the SP domain */
1113 gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1114 ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1115 (1 << 20), (1 << 20));
1116 if (ret)
1117 DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1118 gpu->name);
1119
1120 return ret;
b5f103ab
JC
1121}
1122
1123static int a5xx_pm_suspend(struct msm_gpu *gpu)
1124{
2401a008
JC
1125 /* Clear the VBIF pipe before shutting down */
1126 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1127 spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1128
1129 gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1130
1131 /*
1132 * Reset the VBIF before power collapse to avoid issue with FIFO
1133 * entries
1134 */
1135 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1136 gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1137
b5f103ab
JC
1138 return msm_gpu_pm_suspend(gpu);
1139}
1140
1141static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1142{
1143 *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1144 REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1145
1146 return 0;
1147}
1148
1149#ifdef CONFIG_DEBUG_FS
1150static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m)
1151{
b5f103ab
JC
1152 seq_printf(m, "status: %08x\n",
1153 gpu_read(gpu, REG_A5XX_RBBM_STATUS));
a23cb3b5
JC
1154
1155 /*
1156 * Temporarily disable hardware clock gating before going into
1157 * adreno_show to avoid issues while reading the registers
1158 */
1159 a5xx_set_hwcg(gpu, false);
b5f103ab 1160 adreno_show(gpu, m);
a23cb3b5 1161 a5xx_set_hwcg(gpu, true);
b5f103ab
JC
1162}
1163#endif
1164
b1fc2839
JC
1165static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1166{
1167 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1168 struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1169
1170 return a5xx_gpu->cur_ring;
1171}
1172
b5f103ab
JC
1173static const struct adreno_gpu_funcs funcs = {
1174 .base = {
1175 .get_param = adreno_get_param,
1176 .hw_init = a5xx_hw_init,
1177 .pm_suspend = a5xx_pm_suspend,
1178 .pm_resume = a5xx_pm_resume,
1179 .recover = a5xx_recover,
b5f103ab 1180 .submit = a5xx_submit,
b1fc2839
JC
1181 .flush = a5xx_flush,
1182 .active_ring = a5xx_active_ring,
b5f103ab
JC
1183 .irq = a5xx_irq,
1184 .destroy = a5xx_destroy,
0c3eaf1f 1185#ifdef CONFIG_DEBUG_FS
b5f103ab 1186 .show = a5xx_show,
0c3eaf1f 1187#endif
b5f103ab
JC
1188 },
1189 .get_timestamp = a5xx_get_timestamp,
1190};
1191
f56d9df6
JC
1192static void check_speed_bin(struct device *dev)
1193{
1194 struct nvmem_cell *cell;
1195 u32 bin, val;
1196
1197 cell = nvmem_cell_get(dev, "speed_bin");
1198
1199 /* If a nvmem cell isn't defined, nothing to do */
1200 if (IS_ERR(cell))
1201 return;
1202
1203 bin = *((u32 *) nvmem_cell_read(cell, NULL));
1204 nvmem_cell_put(cell);
1205
1206 val = (1 << bin);
1207
1208 dev_pm_opp_set_supported_hw(dev, &val, 1);
1209}
1210
b5f103ab
JC
1211struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1212{
1213 struct msm_drm_private *priv = dev->dev_private;
1214 struct platform_device *pdev = priv->gpu_pdev;
1215 struct a5xx_gpu *a5xx_gpu = NULL;
1216 struct adreno_gpu *adreno_gpu;
1217 struct msm_gpu *gpu;
1218 int ret;
1219
1220 if (!pdev) {
1221 dev_err(dev->dev, "No A5XX device is defined\n");
1222 return ERR_PTR(-ENXIO);
1223 }
1224
1225 a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1226 if (!a5xx_gpu)
1227 return ERR_PTR(-ENOMEM);
1228
1229 adreno_gpu = &a5xx_gpu->base;
1230 gpu = &adreno_gpu->base;
1231
b5f103ab
JC
1232 adreno_gpu->registers = a5xx_registers;
1233 adreno_gpu->reg_offsets = a5xx_register_offsets;
1234
2401a008
JC
1235 a5xx_gpu->lm_leakage = 0x4E001A;
1236
f56d9df6
JC
1237 check_speed_bin(&pdev->dev);
1238
b1fc2839 1239 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
b5f103ab
JC
1240 if (ret) {
1241 a5xx_destroy(&(a5xx_gpu->base.base));
1242 return ERR_PTR(ret);
1243 }
1244
7f8036b7
RC
1245 if (gpu->aspace)
1246 msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1247
b1fc2839
JC
1248 /* Set up the preemption specific bits and pieces for each ringbuffer */
1249 a5xx_preempt_init(gpu);
1250
b5f103ab
JC
1251 return gpu;
1252}