]>
Commit | Line | Data |
---|---|---|
7786fd10 BB |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright 2019 Collabora Ltd */ | |
3 | ||
4 | #include <drm/drm_file.h> | |
5 | #include <drm/drm_gem_shmem_helper.h> | |
6 | #include <drm/panfrost_drm.h> | |
7 | #include <linux/completion.h> | |
8 | #include <linux/iopoll.h> | |
9 | #include <linux/pm_runtime.h> | |
10 | #include <linux/slab.h> | |
11 | #include <linux/uaccess.h> | |
12 | ||
13 | #include "panfrost_device.h" | |
14 | #include "panfrost_features.h" | |
15 | #include "panfrost_gem.h" | |
16 | #include "panfrost_issues.h" | |
17 | #include "panfrost_job.h" | |
18 | #include "panfrost_mmu.h" | |
6f39188c | 19 | #include "panfrost_perfcnt.h" |
7786fd10 BB |
20 | #include "panfrost_regs.h" |
21 | ||
22 | #define COUNTERS_PER_BLOCK 64 | |
23 | #define BYTES_PER_COUNTER 4 | |
24 | #define BLOCKS_PER_COREGROUP 8 | |
25 | #define V4_SHADERS_PER_COREGROUP 4 | |
26 | ||
27 | struct panfrost_perfcnt { | |
bdefca2d | 28 | struct panfrost_gem_mapping *mapping; |
7786fd10 BB |
29 | size_t bosize; |
30 | void *buf; | |
31 | struct panfrost_file_priv *user; | |
32 | struct mutex lock; | |
33 | struct completion dump_comp; | |
34 | }; | |
35 | ||
36 | void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev) | |
37 | { | |
38 | complete(&pfdev->perfcnt->dump_comp); | |
39 | } | |
40 | ||
41 | void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev) | |
42 | { | |
43 | gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_CACHES); | |
44 | } | |
45 | ||
46 | static int panfrost_perfcnt_dump_locked(struct panfrost_device *pfdev) | |
47 | { | |
48 | u64 gpuva; | |
49 | int ret; | |
50 | ||
51 | reinit_completion(&pfdev->perfcnt->dump_comp); | |
bdefca2d | 52 | gpuva = pfdev->perfcnt->mapping->mmnode.start << PAGE_SHIFT; |
7786fd10 BB |
53 | gpu_write(pfdev, GPU_PERFCNT_BASE_LO, gpuva); |
54 | gpu_write(pfdev, GPU_PERFCNT_BASE_HI, gpuva >> 32); | |
55 | gpu_write(pfdev, GPU_INT_CLEAR, | |
56 | GPU_IRQ_CLEAN_CACHES_COMPLETED | | |
57 | GPU_IRQ_PERFCNT_SAMPLE_COMPLETED); | |
58 | gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_SAMPLE); | |
59 | ret = wait_for_completion_interruptible_timeout(&pfdev->perfcnt->dump_comp, | |
60 | msecs_to_jiffies(1000)); | |
61 | if (!ret) | |
62 | ret = -ETIMEDOUT; | |
63 | else if (ret > 0) | |
64 | ret = 0; | |
65 | ||
66 | return ret; | |
67 | } | |
68 | ||
69 | static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev, | |
0a523998 | 70 | struct drm_file *file_priv, |
7786fd10 BB |
71 | unsigned int counterset) |
72 | { | |
0a523998 | 73 | struct panfrost_file_priv *user = file_priv->driver_priv; |
7786fd10 BB |
74 | struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; |
75 | struct drm_gem_shmem_object *bo; | |
dde2bb2d | 76 | u32 cfg, as; |
7786fd10 BB |
77 | int ret; |
78 | ||
79 | if (user == perfcnt->user) | |
80 | return 0; | |
81 | else if (perfcnt->user) | |
82 | return -EBUSY; | |
83 | ||
84 | ret = pm_runtime_get_sync(pfdev->dev); | |
85 | if (ret < 0) | |
86 | return ret; | |
87 | ||
88 | bo = drm_gem_shmem_create(pfdev->ddev, perfcnt->bosize); | |
89 | if (IS_ERR(bo)) | |
90 | return PTR_ERR(bo); | |
91 | ||
7786fd10 | 92 | /* Map the perfcnt buf in the address space attached to file_priv. */ |
bdefca2d | 93 | ret = panfrost_gem_open(&bo->base, file_priv); |
7786fd10 BB |
94 | if (ret) |
95 | goto err_put_bo; | |
96 | ||
bdefca2d BB |
97 | perfcnt->mapping = panfrost_gem_mapping_get(to_panfrost_bo(&bo->base), |
98 | user); | |
99 | if (!perfcnt->mapping) { | |
100 | ret = -EINVAL; | |
101 | goto err_close_bo; | |
102 | } | |
103 | ||
7786fd10 BB |
104 | perfcnt->buf = drm_gem_shmem_vmap(&bo->base); |
105 | if (IS_ERR(perfcnt->buf)) { | |
106 | ret = PTR_ERR(perfcnt->buf); | |
bdefca2d | 107 | goto err_put_mapping; |
7786fd10 BB |
108 | } |
109 | ||
110 | /* | |
111 | * Invalidate the cache and clear the counters to start from a fresh | |
112 | * state. | |
113 | */ | |
114 | reinit_completion(&pfdev->perfcnt->dump_comp); | |
115 | gpu_write(pfdev, GPU_INT_CLEAR, | |
116 | GPU_IRQ_CLEAN_CACHES_COMPLETED | | |
117 | GPU_IRQ_PERFCNT_SAMPLE_COMPLETED); | |
118 | gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_CLEAR); | |
119 | gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_INV_CACHES); | |
120 | ret = wait_for_completion_timeout(&pfdev->perfcnt->dump_comp, | |
121 | msecs_to_jiffies(1000)); | |
122 | if (!ret) { | |
123 | ret = -ETIMEDOUT; | |
124 | goto err_vunmap; | |
125 | } | |
126 | ||
127 | perfcnt->user = user; | |
128 | ||
dde2bb2d BB |
129 | as = panfrost_mmu_as_get(pfdev, perfcnt->mapping->mmu); |
130 | cfg = GPU_PERFCNT_CFG_AS(as) | | |
7786fd10 BB |
131 | GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_MANUAL); |
132 | ||
133 | /* | |
134 | * Bifrost GPUs have 2 set of counters, but we're only interested by | |
135 | * the first one for now. | |
136 | */ | |
137 | if (panfrost_model_is_bifrost(pfdev)) | |
138 | cfg |= GPU_PERFCNT_CFG_SETSEL(counterset); | |
139 | ||
140 | gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0xffffffff); | |
141 | gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0xffffffff); | |
142 | gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0xffffffff); | |
143 | ||
144 | /* | |
145 | * Due to PRLAM-8186 we need to disable the Tiler before we enable HW | |
146 | * counters. | |
147 | */ | |
148 | if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186)) | |
149 | gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0); | |
150 | else | |
151 | gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff); | |
152 | ||
153 | gpu_write(pfdev, GPU_PERFCNT_CFG, cfg); | |
154 | ||
155 | if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186)) | |
156 | gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff); | |
157 | ||
bdefca2d BB |
158 | /* The BO ref is retained by the mapping. */ |
159 | drm_gem_object_put_unlocked(&bo->base); | |
160 | ||
7786fd10 BB |
161 | return 0; |
162 | ||
163 | err_vunmap: | |
bdefca2d BB |
164 | drm_gem_shmem_vunmap(&bo->base, perfcnt->buf); |
165 | err_put_mapping: | |
166 | panfrost_gem_mapping_put(perfcnt->mapping); | |
0a523998 | 167 | err_close_bo: |
bdefca2d | 168 | panfrost_gem_close(&bo->base, file_priv); |
7786fd10 BB |
169 | err_put_bo: |
170 | drm_gem_object_put_unlocked(&bo->base); | |
171 | return ret; | |
172 | } | |
173 | ||
174 | static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev, | |
0a523998 | 175 | struct drm_file *file_priv) |
7786fd10 | 176 | { |
0a523998 | 177 | struct panfrost_file_priv *user = file_priv->driver_priv; |
7786fd10 BB |
178 | struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; |
179 | ||
180 | if (user != perfcnt->user) | |
181 | return -EINVAL; | |
182 | ||
183 | gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0x0); | |
184 | gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0x0); | |
185 | gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0x0); | |
186 | gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0); | |
187 | gpu_write(pfdev, GPU_PERFCNT_CFG, | |
188 | GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF)); | |
189 | ||
190 | perfcnt->user = NULL; | |
bdefca2d | 191 | drm_gem_shmem_vunmap(&perfcnt->mapping->obj->base.base, perfcnt->buf); |
7786fd10 | 192 | perfcnt->buf = NULL; |
bdefca2d | 193 | panfrost_gem_close(&perfcnt->mapping->obj->base.base, file_priv); |
dde2bb2d | 194 | panfrost_mmu_as_put(pfdev, perfcnt->mapping->mmu); |
bdefca2d BB |
195 | panfrost_gem_mapping_put(perfcnt->mapping); |
196 | perfcnt->mapping = NULL; | |
7786fd10 BB |
197 | pm_runtime_mark_last_busy(pfdev->dev); |
198 | pm_runtime_put_autosuspend(pfdev->dev); | |
199 | ||
200 | return 0; | |
201 | } | |
202 | ||
203 | int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data, | |
204 | struct drm_file *file_priv) | |
205 | { | |
7786fd10 BB |
206 | struct panfrost_device *pfdev = dev->dev_private; |
207 | struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; | |
208 | struct drm_panfrost_perfcnt_enable *req = data; | |
209 | int ret; | |
210 | ||
211 | ret = panfrost_unstable_ioctl_check(); | |
212 | if (ret) | |
213 | return ret; | |
214 | ||
215 | /* Only Bifrost GPUs have 2 set of counters. */ | |
216 | if (req->counterset > (panfrost_model_is_bifrost(pfdev) ? 1 : 0)) | |
217 | return -EINVAL; | |
218 | ||
219 | mutex_lock(&perfcnt->lock); | |
220 | if (req->enable) | |
0a523998 | 221 | ret = panfrost_perfcnt_enable_locked(pfdev, file_priv, |
7786fd10 BB |
222 | req->counterset); |
223 | else | |
0a523998 | 224 | ret = panfrost_perfcnt_disable_locked(pfdev, file_priv); |
7786fd10 BB |
225 | mutex_unlock(&perfcnt->lock); |
226 | ||
227 | return ret; | |
228 | } | |
229 | ||
230 | int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data, | |
231 | struct drm_file *file_priv) | |
232 | { | |
233 | struct panfrost_device *pfdev = dev->dev_private; | |
234 | struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; | |
235 | struct drm_panfrost_perfcnt_dump *req = data; | |
236 | void __user *user_ptr = (void __user *)(uintptr_t)req->buf_ptr; | |
237 | int ret; | |
238 | ||
239 | ret = panfrost_unstable_ioctl_check(); | |
240 | if (ret) | |
241 | return ret; | |
242 | ||
243 | mutex_lock(&perfcnt->lock); | |
244 | if (perfcnt->user != file_priv->driver_priv) { | |
245 | ret = -EINVAL; | |
246 | goto out; | |
247 | } | |
248 | ||
249 | ret = panfrost_perfcnt_dump_locked(pfdev); | |
250 | if (ret) | |
251 | goto out; | |
252 | ||
253 | if (copy_to_user(user_ptr, perfcnt->buf, perfcnt->bosize)) | |
254 | ret = -EFAULT; | |
255 | ||
256 | out: | |
257 | mutex_unlock(&perfcnt->lock); | |
258 | ||
259 | return ret; | |
260 | } | |
261 | ||
0a523998 | 262 | void panfrost_perfcnt_close(struct drm_file *file_priv) |
7786fd10 | 263 | { |
0a523998 | 264 | struct panfrost_file_priv *pfile = file_priv->driver_priv; |
7786fd10 BB |
265 | struct panfrost_device *pfdev = pfile->pfdev; |
266 | struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; | |
267 | ||
268 | pm_runtime_get_sync(pfdev->dev); | |
269 | mutex_lock(&perfcnt->lock); | |
270 | if (perfcnt->user == pfile) | |
0a523998 | 271 | panfrost_perfcnt_disable_locked(pfdev, file_priv); |
7786fd10 BB |
272 | mutex_unlock(&perfcnt->lock); |
273 | pm_runtime_mark_last_busy(pfdev->dev); | |
274 | pm_runtime_put_autosuspend(pfdev->dev); | |
275 | } | |
276 | ||
277 | int panfrost_perfcnt_init(struct panfrost_device *pfdev) | |
278 | { | |
279 | struct panfrost_perfcnt *perfcnt; | |
280 | size_t size; | |
281 | ||
282 | if (panfrost_has_hw_feature(pfdev, HW_FEATURE_V4)) { | |
283 | unsigned int ncoregroups; | |
284 | ||
285 | ncoregroups = hweight64(pfdev->features.l2_present); | |
286 | size = ncoregroups * BLOCKS_PER_COREGROUP * | |
287 | COUNTERS_PER_BLOCK * BYTES_PER_COUNTER; | |
288 | } else { | |
289 | unsigned int nl2c, ncores; | |
290 | ||
291 | /* | |
292 | * TODO: define a macro to extract the number of l2 caches from | |
293 | * mem_features. | |
294 | */ | |
295 | nl2c = ((pfdev->features.mem_features >> 8) & GENMASK(3, 0)) + 1; | |
296 | ||
297 | /* | |
298 | * shader_present might be sparse, but the counters layout | |
299 | * forces to dump unused regions too, hence the fls64() call | |
300 | * instead of hweight64(). | |
301 | */ | |
302 | ncores = fls64(pfdev->features.shader_present); | |
303 | ||
304 | /* | |
305 | * There's always one JM and one Tiler block, hence the '+ 2' | |
306 | * here. | |
307 | */ | |
308 | size = (nl2c + ncores + 2) * | |
309 | COUNTERS_PER_BLOCK * BYTES_PER_COUNTER; | |
310 | } | |
311 | ||
312 | perfcnt = devm_kzalloc(pfdev->dev, sizeof(*perfcnt), GFP_KERNEL); | |
313 | if (!perfcnt) | |
314 | return -ENOMEM; | |
315 | ||
316 | perfcnt->bosize = size; | |
317 | ||
318 | /* Start with everything disabled. */ | |
319 | gpu_write(pfdev, GPU_PERFCNT_CFG, | |
320 | GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF)); | |
321 | gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0); | |
322 | gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0); | |
323 | gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0); | |
324 | gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0); | |
325 | ||
326 | init_completion(&perfcnt->dump_comp); | |
327 | mutex_init(&perfcnt->lock); | |
328 | pfdev->perfcnt = perfcnt; | |
329 | ||
330 | return 0; | |
331 | } | |
332 | ||
333 | void panfrost_perfcnt_fini(struct panfrost_device *pfdev) | |
334 | { | |
335 | /* Disable everything before leaving. */ | |
336 | gpu_write(pfdev, GPU_PERFCNT_CFG, | |
337 | GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF)); | |
338 | gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0); | |
339 | gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0); | |
340 | gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0); | |
341 | gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0); | |
342 | } |