]>
Commit | Line | Data |
---|---|---|
130e0371 OG |
1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
22 | ||
23 | #include "amdgpu_amdkfd.h" | |
2f7d10b3 | 24 | #include "amd_shared.h" |
fdf2f6c5 | 25 | |
130e0371 | 26 | #include "amdgpu.h" |
2db0cdbe | 27 | #include "amdgpu_gfx.h" |
2fbd6f94 | 28 | #include "amdgpu_dma_buf.h" |
130e0371 | 29 | #include <linux/module.h> |
1dde0ea9 | 30 | #include <linux/dma-buf.h> |
da361dd1 | 31 | #include "amdgpu_xgmi.h" |
1d251d90 | 32 | #include <uapi/linux/kfd_ioctl.h> |
130e0371 | 33 | |
155494db FK |
34 | static const unsigned int compute_vmid_bitmap = 0xFF00; |
35 | ||
611736d8 FK |
36 | /* Total memory size in system memory and all GPU VRAM. Used to |
37 | * estimate worst case amount of memory to reserve for page tables | |
38 | */ | |
39 | uint64_t amdgpu_amdkfd_total_mem_size; | |
40 | ||
efb1c658 | 41 | int amdgpu_amdkfd_init(void) |
130e0371 | 42 | { |
611736d8 | 43 | struct sysinfo si; |
efb1c658 OG |
44 | int ret; |
45 | ||
611736d8 FK |
46 | si_meminfo(&si); |
47 | amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh; | |
48 | amdgpu_amdkfd_total_mem_size *= si.mem_unit; | |
49 | ||
82b7b619 | 50 | #ifdef CONFIG_HSA_AMD |
308176d6 | 51 | ret = kgd2kfd_init(); |
82b7b619 | 52 | amdgpu_amdkfd_gpuvm_init_mem_limits(); |
efb1c658 OG |
53 | #else |
54 | ret = -ENOENT; | |
130e0371 | 55 | #endif |
fcdfa432 | 56 | |
efb1c658 | 57 | return ret; |
130e0371 OG |
58 | } |
59 | ||
5c33f214 FK |
60 | void amdgpu_amdkfd_fini(void) |
61 | { | |
8e07e267 | 62 | kgd2kfd_exit(); |
5c33f214 FK |
63 | } |
64 | ||
65 | void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) | |
130e0371 | 66 | { |
050091ab | 67 | bool vf = amdgpu_sriov_vf(adev); |
5c33f214 | 68 | |
8e07e267 | 69 | adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev, |
e392c887 | 70 | adev->pdev, adev->asic_type, vf); |
611736d8 FK |
71 | |
72 | if (adev->kfd.dev) | |
73 | amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; | |
130e0371 OG |
74 | } |
75 | ||
22cb0164 AD |
76 | /** |
77 | * amdgpu_doorbell_get_kfd_info - Report doorbell configuration required to | |
78 | * setup amdkfd | |
79 | * | |
80 | * @adev: amdgpu_device pointer | |
81 | * @aperture_base: output returning doorbell aperture base physical address | |
82 | * @aperture_size: output returning doorbell aperture size in bytes | |
83 | * @start_offset: output returning # of doorbell bytes reserved for amdgpu. | |
84 | * | |
85 | * amdgpu and amdkfd share the doorbell aperture. amdgpu sets it up, | |
86 | * takes doorbells required for its own rings and reports the setup to amdkfd. | |
87 | * amdgpu reserved doorbells are at the start of the doorbell aperture. | |
88 | */ | |
89 | static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, | |
90 | phys_addr_t *aperture_base, | |
91 | size_t *aperture_size, | |
92 | size_t *start_offset) | |
93 | { | |
94 | /* | |
95 | * The first num_doorbells are used by amdgpu. | |
96 | * amdkfd takes whatever's left in the aperture. | |
97 | */ | |
98 | if (adev->doorbell.size > adev->doorbell.num_doorbells * sizeof(u32)) { | |
99 | *aperture_base = adev->doorbell.base; | |
100 | *aperture_size = adev->doorbell.size; | |
101 | *start_offset = adev->doorbell.num_doorbells * sizeof(u32); | |
102 | } else { | |
103 | *aperture_base = 0; | |
104 | *aperture_size = 0; | |
105 | *start_offset = 0; | |
106 | } | |
107 | } | |
108 | ||
dc102c43 | 109 | void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) |
130e0371 | 110 | { |
234441dd | 111 | int i; |
d0b63bb3 | 112 | int last_valid_bit; |
611736d8 FK |
113 | |
114 | if (adev->kfd.dev) { | |
130e0371 | 115 | struct kgd2kfd_shared_resources gpu_resources = { |
155494db | 116 | .compute_vmid_bitmap = compute_vmid_bitmap, |
d0b63bb3 | 117 | .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, |
155494db FK |
118 | .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, |
119 | .gpuvm_size = min(adev->vm_manager.max_pfn | |
120 | << AMDGPU_GPU_PAGE_SHIFT, | |
ad9a5b78 | 121 | AMDGPU_GMC_HOLE_START), |
234441dd YZ |
122 | .drm_render_minor = adev->ddev->render->index, |
123 | .sdma_doorbell_idx = adev->doorbell_index.sdma_engine, | |
124 | ||
130e0371 OG |
125 | }; |
126 | ||
d0b63bb3 | 127 | /* this is going to have a few of the MSBs set that we need to |
0d87c9cf KR |
128 | * clear |
129 | */ | |
e6945304 | 130 | bitmap_complement(gpu_resources.cp_queue_bitmap, |
d0b63bb3 AR |
131 | adev->gfx.mec.queue_bitmap, |
132 | KGD_MAX_QUEUES); | |
133 | ||
134 | /* According to linux/bitmap.h we shouldn't use bitmap_clear if | |
0d87c9cf KR |
135 | * nbits is not compile time constant |
136 | */ | |
3447d220 | 137 | last_valid_bit = 1 /* only first MEC can have compute queues */ |
d0b63bb3 AR |
138 | * adev->gfx.mec.num_pipe_per_mec |
139 | * adev->gfx.mec.num_queue_per_pipe; | |
140 | for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) | |
e6945304 | 141 | clear_bit(i, gpu_resources.cp_queue_bitmap); |
d0b63bb3 | 142 | |
dc102c43 | 143 | amdgpu_doorbell_get_kfd_info(adev, |
130e0371 OG |
144 | &gpu_resources.doorbell_physical_address, |
145 | &gpu_resources.doorbell_aperture_size, | |
146 | &gpu_resources.doorbell_start_offset); | |
c5892230 | 147 | |
1f86805a YZ |
148 | /* Since SOC15, BIF starts to statically use the |
149 | * lower 12 bits of doorbell addresses for routing | |
150 | * based on settings in registers like | |
151 | * SDMA0_DOORBELL_RANGE etc.. | |
152 | * In order to route a doorbell to CP engine, the lower | |
153 | * 12 bits of its address has to be outside the range | |
154 | * set for SDMA, VCN, and IH blocks. | |
c5892230 | 155 | */ |
234441dd YZ |
156 | if (adev->asic_type >= CHIP_VEGA10) { |
157 | gpu_resources.non_cp_doorbells_start = | |
158 | adev->doorbell_index.first_non_cp; | |
159 | gpu_resources.non_cp_doorbells_end = | |
160 | adev->doorbell_index.last_non_cp; | |
161 | } | |
130e0371 | 162 | |
3a0c3423 | 163 | kgd2kfd_device_init(adev->kfd.dev, adev->ddev, &gpu_resources); |
130e0371 OG |
164 | } |
165 | } | |
166 | ||
dc102c43 | 167 | void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) |
130e0371 | 168 | { |
611736d8 | 169 | if (adev->kfd.dev) { |
8e07e267 | 170 | kgd2kfd_device_exit(adev->kfd.dev); |
611736d8 | 171 | adev->kfd.dev = NULL; |
130e0371 OG |
172 | } |
173 | } | |
174 | ||
dc102c43 | 175 | void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, |
130e0371 OG |
176 | const void *ih_ring_entry) |
177 | { | |
611736d8 | 178 | if (adev->kfd.dev) |
8e07e267 | 179 | kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry); |
130e0371 OG |
180 | } |
181 | ||
9593f4d6 | 182 | void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm) |
130e0371 | 183 | { |
611736d8 | 184 | if (adev->kfd.dev) |
9593f4d6 | 185 | kgd2kfd_suspend(adev->kfd.dev, run_pm); |
130e0371 OG |
186 | } |
187 | ||
9593f4d6 | 188 | int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm) |
130e0371 OG |
189 | { |
190 | int r = 0; | |
191 | ||
611736d8 | 192 | if (adev->kfd.dev) |
9593f4d6 | 193 | r = kgd2kfd_resume(adev->kfd.dev, run_pm); |
130e0371 OG |
194 | |
195 | return r; | |
196 | } | |
197 | ||
5c6dd71e SL |
198 | int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev) |
199 | { | |
200 | int r = 0; | |
201 | ||
611736d8 | 202 | if (adev->kfd.dev) |
8e07e267 | 203 | r = kgd2kfd_pre_reset(adev->kfd.dev); |
5c6dd71e SL |
204 | |
205 | return r; | |
206 | } | |
207 | ||
208 | int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) | |
209 | { | |
210 | int r = 0; | |
211 | ||
611736d8 | 212 | if (adev->kfd.dev) |
8e07e267 | 213 | r = kgd2kfd_post_reset(adev->kfd.dev); |
5c6dd71e SL |
214 | |
215 | return r; | |
216 | } | |
217 | ||
24da5a9c SL |
218 | void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd) |
219 | { | |
220 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
221 | ||
12938fad CK |
222 | if (amdgpu_device_should_recover_gpu(adev)) |
223 | amdgpu_device_gpu_recover(adev, NULL); | |
24da5a9c SL |
224 | } |
225 | ||
7cd52c91 AL |
226 | int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, |
227 | void **mem_obj, uint64_t *gpu_addr, | |
fa5bde80 | 228 | void **cpu_ptr, bool cp_mqd_gfx9) |
130e0371 | 229 | { |
dc102c43 | 230 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
473fee47 | 231 | struct amdgpu_bo *bo = NULL; |
3216c6b7 | 232 | struct amdgpu_bo_param bp; |
130e0371 | 233 | int r; |
473fee47 | 234 | void *cpu_ptr_tmp = NULL; |
130e0371 | 235 | |
3216c6b7 CZ |
236 | memset(&bp, 0, sizeof(bp)); |
237 | bp.size = size; | |
238 | bp.byte_align = PAGE_SIZE; | |
239 | bp.domain = AMDGPU_GEM_DOMAIN_GTT; | |
240 | bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; | |
241 | bp.type = ttm_bo_type_kernel; | |
242 | bp.resv = NULL; | |
15426dbb | 243 | |
fa5bde80 YZ |
244 | if (cp_mqd_gfx9) |
245 | bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9; | |
15426dbb | 246 | |
3216c6b7 | 247 | r = amdgpu_bo_create(adev, &bp, &bo); |
130e0371 | 248 | if (r) { |
dc102c43 | 249 | dev_err(adev->dev, |
130e0371 OG |
250 | "failed to allocate BO for amdkfd (%d)\n", r); |
251 | return r; | |
252 | } | |
253 | ||
254 | /* map the buffer */ | |
473fee47 | 255 | r = amdgpu_bo_reserve(bo, true); |
130e0371 | 256 | if (r) { |
dc102c43 | 257 | dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r); |
130e0371 OG |
258 | goto allocate_mem_reserve_bo_failed; |
259 | } | |
260 | ||
7b7c6c81 | 261 | r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); |
130e0371 | 262 | if (r) { |
dc102c43 | 263 | dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); |
130e0371 OG |
264 | goto allocate_mem_pin_bo_failed; |
265 | } | |
130e0371 | 266 | |
bb812f1e JZ |
267 | r = amdgpu_ttm_alloc_gart(&bo->tbo); |
268 | if (r) { | |
269 | dev_err(adev->dev, "%p bind failed\n", bo); | |
270 | goto allocate_mem_kmap_bo_failed; | |
271 | } | |
272 | ||
473fee47 | 273 | r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp); |
130e0371 | 274 | if (r) { |
dc102c43 | 275 | dev_err(adev->dev, |
130e0371 OG |
276 | "(%d) failed to map bo to kernel for amdkfd\n", r); |
277 | goto allocate_mem_kmap_bo_failed; | |
278 | } | |
130e0371 | 279 | |
473fee47 | 280 | *mem_obj = bo; |
7b7c6c81 | 281 | *gpu_addr = amdgpu_bo_gpu_offset(bo); |
473fee47 YZ |
282 | *cpu_ptr = cpu_ptr_tmp; |
283 | ||
284 | amdgpu_bo_unreserve(bo); | |
130e0371 OG |
285 | |
286 | return 0; | |
287 | ||
288 | allocate_mem_kmap_bo_failed: | |
473fee47 | 289 | amdgpu_bo_unpin(bo); |
130e0371 | 290 | allocate_mem_pin_bo_failed: |
473fee47 | 291 | amdgpu_bo_unreserve(bo); |
130e0371 | 292 | allocate_mem_reserve_bo_failed: |
473fee47 | 293 | amdgpu_bo_unref(&bo); |
130e0371 OG |
294 | |
295 | return r; | |
296 | } | |
297 | ||
7cd52c91 | 298 | void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) |
130e0371 | 299 | { |
473fee47 YZ |
300 | struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; |
301 | ||
302 | amdgpu_bo_reserve(bo, true); | |
303 | amdgpu_bo_kunmap(bo); | |
304 | amdgpu_bo_unpin(bo); | |
305 | amdgpu_bo_unreserve(bo); | |
306 | amdgpu_bo_unref(&(bo)); | |
130e0371 OG |
307 | } |
308 | ||
ca66fb8f OZ |
309 | int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, |
310 | void **mem_obj) | |
311 | { | |
312 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
313 | struct amdgpu_bo *bo = NULL; | |
314 | struct amdgpu_bo_param bp; | |
315 | int r; | |
316 | ||
317 | memset(&bp, 0, sizeof(bp)); | |
318 | bp.size = size; | |
319 | bp.byte_align = 1; | |
320 | bp.domain = AMDGPU_GEM_DOMAIN_GWS; | |
321 | bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS; | |
322 | bp.type = ttm_bo_type_device; | |
323 | bp.resv = NULL; | |
324 | ||
325 | r = amdgpu_bo_create(adev, &bp, &bo); | |
326 | if (r) { | |
327 | dev_err(adev->dev, | |
328 | "failed to allocate gws BO for amdkfd (%d)\n", r); | |
329 | return r; | |
330 | } | |
331 | ||
332 | *mem_obj = bo; | |
333 | return 0; | |
334 | } | |
335 | ||
336 | void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj) | |
337 | { | |
338 | struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj; | |
339 | ||
340 | amdgpu_bo_unref(&bo); | |
341 | } | |
342 | ||
0da8b10e AL |
343 | uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd, |
344 | enum kgd_engine_type type) | |
345 | { | |
346 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
347 | ||
348 | switch (type) { | |
349 | case KGD_ENGINE_PFP: | |
350 | return adev->gfx.pfp_fw_version; | |
351 | ||
352 | case KGD_ENGINE_ME: | |
353 | return adev->gfx.me_fw_version; | |
354 | ||
355 | case KGD_ENGINE_CE: | |
356 | return adev->gfx.ce_fw_version; | |
357 | ||
358 | case KGD_ENGINE_MEC1: | |
359 | return adev->gfx.mec_fw_version; | |
360 | ||
361 | case KGD_ENGINE_MEC2: | |
362 | return adev->gfx.mec2_fw_version; | |
363 | ||
364 | case KGD_ENGINE_RLC: | |
365 | return adev->gfx.rlc_fw_version; | |
366 | ||
367 | case KGD_ENGINE_SDMA1: | |
368 | return adev->sdma.instance[0].fw_version; | |
369 | ||
370 | case KGD_ENGINE_SDMA2: | |
371 | return adev->sdma.instance[1].fw_version; | |
372 | ||
373 | default: | |
374 | return 0; | |
375 | } | |
376 | ||
377 | return 0; | |
378 | } | |
379 | ||
7cd52c91 AL |
380 | void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, |
381 | struct kfd_local_mem_info *mem_info) | |
30f1c042 HK |
382 | { |
383 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
384 | uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask : | |
385 | ~((1ULL << 32) - 1); | |
770d13b1 | 386 | resource_size_t aper_limit = adev->gmc.aper_base + adev->gmc.aper_size; |
30f1c042 HK |
387 | |
388 | memset(mem_info, 0, sizeof(*mem_info)); | |
770d13b1 CK |
389 | if (!(adev->gmc.aper_base & address_mask || aper_limit & address_mask)) { |
390 | mem_info->local_mem_size_public = adev->gmc.visible_vram_size; | |
391 | mem_info->local_mem_size_private = adev->gmc.real_vram_size - | |
392 | adev->gmc.visible_vram_size; | |
30f1c042 HK |
393 | } else { |
394 | mem_info->local_mem_size_public = 0; | |
770d13b1 | 395 | mem_info->local_mem_size_private = adev->gmc.real_vram_size; |
30f1c042 | 396 | } |
770d13b1 | 397 | mem_info->vram_width = adev->gmc.vram_width; |
30f1c042 | 398 | |
fb8baefc | 399 | pr_debug("Address base: %pap limit %pap public 0x%llx private 0x%llx\n", |
770d13b1 | 400 | &adev->gmc.aper_base, &aper_limit, |
30f1c042 HK |
401 | mem_info->local_mem_size_public, |
402 | mem_info->local_mem_size_private); | |
403 | ||
404 | if (amdgpu_sriov_vf(adev)) | |
405 | mem_info->mem_clk_max = adev->clock.default_mclk / 100; | |
944effd3 | 406 | else if (adev->pm.dpm_enabled) { |
6bdadb20 HZ |
407 | if (amdgpu_emu_mode == 1) |
408 | mem_info->mem_clk_max = 0; | |
409 | else | |
410 | mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100; | |
411 | } else | |
7ba01f9e | 412 | mem_info->mem_clk_max = 100; |
30f1c042 HK |
413 | } |
414 | ||
7cd52c91 | 415 | uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd) |
130e0371 | 416 | { |
dc102c43 | 417 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
130e0371 | 418 | |
dc102c43 AR |
419 | if (adev->gfx.funcs->get_gpu_clock_counter) |
420 | return adev->gfx.funcs->get_gpu_clock_counter(adev); | |
130e0371 OG |
421 | return 0; |
422 | } | |
423 | ||
7cd52c91 | 424 | uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd) |
130e0371 | 425 | { |
dc102c43 | 426 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; |
130e0371 | 427 | |
a9efcc19 FK |
428 | /* the sclk is in quantas of 10kHz */ |
429 | if (amdgpu_sriov_vf(adev)) | |
430 | return adev->clock.default_sclk / 100; | |
944effd3 | 431 | else if (adev->pm.dpm_enabled) |
7ba01f9e SL |
432 | return amdgpu_dpm_get_sclk(adev, false) / 100; |
433 | else | |
434 | return 100; | |
130e0371 | 435 | } |
ebdebf42 | 436 | |
7cd52c91 | 437 | void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) |
ebdebf42 FC |
438 | { |
439 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
440 | struct amdgpu_cu_info acu_info = adev->gfx.cu_info; | |
441 | ||
442 | memset(cu_info, 0, sizeof(*cu_info)); | |
443 | if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap)) | |
444 | return; | |
445 | ||
446 | cu_info->cu_active_number = acu_info.number; | |
447 | cu_info->cu_ao_mask = acu_info.ao_cu_mask; | |
448 | memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0], | |
449 | sizeof(acu_info.bitmap)); | |
450 | cu_info->num_shader_engines = adev->gfx.config.max_shader_engines; | |
451 | cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; | |
452 | cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; | |
453 | cu_info->simd_per_cu = acu_info.simd_per_cu; | |
454 | cu_info->max_waves_per_simd = acu_info.max_waves_per_simd; | |
455 | cu_info->wave_front_size = acu_info.wave_front_size; | |
456 | cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu; | |
457 | cu_info->lds_size = acu_info.lds_size; | |
458 | } | |
9f0a0b41 | 459 | |
1dde0ea9 FK |
460 | int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, |
461 | struct kgd_dev **dma_buf_kgd, | |
462 | uint64_t *bo_size, void *metadata_buffer, | |
463 | size_t buffer_size, uint32_t *metadata_size, | |
464 | uint32_t *flags) | |
465 | { | |
466 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
467 | struct dma_buf *dma_buf; | |
468 | struct drm_gem_object *obj; | |
469 | struct amdgpu_bo *bo; | |
470 | uint64_t metadata_flags; | |
471 | int r = -EINVAL; | |
472 | ||
473 | dma_buf = dma_buf_get(dma_buf_fd); | |
474 | if (IS_ERR(dma_buf)) | |
475 | return PTR_ERR(dma_buf); | |
476 | ||
477 | if (dma_buf->ops != &amdgpu_dmabuf_ops) | |
478 | /* Can't handle non-graphics buffers */ | |
479 | goto out_put; | |
480 | ||
481 | obj = dma_buf->priv; | |
482 | if (obj->dev->driver != adev->ddev->driver) | |
483 | /* Can't handle buffers from different drivers */ | |
484 | goto out_put; | |
485 | ||
486 | adev = obj->dev->dev_private; | |
487 | bo = gem_to_amdgpu_bo(obj); | |
488 | if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | | |
489 | AMDGPU_GEM_DOMAIN_GTT))) | |
490 | /* Only VRAM and GTT BOs are supported */ | |
491 | goto out_put; | |
492 | ||
493 | r = 0; | |
494 | if (dma_buf_kgd) | |
495 | *dma_buf_kgd = (struct kgd_dev *)adev; | |
496 | if (bo_size) | |
497 | *bo_size = amdgpu_bo_size(bo); | |
498 | if (metadata_size) | |
499 | *metadata_size = bo->metadata_size; | |
500 | if (metadata_buffer) | |
501 | r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size, | |
502 | metadata_size, &metadata_flags); | |
503 | if (flags) { | |
504 | *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? | |
1d251d90 YZ |
505 | KFD_IOC_ALLOC_MEM_FLAGS_VRAM |
506 | : KFD_IOC_ALLOC_MEM_FLAGS_GTT; | |
1dde0ea9 FK |
507 | |
508 | if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) | |
1d251d90 | 509 | *flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC; |
1dde0ea9 FK |
510 | } |
511 | ||
512 | out_put: | |
513 | dma_buf_put(dma_buf); | |
514 | return r; | |
515 | } | |
516 | ||
9f0a0b41 KR |
517 | uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) |
518 | { | |
519 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
520 | ||
521 | return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); | |
522 | } | |
155494db | 523 | |
db8b62c0 SL |
524 | uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd) |
525 | { | |
526 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
527 | ||
528 | return adev->gmc.xgmi.hive_id; | |
529 | } | |
0c663695 DS |
530 | |
531 | uint64_t amdgpu_amdkfd_get_unique_id(struct kgd_dev *kgd) | |
532 | { | |
533 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
534 | ||
535 | return adev->unique_id; | |
536 | } | |
537 | ||
da361dd1 | 538 | uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src) |
539 | { | |
540 | struct amdgpu_device *peer_adev = (struct amdgpu_device *)src; | |
541 | struct amdgpu_device *adev = (struct amdgpu_device *)dst; | |
542 | int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev); | |
543 | ||
544 | if (ret < 0) { | |
545 | DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n", | |
546 | adev->gmc.xgmi.physical_node_id, | |
547 | peer_adev->gmc.xgmi.physical_node_id, ret); | |
548 | ret = 0; | |
549 | } | |
550 | return (uint8_t)ret; | |
551 | } | |
db8b62c0 | 552 | |
d8e408a8 OZ |
553 | uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd) |
554 | { | |
555 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
556 | ||
557 | return adev->rmmio_remap.bus_addr; | |
558 | } | |
559 | ||
29e76462 OZ |
560 | uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd) |
561 | { | |
562 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
563 | ||
564 | return adev->gds.gws_size; | |
565 | } | |
566 | ||
c6d1ec41 JG |
567 | uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd) |
568 | { | |
569 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
570 | ||
571 | return adev->rev_id; | |
572 | } | |
573 | ||
4c660c8f FK |
574 | int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, |
575 | uint32_t vmid, uint64_t gpu_addr, | |
576 | uint32_t *ib_cmd, uint32_t ib_len) | |
577 | { | |
578 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
579 | struct amdgpu_job *job; | |
580 | struct amdgpu_ib *ib; | |
581 | struct amdgpu_ring *ring; | |
582 | struct dma_fence *f = NULL; | |
583 | int ret; | |
584 | ||
585 | switch (engine) { | |
586 | case KGD_ENGINE_MEC1: | |
587 | ring = &adev->gfx.compute_ring[0]; | |
588 | break; | |
589 | case KGD_ENGINE_SDMA1: | |
590 | ring = &adev->sdma.instance[0].ring; | |
591 | break; | |
592 | case KGD_ENGINE_SDMA2: | |
593 | ring = &adev->sdma.instance[1].ring; | |
594 | break; | |
595 | default: | |
596 | pr_err("Invalid engine in IB submission: %d\n", engine); | |
597 | ret = -EINVAL; | |
598 | goto err; | |
599 | } | |
600 | ||
601 | ret = amdgpu_job_alloc(adev, 1, &job, NULL); | |
602 | if (ret) | |
603 | goto err; | |
604 | ||
605 | ib = &job->ibs[0]; | |
606 | memset(ib, 0, sizeof(struct amdgpu_ib)); | |
607 | ||
608 | ib->gpu_addr = gpu_addr; | |
609 | ib->ptr = ib_cmd; | |
610 | ib->length_dw = ib_len; | |
611 | /* This works for NO_HWS. TODO: need to handle without knowing VMID */ | |
612 | job->vmid = vmid; | |
613 | ||
614 | ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); | |
615 | if (ret) { | |
616 | DRM_ERROR("amdgpu: failed to schedule IB.\n"); | |
617 | goto err_ib_sched; | |
618 | } | |
619 | ||
620 | ret = dma_fence_wait(f, false); | |
621 | ||
622 | err_ib_sched: | |
623 | dma_fence_put(f); | |
624 | amdgpu_job_free(job); | |
625 | err: | |
626 | return ret; | |
627 | } | |
628 | ||
01c097db FK |
629 | void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) |
630 | { | |
631 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
632 | ||
9530273e EQ |
633 | amdgpu_dpm_switch_power_profile(adev, |
634 | PP_SMC_POWER_PROFILE_COMPUTE, | |
635 | !idle); | |
01c097db FK |
636 | } |
637 | ||
155494db FK |
638 | bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) |
639 | { | |
611736d8 | 640 | if (adev->kfd.dev) { |
155494db FK |
641 | if ((1 << vmid) & compute_vmid_bitmap) |
642 | return true; | |
643 | } | |
644 | ||
645 | return false; | |
646 | } | |
fcdfa432 | 647 | |
ffa02269 AS |
648 | int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid) |
649 | { | |
650 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
651 | ||
652 | if (adev->family == AMDGPU_FAMILY_AI) { | |
653 | int i; | |
654 | ||
655 | for (i = 0; i < adev->num_vmhubs; i++) | |
656 | amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); | |
657 | } else { | |
658 | amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); | |
659 | } | |
660 | ||
661 | return 0; | |
662 | } | |
663 | ||
664 | int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid) | |
665 | { | |
666 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
b80cd524 | 667 | const uint32_t flush_type = 0; |
ffa02269 AS |
668 | bool all_hub = false; |
669 | ||
ffa02269 AS |
670 | if (adev->family == AMDGPU_FAMILY_AI) |
671 | all_hub = true; | |
672 | ||
673 | return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); | |
674 | } | |
675 | ||
aabf3a95 JX |
676 | bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd) |
677 | { | |
678 | struct amdgpu_device *adev = (struct amdgpu_device *)kgd; | |
679 | ||
680 | return adev->have_atomics_support; | |
681 | } | |
682 | ||
82b7b619 | 683 | #ifndef CONFIG_HSA_AMD |
fcdfa432 OG |
684 | bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) |
685 | { | |
686 | return false; | |
687 | } | |
688 | ||
4f01b73e | 689 | void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) |
fcdfa432 OG |
690 | { |
691 | } | |
692 | ||
f4a3c42b | 693 | int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) |
694 | { | |
695 | return 0; | |
696 | } | |
697 | ||
fcdfa432 OG |
698 | void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, |
699 | struct amdgpu_vm *vm) | |
700 | { | |
701 | } | |
702 | ||
703 | struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) | |
704 | { | |
705 | return NULL; | |
706 | } | |
707 | ||
708 | int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) | |
709 | { | |
710 | return 0; | |
711 | } | |
712 | ||
2d3d25b6 | 713 | struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, |
050091ab | 714 | unsigned int asic_type, bool vf) |
2d3d25b6 AL |
715 | { |
716 | return NULL; | |
717 | } | |
718 | ||
719 | bool kgd2kfd_device_init(struct kfd_dev *kfd, | |
88d80250 | 720 | struct drm_device *ddev, |
2d3d25b6 AL |
721 | const struct kgd2kfd_shared_resources *gpu_resources) |
722 | { | |
723 | return false; | |
724 | } | |
725 | ||
726 | void kgd2kfd_device_exit(struct kfd_dev *kfd) | |
727 | { | |
728 | } | |
729 | ||
730 | void kgd2kfd_exit(void) | |
731 | { | |
732 | } | |
733 | ||
9593f4d6 | 734 | void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) |
2d3d25b6 AL |
735 | { |
736 | } | |
737 | ||
9593f4d6 | 738 | int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) |
2d3d25b6 AL |
739 | { |
740 | return 0; | |
741 | } | |
742 | ||
743 | int kgd2kfd_pre_reset(struct kfd_dev *kfd) | |
744 | { | |
745 | return 0; | |
746 | } | |
747 | ||
748 | int kgd2kfd_post_reset(struct kfd_dev *kfd) | |
749 | { | |
750 | return 0; | |
751 | } | |
752 | ||
753 | void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) | |
754 | { | |
755 | } | |
9b54d201 EH |
756 | |
757 | void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) | |
758 | { | |
759 | } | |
fcdfa432 | 760 | #endif |