]>
Commit | Line | Data |
---|---|---|
4a488a7a OG |
1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
22 | ||
23 | #include <linux/amd-iommu.h> | |
24 | #include <linux/bsearch.h> | |
25 | #include <linux/pci.h> | |
26 | #include <linux/slab.h> | |
27 | #include "kfd_priv.h" | |
64c7f8cf | 28 | #include "kfd_device_queue_manager.h" |
e18e794e | 29 | #include "kfd_pm4_headers.h" |
4a488a7a | 30 | |
19f6d2a6 OG |
31 | #define MQD_SIZE_ALIGNED 768 |
32 | ||
4a488a7a | 33 | static const struct kfd_device_info kaveri_device_info = { |
0da7558c BG |
34 | .asic_family = CHIP_KAVERI, |
35 | .max_pasid_bits = 16, | |
992839ad YS |
36 | /* max num of queues for KV.TODO should be a dynamic value */ |
37 | .max_no_of_hqd = 24, | |
0da7558c | 38 | .ih_ring_entry_size = 4 * sizeof(uint32_t), |
f3a39818 | 39 | .event_interrupt_class = &event_interrupt_class_cik, |
fbeb661b | 40 | .num_of_watch_points = 4, |
0da7558c BG |
41 | .mqd_size_aligned = MQD_SIZE_ALIGNED |
42 | }; | |
43 | ||
44 | static const struct kfd_device_info carrizo_device_info = { | |
45 | .asic_family = CHIP_CARRIZO, | |
4a488a7a | 46 | .max_pasid_bits = 16, |
eaccd6e7 OG |
47 | /* max num of queues for CZ.TODO should be a dynamic value */ |
48 | .max_no_of_hqd = 24, | |
b3f5e6b4 | 49 | .ih_ring_entry_size = 4 * sizeof(uint32_t), |
eaccd6e7 | 50 | .event_interrupt_class = &event_interrupt_class_cik, |
f7c826ad | 51 | .num_of_watch_points = 4, |
19f6d2a6 | 52 | .mqd_size_aligned = MQD_SIZE_ALIGNED |
4a488a7a OG |
53 | }; |
54 | ||
55 | struct kfd_deviceid { | |
56 | unsigned short did; | |
57 | const struct kfd_device_info *device_info; | |
58 | }; | |
59 | ||
60 | /* Please keep this sorted by increasing device id. */ | |
61 | static const struct kfd_deviceid supported_devices[] = { | |
62 | { 0x1304, &kaveri_device_info }, /* Kaveri */ | |
63 | { 0x1305, &kaveri_device_info }, /* Kaveri */ | |
64 | { 0x1306, &kaveri_device_info }, /* Kaveri */ | |
65 | { 0x1307, &kaveri_device_info }, /* Kaveri */ | |
66 | { 0x1309, &kaveri_device_info }, /* Kaveri */ | |
67 | { 0x130A, &kaveri_device_info }, /* Kaveri */ | |
68 | { 0x130B, &kaveri_device_info }, /* Kaveri */ | |
69 | { 0x130C, &kaveri_device_info }, /* Kaveri */ | |
70 | { 0x130D, &kaveri_device_info }, /* Kaveri */ | |
71 | { 0x130E, &kaveri_device_info }, /* Kaveri */ | |
72 | { 0x130F, &kaveri_device_info }, /* Kaveri */ | |
73 | { 0x1310, &kaveri_device_info }, /* Kaveri */ | |
74 | { 0x1311, &kaveri_device_info }, /* Kaveri */ | |
75 | { 0x1312, &kaveri_device_info }, /* Kaveri */ | |
76 | { 0x1313, &kaveri_device_info }, /* Kaveri */ | |
77 | { 0x1315, &kaveri_device_info }, /* Kaveri */ | |
78 | { 0x1316, &kaveri_device_info }, /* Kaveri */ | |
79 | { 0x1317, &kaveri_device_info }, /* Kaveri */ | |
80 | { 0x1318, &kaveri_device_info }, /* Kaveri */ | |
81 | { 0x131B, &kaveri_device_info }, /* Kaveri */ | |
82 | { 0x131C, &kaveri_device_info }, /* Kaveri */ | |
123576d1 BG |
83 | { 0x131D, &kaveri_device_info }, /* Kaveri */ |
84 | { 0x9870, &carrizo_device_info }, /* Carrizo */ | |
85 | { 0x9874, &carrizo_device_info }, /* Carrizo */ | |
86 | { 0x9875, &carrizo_device_info }, /* Carrizo */ | |
87 | { 0x9876, &carrizo_device_info }, /* Carrizo */ | |
88 | { 0x9877, &carrizo_device_info } /* Carrizo */ | |
4a488a7a OG |
89 | }; |
90 | ||
6e81090b OG |
91 | static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, |
92 | unsigned int chunk_size); | |
93 | static void kfd_gtt_sa_fini(struct kfd_dev *kfd); | |
94 | ||
4a488a7a OG |
95 | static const struct kfd_device_info *lookup_device_info(unsigned short did) |
96 | { | |
97 | size_t i; | |
98 | ||
99 | for (i = 0; i < ARRAY_SIZE(supported_devices); i++) { | |
100 | if (supported_devices[i].did == did) { | |
101 | BUG_ON(supported_devices[i].device_info == NULL); | |
102 | return supported_devices[i].device_info; | |
103 | } | |
104 | } | |
105 | ||
106 | return NULL; | |
107 | } | |
108 | ||
cea405b1 XZ |
109 | struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, |
110 | struct pci_dev *pdev, const struct kfd2kgd_calls *f2g) | |
4a488a7a OG |
111 | { |
112 | struct kfd_dev *kfd; | |
113 | ||
114 | const struct kfd_device_info *device_info = | |
115 | lookup_device_info(pdev->device); | |
116 | ||
117 | if (!device_info) | |
118 | return NULL; | |
119 | ||
120 | kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); | |
121 | if (!kfd) | |
122 | return NULL; | |
123 | ||
124 | kfd->kgd = kgd; | |
125 | kfd->device_info = device_info; | |
126 | kfd->pdev = pdev; | |
19f6d2a6 | 127 | kfd->init_complete = false; |
cea405b1 XZ |
128 | kfd->kfd2kgd = f2g; |
129 | ||
130 | mutex_init(&kfd->doorbell_mutex); | |
131 | memset(&kfd->doorbell_available_index, 0, | |
132 | sizeof(kfd->doorbell_available_index)); | |
4a488a7a OG |
133 | |
134 | return kfd; | |
135 | } | |
136 | ||
b17f068a OG |
137 | static bool device_iommu_pasid_init(struct kfd_dev *kfd) |
138 | { | |
139 | const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | | |
140 | AMD_IOMMU_DEVICE_FLAG_PRI_SUP | | |
141 | AMD_IOMMU_DEVICE_FLAG_PASID_SUP; | |
142 | ||
143 | struct amd_iommu_device_info iommu_info; | |
144 | unsigned int pasid_limit; | |
145 | int err; | |
146 | ||
147 | err = amd_iommu_device_info(kfd->pdev, &iommu_info); | |
148 | if (err < 0) { | |
149 | dev_err(kfd_device, | |
150 | "error getting iommu info. is the iommu enabled?\n"); | |
151 | return false; | |
152 | } | |
153 | ||
154 | if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { | |
155 | dev_err(kfd_device, "error required iommu flags ats(%i), pri(%i), pasid(%i)\n", | |
156 | (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, | |
157 | (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, | |
158 | (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) != 0); | |
159 | return false; | |
160 | } | |
161 | ||
162 | pasid_limit = min_t(unsigned int, | |
163 | (unsigned int)1 << kfd->device_info->max_pasid_bits, | |
164 | iommu_info.max_pasids); | |
165 | /* | |
166 | * last pasid is used for kernel queues doorbells | |
167 | * in the future the last pasid might be used for a kernel thread. | |
168 | */ | |
169 | pasid_limit = min_t(unsigned int, | |
170 | pasid_limit, | |
171 | kfd->doorbell_process_limit - 1); | |
172 | ||
173 | err = amd_iommu_init_device(kfd->pdev, pasid_limit); | |
174 | if (err < 0) { | |
175 | dev_err(kfd_device, "error initializing iommu device\n"); | |
176 | return false; | |
177 | } | |
178 | ||
179 | if (!kfd_set_pasid_limit(pasid_limit)) { | |
180 | dev_err(kfd_device, "error setting pasid limit\n"); | |
181 | amd_iommu_free_device(kfd->pdev); | |
182 | return false; | |
183 | } | |
184 | ||
185 | return true; | |
186 | } | |
187 | ||
188 | static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) | |
189 | { | |
190 | struct kfd_dev *dev = kfd_device_by_pci_dev(pdev); | |
191 | ||
192 | if (dev) | |
193 | kfd_unbind_process_from_device(dev, pasid); | |
194 | } | |
195 | ||
59d3e8be AS |
196 | /* |
197 | * This function called by IOMMU driver on PPR failure | |
198 | */ | |
199 | static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, | |
200 | unsigned long address, u16 flags) | |
201 | { | |
202 | struct kfd_dev *dev; | |
203 | ||
204 | dev_warn(kfd_device, | |
205 | "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", | |
206 | PCI_BUS_NUM(pdev->devfn), | |
207 | PCI_SLOT(pdev->devfn), | |
208 | PCI_FUNC(pdev->devfn), | |
209 | pasid, | |
210 | address, | |
211 | flags); | |
212 | ||
213 | dev = kfd_device_by_pci_dev(pdev); | |
214 | BUG_ON(dev == NULL); | |
215 | ||
216 | kfd_signal_iommu_event(dev, pasid, address, | |
217 | flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); | |
218 | ||
219 | return AMD_IOMMU_INV_PRI_RSP_INVALID; | |
220 | } | |
221 | ||
4a488a7a OG |
222 | bool kgd2kfd_device_init(struct kfd_dev *kfd, |
223 | const struct kgd2kfd_shared_resources *gpu_resources) | |
224 | { | |
19f6d2a6 OG |
225 | unsigned int size; |
226 | ||
4a488a7a OG |
227 | kfd->shared_resources = *gpu_resources; |
228 | ||
19f6d2a6 | 229 | /* calculate max size of mqds needed for queues */ |
b8cbab04 OG |
230 | size = max_num_of_queues_per_device * |
231 | kfd->device_info->mqd_size_aligned; | |
19f6d2a6 | 232 | |
e18e794e OG |
233 | /* |
234 | * calculate max size of runlist packet. | |
235 | * There can be only 2 packets at once | |
236 | */ | |
b3869b17 DA |
237 | size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) + |
238 | max_num_of_queues_per_device * | |
e18e794e OG |
239 | sizeof(struct pm4_map_queues) + sizeof(struct pm4_runlist)) * 2; |
240 | ||
241 | /* Add size of HIQ & DIQ */ | |
242 | size += KFD_KERNEL_QUEUE_SIZE * 2; | |
243 | ||
244 | /* add another 512KB for all other allocations on gart (HPD, fences) */ | |
19f6d2a6 OG |
245 | size += 512 * 1024; |
246 | ||
cea405b1 XZ |
247 | if (kfd->kfd2kgd->init_gtt_mem_allocation( |
248 | kfd->kgd, size, &kfd->gtt_mem, | |
249 | &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)){ | |
19f6d2a6 | 250 | dev_err(kfd_device, |
e18e794e OG |
251 | "Could not allocate %d bytes for device (%x:%x)\n", |
252 | size, kfd->pdev->vendor, kfd->pdev->device); | |
19f6d2a6 OG |
253 | goto out; |
254 | } | |
255 | ||
e18e794e OG |
256 | dev_info(kfd_device, |
257 | "Allocated %d bytes on gart for device(%x:%x)\n", | |
258 | size, kfd->pdev->vendor, kfd->pdev->device); | |
259 | ||
73a1da0b OG |
260 | /* Initialize GTT sa with 512 byte chunk size */ |
261 | if (kfd_gtt_sa_init(kfd, size, 512) != 0) { | |
262 | dev_err(kfd_device, | |
263 | "Error initializing gtt sub-allocator\n"); | |
264 | goto kfd_gtt_sa_init_error; | |
265 | } | |
266 | ||
19f6d2a6 OG |
267 | kfd_doorbell_init(kfd); |
268 | ||
269 | if (kfd_topology_add_device(kfd) != 0) { | |
270 | dev_err(kfd_device, | |
271 | "Error adding device (%x:%x) to topology\n", | |
272 | kfd->pdev->vendor, kfd->pdev->device); | |
273 | goto kfd_topology_add_device_error; | |
274 | } | |
275 | ||
2249d558 AL |
276 | if (kfd_interrupt_init(kfd)) { |
277 | dev_err(kfd_device, | |
278 | "Error initializing interrupts for device (%x:%x)\n", | |
279 | kfd->pdev->vendor, kfd->pdev->device); | |
280 | goto kfd_interrupt_error; | |
281 | } | |
282 | ||
b17f068a OG |
283 | if (!device_iommu_pasid_init(kfd)) { |
284 | dev_err(kfd_device, | |
285 | "Error initializing iommuv2 for device (%x:%x)\n", | |
286 | kfd->pdev->vendor, kfd->pdev->device); | |
287 | goto device_iommu_pasid_error; | |
288 | } | |
289 | amd_iommu_set_invalidate_ctx_cb(kfd->pdev, | |
290 | iommu_pasid_shutdown_callback); | |
59d3e8be | 291 | amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb); |
5b5c4e40 | 292 | |
64c7f8cf BG |
293 | kfd->dqm = device_queue_manager_init(kfd); |
294 | if (!kfd->dqm) { | |
295 | dev_err(kfd_device, | |
296 | "Error initializing queue manager for device (%x:%x)\n", | |
297 | kfd->pdev->vendor, kfd->pdev->device); | |
298 | goto device_queue_manager_error; | |
299 | } | |
300 | ||
45c9a5e4 | 301 | if (kfd->dqm->ops.start(kfd->dqm) != 0) { |
64c7f8cf BG |
302 | dev_err(kfd_device, |
303 | "Error starting queuen manager for device (%x:%x)\n", | |
304 | kfd->pdev->vendor, kfd->pdev->device); | |
305 | goto dqm_start_error; | |
306 | } | |
307 | ||
fbeb661b YS |
308 | kfd->dbgmgr = NULL; |
309 | ||
4a488a7a OG |
310 | kfd->init_complete = true; |
311 | dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor, | |
312 | kfd->pdev->device); | |
313 | ||
64c7f8cf BG |
314 | pr_debug("kfd: Starting kfd with the following scheduling policy %d\n", |
315 | sched_policy); | |
316 | ||
19f6d2a6 OG |
317 | goto out; |
318 | ||
64c7f8cf BG |
319 | dqm_start_error: |
320 | device_queue_manager_uninit(kfd->dqm); | |
321 | device_queue_manager_error: | |
322 | amd_iommu_free_device(kfd->pdev); | |
b17f068a | 323 | device_iommu_pasid_error: |
2249d558 AL |
324 | kfd_interrupt_exit(kfd); |
325 | kfd_interrupt_error: | |
b17f068a | 326 | kfd_topology_remove_device(kfd); |
19f6d2a6 | 327 | kfd_topology_add_device_error: |
73a1da0b OG |
328 | kfd_gtt_sa_fini(kfd); |
329 | kfd_gtt_sa_init_error: | |
cea405b1 | 330 | kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); |
19f6d2a6 OG |
331 | dev_err(kfd_device, |
332 | "device (%x:%x) NOT added due to errors\n", | |
333 | kfd->pdev->vendor, kfd->pdev->device); | |
334 | out: | |
335 | return kfd->init_complete; | |
4a488a7a OG |
336 | } |
337 | ||
338 | void kgd2kfd_device_exit(struct kfd_dev *kfd) | |
339 | { | |
b17f068a | 340 | if (kfd->init_complete) { |
64c7f8cf | 341 | device_queue_manager_uninit(kfd->dqm); |
b17f068a | 342 | amd_iommu_free_device(kfd->pdev); |
2249d558 | 343 | kfd_interrupt_exit(kfd); |
b17f068a | 344 | kfd_topology_remove_device(kfd); |
73a1da0b | 345 | kfd_gtt_sa_fini(kfd); |
cea405b1 | 346 | kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); |
b17f068a | 347 | } |
5b5c4e40 | 348 | |
4a488a7a OG |
349 | kfree(kfd); |
350 | } | |
351 | ||
352 | void kgd2kfd_suspend(struct kfd_dev *kfd) | |
353 | { | |
354 | BUG_ON(kfd == NULL); | |
b17f068a | 355 | |
64c7f8cf | 356 | if (kfd->init_complete) { |
45c9a5e4 | 357 | kfd->dqm->ops.stop(kfd->dqm); |
abc9d3e3 | 358 | amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); |
59d3e8be | 359 | amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); |
b17f068a | 360 | amd_iommu_free_device(kfd->pdev); |
64c7f8cf | 361 | } |
4a488a7a OG |
362 | } |
363 | ||
364 | int kgd2kfd_resume(struct kfd_dev *kfd) | |
365 | { | |
b17f068a OG |
366 | unsigned int pasid_limit; |
367 | int err; | |
368 | ||
4a488a7a OG |
369 | BUG_ON(kfd == NULL); |
370 | ||
b17f068a OG |
371 | pasid_limit = kfd_get_pasid_limit(); |
372 | ||
373 | if (kfd->init_complete) { | |
374 | err = amd_iommu_init_device(kfd->pdev, pasid_limit); | |
375 | if (err < 0) | |
376 | return -ENXIO; | |
377 | amd_iommu_set_invalidate_ctx_cb(kfd->pdev, | |
378 | iommu_pasid_shutdown_callback); | |
59d3e8be | 379 | amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb); |
45c9a5e4 | 380 | kfd->dqm->ops.start(kfd->dqm); |
b17f068a OG |
381 | } |
382 | ||
4a488a7a OG |
383 | return 0; |
384 | } | |
385 | ||
b3f5e6b4 AL |
386 | /* This is called directly from KGD at ISR. */ |
387 | void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) | |
4a488a7a | 388 | { |
2249d558 AL |
389 | if (!kfd->init_complete) |
390 | return; | |
391 | ||
392 | spin_lock(&kfd->interrupt_lock); | |
393 | ||
394 | if (kfd->interrupts_active | |
395 | && interrupt_is_wanted(kfd, ih_ring_entry) | |
396 | && enqueue_ih_ring_entry(kfd, ih_ring_entry)) | |
397 | schedule_work(&kfd->interrupt_work); | |
398 | ||
399 | spin_unlock(&kfd->interrupt_lock); | |
4a488a7a | 400 | } |
6e81090b OG |
401 | |
402 | static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, | |
403 | unsigned int chunk_size) | |
404 | { | |
405 | unsigned int num_of_bits; | |
406 | ||
407 | BUG_ON(!kfd); | |
408 | BUG_ON(!kfd->gtt_mem); | |
409 | BUG_ON(buf_size < chunk_size); | |
410 | BUG_ON(buf_size == 0); | |
411 | BUG_ON(chunk_size == 0); | |
412 | ||
413 | kfd->gtt_sa_chunk_size = chunk_size; | |
414 | kfd->gtt_sa_num_of_chunks = buf_size / chunk_size; | |
415 | ||
416 | num_of_bits = kfd->gtt_sa_num_of_chunks / BITS_PER_BYTE; | |
417 | BUG_ON(num_of_bits == 0); | |
418 | ||
419 | kfd->gtt_sa_bitmap = kzalloc(num_of_bits, GFP_KERNEL); | |
420 | ||
421 | if (!kfd->gtt_sa_bitmap) | |
422 | return -ENOMEM; | |
423 | ||
424 | pr_debug("kfd: gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n", | |
425 | kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap); | |
426 | ||
427 | mutex_init(&kfd->gtt_sa_lock); | |
428 | ||
429 | return 0; | |
430 | ||
431 | } | |
432 | ||
433 | static void kfd_gtt_sa_fini(struct kfd_dev *kfd) | |
434 | { | |
435 | mutex_destroy(&kfd->gtt_sa_lock); | |
436 | kfree(kfd->gtt_sa_bitmap); | |
437 | } | |
438 | ||
439 | static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr, | |
440 | unsigned int bit_num, | |
441 | unsigned int chunk_size) | |
442 | { | |
443 | return start_addr + bit_num * chunk_size; | |
444 | } | |
445 | ||
446 | static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr, | |
447 | unsigned int bit_num, | |
448 | unsigned int chunk_size) | |
449 | { | |
450 | return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size); | |
451 | } | |
452 | ||
453 | int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, | |
454 | struct kfd_mem_obj **mem_obj) | |
455 | { | |
456 | unsigned int found, start_search, cur_size; | |
457 | ||
458 | BUG_ON(!kfd); | |
459 | ||
460 | if (size == 0) | |
461 | return -EINVAL; | |
462 | ||
463 | if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size) | |
464 | return -ENOMEM; | |
465 | ||
466 | *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); | |
467 | if ((*mem_obj) == NULL) | |
468 | return -ENOMEM; | |
469 | ||
470 | pr_debug("kfd: allocated mem_obj = %p for size = %d\n", *mem_obj, size); | |
471 | ||
472 | start_search = 0; | |
473 | ||
474 | mutex_lock(&kfd->gtt_sa_lock); | |
475 | ||
476 | kfd_gtt_restart_search: | |
477 | /* Find the first chunk that is free */ | |
478 | found = find_next_zero_bit(kfd->gtt_sa_bitmap, | |
479 | kfd->gtt_sa_num_of_chunks, | |
480 | start_search); | |
481 | ||
482 | pr_debug("kfd: found = %d\n", found); | |
483 | ||
484 | /* If there wasn't any free chunk, bail out */ | |
485 | if (found == kfd->gtt_sa_num_of_chunks) | |
486 | goto kfd_gtt_no_free_chunk; | |
487 | ||
488 | /* Update fields of mem_obj */ | |
489 | (*mem_obj)->range_start = found; | |
490 | (*mem_obj)->range_end = found; | |
491 | (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr( | |
492 | kfd->gtt_start_gpu_addr, | |
493 | found, | |
494 | kfd->gtt_sa_chunk_size); | |
495 | (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr( | |
496 | kfd->gtt_start_cpu_ptr, | |
497 | found, | |
498 | kfd->gtt_sa_chunk_size); | |
499 | ||
500 | pr_debug("kfd: gpu_addr = %p, cpu_addr = %p\n", | |
501 | (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr); | |
502 | ||
503 | /* If we need only one chunk, mark it as allocated and get out */ | |
504 | if (size <= kfd->gtt_sa_chunk_size) { | |
505 | pr_debug("kfd: single bit\n"); | |
506 | set_bit(found, kfd->gtt_sa_bitmap); | |
507 | goto kfd_gtt_out; | |
508 | } | |
509 | ||
510 | /* Otherwise, try to see if we have enough contiguous chunks */ | |
511 | cur_size = size - kfd->gtt_sa_chunk_size; | |
512 | do { | |
513 | (*mem_obj)->range_end = | |
514 | find_next_zero_bit(kfd->gtt_sa_bitmap, | |
515 | kfd->gtt_sa_num_of_chunks, ++found); | |
516 | /* | |
517 | * If next free chunk is not contiguous than we need to | |
518 | * restart our search from the last free chunk we found (which | |
519 | * wasn't contiguous to the previous ones | |
520 | */ | |
521 | if ((*mem_obj)->range_end != found) { | |
522 | start_search = found; | |
523 | goto kfd_gtt_restart_search; | |
524 | } | |
525 | ||
526 | /* | |
527 | * If we reached end of buffer, bail out with error | |
528 | */ | |
529 | if (found == kfd->gtt_sa_num_of_chunks) | |
530 | goto kfd_gtt_no_free_chunk; | |
531 | ||
532 | /* Check if we don't need another chunk */ | |
533 | if (cur_size <= kfd->gtt_sa_chunk_size) | |
534 | cur_size = 0; | |
535 | else | |
536 | cur_size -= kfd->gtt_sa_chunk_size; | |
537 | ||
538 | } while (cur_size > 0); | |
539 | ||
540 | pr_debug("kfd: range_start = %d, range_end = %d\n", | |
541 | (*mem_obj)->range_start, (*mem_obj)->range_end); | |
542 | ||
543 | /* Mark the chunks as allocated */ | |
544 | for (found = (*mem_obj)->range_start; | |
545 | found <= (*mem_obj)->range_end; | |
546 | found++) | |
547 | set_bit(found, kfd->gtt_sa_bitmap); | |
548 | ||
549 | kfd_gtt_out: | |
550 | mutex_unlock(&kfd->gtt_sa_lock); | |
551 | return 0; | |
552 | ||
553 | kfd_gtt_no_free_chunk: | |
554 | pr_debug("kfd: allocation failed with mem_obj = %p\n", mem_obj); | |
555 | mutex_unlock(&kfd->gtt_sa_lock); | |
556 | kfree(mem_obj); | |
557 | return -ENOMEM; | |
558 | } | |
559 | ||
560 | int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) | |
561 | { | |
562 | unsigned int bit; | |
563 | ||
564 | BUG_ON(!kfd); | |
9216ed29 OG |
565 | |
566 | /* Act like kfree when trying to free a NULL object */ | |
567 | if (!mem_obj) | |
568 | return 0; | |
6e81090b OG |
569 | |
570 | pr_debug("kfd: free mem_obj = %p, range_start = %d, range_end = %d\n", | |
571 | mem_obj, mem_obj->range_start, mem_obj->range_end); | |
572 | ||
573 | mutex_lock(&kfd->gtt_sa_lock); | |
574 | ||
575 | /* Mark the chunks as free */ | |
576 | for (bit = mem_obj->range_start; | |
577 | bit <= mem_obj->range_end; | |
578 | bit++) | |
579 | clear_bit(bit, kfd->gtt_sa_bitmap); | |
580 | ||
581 | mutex_unlock(&kfd->gtt_sa_lock); | |
582 | ||
583 | kfree(mem_obj); | |
584 | return 0; | |
585 | } |