]> git.proxmox.com Git - mirror_ubuntu-eoan-kernel.git/blame - drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drm/amdkfd: Only load sdma mqd when queue is active
[mirror_ubuntu-eoan-kernel.git] / drivers / gpu / drm / amd / amdkfd / kfd_device_queue_manager.c
CommitLineData
64c7f8cf
BG
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
26103436
FK
24#include <linux/ratelimit.h>
25#include <linux/printk.h>
64c7f8cf
BG
26#include <linux/slab.h>
27#include <linux/list.h>
28#include <linux/types.h>
64c7f8cf 29#include <linux/bitops.h>
99331a51 30#include <linux/sched.h>
64c7f8cf
BG
31#include "kfd_priv.h"
32#include "kfd_device_queue_manager.h"
33#include "kfd_mqd_manager.h"
34#include "cik_regs.h"
35#include "kfd_kernel_queue.h"
5b87245f 36#include "amdgpu_amdkfd.h"
64c7f8cf
BG
37
38/* Size of the per-pipe EOP queue */
39#define CIK_HPD_EOP_BYTES_LOG2 11
40#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
41
64c7f8cf
BG
42static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
43 unsigned int pasid, unsigned int vmid);
44
45static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
46 struct queue *q,
47 struct qcm_process_device *qpd);
bcea3081 48
c4744e24
YZ
49static int execute_queues_cpsch(struct device_queue_manager *dqm,
50 enum kfd_unmap_queues_filter filter,
51 uint32_t filter_param);
7da2bcf8 52static int unmap_queues_cpsch(struct device_queue_manager *dqm,
4465f466
YZ
53 enum kfd_unmap_queues_filter filter,
54 uint32_t filter_param);
64c7f8cf 55
60a00956
FK
56static int map_queues_cpsch(struct device_queue_manager *dqm);
57
bcea3081
BG
58static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
59 struct queue *q,
60 struct qcm_process_device *qpd);
61
62static void deallocate_sdma_queue(struct device_queue_manager *dqm,
1b4670f6 63 struct queue *q);
64c7f8cf 64
73ea648d
SL
65static void kfd_process_hw_exception(struct work_struct *work);
66
bcea3081
BG
67static inline
68enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
64c7f8cf 69{
1b4670f6 70 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
85d258f9
BG
71 return KFD_MQD_TYPE_SDMA;
72 return KFD_MQD_TYPE_CP;
64c7f8cf
BG
73}
74
d0b63bb3
AR
75static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
76{
77 int i;
78 int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
79 + pipe * dqm->dev->shared_resources.num_queue_per_pipe;
80
81 /* queue is available for KFD usage if bit is 1 */
82 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i)
83 if (test_bit(pipe_offset + i,
84 dqm->dev->shared_resources.queue_bitmap))
85 return true;
86 return false;
87}
88
d0b63bb3 89unsigned int get_queues_num(struct device_queue_manager *dqm)
64ea8f4a 90{
d0b63bb3
AR
91 return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
92 KGD_MAX_QUEUES);
64ea8f4a
OG
93}
94
d0b63bb3 95unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
64c7f8cf 96{
d0b63bb3
AR
97 return dqm->dev->shared_resources.num_queue_per_pipe;
98}
99
100unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
101{
d0b63bb3 102 return dqm->dev->shared_resources.num_pipe_per_mec;
64c7f8cf
BG
103}
104
98bb9222
YZ
105static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
106{
107 return dqm->dev->device_info->num_sdma_engines;
108}
109
1b4670f6
OZ
110static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
111{
112 return dqm->dev->device_info->num_xgmi_sdma_engines;
113}
114
98bb9222
YZ
115unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
116{
117 return dqm->dev->device_info->num_sdma_engines
d5094189 118 * dqm->dev->device_info->num_sdma_queues_per_engine;
98bb9222
YZ
119}
120
1b4670f6
OZ
121unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
122{
123 return dqm->dev->device_info->num_xgmi_sdma_engines
124 * dqm->dev->device_info->num_sdma_queues_per_engine;
125}
126
a22fc854 127void program_sh_mem_settings(struct device_queue_manager *dqm,
64c7f8cf
BG
128 struct qcm_process_device *qpd)
129{
cea405b1
XZ
130 return dqm->dev->kfd2kgd->program_sh_mem_settings(
131 dqm->dev->kgd, qpd->vmid,
64c7f8cf
BG
132 qpd->sh_mem_config,
133 qpd->sh_mem_ape1_base,
134 qpd->sh_mem_ape1_limit,
135 qpd->sh_mem_bases);
136}
137
ef568db7
FK
138static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
139{
140 struct kfd_dev *dev = qpd->dqm->dev;
141
142 if (!KFD_IS_SOC15(dev->device_info->asic_family)) {
143 /* On pre-SOC15 chips we need to use the queue ID to
144 * preserve the user mode ABI.
145 */
146 q->doorbell_id = q->properties.queue_id;
1b4670f6
OZ
147 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
148 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
234441dd
YZ
149 /* For SDMA queues on SOC15 with 8-byte doorbell, use static
150 * doorbell assignments based on the engine and queue id.
151 * The doobell index distance between RLC (2*i) and (2*i+1)
152 * for a SDMA engine is 512.
ef568db7 153 */
234441dd
YZ
154 uint32_t *idx_offset =
155 dev->shared_resources.sdma_doorbell_idx;
156
157 q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
158 + (q->properties.sdma_queue_id & 1)
159 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
160 + (q->properties.sdma_queue_id >> 1);
ef568db7
FK
161 } else {
162 /* For CP queues on SOC15 reserve a free doorbell ID */
163 unsigned int found;
164
165 found = find_first_zero_bit(qpd->doorbell_bitmap,
166 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
167 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
168 pr_debug("No doorbells available");
169 return -EBUSY;
170 }
171 set_bit(found, qpd->doorbell_bitmap);
172 q->doorbell_id = found;
173 }
174
175 q->properties.doorbell_off =
176 kfd_doorbell_id_to_offset(dev, q->process,
177 q->doorbell_id);
178
179 return 0;
180}
181
182static void deallocate_doorbell(struct qcm_process_device *qpd,
183 struct queue *q)
184{
185 unsigned int old;
186 struct kfd_dev *dev = qpd->dqm->dev;
187
188 if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
1b4670f6
OZ
189 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
190 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
ef568db7
FK
191 return;
192
193 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
194 WARN_ON(!old);
195}
196
64c7f8cf
BG
197static int allocate_vmid(struct device_queue_manager *dqm,
198 struct qcm_process_device *qpd,
199 struct queue *q)
200{
201 int bit, allocated_vmid;
202
203 if (dqm->vmid_bitmap == 0)
204 return -ENOMEM;
205
4252bf68
HK
206 bit = ffs(dqm->vmid_bitmap) - 1;
207 dqm->vmid_bitmap &= ~(1 << bit);
64c7f8cf 208
44008d7a 209 allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
79775b62 210 pr_debug("vmid allocation %d\n", allocated_vmid);
64c7f8cf
BG
211 qpd->vmid = allocated_vmid;
212 q->properties.vmid = allocated_vmid;
213
214 set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
215 program_sh_mem_settings(dqm, qpd);
216
403575c4
FK
217 /* qpd->page_table_base is set earlier when register_process()
218 * is called, i.e. when the first queue is created.
219 */
220 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
221 qpd->vmid,
222 qpd->page_table_base);
223 /* invalidate the VM context after pasid and vmid mapping is set up */
224 kfd_flush_tlb(qpd_to_pdd(qpd));
225
64c7f8cf
BG
226 return 0;
227}
228
552764b6
FK
229static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
230 struct qcm_process_device *qpd)
231{
f6e27ff1
FK
232 const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf;
233 int ret;
552764b6
FK
234
235 if (!qpd->ib_kaddr)
236 return -ENOMEM;
237
f6e27ff1
FK
238 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
239 if (ret)
240 return ret;
552764b6 241
5b87245f 242 return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
f6e27ff1
FK
243 qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
244 pmf->release_mem_size / sizeof(uint32_t));
552764b6
FK
245}
246
64c7f8cf
BG
247static void deallocate_vmid(struct device_queue_manager *dqm,
248 struct qcm_process_device *qpd,
249 struct queue *q)
250{
44008d7a 251 int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
64c7f8cf 252
552764b6
FK
253 /* On GFX v7, CP doesn't flush TC at dequeue */
254 if (q->device->device_info->asic_family == CHIP_HAWAII)
255 if (flush_texture_cache_nocpsch(q->device, qpd))
256 pr_err("Failed to flush TC\n");
257
403575c4
FK
258 kfd_flush_tlb(qpd_to_pdd(qpd));
259
2030664b
BG
260 /* Release the vmid mapping */
261 set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
262
4252bf68 263 dqm->vmid_bitmap |= (1 << bit);
64c7f8cf
BG
264 qpd->vmid = 0;
265 q->properties.vmid = 0;
266}
267
268static int create_queue_nocpsch(struct device_queue_manager *dqm,
269 struct queue *q,
b46cb7d7 270 struct qcm_process_device *qpd)
64c7f8cf
BG
271{
272 int retval;
273
64c7f8cf
BG
274 print_queue(q);
275
efeaed4d 276 dqm_lock(dqm);
64c7f8cf 277
b8cbab04 278 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
79775b62 279 pr_warn("Can't create new usermode queue because %d queues were already created\n",
b8cbab04 280 dqm->total_queue_count);
ab7c1648
KR
281 retval = -EPERM;
282 goto out_unlock;
b8cbab04
OG
283 }
284
64c7f8cf
BG
285 if (list_empty(&qpd->queues_list)) {
286 retval = allocate_vmid(dqm, qpd, q);
ab7c1648
KR
287 if (retval)
288 goto out_unlock;
64c7f8cf 289 }
64c7f8cf 290 q->properties.vmid = qpd->vmid;
26103436 291 /*
bb2d2128
FK
292 * Eviction state logic: mark all queues as evicted, even ones
293 * not currently active. Restoring inactive queues later only
294 * updates the is_evicted flag but is a no-op otherwise.
26103436 295 */
bb2d2128 296 q->properties.is_evicted = !!qpd->evicted;
64c7f8cf 297
373d7080
FK
298 q->properties.tba_addr = qpd->tba_addr;
299 q->properties.tma_addr = qpd->tma_addr;
300
bcea3081
BG
301 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
302 retval = create_compute_queue_nocpsch(dqm, q, qpd);
1b4670f6
OZ
303 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
304 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
bcea3081 305 retval = create_sdma_queue_nocpsch(dqm, q, qpd);
ab7c1648
KR
306 else
307 retval = -EINVAL;
64c7f8cf 308
4eacc26b 309 if (retval) {
b46cb7d7 310 if (list_empty(&qpd->queues_list))
64c7f8cf 311 deallocate_vmid(dqm, qpd, q);
ab7c1648 312 goto out_unlock;
64c7f8cf
BG
313 }
314
315 list_add(&q->list, &qpd->queues_list);
bc920fd4 316 qpd->queue_count++;
b6819cec
JC
317 if (q->properties.is_active)
318 dqm->queue_count++;
64c7f8cf 319
bcea3081
BG
320 if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
321 dqm->sdma_queue_count++;
1b4670f6
OZ
322 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
323 dqm->xgmi_sdma_queue_count++;
64c7f8cf 324
b8cbab04
OG
325 /*
326 * Unconditionally increment this counter, regardless of the queue's
327 * type or whether the queue is active.
328 */
329 dqm->total_queue_count++;
330 pr_debug("Total of %d queues are accountable so far\n",
331 dqm->total_queue_count);
332
ab7c1648 333out_unlock:
efeaed4d 334 dqm_unlock(dqm);
ab7c1648 335 return retval;
64c7f8cf
BG
336}
337
338static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
339{
340 bool set;
f0ec5b99 341 int pipe, bit, i;
64c7f8cf
BG
342
343 set = false;
344
8eabaf54
KR
345 for (pipe = dqm->next_pipe_to_allocate, i = 0;
346 i < get_pipes_per_mec(dqm);
d0b63bb3
AR
347 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
348
349 if (!is_pipe_enabled(dqm, 0, pipe))
350 continue;
351
64c7f8cf 352 if (dqm->allocated_queues[pipe] != 0) {
4252bf68
HK
353 bit = ffs(dqm->allocated_queues[pipe]) - 1;
354 dqm->allocated_queues[pipe] &= ~(1 << bit);
64c7f8cf
BG
355 q->pipe = pipe;
356 q->queue = bit;
357 set = true;
358 break;
359 }
360 }
361
991ca8ee 362 if (!set)
64c7f8cf
BG
363 return -EBUSY;
364
79775b62 365 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
64c7f8cf 366 /* horizontal hqd allocation */
d0b63bb3 367 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
64c7f8cf
BG
368
369 return 0;
370}
371
372static inline void deallocate_hqd(struct device_queue_manager *dqm,
373 struct queue *q)
374{
4252bf68 375 dqm->allocated_queues[q->pipe] |= (1 << q->queue);
64c7f8cf
BG
376}
377
378static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
379 struct queue *q,
380 struct qcm_process_device *qpd)
381{
8d5f3552 382 struct mqd_manager *mqd_mgr;
1b19aa5a 383 int retval;
64c7f8cf 384
fdfa090b 385 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
64c7f8cf
BG
386
387 retval = allocate_hqd(dqm, q);
4eacc26b 388 if (retval)
64c7f8cf
BG
389 return retval;
390
ef568db7
FK
391 retval = allocate_doorbell(qpd, q);
392 if (retval)
393 goto out_deallocate_hqd;
394
8d5f3552 395 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
64c7f8cf 396 &q->gart_mqd_addr, &q->properties);
ab7c1648 397 if (retval)
ef568db7 398 goto out_deallocate_doorbell;
64c7f8cf 399
79775b62
KR
400 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
401 q->pipe, q->queue);
030e416b 402
6a1c9510
MR
403 dqm->dev->kfd2kgd->set_scratch_backing_va(
404 dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);
405
60a00956
FK
406 if (!q->properties.is_active)
407 return 0;
408
1b19aa5a
FK
409 if (WARN(q->process->mm != current->mm,
410 "should only run in user thread"))
411 retval = -EFAULT;
412 else
413 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
414 &q->properties, current->mm);
ab7c1648
KR
415 if (retval)
416 goto out_uninit_mqd;
030e416b 417
64c7f8cf 418 return 0;
ab7c1648
KR
419
420out_uninit_mqd:
8d5f3552 421 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
ef568db7
FK
422out_deallocate_doorbell:
423 deallocate_doorbell(qpd, q);
ab7c1648
KR
424out_deallocate_hqd:
425 deallocate_hqd(dqm, q);
426
427 return retval;
64c7f8cf
BG
428}
429
9fd3f1bf
FK
430/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
431 * to avoid asynchronized access
432 */
433static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
64c7f8cf
BG
434 struct qcm_process_device *qpd,
435 struct queue *q)
436{
437 int retval;
8d5f3552 438 struct mqd_manager *mqd_mgr;
64c7f8cf 439
fdfa090b
OZ
440 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
441 q->properties.type)];
64c7f8cf 442
c2e1b3a4 443 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
c2e1b3a4
BG
444 deallocate_hqd(dqm, q);
445 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
c2e1b3a4 446 dqm->sdma_queue_count--;
1b4670f6
OZ
447 deallocate_sdma_queue(dqm, q);
448 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
449 dqm->xgmi_sdma_queue_count--;
450 deallocate_sdma_queue(dqm, q);
7113cd65 451 } else {
79775b62 452 pr_debug("q->properties.type %d is invalid\n",
7113cd65 453 q->properties.type);
9fd3f1bf 454 return -EINVAL;
64c7f8cf 455 }
9fd3f1bf 456 dqm->total_queue_count--;
64c7f8cf 457
ef568db7
FK
458 deallocate_doorbell(qpd, q);
459
8d5f3552 460 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
c2e1b3a4 461 KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
b90e3fbe 462 KFD_UNMAP_LATENCY_MS,
64c7f8cf 463 q->pipe, q->queue);
9fd3f1bf
FK
464 if (retval == -ETIME)
465 qpd->reset_wavefronts = true;
64c7f8cf 466
8d5f3552 467 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
64c7f8cf
BG
468
469 list_del(&q->list);
9fd3f1bf
FK
470 if (list_empty(&qpd->queues_list)) {
471 if (qpd->reset_wavefronts) {
472 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
473 dqm->dev);
474 /* dbgdev_wave_reset_wavefronts has to be called before
475 * deallocate_vmid(), i.e. when vmid is still in use.
476 */
477 dbgdev_wave_reset_wavefronts(dqm->dev,
478 qpd->pqm->process);
479 qpd->reset_wavefronts = false;
480 }
481
64c7f8cf 482 deallocate_vmid(dqm, qpd, q);
9fd3f1bf 483 }
bc920fd4 484 qpd->queue_count--;
b6819cec
JC
485 if (q->properties.is_active)
486 dqm->queue_count--;
b8cbab04 487
9fd3f1bf
FK
488 return retval;
489}
b8cbab04 490
9fd3f1bf
FK
491static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
492 struct qcm_process_device *qpd,
493 struct queue *q)
494{
495 int retval;
496
efeaed4d 497 dqm_lock(dqm);
9fd3f1bf 498 retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
efeaed4d 499 dqm_unlock(dqm);
9fd3f1bf 500
64c7f8cf
BG
501 return retval;
502}
503
504static int update_queue(struct device_queue_manager *dqm, struct queue *q)
505{
506 int retval;
8d5f3552 507 struct mqd_manager *mqd_mgr;
26103436 508 struct kfd_process_device *pdd;
b6ffbab8 509 bool prev_active = false;
64c7f8cf 510
efeaed4d 511 dqm_lock(dqm);
26103436
FK
512 pdd = kfd_get_process_device_data(q->device, q->process);
513 if (!pdd) {
514 retval = -ENODEV;
515 goto out_unlock;
516 }
fdfa090b
OZ
517 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
518 q->properties.type)];
64c7f8cf 519
60a00956
FK
520 /* Save previous activity state for counters */
521 prev_active = q->properties.is_active;
522
523 /* Make sure the queue is unmapped before updating the MQD */
d146c5a7 524 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
60a00956
FK
525 retval = unmap_queues_cpsch(dqm,
526 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
894a8293 527 if (retval) {
60a00956
FK
528 pr_err("unmap queue failed\n");
529 goto out_unlock;
530 }
894a8293 531 } else if (prev_active &&
60a00956 532 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
1b4670f6
OZ
533 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
534 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
8d5f3552 535 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
60a00956
FK
536 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
537 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
538 if (retval) {
539 pr_err("destroy mqd failed\n");
540 goto out_unlock;
541 }
542 }
543
8d5f3552 544 retval = mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties);
60a00956 545
096d1a3e
FK
546 /*
547 * check active state vs. the previous state and modify
548 * counter accordingly. map_queues_cpsch uses the
549 * dqm->queue_count to determine whether a new runlist must be
550 * uploaded.
551 */
552 if (q->properties.is_active && !prev_active)
553 dqm->queue_count++;
554 else if (!q->properties.is_active && prev_active)
555 dqm->queue_count--;
556
d146c5a7 557 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
60a00956 558 retval = map_queues_cpsch(dqm);
894a8293 559 else if (q->properties.is_active &&
60a00956 560 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
1b4670f6
OZ
561 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
562 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1b19aa5a
FK
563 if (WARN(q->process->mm != current->mm,
564 "should only run in user thread"))
565 retval = -EFAULT;
566 else
567 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
568 q->pipe, q->queue,
569 &q->properties, current->mm);
570 }
b6ffbab8 571
ab7c1648 572out_unlock:
efeaed4d 573 dqm_unlock(dqm);
64c7f8cf
BG
574 return retval;
575}
576
26103436
FK
577static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
578 struct qcm_process_device *qpd)
579{
580 struct queue *q;
8d5f3552 581 struct mqd_manager *mqd_mgr;
26103436 582 struct kfd_process_device *pdd;
bb2d2128 583 int retval, ret = 0;
26103436 584
efeaed4d 585 dqm_lock(dqm);
26103436
FK
586 if (qpd->evicted++ > 0) /* already evicted, do nothing */
587 goto out;
588
589 pdd = qpd_to_pdd(qpd);
590 pr_info_ratelimited("Evicting PASID %u queues\n",
591 pdd->process->pasid);
592
bb2d2128
FK
593 /* Mark all queues as evicted. Deactivate all active queues on
594 * the qpd.
595 */
26103436 596 list_for_each_entry(q, &qpd->queues_list, list) {
bb2d2128 597 q->properties.is_evicted = true;
26103436
FK
598 if (!q->properties.is_active)
599 continue;
bb2d2128 600
fdfa090b
OZ
601 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
602 q->properties.type)];
26103436 603 q->properties.is_active = false;
8d5f3552 604 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
26103436
FK
605 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
606 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
bb2d2128
FK
607 if (retval && !ret)
608 /* Return the first error, but keep going to
609 * maintain a consistent eviction state
610 */
611 ret = retval;
26103436
FK
612 dqm->queue_count--;
613 }
614
615out:
efeaed4d 616 dqm_unlock(dqm);
bb2d2128 617 return ret;
26103436
FK
618}
619
620static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
621 struct qcm_process_device *qpd)
622{
623 struct queue *q;
624 struct kfd_process_device *pdd;
625 int retval = 0;
626
efeaed4d 627 dqm_lock(dqm);
26103436
FK
628 if (qpd->evicted++ > 0) /* already evicted, do nothing */
629 goto out;
630
631 pdd = qpd_to_pdd(qpd);
632 pr_info_ratelimited("Evicting PASID %u queues\n",
633 pdd->process->pasid);
634
bb2d2128
FK
635 /* Mark all queues as evicted. Deactivate all active queues on
636 * the qpd.
637 */
26103436 638 list_for_each_entry(q, &qpd->queues_list, list) {
bb2d2128 639 q->properties.is_evicted = true;
26103436
FK
640 if (!q->properties.is_active)
641 continue;
bb2d2128 642
26103436
FK
643 q->properties.is_active = false;
644 dqm->queue_count--;
645 }
646 retval = execute_queues_cpsch(dqm,
647 qpd->is_debug ?
648 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
649 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
650
651out:
efeaed4d 652 dqm_unlock(dqm);
26103436
FK
653 return retval;
654}
655
656static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
657 struct qcm_process_device *qpd)
658{
1b19aa5a 659 struct mm_struct *mm = NULL;
26103436 660 struct queue *q;
8d5f3552 661 struct mqd_manager *mqd_mgr;
26103436 662 struct kfd_process_device *pdd;
e715c6d0 663 uint64_t pd_base;
bb2d2128 664 int retval, ret = 0;
26103436
FK
665
666 pdd = qpd_to_pdd(qpd);
667 /* Retrieve PD base */
5b87245f 668 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
26103436 669
efeaed4d 670 dqm_lock(dqm);
26103436
FK
671 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
672 goto out;
673 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
674 qpd->evicted--;
675 goto out;
676 }
677
678 pr_info_ratelimited("Restoring PASID %u queues\n",
679 pdd->process->pasid);
680
681 /* Update PD Base in QPD */
682 qpd->page_table_base = pd_base;
e715c6d0 683 pr_debug("Updated PD address to 0x%llx\n", pd_base);
26103436
FK
684
685 if (!list_empty(&qpd->queues_list)) {
686 dqm->dev->kfd2kgd->set_vm_context_page_table_base(
687 dqm->dev->kgd,
688 qpd->vmid,
689 qpd->page_table_base);
690 kfd_flush_tlb(pdd);
691 }
692
1b19aa5a
FK
693 /* Take a safe reference to the mm_struct, which may otherwise
694 * disappear even while the kfd_process is still referenced.
695 */
696 mm = get_task_mm(pdd->process->lead_thread);
697 if (!mm) {
bb2d2128 698 ret = -EFAULT;
1b19aa5a
FK
699 goto out;
700 }
701
bb2d2128
FK
702 /* Remove the eviction flags. Activate queues that are not
703 * inactive for other reasons.
704 */
26103436 705 list_for_each_entry(q, &qpd->queues_list, list) {
bb2d2128
FK
706 q->properties.is_evicted = false;
707 if (!QUEUE_IS_ACTIVE(q->properties))
26103436 708 continue;
bb2d2128 709
fdfa090b
OZ
710 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
711 q->properties.type)];
26103436 712 q->properties.is_active = true;
8d5f3552 713 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
1b19aa5a 714 q->queue, &q->properties, mm);
bb2d2128
FK
715 if (retval && !ret)
716 /* Return the first error, but keep going to
717 * maintain a consistent eviction state
718 */
719 ret = retval;
26103436
FK
720 dqm->queue_count++;
721 }
722 qpd->evicted = 0;
723out:
1b19aa5a
FK
724 if (mm)
725 mmput(mm);
efeaed4d 726 dqm_unlock(dqm);
bb2d2128 727 return ret;
26103436
FK
728}
729
730static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
731 struct qcm_process_device *qpd)
732{
733 struct queue *q;
734 struct kfd_process_device *pdd;
e715c6d0 735 uint64_t pd_base;
26103436
FK
736 int retval = 0;
737
738 pdd = qpd_to_pdd(qpd);
739 /* Retrieve PD base */
5b87245f 740 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
26103436 741
efeaed4d 742 dqm_lock(dqm);
26103436
FK
743 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
744 goto out;
745 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
746 qpd->evicted--;
747 goto out;
748 }
749
750 pr_info_ratelimited("Restoring PASID %u queues\n",
751 pdd->process->pasid);
752
753 /* Update PD Base in QPD */
754 qpd->page_table_base = pd_base;
e715c6d0 755 pr_debug("Updated PD address to 0x%llx\n", pd_base);
26103436
FK
756
757 /* activate all active queues on the qpd */
758 list_for_each_entry(q, &qpd->queues_list, list) {
26103436 759 q->properties.is_evicted = false;
bb2d2128
FK
760 if (!QUEUE_IS_ACTIVE(q->properties))
761 continue;
762
26103436
FK
763 q->properties.is_active = true;
764 dqm->queue_count++;
765 }
766 retval = execute_queues_cpsch(dqm,
767 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
bb2d2128 768 qpd->evicted = 0;
26103436 769out:
efeaed4d 770 dqm_unlock(dqm);
26103436
FK
771 return retval;
772}
773
58dcd5bf 774static int register_process(struct device_queue_manager *dqm,
64c7f8cf
BG
775 struct qcm_process_device *qpd)
776{
777 struct device_process_node *n;
403575c4 778 struct kfd_process_device *pdd;
e715c6d0 779 uint64_t pd_base;
a22fc854 780 int retval;
64c7f8cf 781
dbf56ab1 782 n = kzalloc(sizeof(*n), GFP_KERNEL);
64c7f8cf
BG
783 if (!n)
784 return -ENOMEM;
785
786 n->qpd = qpd;
787
403575c4
FK
788 pdd = qpd_to_pdd(qpd);
789 /* Retrieve PD base */
5b87245f 790 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
403575c4 791
efeaed4d 792 dqm_lock(dqm);
64c7f8cf
BG
793 list_add(&n->list, &dqm->queues);
794
403575c4
FK
795 /* Update PD Base in QPD */
796 qpd->page_table_base = pd_base;
e715c6d0 797 pr_debug("Updated PD address to 0x%llx\n", pd_base);
403575c4 798
bfd5e378 799 retval = dqm->asic_ops.update_qpd(dqm, qpd);
a22fc854 800
f756e631 801 dqm->processes_count++;
64c7f8cf 802
efeaed4d 803 dqm_unlock(dqm);
64c7f8cf 804
32cce8bc
FK
805 /* Outside the DQM lock because under the DQM lock we can't do
806 * reclaim or take other locks that others hold while reclaiming.
807 */
808 kfd_inc_compute_active(dqm->dev);
809
a22fc854 810 return retval;
64c7f8cf
BG
811}
812
58dcd5bf 813static int unregister_process(struct device_queue_manager *dqm,
64c7f8cf
BG
814 struct qcm_process_device *qpd)
815{
816 int retval;
817 struct device_process_node *cur, *next;
818
1e5ec956
OG
819 pr_debug("qpd->queues_list is %s\n",
820 list_empty(&qpd->queues_list) ? "empty" : "not empty");
64c7f8cf
BG
821
822 retval = 0;
efeaed4d 823 dqm_lock(dqm);
64c7f8cf
BG
824
825 list_for_each_entry_safe(cur, next, &dqm->queues, list) {
826 if (qpd == cur->qpd) {
827 list_del(&cur->list);
f5d896bb 828 kfree(cur);
f756e631 829 dqm->processes_count--;
64c7f8cf
BG
830 goto out;
831 }
832 }
833 /* qpd not found in dqm list */
834 retval = 1;
835out:
efeaed4d 836 dqm_unlock(dqm);
32cce8bc
FK
837
838 /* Outside the DQM lock because under the DQM lock we can't do
839 * reclaim or take other locks that others hold while reclaiming.
840 */
841 if (!retval)
842 kfd_dec_compute_active(dqm->dev);
843
64c7f8cf
BG
844 return retval;
845}
846
847static int
848set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
849 unsigned int vmid)
850{
cea405b1 851 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
deb99d7c 852 dqm->dev->kgd, pasid, vmid);
64c7f8cf
BG
853}
854
2249d558
AL
855static void init_interrupts(struct device_queue_manager *dqm)
856{
857 unsigned int i;
858
d0b63bb3
AR
859 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
860 if (is_pipe_enabled(dqm, 0, i))
861 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
2249d558
AL
862}
863
64c7f8cf
BG
864static int initialize_nocpsch(struct device_queue_manager *dqm)
865{
86194cf8 866 int pipe, queue;
64c7f8cf 867
79775b62 868 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
64c7f8cf 869
ab7c1648
KR
870 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
871 sizeof(unsigned int), GFP_KERNEL);
872 if (!dqm->allocated_queues)
873 return -ENOMEM;
874
efeaed4d 875 mutex_init(&dqm->lock_hidden);
64c7f8cf
BG
876 INIT_LIST_HEAD(&dqm->queues);
877 dqm->queue_count = dqm->next_pipe_to_allocate = 0;
bcea3081 878 dqm->sdma_queue_count = 0;
1b4670f6 879 dqm->xgmi_sdma_queue_count = 0;
64c7f8cf 880
86194cf8
FK
881 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
882 int pipe_offset = pipe * get_queues_per_pipe(dqm);
883
884 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
885 if (test_bit(pipe_offset + queue,
886 dqm->dev->shared_resources.queue_bitmap))
887 dqm->allocated_queues[pipe] |= 1 << queue;
888 }
64c7f8cf 889
44008d7a 890 dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
cb77ee7c 891 dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
1b4670f6 892 dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
64c7f8cf 893
64c7f8cf
BG
894 return 0;
895}
896
58dcd5bf 897static void uninitialize(struct device_queue_manager *dqm)
64c7f8cf 898{
6f9d54fd
OG
899 int i;
900
32fa8219 901 WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
64c7f8cf
BG
902
903 kfree(dqm->allocated_queues);
6f9d54fd 904 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
8d5f3552 905 kfree(dqm->mqd_mgrs[i]);
efeaed4d 906 mutex_destroy(&dqm->lock_hidden);
a86aa3ca 907 kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
64c7f8cf
BG
908}
909
910static int start_nocpsch(struct device_queue_manager *dqm)
911{
2249d558 912 init_interrupts(dqm);
552764b6 913 return pm_init(&dqm->packets, dqm);
64c7f8cf
BG
914}
915
916static int stop_nocpsch(struct device_queue_manager *dqm)
917{
552764b6 918 pm_uninit(&dqm->packets);
64c7f8cf
BG
919 return 0;
920}
921
bcea3081 922static int allocate_sdma_queue(struct device_queue_manager *dqm,
e78579aa 923 struct queue *q)
bcea3081
BG
924{
925 int bit;
926
1b4670f6
OZ
927 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
928 if (dqm->sdma_bitmap == 0)
929 return -ENOMEM;
930 bit = __ffs64(dqm->sdma_bitmap);
931 dqm->sdma_bitmap &= ~(1ULL << bit);
932 q->sdma_id = bit;
933 q->properties.sdma_engine_id = q->sdma_id %
934 get_num_sdma_engines(dqm);
935 q->properties.sdma_queue_id = q->sdma_id /
936 get_num_sdma_engines(dqm);
937 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
938 if (dqm->xgmi_sdma_bitmap == 0)
939 return -ENOMEM;
940 bit = __ffs64(dqm->xgmi_sdma_bitmap);
941 dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
942 q->sdma_id = bit;
943 /* sdma_engine_id is sdma id including
944 * both PCIe-optimized SDMAs and XGMI-
945 * optimized SDMAs. The calculation below
946 * assumes the first N engines are always
947 * PCIe-optimized ones
948 */
949 q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
950 q->sdma_id % get_num_xgmi_sdma_engines(dqm);
951 q->properties.sdma_queue_id = q->sdma_id /
952 get_num_xgmi_sdma_engines(dqm);
953 }
e78579aa 954
e78579aa
YZ
955 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
956 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
bcea3081
BG
957
958 return 0;
959}
960
961static void deallocate_sdma_queue(struct device_queue_manager *dqm,
1b4670f6 962 struct queue *q)
bcea3081 963{
1b4670f6
OZ
964 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
965 if (q->sdma_id >= get_num_sdma_queues(dqm))
966 return;
967 dqm->sdma_bitmap |= (1ULL << q->sdma_id);
968 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
969 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
970 return;
971 dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
972 }
bcea3081
BG
973}
974
bcea3081
BG
975static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
976 struct queue *q,
977 struct qcm_process_device *qpd)
978{
8d5f3552 979 struct mqd_manager *mqd_mgr;
bcea3081
BG
980 int retval;
981
fdfa090b 982 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA];
bcea3081 983
e78579aa 984 retval = allocate_sdma_queue(dqm, q);
4eacc26b 985 if (retval)
bcea3081
BG
986 return retval;
987
ef568db7
FK
988 retval = allocate_doorbell(qpd, q);
989 if (retval)
990 goto out_deallocate_sdma_queue;
991
bfd5e378 992 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
8d5f3552 993 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
bcea3081 994 &q->gart_mqd_addr, &q->properties);
ab7c1648 995 if (retval)
ef568db7 996 goto out_deallocate_doorbell;
bcea3081 997
2ff52819
OZ
998 if (!q->properties.is_active)
999 return 0;
1000
8d5f3552 1001 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 0, 0, &q->properties,
2ff52819 1002 current->mm);
ab7c1648
KR
1003 if (retval)
1004 goto out_uninit_mqd;
4fadf6b6 1005
bcea3081 1006 return 0;
ab7c1648
KR
1007
1008out_uninit_mqd:
8d5f3552 1009 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
ef568db7
FK
1010out_deallocate_doorbell:
1011 deallocate_doorbell(qpd, q);
ab7c1648 1012out_deallocate_sdma_queue:
1b4670f6 1013 deallocate_sdma_queue(dqm, q);
ab7c1648
KR
1014
1015 return retval;
bcea3081
BG
1016}
1017
64c7f8cf
BG
1018/*
1019 * Device Queue Manager implementation for cp scheduler
1020 */
1021
1022static int set_sched_resources(struct device_queue_manager *dqm)
1023{
d0b63bb3 1024 int i, mec;
64c7f8cf 1025 struct scheduling_resources res;
64c7f8cf 1026
44008d7a 1027 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
d0b63bb3
AR
1028
1029 res.queue_mask = 0;
1030 for (i = 0; i < KGD_MAX_QUEUES; ++i) {
1031 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
1032 / dqm->dev->shared_resources.num_pipe_per_mec;
1033
1034 if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
1035 continue;
1036
1037 /* only acquire queues from the first MEC */
1038 if (mec > 0)
1039 continue;
1040
1041 /* This situation may be hit in the future if a new HW
1042 * generation exposes more than 64 queues. If so, the
8eabaf54
KR
1043 * definition of res.queue_mask needs updating
1044 */
1d11ee89 1045 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
d0b63bb3
AR
1046 pr_err("Invalid queue enabled by amdgpu: %d\n", i);
1047 break;
1048 }
1049
1050 res.queue_mask |= (1ull << i);
1051 }
64c7f8cf
BG
1052 res.gws_mask = res.oac_mask = res.gds_heap_base =
1053 res.gds_heap_size = 0;
1054
79775b62
KR
1055 pr_debug("Scheduling resources:\n"
1056 "vmid mask: 0x%8X\n"
1057 "queue mask: 0x%8llX\n",
64c7f8cf
BG
1058 res.vmid_mask, res.queue_mask);
1059
1060 return pm_send_set_resources(&dqm->packets, &res);
1061}
1062
1063static int initialize_cpsch(struct device_queue_manager *dqm)
1064{
79775b62 1065 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
64c7f8cf 1066
efeaed4d 1067 mutex_init(&dqm->lock_hidden);
64c7f8cf
BG
1068 INIT_LIST_HEAD(&dqm->queues);
1069 dqm->queue_count = dqm->processes_count = 0;
bcea3081 1070 dqm->sdma_queue_count = 0;
1b4670f6 1071 dqm->xgmi_sdma_queue_count = 0;
64c7f8cf 1072 dqm->active_runlist = false;
cb77ee7c 1073 dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
1b4670f6 1074 dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1;
64c7f8cf 1075
73ea648d
SL
1076 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
1077
bfd5e378 1078 return 0;
64c7f8cf
BG
1079}
1080
1081static int start_cpsch(struct device_queue_manager *dqm)
1082{
64c7f8cf
BG
1083 int retval;
1084
64c7f8cf
BG
1085 retval = 0;
1086
1087 retval = pm_init(&dqm->packets, dqm);
4eacc26b 1088 if (retval)
64c7f8cf
BG
1089 goto fail_packet_manager_init;
1090
1091 retval = set_sched_resources(dqm);
4eacc26b 1092 if (retval)
64c7f8cf
BG
1093 goto fail_set_sched_resources;
1094
79775b62 1095 pr_debug("Allocating fence memory\n");
64c7f8cf
BG
1096
1097 /* allocate fence memory on the gart */
a86aa3ca
OG
1098 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
1099 &dqm->fence_mem);
64c7f8cf 1100
4eacc26b 1101 if (retval)
64c7f8cf
BG
1102 goto fail_allocate_vidmem;
1103
1104 dqm->fence_addr = dqm->fence_mem->cpu_ptr;
1105 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
2249d558
AL
1106
1107 init_interrupts(dqm);
1108
efeaed4d 1109 dqm_lock(dqm);
73ea648d
SL
1110 /* clear hang status when driver try to start the hw scheduler */
1111 dqm->is_hws_hang = false;
c4744e24 1112 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
efeaed4d 1113 dqm_unlock(dqm);
64c7f8cf
BG
1114
1115 return 0;
1116fail_allocate_vidmem:
1117fail_set_sched_resources:
1118 pm_uninit(&dqm->packets);
1119fail_packet_manager_init:
1120 return retval;
1121}
1122
1123static int stop_cpsch(struct device_queue_manager *dqm)
1124{
efeaed4d 1125 dqm_lock(dqm);
4465f466 1126 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
efeaed4d 1127 dqm_unlock(dqm);
64c7f8cf 1128
a86aa3ca 1129 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
64c7f8cf
BG
1130 pm_uninit(&dqm->packets);
1131
1132 return 0;
1133}
1134
1135static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
1136 struct kernel_queue *kq,
1137 struct qcm_process_device *qpd)
1138{
efeaed4d 1139 dqm_lock(dqm);
b8cbab04 1140 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
79775b62 1141 pr_warn("Can't create new kernel queue because %d queues were already created\n",
b8cbab04 1142 dqm->total_queue_count);
efeaed4d 1143 dqm_unlock(dqm);
b8cbab04
OG
1144 return -EPERM;
1145 }
1146
1147 /*
1148 * Unconditionally increment this counter, regardless of the queue's
1149 * type or whether the queue is active.
1150 */
1151 dqm->total_queue_count++;
1152 pr_debug("Total of %d queues are accountable so far\n",
1153 dqm->total_queue_count);
1154
64c7f8cf
BG
1155 list_add(&kq->list, &qpd->priv_queue_list);
1156 dqm->queue_count++;
1157 qpd->is_debug = true;
c4744e24 1158 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
efeaed4d 1159 dqm_unlock(dqm);
64c7f8cf
BG
1160
1161 return 0;
1162}
1163
1164static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
1165 struct kernel_queue *kq,
1166 struct qcm_process_device *qpd)
1167{
efeaed4d 1168 dqm_lock(dqm);
64c7f8cf
BG
1169 list_del(&kq->list);
1170 dqm->queue_count--;
1171 qpd->is_debug = false;
c4744e24 1172 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
b8cbab04
OG
1173 /*
1174 * Unconditionally decrement this counter, regardless of the queue's
1175 * type.
1176 */
8b58f261 1177 dqm->total_queue_count--;
b8cbab04
OG
1178 pr_debug("Total of %d queues are accountable so far\n",
1179 dqm->total_queue_count);
efeaed4d 1180 dqm_unlock(dqm);
64c7f8cf
BG
1181}
1182
1183static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
b46cb7d7 1184 struct qcm_process_device *qpd)
64c7f8cf
BG
1185{
1186 int retval;
8d5f3552 1187 struct mqd_manager *mqd_mgr;
64c7f8cf 1188
b8cbab04 1189 if (dqm->total_queue_count >= max_num_of_queues_per_device) {
79775b62 1190 pr_warn("Can't create new usermode queue because %d queues were already created\n",
b8cbab04
OG
1191 dqm->total_queue_count);
1192 retval = -EPERM;
89cd9d23 1193 goto out;
b8cbab04
OG
1194 }
1195
1b4670f6
OZ
1196 if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1197 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
e78579aa 1198 retval = allocate_sdma_queue(dqm, q);
894a8293 1199 if (retval)
89cd9d23 1200 goto out;
e139cd2a 1201 }
ef568db7
FK
1202
1203 retval = allocate_doorbell(qpd, q);
1204 if (retval)
1205 goto out_deallocate_sdma_queue;
1206
fdfa090b
OZ
1207 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1208 q->properties.type)];
26103436 1209 /*
bb2d2128
FK
1210 * Eviction state logic: mark all queues as evicted, even ones
1211 * not currently active. Restoring inactive queues later only
1212 * updates the is_evicted flag but is a no-op otherwise.
26103436 1213 */
bb2d2128 1214 q->properties.is_evicted = !!qpd->evicted;
eec0b4cf
OZ
1215 if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1216 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1217 dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
373d7080
FK
1218 q->properties.tba_addr = qpd->tba_addr;
1219 q->properties.tma_addr = qpd->tma_addr;
8d5f3552 1220 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
64c7f8cf 1221 &q->gart_mqd_addr, &q->properties);
4eacc26b 1222 if (retval)
ef568db7 1223 goto out_deallocate_doorbell;
64c7f8cf 1224
89cd9d23
PY
1225 dqm_lock(dqm);
1226
64c7f8cf 1227 list_add(&q->list, &qpd->queues_list);
bc920fd4 1228 qpd->queue_count++;
64c7f8cf
BG
1229 if (q->properties.is_active) {
1230 dqm->queue_count++;
c4744e24
YZ
1231 retval = execute_queues_cpsch(dqm,
1232 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
64c7f8cf
BG
1233 }
1234
bcea3081 1235 if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
8eabaf54 1236 dqm->sdma_queue_count++;
1b4670f6
OZ
1237 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1238 dqm->xgmi_sdma_queue_count++;
b8cbab04
OG
1239 /*
1240 * Unconditionally increment this counter, regardless of the queue's
1241 * type or whether the queue is active.
1242 */
1243 dqm->total_queue_count++;
1244
1245 pr_debug("Total of %d queues are accountable so far\n",
1246 dqm->total_queue_count);
1247
efeaed4d 1248 dqm_unlock(dqm);
72a01d23
FK
1249 return retval;
1250
ef568db7
FK
1251out_deallocate_doorbell:
1252 deallocate_doorbell(qpd, q);
72a01d23 1253out_deallocate_sdma_queue:
1b4670f6
OZ
1254 if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1255 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1256 deallocate_sdma_queue(dqm, q);
89cd9d23 1257out:
64c7f8cf
BG
1258 return retval;
1259}
1260
788bf83d 1261int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
d80d19bd 1262 unsigned int fence_value,
8c72c3d7 1263 unsigned int timeout_ms)
64c7f8cf 1264{
8c72c3d7 1265 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
64c7f8cf
BG
1266
1267 while (*fence_addr != fence_value) {
8c72c3d7 1268 if (time_after(jiffies, end_jiffies)) {
79775b62 1269 pr_err("qcm fence wait loop timeout expired\n");
0e9a860c
YZ
1270 /* In HWS case, this is used to halt the driver thread
1271 * in order not to mess up CP states before doing
1272 * scandumps for FW debugging.
1273 */
1274 while (halt_if_hws_hang)
1275 schedule();
1276
64c7f8cf
BG
1277 return -ETIME;
1278 }
99331a51 1279 schedule();
64c7f8cf
BG
1280 }
1281
1282 return 0;
1283}
1284
065e4bdf 1285static int unmap_sdma_queues(struct device_queue_manager *dqm)
bcea3081 1286{
065e4bdf
OZ
1287 int i, retval = 0;
1288
1b4670f6
OZ
1289 for (i = 0; i < dqm->dev->device_info->num_sdma_engines +
1290 dqm->dev->device_info->num_xgmi_sdma_engines; i++) {
065e4bdf
OZ
1291 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
1292 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i);
1293 if (retval)
1294 return retval;
1295 }
1296 return retval;
bcea3081
BG
1297}
1298
60a00956
FK
1299/* dqm->lock mutex has to be locked before calling this function */
1300static int map_queues_cpsch(struct device_queue_manager *dqm)
1301{
1302 int retval;
1303
1304 if (dqm->queue_count <= 0 || dqm->processes_count <= 0)
1305 return 0;
1306
1307 if (dqm->active_runlist)
1308 return 0;
1309
1310 retval = pm_send_runlist(&dqm->packets, &dqm->queues);
1311 if (retval) {
1312 pr_err("failed to execute runlist\n");
1313 return retval;
1314 }
1315 dqm->active_runlist = true;
1316
1317 return retval;
1318}
1319
ac30c783 1320/* dqm->lock mutex has to be locked before calling this function */
7da2bcf8 1321static int unmap_queues_cpsch(struct device_queue_manager *dqm,
4465f466
YZ
1322 enum kfd_unmap_queues_filter filter,
1323 uint32_t filter_param)
64c7f8cf 1324{
9fd3f1bf 1325 int retval = 0;
64c7f8cf 1326
73ea648d
SL
1327 if (dqm->is_hws_hang)
1328 return -EIO;
991ca8ee 1329 if (!dqm->active_runlist)
ac30c783 1330 return retval;
bcea3081 1331
1b4670f6
OZ
1332 pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n",
1333 dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count);
bcea3081 1334
1b4670f6 1335 if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count)
065e4bdf 1336 unmap_sdma_queues(dqm);
bcea3081 1337
64c7f8cf 1338 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
4465f466 1339 filter, filter_param, false, 0);
4eacc26b 1340 if (retval)
ac30c783 1341 return retval;
64c7f8cf
BG
1342
1343 *dqm->fence_addr = KFD_FENCE_INIT;
1344 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
1345 KFD_FENCE_COMPLETED);
1346 /* should be timed out */
c3447e81 1347 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
64c7f8cf 1348 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
9fd3f1bf 1349 if (retval)
ac30c783 1350 return retval;
9fd3f1bf 1351
64c7f8cf
BG
1352 pm_release_ib(&dqm->packets);
1353 dqm->active_runlist = false;
1354
64c7f8cf
BG
1355 return retval;
1356}
1357
ac30c783 1358/* dqm->lock mutex has to be locked before calling this function */
c4744e24
YZ
1359static int execute_queues_cpsch(struct device_queue_manager *dqm,
1360 enum kfd_unmap_queues_filter filter,
1361 uint32_t filter_param)
64c7f8cf
BG
1362{
1363 int retval;
1364
73ea648d
SL
1365 if (dqm->is_hws_hang)
1366 return -EIO;
c4744e24 1367 retval = unmap_queues_cpsch(dqm, filter, filter_param);
4eacc26b 1368 if (retval) {
c4744e24 1369 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
73ea648d
SL
1370 dqm->is_hws_hang = true;
1371 schedule_work(&dqm->hw_exception_work);
ac30c783 1372 return retval;
64c7f8cf
BG
1373 }
1374
60a00956 1375 return map_queues_cpsch(dqm);
64c7f8cf
BG
1376}
1377
1378static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1379 struct qcm_process_device *qpd,
1380 struct queue *q)
1381{
1382 int retval;
8d5f3552 1383 struct mqd_manager *mqd_mgr;
992839ad 1384
64c7f8cf
BG
1385 retval = 0;
1386
1387 /* remove queue from list to prevent rescheduling after preemption */
efeaed4d 1388 dqm_lock(dqm);
992839ad
YS
1389
1390 if (qpd->is_debug) {
1391 /*
1392 * error, currently we do not allow to destroy a queue
1393 * of a currently debugged process
1394 */
1395 retval = -EBUSY;
1396 goto failed_try_destroy_debugged_queue;
1397
1398 }
1399
fdfa090b
OZ
1400 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1401 q->properties.type)];
64c7f8cf 1402
ef568db7
FK
1403 deallocate_doorbell(qpd, q);
1404
e139cd2a 1405 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
bcea3081 1406 dqm->sdma_queue_count--;
1b4670f6
OZ
1407 deallocate_sdma_queue(dqm, q);
1408 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1409 dqm->xgmi_sdma_queue_count--;
1410 deallocate_sdma_queue(dqm, q);
e139cd2a 1411 }
bcea3081 1412
64c7f8cf 1413 list_del(&q->list);
bc920fd4 1414 qpd->queue_count--;
40a526dc 1415 if (q->properties.is_active) {
b6819cec 1416 dqm->queue_count--;
40a526dc 1417 retval = execute_queues_cpsch(dqm,
9fd3f1bf 1418 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
40a526dc
YZ
1419 if (retval == -ETIME)
1420 qpd->reset_wavefronts = true;
1421 }
64c7f8cf 1422
b8cbab04
OG
1423 /*
1424 * Unconditionally decrement this counter, regardless of the queue's
1425 * type
1426 */
1427 dqm->total_queue_count--;
1428 pr_debug("Total of %d queues are accountable so far\n",
1429 dqm->total_queue_count);
64c7f8cf 1430
efeaed4d 1431 dqm_unlock(dqm);
64c7f8cf 1432
89cd9d23
PY
1433 /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
1434 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1435
9e827224 1436 return retval;
64c7f8cf 1437
992839ad
YS
1438failed_try_destroy_debugged_queue:
1439
efeaed4d 1440 dqm_unlock(dqm);
64c7f8cf
BG
1441 return retval;
1442}
1443
1444/*
1445 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1446 * stay in user mode.
1447 */
1448#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1449/* APE1 limit is inclusive and 64K aligned. */
1450#define APE1_LIMIT_ALIGNMENT 0xFFFF
1451
1452static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1453 struct qcm_process_device *qpd,
1454 enum cache_policy default_policy,
1455 enum cache_policy alternate_policy,
1456 void __user *alternate_aperture_base,
1457 uint64_t alternate_aperture_size)
1458{
bed4f110
FK
1459 bool retval = true;
1460
1461 if (!dqm->asic_ops.set_cache_memory_policy)
1462 return retval;
64c7f8cf 1463
efeaed4d 1464 dqm_lock(dqm);
64c7f8cf
BG
1465
1466 if (alternate_aperture_size == 0) {
1467 /* base > limit disables APE1 */
1468 qpd->sh_mem_ape1_base = 1;
1469 qpd->sh_mem_ape1_limit = 0;
1470 } else {
1471 /*
1472 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1473 * SH_MEM_APE1_BASE[31:0], 0x0000 }
1474 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1475 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1476 * Verify that the base and size parameters can be
1477 * represented in this format and convert them.
1478 * Additionally restrict APE1 to user-mode addresses.
1479 */
1480
1481 uint64_t base = (uintptr_t)alternate_aperture_base;
1482 uint64_t limit = base + alternate_aperture_size - 1;
1483
ab7c1648
KR
1484 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
1485 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
1486 retval = false;
64c7f8cf 1487 goto out;
ab7c1648 1488 }
64c7f8cf
BG
1489
1490 qpd->sh_mem_ape1_base = base >> 16;
1491 qpd->sh_mem_ape1_limit = limit >> 16;
1492 }
1493
bfd5e378 1494 retval = dqm->asic_ops.set_cache_memory_policy(
a22fc854
BG
1495 dqm,
1496 qpd,
1497 default_policy,
1498 alternate_policy,
1499 alternate_aperture_base,
1500 alternate_aperture_size);
64c7f8cf 1501
d146c5a7 1502 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
64c7f8cf
BG
1503 program_sh_mem_settings(dqm, qpd);
1504
79775b62 1505 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
64c7f8cf
BG
1506 qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1507 qpd->sh_mem_ape1_limit);
1508
64c7f8cf 1509out:
efeaed4d 1510 dqm_unlock(dqm);
ab7c1648 1511 return retval;
64c7f8cf
BG
1512}
1513
d7b9bd22
FK
1514static int set_trap_handler(struct device_queue_manager *dqm,
1515 struct qcm_process_device *qpd,
1516 uint64_t tba_addr,
1517 uint64_t tma_addr)
1518{
1519 uint64_t *tma;
1520
1521 if (dqm->dev->cwsr_enabled) {
1522 /* Jump from CWSR trap handler to user trap */
1523 tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
1524 tma[0] = tba_addr;
1525 tma[1] = tma_addr;
1526 } else {
1527 qpd->tba_addr = tba_addr;
1528 qpd->tma_addr = tma_addr;
1529 }
1530
1531 return 0;
1532}
1533
9fd3f1bf
FK
1534static int process_termination_nocpsch(struct device_queue_manager *dqm,
1535 struct qcm_process_device *qpd)
1536{
1537 struct queue *q, *next;
1538 struct device_process_node *cur, *next_dpn;
1539 int retval = 0;
32cce8bc 1540 bool found = false;
9fd3f1bf 1541
efeaed4d 1542 dqm_lock(dqm);
9fd3f1bf
FK
1543
1544 /* Clear all user mode queues */
1545 list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1546 int ret;
1547
1548 ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
1549 if (ret)
1550 retval = ret;
1551 }
1552
1553 /* Unregister process */
1554 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1555 if (qpd == cur->qpd) {
1556 list_del(&cur->list);
1557 kfree(cur);
1558 dqm->processes_count--;
32cce8bc 1559 found = true;
9fd3f1bf
FK
1560 break;
1561 }
1562 }
1563
efeaed4d 1564 dqm_unlock(dqm);
32cce8bc
FK
1565
1566 /* Outside the DQM lock because under the DQM lock we can't do
1567 * reclaim or take other locks that others hold while reclaiming.
1568 */
1569 if (found)
1570 kfd_dec_compute_active(dqm->dev);
1571
9fd3f1bf
FK
1572 return retval;
1573}
1574
5df099e8
JC
1575static int get_wave_state(struct device_queue_manager *dqm,
1576 struct queue *q,
1577 void __user *ctl_stack,
1578 u32 *ctl_stack_used_size,
1579 u32 *save_area_used_size)
1580{
4e6c6fc1 1581 struct mqd_manager *mqd_mgr;
5df099e8
JC
1582 int r;
1583
1584 dqm_lock(dqm);
1585
1586 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
1587 q->properties.is_active || !q->device->cwsr_enabled) {
1588 r = -EINVAL;
1589 goto dqm_unlock;
1590 }
1591
fdfa090b 1592 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
5df099e8 1593
4e6c6fc1 1594 if (!mqd_mgr->get_wave_state) {
5df099e8
JC
1595 r = -EINVAL;
1596 goto dqm_unlock;
1597 }
1598
4e6c6fc1
YZ
1599 r = mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
1600 ctl_stack_used_size, save_area_used_size);
5df099e8
JC
1601
1602dqm_unlock:
1603 dqm_unlock(dqm);
1604 return r;
1605}
9fd3f1bf
FK
1606
1607static int process_termination_cpsch(struct device_queue_manager *dqm,
1608 struct qcm_process_device *qpd)
1609{
1610 int retval;
1611 struct queue *q, *next;
1612 struct kernel_queue *kq, *kq_next;
8d5f3552 1613 struct mqd_manager *mqd_mgr;
9fd3f1bf
FK
1614 struct device_process_node *cur, *next_dpn;
1615 enum kfd_unmap_queues_filter filter =
1616 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
32cce8bc 1617 bool found = false;
9fd3f1bf
FK
1618
1619 retval = 0;
1620
efeaed4d 1621 dqm_lock(dqm);
9fd3f1bf
FK
1622
1623 /* Clean all kernel queues */
1624 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
1625 list_del(&kq->list);
1626 dqm->queue_count--;
1627 qpd->is_debug = false;
1628 dqm->total_queue_count--;
1629 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
1630 }
1631
1632 /* Clear all user mode queues */
1633 list_for_each_entry(q, &qpd->queues_list, list) {
72a01d23 1634 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
9fd3f1bf 1635 dqm->sdma_queue_count--;
1b4670f6
OZ
1636 deallocate_sdma_queue(dqm, q);
1637 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1638 dqm->xgmi_sdma_queue_count--;
1639 deallocate_sdma_queue(dqm, q);
72a01d23 1640 }
9fd3f1bf
FK
1641
1642 if (q->properties.is_active)
1643 dqm->queue_count--;
1644
1645 dqm->total_queue_count--;
1646 }
1647
1648 /* Unregister process */
1649 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1650 if (qpd == cur->qpd) {
1651 list_del(&cur->list);
1652 kfree(cur);
1653 dqm->processes_count--;
32cce8bc 1654 found = true;
9fd3f1bf
FK
1655 break;
1656 }
1657 }
1658
1659 retval = execute_queues_cpsch(dqm, filter, 0);
73ea648d 1660 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
9fd3f1bf
FK
1661 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
1662 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
1663 qpd->reset_wavefronts = false;
1664 }
1665
89cd9d23
PY
1666 dqm_unlock(dqm);
1667
32cce8bc
FK
1668 /* Outside the DQM lock because under the DQM lock we can't do
1669 * reclaim or take other locks that others hold while reclaiming.
1670 */
1671 if (found)
1672 kfd_dec_compute_active(dqm->dev);
1673
89cd9d23
PY
1674 /* Lastly, free mqd resources.
1675 * Do uninit_mqd() after dqm_unlock to avoid circular locking.
1676 */
9fd3f1bf 1677 list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
fdfa090b
OZ
1678 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1679 q->properties.type)];
9fd3f1bf 1680 list_del(&q->list);
bc920fd4 1681 qpd->queue_count--;
8d5f3552 1682 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
9fd3f1bf
FK
1683 }
1684
9fd3f1bf
FK
1685 return retval;
1686}
1687
fdfa090b
OZ
1688static int init_mqd_managers(struct device_queue_manager *dqm)
1689{
1690 int i, j;
1691 struct mqd_manager *mqd_mgr;
1692
1693 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
1694 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
1695 if (!mqd_mgr) {
1696 pr_err("mqd manager [%d] initialization failed\n", i);
1697 goto out_free;
1698 }
1699 dqm->mqd_mgrs[i] = mqd_mgr;
1700 }
1701
1702 return 0;
1703
1704out_free:
1705 for (j = 0; j < i; j++) {
1706 kfree(dqm->mqd_mgrs[j]);
1707 dqm->mqd_mgrs[j] = NULL;
1708 }
1709
1710 return -ENOMEM;
1711}
11614c36
OZ
1712
1713/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
1714static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
1715{
1716 int retval;
1717 struct kfd_dev *dev = dqm->dev;
1718 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
1719 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
1720 dev->device_info->num_sdma_engines *
1721 dev->device_info->num_sdma_queues_per_engine +
1722 dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
1723
1724 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
1725 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
1726 (void *)&(mem_obj->cpu_ptr), true);
1727
1728 return retval;
1729}
1730
64c7f8cf
BG
1731struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1732{
1733 struct device_queue_manager *dqm;
1734
79775b62 1735 pr_debug("Loading device queue manager\n");
a22fc854 1736
dbf56ab1 1737 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
64c7f8cf
BG
1738 if (!dqm)
1739 return NULL;
1740
d146c5a7
FK
1741 switch (dev->device_info->asic_family) {
1742 /* HWS is not available on Hawaii. */
1743 case CHIP_HAWAII:
1744 /* HWS depends on CWSR for timely dequeue. CWSR is not
1745 * available on Tonga.
1746 *
1747 * FIXME: This argument also applies to Kaveri.
1748 */
1749 case CHIP_TONGA:
1750 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
1751 break;
1752 default:
1753 dqm->sched_policy = sched_policy;
1754 break;
1755 }
1756
64c7f8cf 1757 dqm->dev = dev;
d146c5a7 1758 switch (dqm->sched_policy) {
64c7f8cf
BG
1759 case KFD_SCHED_POLICY_HWS:
1760 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1761 /* initialize dqm for cp scheduling */
45c9a5e4
OG
1762 dqm->ops.create_queue = create_queue_cpsch;
1763 dqm->ops.initialize = initialize_cpsch;
1764 dqm->ops.start = start_cpsch;
1765 dqm->ops.stop = stop_cpsch;
1766 dqm->ops.destroy_queue = destroy_queue_cpsch;
1767 dqm->ops.update_queue = update_queue;
58dcd5bf
YZ
1768 dqm->ops.register_process = register_process;
1769 dqm->ops.unregister_process = unregister_process;
1770 dqm->ops.uninitialize = uninitialize;
45c9a5e4
OG
1771 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
1772 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
1773 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
d7b9bd22 1774 dqm->ops.set_trap_handler = set_trap_handler;
9fd3f1bf 1775 dqm->ops.process_termination = process_termination_cpsch;
26103436
FK
1776 dqm->ops.evict_process_queues = evict_process_queues_cpsch;
1777 dqm->ops.restore_process_queues = restore_process_queues_cpsch;
5df099e8 1778 dqm->ops.get_wave_state = get_wave_state;
64c7f8cf
BG
1779 break;
1780 case KFD_SCHED_POLICY_NO_HWS:
1781 /* initialize dqm for no cp scheduling */
45c9a5e4
OG
1782 dqm->ops.start = start_nocpsch;
1783 dqm->ops.stop = stop_nocpsch;
1784 dqm->ops.create_queue = create_queue_nocpsch;
1785 dqm->ops.destroy_queue = destroy_queue_nocpsch;
1786 dqm->ops.update_queue = update_queue;
58dcd5bf
YZ
1787 dqm->ops.register_process = register_process;
1788 dqm->ops.unregister_process = unregister_process;
45c9a5e4 1789 dqm->ops.initialize = initialize_nocpsch;
58dcd5bf 1790 dqm->ops.uninitialize = uninitialize;
45c9a5e4 1791 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
d7b9bd22 1792 dqm->ops.set_trap_handler = set_trap_handler;
9fd3f1bf 1793 dqm->ops.process_termination = process_termination_nocpsch;
26103436
FK
1794 dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
1795 dqm->ops.restore_process_queues =
1796 restore_process_queues_nocpsch;
5df099e8 1797 dqm->ops.get_wave_state = get_wave_state;
64c7f8cf
BG
1798 break;
1799 default:
d146c5a7 1800 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
32fa8219 1801 goto out_free;
64c7f8cf
BG
1802 }
1803
a22fc854
BG
1804 switch (dev->device_info->asic_family) {
1805 case CHIP_CARRIZO:
bfd5e378 1806 device_queue_manager_init_vi(&dqm->asic_ops);
300dec95
OG
1807 break;
1808
a22fc854 1809 case CHIP_KAVERI:
bfd5e378 1810 device_queue_manager_init_cik(&dqm->asic_ops);
300dec95 1811 break;
97672cbe
FK
1812
1813 case CHIP_HAWAII:
1814 device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
1815 break;
1816
1817 case CHIP_TONGA:
1818 case CHIP_FIJI:
1819 case CHIP_POLARIS10:
1820 case CHIP_POLARIS11:
846a44d7 1821 case CHIP_POLARIS12:
ed81cd6e 1822 case CHIP_VEGAM:
97672cbe
FK
1823 device_queue_manager_init_vi_tonga(&dqm->asic_ops);
1824 break;
bed4f110
FK
1825
1826 case CHIP_VEGA10:
846a44d7 1827 case CHIP_VEGA12:
22a3a294 1828 case CHIP_VEGA20:
bed4f110
FK
1829 case CHIP_RAVEN:
1830 device_queue_manager_init_v9(&dqm->asic_ops);
1831 break;
e596b903
YZ
1832 default:
1833 WARN(1, "Unexpected ASIC family %u",
1834 dev->device_info->asic_family);
1835 goto out_free;
a22fc854
BG
1836 }
1837
fdfa090b
OZ
1838 if (init_mqd_managers(dqm))
1839 goto out_free;
1840
11614c36
OZ
1841 if (allocate_hiq_sdma_mqd(dqm)) {
1842 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
1843 goto out_free;
1844 }
1845
32fa8219
FK
1846 if (!dqm->ops.initialize(dqm))
1847 return dqm;
64c7f8cf 1848
32fa8219
FK
1849out_free:
1850 kfree(dqm);
1851 return NULL;
64c7f8cf
BG
1852}
1853
11614c36
OZ
1854void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, struct kfd_mem_obj *mqd)
1855{
1856 WARN(!mqd, "No hiq sdma mqd trunk to free");
1857
1858 amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
1859}
1860
64c7f8cf
BG
1861void device_queue_manager_uninit(struct device_queue_manager *dqm)
1862{
45c9a5e4 1863 dqm->ops.uninitialize(dqm);
11614c36 1864 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
64c7f8cf
BG
1865 kfree(dqm);
1866}
851a645e 1867
2640c3fa 1868int kfd_process_vm_fault(struct device_queue_manager *dqm,
1869 unsigned int pasid)
1870{
1871 struct kfd_process_device *pdd;
1872 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
1873 int ret = 0;
1874
1875 if (!p)
1876 return -EINVAL;
1877 pdd = kfd_get_process_device_data(dqm->dev, p);
1878 if (pdd)
1879 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
1880 kfd_unref_process(p);
1881
1882 return ret;
1883}
1884
73ea648d
SL
1885static void kfd_process_hw_exception(struct work_struct *work)
1886{
1887 struct device_queue_manager *dqm = container_of(work,
1888 struct device_queue_manager, hw_exception_work);
5b87245f 1889 amdgpu_amdkfd_gpu_reset(dqm->dev->kgd);
73ea648d
SL
1890}
1891
851a645e
FK
1892#if defined(CONFIG_DEBUG_FS)
1893
1894static void seq_reg_dump(struct seq_file *m,
1895 uint32_t (*dump)[2], uint32_t n_regs)
1896{
1897 uint32_t i, count;
1898
1899 for (i = 0, count = 0; i < n_regs; i++) {
1900 if (count == 0 ||
1901 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
1902 seq_printf(m, "%s %08x: %08x",
1903 i ? "\n" : "",
1904 dump[i][0], dump[i][1]);
1905 count = 7;
1906 } else {
1907 seq_printf(m, " %08x", dump[i][1]);
1908 count--;
1909 }
1910 }
1911
1912 seq_puts(m, "\n");
1913}
1914
1915int dqm_debugfs_hqds(struct seq_file *m, void *data)
1916{
1917 struct device_queue_manager *dqm = data;
1918 uint32_t (*dump)[2], n_regs;
1919 int pipe, queue;
1920 int r = 0;
1921
24f48a42
OZ
1922 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
1923 KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs);
1924 if (!r) {
1925 seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n",
1926 KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
1927 KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
1928 KFD_CIK_HIQ_QUEUE);
1929 seq_reg_dump(m, dump, n_regs);
1930
1931 kfree(dump);
1932 }
1933
851a645e
FK
1934 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
1935 int pipe_offset = pipe * get_queues_per_pipe(dqm);
1936
1937 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
1938 if (!test_bit(pipe_offset + queue,
1939 dqm->dev->shared_resources.queue_bitmap))
1940 continue;
1941
1942 r = dqm->dev->kfd2kgd->hqd_dump(
1943 dqm->dev->kgd, pipe, queue, &dump, &n_regs);
1944 if (r)
1945 break;
1946
1947 seq_printf(m, " CP Pipe %d, Queue %d\n",
1948 pipe, queue);
1949 seq_reg_dump(m, dump, n_regs);
1950
1951 kfree(dump);
1952 }
1953 }
1954
98bb9222 1955 for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) {
d5094189
SL
1956 for (queue = 0;
1957 queue < dqm->dev->device_info->num_sdma_queues_per_engine;
1958 queue++) {
851a645e
FK
1959 r = dqm->dev->kfd2kgd->hqd_sdma_dump(
1960 dqm->dev->kgd, pipe, queue, &dump, &n_regs);
1961 if (r)
1962 break;
1963
1964 seq_printf(m, " SDMA Engine %d, RLC %d\n",
1965 pipe, queue);
1966 seq_reg_dump(m, dump, n_regs);
1967
1968 kfree(dump);
1969 }
1970 }
1971
1972 return r;
1973}
1974
a29ec470
SL
1975int dqm_debugfs_execute_queues(struct device_queue_manager *dqm)
1976{
1977 int r = 0;
1978
1979 dqm_lock(dqm);
1980 dqm->active_runlist = true;
1981 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1982 dqm_unlock(dqm);
1983
1984 return r;
1985}
1986
851a645e 1987#endif