]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
amdkfd: Add device queue manager module
[mirror_ubuntu-artful-kernel.git] / drivers / gpu / drm / amd / amdkfd / kfd_device_queue_manager.c
CommitLineData
64c7f8cf
BG
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24#include <linux/slab.h>
25#include <linux/list.h>
26#include <linux/types.h>
27#include <linux/printk.h>
28#include <linux/bitops.h>
29#include "kfd_priv.h"
30#include "kfd_device_queue_manager.h"
31#include "kfd_mqd_manager.h"
32#include "cik_regs.h"
33#include "kfd_kernel_queue.h"
34#include "../../radeon/cik_reg.h"
35
36/* Size of the per-pipe EOP queue */
37#define CIK_HPD_EOP_BYTES_LOG2 11
38#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
39
40static bool is_mem_initialized;
41
42static int init_memory(struct device_queue_manager *dqm);
43static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
44 unsigned int pasid, unsigned int vmid);
45
46static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
47 struct queue *q,
48 struct qcm_process_device *qpd);
49static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
50static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
51
52
53static inline unsigned int get_pipes_num(struct device_queue_manager *dqm)
54{
55 BUG_ON(!dqm || !dqm->dev);
56 return dqm->dev->shared_resources.compute_pipe_count;
57}
58
59static inline unsigned int get_first_pipe(struct device_queue_manager *dqm)
60{
61 BUG_ON(!dqm);
62 return dqm->dev->shared_resources.first_compute_pipe;
63}
64
65static inline unsigned int get_pipes_num_cpsch(void)
66{
67 return PIPE_PER_ME_CP_SCHEDULING;
68}
69
70static unsigned int get_sh_mem_bases_nybble_64(struct kfd_process *process,
71 struct kfd_dev *dev)
72{
73 struct kfd_process_device *pdd;
74 uint32_t nybble;
75
76 pdd = kfd_get_process_device_data(dev, process, 1);
77 nybble = (pdd->lds_base >> 60) & 0x0E;
78
79 return nybble;
80
81}
82
83static unsigned int get_sh_mem_bases_32(struct kfd_process *process,
84 struct kfd_dev *dev)
85{
86 struct kfd_process_device *pdd;
87 unsigned int shared_base;
88
89 pdd = kfd_get_process_device_data(dev, process, 1);
90 shared_base = (pdd->lds_base >> 16) & 0xFF;
91
92 return shared_base;
93}
94
95static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble);
96static void init_process_memory(struct device_queue_manager *dqm,
97 struct qcm_process_device *qpd)
98{
99 unsigned int temp;
100
101 BUG_ON(!dqm || !qpd);
102
103 /* check if sh_mem_config register already configured */
104 if (qpd->sh_mem_config == 0) {
105 qpd->sh_mem_config =
106 ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
107 DEFAULT_MTYPE(MTYPE_NONCACHED) |
108 APE1_MTYPE(MTYPE_NONCACHED);
109 qpd->sh_mem_ape1_limit = 0;
110 qpd->sh_mem_ape1_base = 0;
111 }
112
113 if (qpd->pqm->process->is_32bit_user_mode) {
114 temp = get_sh_mem_bases_32(qpd->pqm->process, dqm->dev);
115 qpd->sh_mem_bases = SHARED_BASE(temp);
116 qpd->sh_mem_config |= PTR32;
117 } else {
118 temp = get_sh_mem_bases_nybble_64(qpd->pqm->process, dqm->dev);
119 qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
120 }
121
122 pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
123 qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
124}
125
126static void program_sh_mem_settings(struct device_queue_manager *dqm,
127 struct qcm_process_device *qpd)
128{
129 return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid,
130 qpd->sh_mem_config,
131 qpd->sh_mem_ape1_base,
132 qpd->sh_mem_ape1_limit,
133 qpd->sh_mem_bases);
134}
135
136static int allocate_vmid(struct device_queue_manager *dqm,
137 struct qcm_process_device *qpd,
138 struct queue *q)
139{
140 int bit, allocated_vmid;
141
142 if (dqm->vmid_bitmap == 0)
143 return -ENOMEM;
144
145 bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM);
146 clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
147
148 /* Kaveri kfd vmid's starts from vmid 8 */
149 allocated_vmid = bit + KFD_VMID_START_OFFSET;
150 pr_debug("kfd: vmid allocation %d\n", allocated_vmid);
151 qpd->vmid = allocated_vmid;
152 q->properties.vmid = allocated_vmid;
153
154 set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
155 program_sh_mem_settings(dqm, qpd);
156
157 return 0;
158}
159
160static void deallocate_vmid(struct device_queue_manager *dqm,
161 struct qcm_process_device *qpd,
162 struct queue *q)
163{
164 int bit = qpd->vmid - KFD_VMID_START_OFFSET;
165
166 set_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
167 qpd->vmid = 0;
168 q->properties.vmid = 0;
169}
170
171static int create_queue_nocpsch(struct device_queue_manager *dqm,
172 struct queue *q,
173 struct qcm_process_device *qpd,
174 int *allocated_vmid)
175{
176 int retval;
177
178 BUG_ON(!dqm || !q || !qpd || !allocated_vmid);
179
180 pr_debug("kfd: In func %s\n", __func__);
181 print_queue(q);
182
183 mutex_lock(&dqm->lock);
184
185 if (list_empty(&qpd->queues_list)) {
186 retval = allocate_vmid(dqm, qpd, q);
187 if (retval != 0) {
188 mutex_unlock(&dqm->lock);
189 return retval;
190 }
191 }
192 *allocated_vmid = qpd->vmid;
193 q->properties.vmid = qpd->vmid;
194
195 retval = create_compute_queue_nocpsch(dqm, q, qpd);
196
197 if (retval != 0) {
198 if (list_empty(&qpd->queues_list)) {
199 deallocate_vmid(dqm, qpd, q);
200 *allocated_vmid = 0;
201 }
202 mutex_unlock(&dqm->lock);
203 return retval;
204 }
205
206 list_add(&q->list, &qpd->queues_list);
207 dqm->queue_count++;
208
209 mutex_unlock(&dqm->lock);
210 return 0;
211}
212
213static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
214{
215 bool set;
216 int pipe, bit;
217
218 set = false;
219
220 for (pipe = dqm->next_pipe_to_allocate; pipe < get_pipes_num(dqm);
221 pipe = (pipe + 1) % get_pipes_num(dqm)) {
222 if (dqm->allocated_queues[pipe] != 0) {
223 bit = find_first_bit(
224 (unsigned long *)&dqm->allocated_queues[pipe],
225 QUEUES_PER_PIPE);
226
227 clear_bit(bit,
228 (unsigned long *)&dqm->allocated_queues[pipe]);
229 q->pipe = pipe;
230 q->queue = bit;
231 set = true;
232 break;
233 }
234 }
235
236 if (set == false)
237 return -EBUSY;
238
239 pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n",
240 __func__, q->pipe, q->queue);
241 /* horizontal hqd allocation */
242 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_num(dqm);
243
244 return 0;
245}
246
247static inline void deallocate_hqd(struct device_queue_manager *dqm,
248 struct queue *q)
249{
250 set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]);
251}
252
253static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
254 struct queue *q,
255 struct qcm_process_device *qpd)
256{
257 int retval;
258 struct mqd_manager *mqd;
259
260 BUG_ON(!dqm || !q || !qpd);
261
262 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
263 if (mqd == NULL)
264 return -ENOMEM;
265
266 retval = allocate_hqd(dqm, q);
267 if (retval != 0)
268 return retval;
269
270 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
271 &q->gart_mqd_addr, &q->properties);
272 if (retval != 0) {
273 deallocate_hqd(dqm, q);
274 return retval;
275 }
276
277 return 0;
278}
279
280static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
281 struct qcm_process_device *qpd,
282 struct queue *q)
283{
284 int retval;
285 struct mqd_manager *mqd;
286
287 BUG_ON(!dqm || !q || !q->mqd || !qpd);
288
289 retval = 0;
290
291 pr_debug("kfd: In Func %s\n", __func__);
292
293 mutex_lock(&dqm->lock);
294 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
295 if (mqd == NULL) {
296 retval = -ENOMEM;
297 goto out;
298 }
299
300 retval = mqd->destroy_mqd(mqd, q->mqd,
301 KFD_PREEMPT_TYPE_WAVEFRONT,
302 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
303 q->pipe, q->queue);
304
305 if (retval != 0)
306 goto out;
307
308 deallocate_hqd(dqm, q);
309
310 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
311
312 list_del(&q->list);
313 if (list_empty(&qpd->queues_list))
314 deallocate_vmid(dqm, qpd, q);
315 dqm->queue_count--;
316out:
317 mutex_unlock(&dqm->lock);
318 return retval;
319}
320
321static int update_queue(struct device_queue_manager *dqm, struct queue *q)
322{
323 int retval;
324 struct mqd_manager *mqd;
325
326 BUG_ON(!dqm || !q || !q->mqd);
327
328 mutex_lock(&dqm->lock);
329 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
330 if (mqd == NULL) {
331 mutex_unlock(&dqm->lock);
332 return -ENOMEM;
333 }
334
335 retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
336 if (q->properties.is_active == true)
337 dqm->queue_count++;
338 else
339 dqm->queue_count--;
340
341 if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
342 retval = execute_queues_cpsch(dqm, false);
343
344 mutex_unlock(&dqm->lock);
345 return retval;
346}
347
348static struct mqd_manager *get_mqd_manager_nocpsch(
349 struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
350{
351 struct mqd_manager *mqd;
352
353 BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX);
354
355 pr_debug("kfd: In func %s mqd type %d\n", __func__, type);
356
357 mqd = dqm->mqds[type];
358 if (!mqd) {
359 mqd = mqd_manager_init(type, dqm->dev);
360 if (mqd == NULL)
361 pr_err("kfd: mqd manager is NULL");
362 dqm->mqds[type] = mqd;
363 }
364
365 return mqd;
366}
367
368static int register_process_nocpsch(struct device_queue_manager *dqm,
369 struct qcm_process_device *qpd)
370{
371 struct device_process_node *n;
372
373 BUG_ON(!dqm || !qpd);
374
375 pr_debug("kfd: In func %s\n", __func__);
376
377 n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL);
378 if (!n)
379 return -ENOMEM;
380
381 n->qpd = qpd;
382
383 mutex_lock(&dqm->lock);
384 list_add(&n->list, &dqm->queues);
385
386 init_process_memory(dqm, qpd);
387 dqm->processes_count++;
388
389 mutex_unlock(&dqm->lock);
390
391 return 0;
392}
393
394static int unregister_process_nocpsch(struct device_queue_manager *dqm,
395 struct qcm_process_device *qpd)
396{
397 int retval;
398 struct device_process_node *cur, *next;
399
400 BUG_ON(!dqm || !qpd);
401
402 BUG_ON(!list_empty(&qpd->queues_list));
403
404 pr_debug("kfd: In func %s\n", __func__);
405
406 retval = 0;
407 mutex_lock(&dqm->lock);
408
409 list_for_each_entry_safe(cur, next, &dqm->queues, list) {
410 if (qpd == cur->qpd) {
411 list_del(&cur->list);
412 dqm->processes_count--;
413 goto out;
414 }
415 }
416 /* qpd not found in dqm list */
417 retval = 1;
418out:
419 mutex_unlock(&dqm->lock);
420 return retval;
421}
422
423static int
424set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
425 unsigned int vmid)
426{
427 uint32_t pasid_mapping;
428
429 pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
430 ATC_VMID_PASID_MAPPING_VALID;
431 return kfd2kgd->set_pasid_vmid_mapping(dqm->dev->kgd, pasid_mapping,
432 vmid);
433}
434
435static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
436{
437 /* In 64-bit mode, we can only control the top 3 bits of the LDS,
438 * scratch and GPUVM apertures.
439 * The hardware fills in the remaining 59 bits according to the
440 * following pattern:
441 * LDS: X0000000'00000000 - X0000001'00000000 (4GB)
442 * Scratch: X0000001'00000000 - X0000002'00000000 (4GB)
443 * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB)
444 *
445 * (where X/Y is the configurable nybble with the low-bit 0)
446 *
447 * LDS and scratch will have the same top nybble programmed in the
448 * top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
449 * GPUVM can have a different top nybble programmed in the
450 * top 3 bits of SH_MEM_BASES.SHARED_BASE.
451 * We don't bother to support different top nybbles
452 * for LDS/Scratch and GPUVM.
453 */
454
455 BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
456 top_address_nybble == 0);
457
458 return PRIVATE_BASE(top_address_nybble << 12) |
459 SHARED_BASE(top_address_nybble << 12);
460}
461
462static int init_memory(struct device_queue_manager *dqm)
463{
464 int i, retval;
465
466 for (i = 8; i < 16; i++)
467 set_pasid_vmid_mapping(dqm, 0, i);
468
469 retval = kfd2kgd->init_memory(dqm->dev->kgd);
470 if (retval == 0)
471 is_mem_initialized = true;
472 return retval;
473}
474
475
476static int init_pipelines(struct device_queue_manager *dqm,
477 unsigned int pipes_num, unsigned int first_pipe)
478{
479 void *hpdptr;
480 struct mqd_manager *mqd;
481 unsigned int i, err, inx;
482 uint64_t pipe_hpd_addr;
483
484 BUG_ON(!dqm || !dqm->dev);
485
486 pr_debug("kfd: In func %s\n", __func__);
487
488 /*
489 * Allocate memory for the HPDs. This is hardware-owned per-pipe data.
490 * The driver never accesses this memory after zeroing it.
491 * It doesn't even have to be saved/restored on suspend/resume
492 * because it contains no data when there are no active queues.
493 */
494
495 err = kfd2kgd->allocate_mem(dqm->dev->kgd,
496 CIK_HPD_EOP_BYTES * pipes_num,
497 PAGE_SIZE,
498 KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
499 (struct kgd_mem **) &dqm->pipeline_mem);
500
501 if (err) {
502 pr_err("kfd: error allocate vidmem num pipes: %d\n",
503 pipes_num);
504 return -ENOMEM;
505 }
506
507 hpdptr = dqm->pipeline_mem->cpu_ptr;
508 dqm->pipelines_addr = dqm->pipeline_mem->gpu_addr;
509
510 memset(hpdptr, 0, CIK_HPD_EOP_BYTES * pipes_num);
511
512 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE);
513 if (mqd == NULL) {
514 kfd2kgd->free_mem(dqm->dev->kgd,
515 (struct kgd_mem *) dqm->pipeline_mem);
516 return -ENOMEM;
517 }
518
519 for (i = 0; i < pipes_num; i++) {
520 inx = i + first_pipe;
521 pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES;
522 pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr);
523 /* = log2(bytes/4)-1 */
524 kfd2kgd->init_pipeline(dqm->dev->kgd, i,
525 CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr);
526 }
527
528 return 0;
529}
530
531
532static int init_scheduler(struct device_queue_manager *dqm)
533{
534 int retval;
535
536 BUG_ON(!dqm);
537
538 pr_debug("kfd: In %s\n", __func__);
539
540 retval = init_pipelines(dqm, get_pipes_num(dqm), KFD_DQM_FIRST_PIPE);
541 if (retval != 0)
542 return retval;
543
544 retval = init_memory(dqm);
545
546 return retval;
547}
548
549static int initialize_nocpsch(struct device_queue_manager *dqm)
550{
551 int i;
552
553 BUG_ON(!dqm);
554
555 pr_debug("kfd: In func %s num of pipes: %d\n",
556 __func__, get_pipes_num(dqm));
557
558 mutex_init(&dqm->lock);
559 INIT_LIST_HEAD(&dqm->queues);
560 dqm->queue_count = dqm->next_pipe_to_allocate = 0;
561 dqm->allocated_queues = kcalloc(get_pipes_num(dqm),
562 sizeof(unsigned int), GFP_KERNEL);
563 if (!dqm->allocated_queues) {
564 mutex_destroy(&dqm->lock);
565 return -ENOMEM;
566 }
567
568 for (i = 0; i < get_pipes_num(dqm); i++)
569 dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1;
570
571 dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
572
573 init_scheduler(dqm);
574 return 0;
575}
576
577static void uninitialize_nocpsch(struct device_queue_manager *dqm)
578{
579 BUG_ON(!dqm);
580
581 BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
582
583 kfree(dqm->allocated_queues);
584 mutex_destroy(&dqm->lock);
585 kfd2kgd->free_mem(dqm->dev->kgd,
586 (struct kgd_mem *) dqm->pipeline_mem);
587}
588
589static int start_nocpsch(struct device_queue_manager *dqm)
590{
591 return 0;
592}
593
594static int stop_nocpsch(struct device_queue_manager *dqm)
595{
596 return 0;
597}
598
599/*
600 * Device Queue Manager implementation for cp scheduler
601 */
602
603static int set_sched_resources(struct device_queue_manager *dqm)
604{
605 struct scheduling_resources res;
606 unsigned int queue_num, queue_mask;
607
608 BUG_ON(!dqm);
609
610 pr_debug("kfd: In func %s\n", __func__);
611
612 queue_num = get_pipes_num_cpsch() * QUEUES_PER_PIPE;
613 queue_mask = (1 << queue_num) - 1;
614 res.vmid_mask = (1 << VMID_PER_DEVICE) - 1;
615 res.vmid_mask <<= KFD_VMID_START_OFFSET;
616 res.queue_mask = queue_mask << (get_first_pipe(dqm) * QUEUES_PER_PIPE);
617 res.gws_mask = res.oac_mask = res.gds_heap_base =
618 res.gds_heap_size = 0;
619
620 pr_debug("kfd: scheduling resources:\n"
621 " vmid mask: 0x%8X\n"
622 " queue mask: 0x%8llX\n",
623 res.vmid_mask, res.queue_mask);
624
625 return pm_send_set_resources(&dqm->packets, &res);
626}
627
628static int initialize_cpsch(struct device_queue_manager *dqm)
629{
630 int retval;
631
632 BUG_ON(!dqm);
633
634 pr_debug("kfd: In func %s num of pipes: %d\n",
635 __func__, get_pipes_num_cpsch());
636
637 mutex_init(&dqm->lock);
638 INIT_LIST_HEAD(&dqm->queues);
639 dqm->queue_count = dqm->processes_count = 0;
640 dqm->active_runlist = false;
641 retval = init_pipelines(dqm, get_pipes_num(dqm), 0);
642 if (retval != 0)
643 goto fail_init_pipelines;
644
645 return 0;
646
647fail_init_pipelines:
648 mutex_destroy(&dqm->lock);
649 return retval;
650}
651
652static int start_cpsch(struct device_queue_manager *dqm)
653{
654 struct device_process_node *node;
655 int retval;
656
657 BUG_ON(!dqm);
658
659 retval = 0;
660
661 retval = pm_init(&dqm->packets, dqm);
662 if (retval != 0)
663 goto fail_packet_manager_init;
664
665 retval = set_sched_resources(dqm);
666 if (retval != 0)
667 goto fail_set_sched_resources;
668
669 pr_debug("kfd: allocating fence memory\n");
670
671 /* allocate fence memory on the gart */
672 retval = kfd2kgd->allocate_mem(dqm->dev->kgd,
673 sizeof(*dqm->fence_addr),
674 32,
675 KFD_MEMPOOL_SYSTEM_WRITECOMBINE,
676 (struct kgd_mem **) &dqm->fence_mem);
677
678 if (retval != 0)
679 goto fail_allocate_vidmem;
680
681 dqm->fence_addr = dqm->fence_mem->cpu_ptr;
682 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
683
684 list_for_each_entry(node, &dqm->queues, list)
685 if (node->qpd->pqm->process && dqm->dev)
686 kfd_bind_process_to_device(dqm->dev,
687 node->qpd->pqm->process);
688
689 execute_queues_cpsch(dqm, true);
690
691 return 0;
692fail_allocate_vidmem:
693fail_set_sched_resources:
694 pm_uninit(&dqm->packets);
695fail_packet_manager_init:
696 return retval;
697}
698
699static int stop_cpsch(struct device_queue_manager *dqm)
700{
701 struct device_process_node *node;
702 struct kfd_process_device *pdd;
703
704 BUG_ON(!dqm);
705
706 destroy_queues_cpsch(dqm, true);
707
708 list_for_each_entry(node, &dqm->queues, list) {
709 pdd = kfd_get_process_device_data(dqm->dev,
710 node->qpd->pqm->process, 1);
711 pdd->bound = false;
712 }
713 kfd2kgd->free_mem(dqm->dev->kgd,
714 (struct kgd_mem *) dqm->fence_mem);
715 pm_uninit(&dqm->packets);
716
717 return 0;
718}
719
720static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
721 struct kernel_queue *kq,
722 struct qcm_process_device *qpd)
723{
724 BUG_ON(!dqm || !kq || !qpd);
725
726 pr_debug("kfd: In func %s\n", __func__);
727
728 mutex_lock(&dqm->lock);
729 list_add(&kq->list, &qpd->priv_queue_list);
730 dqm->queue_count++;
731 qpd->is_debug = true;
732 execute_queues_cpsch(dqm, false);
733 mutex_unlock(&dqm->lock);
734
735 return 0;
736}
737
738static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
739 struct kernel_queue *kq,
740 struct qcm_process_device *qpd)
741{
742 BUG_ON(!dqm || !kq);
743
744 pr_debug("kfd: In %s\n", __func__);
745
746 mutex_lock(&dqm->lock);
747 destroy_queues_cpsch(dqm, false);
748 list_del(&kq->list);
749 dqm->queue_count--;
750 qpd->is_debug = false;
751 execute_queues_cpsch(dqm, false);
752 mutex_unlock(&dqm->lock);
753}
754
755static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
756 struct qcm_process_device *qpd, int *allocate_vmid)
757{
758 int retval;
759 struct mqd_manager *mqd;
760
761 BUG_ON(!dqm || !q || !qpd);
762
763 retval = 0;
764
765 if (allocate_vmid)
766 *allocate_vmid = 0;
767
768 mutex_lock(&dqm->lock);
769
770 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
771 if (mqd == NULL) {
772 mutex_unlock(&dqm->lock);
773 return -ENOMEM;
774 }
775
776 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
777 &q->gart_mqd_addr, &q->properties);
778 if (retval != 0)
779 goto out;
780
781 list_add(&q->list, &qpd->queues_list);
782 if (q->properties.is_active) {
783 dqm->queue_count++;
784 retval = execute_queues_cpsch(dqm, false);
785 }
786
787out:
788 mutex_unlock(&dqm->lock);
789 return retval;
790}
791
792int fence_wait_timeout(unsigned int *fence_addr, unsigned int fence_value,
793 unsigned long timeout)
794{
795 BUG_ON(!fence_addr);
796 timeout += jiffies;
797
798 while (*fence_addr != fence_value) {
799 if (time_after(jiffies, timeout)) {
800 pr_err("kfd: qcm fence wait loop timeout expired\n");
801 return -ETIME;
802 }
803 cpu_relax();
804 }
805
806 return 0;
807}
808
809static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
810{
811 int retval;
812
813 BUG_ON(!dqm);
814
815 retval = 0;
816
817 if (lock)
818 mutex_lock(&dqm->lock);
819 if (dqm->active_runlist == false)
820 goto out;
821 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
822 KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
823 if (retval != 0)
824 goto out;
825
826 *dqm->fence_addr = KFD_FENCE_INIT;
827 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
828 KFD_FENCE_COMPLETED);
829 /* should be timed out */
830 fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
831 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
832 pm_release_ib(&dqm->packets);
833 dqm->active_runlist = false;
834
835out:
836 if (lock)
837 mutex_unlock(&dqm->lock);
838 return retval;
839}
840
841static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
842{
843 int retval;
844
845 BUG_ON(!dqm);
846
847 if (lock)
848 mutex_lock(&dqm->lock);
849
850 retval = destroy_queues_cpsch(dqm, false);
851 if (retval != 0) {
852 pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
853 goto out;
854 }
855
856 if (dqm->queue_count <= 0 || dqm->processes_count <= 0) {
857 retval = 0;
858 goto out;
859 }
860
861 if (dqm->active_runlist) {
862 retval = 0;
863 goto out;
864 }
865
866 retval = pm_send_runlist(&dqm->packets, &dqm->queues);
867 if (retval != 0) {
868 pr_err("kfd: failed to execute runlist");
869 goto out;
870 }
871 dqm->active_runlist = true;
872
873out:
874 if (lock)
875 mutex_unlock(&dqm->lock);
876 return retval;
877}
878
879static int destroy_queue_cpsch(struct device_queue_manager *dqm,
880 struct qcm_process_device *qpd,
881 struct queue *q)
882{
883 int retval;
884 struct mqd_manager *mqd;
885
886 BUG_ON(!dqm || !qpd || !q);
887
888 retval = 0;
889
890 /* remove queue from list to prevent rescheduling after preemption */
891 mutex_lock(&dqm->lock);
892
893 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP);
894 if (!mqd) {
895 retval = -ENOMEM;
896 goto failed;
897 }
898
899 list_del(&q->list);
900 dqm->queue_count--;
901
902 execute_queues_cpsch(dqm, false);
903
904 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
905
906 mutex_unlock(&dqm->lock);
907
908 return 0;
909
910failed:
911 mutex_unlock(&dqm->lock);
912 return retval;
913}
914
915/*
916 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
917 * stay in user mode.
918 */
919#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
920/* APE1 limit is inclusive and 64K aligned. */
921#define APE1_LIMIT_ALIGNMENT 0xFFFF
922
923static bool set_cache_memory_policy(struct device_queue_manager *dqm,
924 struct qcm_process_device *qpd,
925 enum cache_policy default_policy,
926 enum cache_policy alternate_policy,
927 void __user *alternate_aperture_base,
928 uint64_t alternate_aperture_size)
929{
930 uint32_t default_mtype;
931 uint32_t ape1_mtype;
932
933 pr_debug("kfd: In func %s\n", __func__);
934
935 mutex_lock(&dqm->lock);
936
937 if (alternate_aperture_size == 0) {
938 /* base > limit disables APE1 */
939 qpd->sh_mem_ape1_base = 1;
940 qpd->sh_mem_ape1_limit = 0;
941 } else {
942 /*
943 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
944 * SH_MEM_APE1_BASE[31:0], 0x0000 }
945 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
946 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
947 * Verify that the base and size parameters can be
948 * represented in this format and convert them.
949 * Additionally restrict APE1 to user-mode addresses.
950 */
951
952 uint64_t base = (uintptr_t)alternate_aperture_base;
953 uint64_t limit = base + alternate_aperture_size - 1;
954
955 if (limit <= base)
956 goto out;
957
958 if ((base & APE1_FIXED_BITS_MASK) != 0)
959 goto out;
960
961 if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT)
962 goto out;
963
964 qpd->sh_mem_ape1_base = base >> 16;
965 qpd->sh_mem_ape1_limit = limit >> 16;
966 }
967
968 default_mtype = (default_policy == cache_policy_coherent) ?
969 MTYPE_NONCACHED :
970 MTYPE_CACHED;
971
972 ape1_mtype = (alternate_policy == cache_policy_coherent) ?
973 MTYPE_NONCACHED :
974 MTYPE_CACHED;
975
976 qpd->sh_mem_config = (qpd->sh_mem_config & PTR32)
977 | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
978 | DEFAULT_MTYPE(default_mtype)
979 | APE1_MTYPE(ape1_mtype);
980
981 if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
982 program_sh_mem_settings(dqm, qpd);
983
984 pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
985 qpd->sh_mem_config, qpd->sh_mem_ape1_base,
986 qpd->sh_mem_ape1_limit);
987
988 mutex_unlock(&dqm->lock);
989 return true;
990
991out:
992 mutex_unlock(&dqm->lock);
993 return false;
994}
995
996struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
997{
998 struct device_queue_manager *dqm;
999
1000 BUG_ON(!dev);
1001
1002 dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL);
1003 if (!dqm)
1004 return NULL;
1005
1006 dqm->dev = dev;
1007 switch (sched_policy) {
1008 case KFD_SCHED_POLICY_HWS:
1009 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1010 /* initialize dqm for cp scheduling */
1011 dqm->create_queue = create_queue_cpsch;
1012 dqm->initialize = initialize_cpsch;
1013 dqm->start = start_cpsch;
1014 dqm->stop = stop_cpsch;
1015 dqm->destroy_queue = destroy_queue_cpsch;
1016 dqm->update_queue = update_queue;
1017 dqm->get_mqd_manager = get_mqd_manager_nocpsch;
1018 dqm->register_process = register_process_nocpsch;
1019 dqm->unregister_process = unregister_process_nocpsch;
1020 dqm->uninitialize = uninitialize_nocpsch;
1021 dqm->create_kernel_queue = create_kernel_queue_cpsch;
1022 dqm->destroy_kernel_queue = destroy_kernel_queue_cpsch;
1023 dqm->set_cache_memory_policy = set_cache_memory_policy;
1024 break;
1025 case KFD_SCHED_POLICY_NO_HWS:
1026 /* initialize dqm for no cp scheduling */
1027 dqm->start = start_nocpsch;
1028 dqm->stop = stop_nocpsch;
1029 dqm->create_queue = create_queue_nocpsch;
1030 dqm->destroy_queue = destroy_queue_nocpsch;
1031 dqm->update_queue = update_queue;
1032 dqm->get_mqd_manager = get_mqd_manager_nocpsch;
1033 dqm->register_process = register_process_nocpsch;
1034 dqm->unregister_process = unregister_process_nocpsch;
1035 dqm->initialize = initialize_nocpsch;
1036 dqm->uninitialize = uninitialize_nocpsch;
1037 dqm->set_cache_memory_policy = set_cache_memory_policy;
1038 break;
1039 default:
1040 BUG();
1041 break;
1042 }
1043
1044 if (dqm->initialize(dqm) != 0) {
1045 kfree(dqm);
1046 return NULL;
1047 }
1048
1049 return dqm;
1050}
1051
1052void device_queue_manager_uninit(struct device_queue_manager *dqm)
1053{
1054 BUG_ON(!dqm);
1055
1056 dqm->uninitialize(dqm);
1057 kfree(dqm);
1058}
1059