2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/slab.h>
25 #include <linux/mutex.h>
26 #include "kfd_device_queue_manager.h"
27 #include "kfd_kernel_queue.h"
30 static inline void inc_wptr(unsigned int *wptr
, unsigned int increment_bytes
,
31 unsigned int buffer_size_bytes
)
33 unsigned int temp
= *wptr
+ increment_bytes
/ sizeof(uint32_t);
35 WARN((temp
* sizeof(uint32_t)) > buffer_size_bytes
,
36 "Runlist IB overflow");
40 static void pm_calc_rlib_size(struct packet_manager
*pm
,
41 unsigned int *rlib_size
,
42 bool *over_subscription
)
44 unsigned int process_count
, queue_count
, compute_queue_count
, gws_queue_count
;
45 unsigned int map_queue_size
;
46 unsigned int max_proc_per_quantum
= 1;
47 struct kfd_dev
*dev
= pm
->dqm
->dev
;
49 process_count
= pm
->dqm
->processes_count
;
50 queue_count
= pm
->dqm
->active_queue_count
;
51 compute_queue_count
= pm
->dqm
->active_cp_queue_count
;
52 gws_queue_count
= pm
->dqm
->gws_queue_count
;
54 /* check if there is over subscription
55 * Note: the arbitration between the number of VMIDs and
56 * hws_max_conc_proc has been done in
57 * kgd2kfd_device_init().
59 *over_subscription
= false;
61 if (dev
->max_proc_per_quantum
> 1)
62 max_proc_per_quantum
= dev
->max_proc_per_quantum
;
64 if ((process_count
> max_proc_per_quantum
) ||
65 compute_queue_count
> get_cp_queues_num(pm
->dqm
) ||
66 gws_queue_count
> 1) {
67 *over_subscription
= true;
68 pr_debug("Over subscribed runlist\n");
71 map_queue_size
= pm
->pmf
->map_queues_size
;
72 /* calculate run list ib allocation size */
73 *rlib_size
= process_count
* pm
->pmf
->map_process_size
+
74 queue_count
* map_queue_size
;
77 * Increase the allocation size in case we need a chained run list
78 * when over subscription
80 if (*over_subscription
)
81 *rlib_size
+= pm
->pmf
->runlist_size
;
83 pr_debug("runlist ib size %d\n", *rlib_size
);
86 static int pm_allocate_runlist_ib(struct packet_manager
*pm
,
87 unsigned int **rl_buffer
,
88 uint64_t *rl_gpu_buffer
,
89 unsigned int *rl_buffer_size
,
90 bool *is_over_subscription
)
94 if (WARN_ON(pm
->allocated
))
97 pm_calc_rlib_size(pm
, rl_buffer_size
, is_over_subscription
);
99 mutex_lock(&pm
->lock
);
101 retval
= kfd_gtt_sa_allocate(pm
->dqm
->dev
, *rl_buffer_size
,
105 pr_err("Failed to allocate runlist IB\n");
109 *(void **)rl_buffer
= pm
->ib_buffer_obj
->cpu_ptr
;
110 *rl_gpu_buffer
= pm
->ib_buffer_obj
->gpu_addr
;
112 memset(*rl_buffer
, 0, *rl_buffer_size
);
113 pm
->allocated
= true;
116 mutex_unlock(&pm
->lock
);
120 static int pm_create_runlist_ib(struct packet_manager
*pm
,
121 struct list_head
*queues
,
122 uint64_t *rl_gpu_addr
,
123 size_t *rl_size_bytes
)
125 unsigned int alloc_size_bytes
;
126 unsigned int *rl_buffer
, rl_wptr
, i
;
127 int retval
, proccesses_mapped
;
128 struct device_process_node
*cur
;
129 struct qcm_process_device
*qpd
;
131 struct kernel_queue
*kq
;
132 bool is_over_subscription
;
134 rl_wptr
= retval
= proccesses_mapped
= 0;
136 retval
= pm_allocate_runlist_ib(pm
, &rl_buffer
, rl_gpu_addr
,
137 &alloc_size_bytes
, &is_over_subscription
);
141 *rl_size_bytes
= alloc_size_bytes
;
142 pm
->ib_size_bytes
= alloc_size_bytes
;
144 pr_debug("Building runlist ib process count: %d queues count %d\n",
145 pm
->dqm
->processes_count
, pm
->dqm
->active_queue_count
);
147 /* build the run list ib packet */
148 list_for_each_entry(cur
, queues
, list
) {
150 /* build map process packet */
151 if (proccesses_mapped
>= pm
->dqm
->processes_count
) {
152 pr_debug("Not enough space left in runlist IB\n");
157 retval
= pm
->pmf
->map_process(pm
, &rl_buffer
[rl_wptr
], qpd
);
162 inc_wptr(&rl_wptr
, pm
->pmf
->map_process_size
,
165 list_for_each_entry(kq
, &qpd
->priv_queue_list
, list
) {
166 if (!kq
->queue
->properties
.is_active
)
169 pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
170 kq
->queue
->queue
, qpd
->is_debug
);
172 retval
= pm
->pmf
->map_queues(pm
,
180 pm
->pmf
->map_queues_size
,
184 list_for_each_entry(q
, &qpd
->queues_list
, list
) {
185 if (!q
->properties
.is_active
)
188 pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
189 q
->queue
, qpd
->is_debug
);
191 retval
= pm
->pmf
->map_queues(pm
,
200 pm
->pmf
->map_queues_size
,
205 pr_debug("Finished map process and queues to runlist\n");
207 if (is_over_subscription
) {
208 if (!pm
->is_over_subscription
)
209 pr_warn("Runlist is getting oversubscribed. Expect reduced ROCm performance.\n");
210 retval
= pm
->pmf
->runlist(pm
, &rl_buffer
[rl_wptr
],
212 alloc_size_bytes
/ sizeof(uint32_t),
215 pm
->is_over_subscription
= is_over_subscription
;
217 for (i
= 0; i
< alloc_size_bytes
/ sizeof(uint32_t); i
++)
218 pr_debug("0x%2X ", rl_buffer
[i
]);
224 int pm_init(struct packet_manager
*pm
, struct device_queue_manager
*dqm
)
226 switch (dqm
->dev
->device_info
->asic_family
) {
229 /* PM4 packet structures on CIK are the same as on VI */
237 pm
->pmf
= &kfd_vi_pm_funcs
;
248 case CHIP_SIENNA_CICHLID
:
249 case CHIP_NAVY_FLOUNDER
:
251 case CHIP_DIMGREY_CAVEFISH
:
252 pm
->pmf
= &kfd_v9_pm_funcs
;
255 WARN(1, "Unexpected ASIC family %u",
256 dqm
->dev
->device_info
->asic_family
);
261 mutex_init(&pm
->lock
);
262 pm
->priv_queue
= kernel_queue_init(dqm
->dev
, KFD_QUEUE_TYPE_HIQ
);
263 if (!pm
->priv_queue
) {
264 mutex_destroy(&pm
->lock
);
267 pm
->allocated
= false;
272 void pm_uninit(struct packet_manager
*pm
, bool hanging
)
274 mutex_destroy(&pm
->lock
);
275 kernel_queue_uninit(pm
->priv_queue
, hanging
);
278 int pm_send_set_resources(struct packet_manager
*pm
,
279 struct scheduling_resources
*res
)
281 uint32_t *buffer
, size
;
284 size
= pm
->pmf
->set_resources_size
;
285 mutex_lock(&pm
->lock
);
286 kq_acquire_packet_buffer(pm
->priv_queue
,
287 size
/ sizeof(uint32_t),
288 (unsigned int **)&buffer
);
290 pr_err("Failed to allocate buffer on kernel queue\n");
295 retval
= pm
->pmf
->set_resources(pm
, buffer
, res
);
297 kq_submit_packet(pm
->priv_queue
);
299 kq_rollback_packet(pm
->priv_queue
);
302 mutex_unlock(&pm
->lock
);
307 int pm_send_runlist(struct packet_manager
*pm
, struct list_head
*dqm_queues
)
309 uint64_t rl_gpu_ib_addr
;
311 size_t rl_ib_size
, packet_size_dwords
;
314 retval
= pm_create_runlist_ib(pm
, dqm_queues
, &rl_gpu_ib_addr
,
317 goto fail_create_runlist_ib
;
319 pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr
);
321 packet_size_dwords
= pm
->pmf
->runlist_size
/ sizeof(uint32_t);
322 mutex_lock(&pm
->lock
);
324 retval
= kq_acquire_packet_buffer(pm
->priv_queue
,
325 packet_size_dwords
, &rl_buffer
);
327 goto fail_acquire_packet_buffer
;
329 retval
= pm
->pmf
->runlist(pm
, rl_buffer
, rl_gpu_ib_addr
,
330 rl_ib_size
/ sizeof(uint32_t), false);
332 goto fail_create_runlist
;
334 kq_submit_packet(pm
->priv_queue
);
336 mutex_unlock(&pm
->lock
);
341 kq_rollback_packet(pm
->priv_queue
);
342 fail_acquire_packet_buffer
:
343 mutex_unlock(&pm
->lock
);
344 fail_create_runlist_ib
:
349 int pm_send_query_status(struct packet_manager
*pm
, uint64_t fence_address
,
350 uint32_t fence_value
)
352 uint32_t *buffer
, size
;
355 if (WARN_ON(!fence_address
))
358 size
= pm
->pmf
->query_status_size
;
359 mutex_lock(&pm
->lock
);
360 kq_acquire_packet_buffer(pm
->priv_queue
,
361 size
/ sizeof(uint32_t), (unsigned int **)&buffer
);
363 pr_err("Failed to allocate buffer on kernel queue\n");
368 retval
= pm
->pmf
->query_status(pm
, buffer
, fence_address
, fence_value
);
370 kq_submit_packet(pm
->priv_queue
);
372 kq_rollback_packet(pm
->priv_queue
);
375 mutex_unlock(&pm
->lock
);
379 int pm_send_unmap_queue(struct packet_manager
*pm
, enum kfd_queue_type type
,
380 enum kfd_unmap_queues_filter filter
,
381 uint32_t filter_param
, bool reset
,
382 unsigned int sdma_engine
)
384 uint32_t *buffer
, size
;
387 size
= pm
->pmf
->unmap_queues_size
;
388 mutex_lock(&pm
->lock
);
389 kq_acquire_packet_buffer(pm
->priv_queue
,
390 size
/ sizeof(uint32_t), (unsigned int **)&buffer
);
392 pr_err("Failed to allocate buffer on kernel queue\n");
397 retval
= pm
->pmf
->unmap_queues(pm
, buffer
, type
, filter
, filter_param
,
400 kq_submit_packet(pm
->priv_queue
);
402 kq_rollback_packet(pm
->priv_queue
);
405 mutex_unlock(&pm
->lock
);
409 void pm_release_ib(struct packet_manager
*pm
)
411 mutex_lock(&pm
->lock
);
413 kfd_gtt_sa_free(pm
->dqm
->dev
, pm
->ib_buffer_obj
);
414 pm
->allocated
= false;
416 mutex_unlock(&pm
->lock
);
419 #if defined(CONFIG_DEBUG_FS)
421 int pm_debugfs_runlist(struct seq_file
*m
, void *data
)
423 struct packet_manager
*pm
= data
;
425 mutex_lock(&pm
->lock
);
427 if (!pm
->allocated
) {
428 seq_puts(m
, " No active runlist\n");
432 seq_hex_dump(m
, " ", DUMP_PREFIX_OFFSET
, 32, 4,
433 pm
->ib_buffer_obj
->cpu_ptr
, pm
->ib_size_bytes
, false);
436 mutex_unlock(&pm
->lock
);
440 int pm_debugfs_hang_hws(struct packet_manager
*pm
)
442 uint32_t *buffer
, size
;
445 size
= pm
->pmf
->query_status_size
;
446 mutex_lock(&pm
->lock
);
447 kq_acquire_packet_buffer(pm
->priv_queue
,
448 size
/ sizeof(uint32_t), (unsigned int **)&buffer
);
450 pr_err("Failed to allocate buffer on kernel queue\n");
454 memset(buffer
, 0x55, size
);
455 kq_submit_packet(pm
->priv_queue
);
457 pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
458 buffer
[0], buffer
[1], buffer
[2], buffer
[3],
459 buffer
[4], buffer
[5], buffer
[6]);
461 mutex_unlock(&pm
->lock
);