1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2019 HabanaLabs, Ltd.
8 #include "habanalabs.h"
10 #include <linux/slab.h>
13 * hl_queue_add_ptr - add to pi or ci and checks if it wraps around
15 * @ptr: the current pi/ci value
16 * @val: the amount to add
18 * Add val to ptr. It can go until twice the queue length.
20 inline u32
hl_hw_queue_add_ptr(u32 ptr
, u16 val
)
23 ptr
&= ((HL_QUEUE_LENGTH
<< 1) - 1);
26 static inline int queue_ci_get(atomic_t
*ci
, u32 queue_len
)
28 return atomic_read(ci
) & ((queue_len
<< 1) - 1);
31 static inline int queue_free_slots(struct hl_hw_queue
*q
, u32 queue_len
)
33 int delta
= (q
->pi
- queue_ci_get(&q
->ci
, queue_len
));
36 return (queue_len
- delta
);
38 return (abs(delta
) - queue_len
);
41 void hl_int_hw_queue_update_ci(struct hl_cs
*cs
)
43 struct hl_device
*hdev
= cs
->ctx
->hdev
;
44 struct hl_hw_queue
*q
;
50 q
= &hdev
->kernel_queues
[0];
51 for (i
= 0 ; i
< hdev
->asic_prop
.max_queues
; i
++, q
++) {
52 if (q
->queue_type
== QUEUE_TYPE_INT
)
53 atomic_add(cs
->jobs_in_queue_cnt
[i
], &q
->ci
);
58 * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
60 * @hdev: pointer to habanalabs device structure
61 * @q: pointer to habanalabs queue structure
62 * @ctl: BD's control word
66 * This function assumes there is enough space on the queue to submit a new
67 * BD to it. It initializes the next BD and calls the device specific
68 * function to set the pi (and doorbell)
70 * This function must be called when the scheduler mutex is taken
73 static void ext_and_hw_queue_submit_bd(struct hl_device
*hdev
,
74 struct hl_hw_queue
*q
, u32 ctl
, u32 len
, u64 ptr
)
78 bd
= q
->kernel_address
;
79 bd
+= hl_pi_2_offset(q
->pi
);
80 bd
->ctl
= cpu_to_le32(ctl
);
81 bd
->len
= cpu_to_le32(len
);
82 bd
->ptr
= cpu_to_le64(ptr
);
84 q
->pi
= hl_queue_inc_ptr(q
->pi
);
85 hdev
->asic_funcs
->ring_doorbell(hdev
, q
->hw_queue_id
, q
->pi
);
89 * ext_queue_sanity_checks - perform some sanity checks on external queue
91 * @hdev : pointer to hl_device structure
92 * @q : pointer to hl_hw_queue structure
93 * @num_of_entries : how many entries to check for space
94 * @reserve_cq_entry : whether to reserve an entry in the cq
96 * H/W queues spinlock should be taken before calling this function
98 * Perform the following:
99 * - Make sure we have enough space in the h/w queue
100 * - Make sure we have enough space in the completion queue
101 * - Reserve space in the completion queue (needs to be reversed if there
102 * is a failure down the road before the actual submission of work). Only
103 * do this action if reserve_cq_entry is true
106 static int ext_queue_sanity_checks(struct hl_device
*hdev
,
107 struct hl_hw_queue
*q
, int num_of_entries
,
108 bool reserve_cq_entry
)
110 atomic_t
*free_slots
=
111 &hdev
->completion_queue
[q
->cq_id
].free_slots_cnt
;
114 /* Check we have enough space in the queue */
115 free_slots_cnt
= queue_free_slots(q
, HL_QUEUE_LENGTH
);
117 if (free_slots_cnt
< num_of_entries
) {
118 dev_dbg(hdev
->dev
, "Queue %d doesn't have room for %d CBs\n",
119 q
->hw_queue_id
, num_of_entries
);
123 if (reserve_cq_entry
) {
125 * Check we have enough space in the completion queue
126 * Add -1 to counter (decrement) unless counter was already 0
127 * In that case, CQ is full so we can't submit a new CB because
128 * we won't get ack on its completion
129 * atomic_add_unless will return 0 if counter was already 0
131 if (atomic_add_negative(num_of_entries
* -1, free_slots
)) {
132 dev_dbg(hdev
->dev
, "No space for %d on CQ %d\n",
133 num_of_entries
, q
->hw_queue_id
);
134 atomic_add(num_of_entries
, free_slots
);
143 * int_queue_sanity_checks - perform some sanity checks on internal queue
145 * @hdev : pointer to hl_device structure
146 * @q : pointer to hl_hw_queue structure
147 * @num_of_entries : how many entries to check for space
149 * H/W queues spinlock should be taken before calling this function
151 * Perform the following:
152 * - Make sure we have enough space in the h/w queue
155 static int int_queue_sanity_checks(struct hl_device
*hdev
,
156 struct hl_hw_queue
*q
,
161 if (num_of_entries
> q
->int_queue_len
) {
163 "Cannot populate queue %u with %u jobs\n",
164 q
->hw_queue_id
, num_of_entries
);
168 /* Check we have enough space in the queue */
169 free_slots_cnt
= queue_free_slots(q
, q
->int_queue_len
);
171 if (free_slots_cnt
< num_of_entries
) {
172 dev_dbg(hdev
->dev
, "Queue %d doesn't have room for %d CBs\n",
173 q
->hw_queue_id
, num_of_entries
);
181 * hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue
182 * @hdev: Pointer to hl_device structure.
183 * @q: Pointer to hl_hw_queue structure.
184 * @num_of_entries: How many entries to check for space.
186 * Notice: We do not reserve queue entries so this function mustn't be called
187 * more than once per CS for the same queue
190 static int hw_queue_sanity_checks(struct hl_device
*hdev
, struct hl_hw_queue
*q
,
195 /* Check we have enough space in the queue */
196 free_slots_cnt
= queue_free_slots(q
, HL_QUEUE_LENGTH
);
198 if (free_slots_cnt
< num_of_entries
) {
199 dev_dbg(hdev
->dev
, "Queue %d doesn't have room for %d CBs\n",
200 q
->hw_queue_id
, num_of_entries
);
208 * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion
210 * @hdev: pointer to hl_device structure
211 * @hw_queue_id: Queue's type
212 * @cb_size: size of CB
213 * @cb_ptr: pointer to CB location
215 * This function sends a single CB, that must NOT generate a completion entry
218 int hl_hw_queue_send_cb_no_cmpl(struct hl_device
*hdev
, u32 hw_queue_id
,
219 u32 cb_size
, u64 cb_ptr
)
221 struct hl_hw_queue
*q
= &hdev
->kernel_queues
[hw_queue_id
];
225 * The CPU queue is a synchronous queue with an effective depth of
226 * a single entry (although it is allocated with room for multiple
227 * entries). Therefore, there is a different lock, called
228 * send_cpu_message_lock, that serializes accesses to the CPU queue.
229 * As a result, we don't need to lock the access to the entire H/W
230 * queues module when submitting a JOB to the CPU queue
232 if (q
->queue_type
!= QUEUE_TYPE_CPU
)
233 hdev
->asic_funcs
->hw_queues_lock(hdev
);
235 if (hdev
->disabled
) {
241 * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue
242 * type only on init phase, when the queues are empty and being tested,
243 * so there is no need for sanity checks.
245 if (q
->queue_type
!= QUEUE_TYPE_HW
) {
246 rc
= ext_queue_sanity_checks(hdev
, q
, 1, false);
251 ext_and_hw_queue_submit_bd(hdev
, q
, 0, cb_size
, cb_ptr
);
254 if (q
->queue_type
!= QUEUE_TYPE_CPU
)
255 hdev
->asic_funcs
->hw_queues_unlock(hdev
);
261 * ext_queue_schedule_job - submit a JOB to an external queue
263 * @job: pointer to the job that needs to be submitted to the queue
265 * This function must be called when the scheduler mutex is taken
268 static void ext_queue_schedule_job(struct hl_cs_job
*job
)
270 struct hl_device
*hdev
= job
->cs
->ctx
->hdev
;
271 struct hl_hw_queue
*q
= &hdev
->kernel_queues
[job
->hw_queue_id
];
272 struct hl_cq_entry cq_pkt
;
281 * Update the JOB ID inside the BD CTL so the device would know what
282 * to write in the completion queue
284 ctl
= ((q
->pi
<< BD_CTL_SHADOW_INDEX_SHIFT
) & BD_CTL_SHADOW_INDEX_MASK
);
286 cb
= job
->patched_cb
;
287 len
= job
->job_cb_size
;
288 ptr
= cb
->bus_address
;
290 cq_pkt
.data
= cpu_to_le32(
291 ((q
->pi
<< CQ_ENTRY_SHADOW_INDEX_SHIFT
)
292 & CQ_ENTRY_SHADOW_INDEX_MASK
) |
293 FIELD_PREP(CQ_ENTRY_SHADOW_INDEX_VALID_MASK
, 1) |
294 FIELD_PREP(CQ_ENTRY_READY_MASK
, 1));
297 * No need to protect pi_offset because scheduling to the
298 * H/W queues is done under the scheduler mutex
300 * No need to check if CQ is full because it was already
301 * checked in ext_queue_sanity_checks
303 cq
= &hdev
->completion_queue
[q
->cq_id
];
304 cq_addr
= cq
->bus_address
+ cq
->pi
* sizeof(struct hl_cq_entry
);
306 hdev
->asic_funcs
->add_end_of_cb_packets(hdev
, cb
->kernel_address
, len
,
308 le32_to_cpu(cq_pkt
.data
),
310 job
->contains_dma_pkt
);
312 q
->shadow_queue
[hl_pi_2_offset(q
->pi
)] = job
;
314 cq
->pi
= hl_cq_inc_ptr(cq
->pi
);
316 ext_and_hw_queue_submit_bd(hdev
, q
, ctl
, len
, ptr
);
320 * int_queue_schedule_job - submit a JOB to an internal queue
322 * @job: pointer to the job that needs to be submitted to the queue
324 * This function must be called when the scheduler mutex is taken
327 static void int_queue_schedule_job(struct hl_cs_job
*job
)
329 struct hl_device
*hdev
= job
->cs
->ctx
->hdev
;
330 struct hl_hw_queue
*q
= &hdev
->kernel_queues
[job
->hw_queue_id
];
335 bd
.len
= cpu_to_le32(job
->job_cb_size
);
336 bd
.ptr
= cpu_to_le64((u64
) (uintptr_t) job
->user_cb
);
338 pi
= q
->kernel_address
+ (q
->pi
& (q
->int_queue_len
- 1)) * sizeof(bd
);
341 q
->pi
&= ((q
->int_queue_len
<< 1) - 1);
343 hdev
->asic_funcs
->pqe_write(hdev
, pi
, &bd
);
345 hdev
->asic_funcs
->ring_doorbell(hdev
, q
->hw_queue_id
, q
->pi
);
349 * hw_queue_schedule_job - submit a JOB to a H/W queue
351 * @job: pointer to the job that needs to be submitted to the queue
353 * This function must be called when the scheduler mutex is taken
356 static void hw_queue_schedule_job(struct hl_cs_job
*job
)
358 struct hl_device
*hdev
= job
->cs
->ctx
->hdev
;
359 struct hl_hw_queue
*q
= &hdev
->kernel_queues
[job
->hw_queue_id
];
361 u32 offset
, ctl
, len
;
364 * Upon PQE completion, COMP_DATA is used as the write data to the
365 * completion queue (QMAN HBW message), and COMP_OFFSET is used as the
366 * write address offset in the SM block (QMAN LBW message).
367 * The write address offset is calculated as "COMP_OFFSET << 2".
369 offset
= job
->cs
->sequence
& (hdev
->asic_prop
.max_pending_cs
- 1);
370 ctl
= ((offset
<< BD_CTL_COMP_OFFSET_SHIFT
) & BD_CTL_COMP_OFFSET_MASK
) |
371 ((q
->pi
<< BD_CTL_COMP_DATA_SHIFT
) & BD_CTL_COMP_DATA_MASK
);
373 len
= job
->job_cb_size
;
376 * A patched CB is created only if a user CB was allocated by driver and
377 * MMU is disabled. If MMU is enabled, the user CB should be used
378 * instead. If the user CB wasn't allocated by driver, assume that it
382 ptr
= job
->patched_cb
->bus_address
;
383 else if (job
->is_kernel_allocated_cb
)
384 ptr
= job
->user_cb
->bus_address
;
386 ptr
= (u64
) (uintptr_t) job
->user_cb
;
388 ext_and_hw_queue_submit_bd(hdev
, q
, ctl
, len
, ptr
);
392 * init_signal_wait_cs - initialize a signal/wait CS
393 * @cs: pointer to the signal/wait CS
395 * H/W queues spinlock should be taken before calling this function
397 static void init_signal_wait_cs(struct hl_cs
*cs
)
399 struct hl_ctx
*ctx
= cs
->ctx
;
400 struct hl_device
*hdev
= ctx
->hdev
;
401 struct hl_hw_queue
*hw_queue
;
402 struct hl_cs_compl
*cs_cmpl
=
403 container_of(cs
->fence
, struct hl_cs_compl
, base_fence
);
405 struct hl_hw_sob
*hw_sob
;
406 struct hl_cs_job
*job
;
409 /* There is only one job in a signal/wait CS */
410 job
= list_first_entry(&cs
->job_list
, struct hl_cs_job
,
412 q_idx
= job
->hw_queue_id
;
413 hw_queue
= &hdev
->kernel_queues
[q_idx
];
415 if (cs
->type
& CS_TYPE_SIGNAL
) {
416 hw_sob
= &hw_queue
->hw_sob
[hw_queue
->curr_sob_offset
];
418 cs_cmpl
->hw_sob
= hw_sob
;
419 cs_cmpl
->sob_val
= hw_queue
->next_sob_val
++;
422 "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
423 cs_cmpl
->hw_sob
->sob_id
, cs_cmpl
->sob_val
, q_idx
);
425 hdev
->asic_funcs
->gen_signal_cb(hdev
, job
->patched_cb
,
426 cs_cmpl
->hw_sob
->sob_id
);
428 kref_get(&hw_sob
->kref
);
430 /* check for wraparound */
431 if (hw_queue
->next_sob_val
== HL_MAX_SOB_VAL
) {
433 * Decrement as we reached the max value.
434 * The release function won't be called here as we've
435 * just incremented the refcount.
437 kref_put(&hw_sob
->kref
, hl_sob_reset_error
);
438 hw_queue
->next_sob_val
= 1;
439 /* only two SOBs are currently in use */
440 hw_queue
->curr_sob_offset
=
441 (hw_queue
->curr_sob_offset
+ 1) %
444 dev_dbg(hdev
->dev
, "switched to SOB %d, q_idx: %d\n",
445 hw_queue
->curr_sob_offset
, q_idx
);
447 } else if (cs
->type
& CS_TYPE_WAIT
) {
448 struct hl_cs_compl
*signal_cs_cmpl
;
450 signal_cs_cmpl
= container_of(cs
->signal_fence
,
454 /* copy the the SOB id and value of the signal CS */
455 cs_cmpl
->hw_sob
= signal_cs_cmpl
->hw_sob
;
456 cs_cmpl
->sob_val
= signal_cs_cmpl
->sob_val
;
459 "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n",
460 cs_cmpl
->hw_sob
->sob_id
, cs_cmpl
->sob_val
,
461 hw_queue
->base_mon_id
, q_idx
);
463 hdev
->asic_funcs
->gen_wait_cb(hdev
, job
->patched_cb
,
464 cs_cmpl
->hw_sob
->sob_id
,
466 hw_queue
->base_mon_id
,
469 kref_get(&cs_cmpl
->hw_sob
->kref
);
471 * Must put the signal fence after the SOB refcnt increment so
472 * the SOB refcnt won't turn 0 and reset the SOB before the
473 * wait CS was submitted.
476 hl_fence_put(cs
->signal_fence
);
477 cs
->signal_fence
= NULL
;
482 * hl_hw_queue_schedule_cs - schedule a command submission
483 * @cs: pointer to the CS
485 int hl_hw_queue_schedule_cs(struct hl_cs
*cs
)
487 struct hl_ctx
*ctx
= cs
->ctx
;
488 struct hl_device
*hdev
= ctx
->hdev
;
489 struct hl_cs_job
*job
, *tmp
;
490 struct hl_hw_queue
*q
;
492 int rc
= 0, i
, cq_cnt
;
494 hdev
->asic_funcs
->hw_queues_lock(hdev
);
496 if (hl_device_disabled_or_in_reset(hdev
)) {
497 ctx
->cs_counters
.device_in_reset_drop_cnt
++;
499 "device is disabled or in reset, CS rejected!\n");
504 max_queues
= hdev
->asic_prop
.max_queues
;
506 q
= &hdev
->kernel_queues
[0];
507 for (i
= 0, cq_cnt
= 0 ; i
< max_queues
; i
++, q
++) {
508 if (cs
->jobs_in_queue_cnt
[i
]) {
509 switch (q
->queue_type
) {
511 rc
= ext_queue_sanity_checks(hdev
, q
,
512 cs
->jobs_in_queue_cnt
[i
], true);
515 rc
= int_queue_sanity_checks(hdev
, q
,
516 cs
->jobs_in_queue_cnt
[i
]);
519 rc
= hw_queue_sanity_checks(hdev
, q
,
520 cs
->jobs_in_queue_cnt
[i
]);
523 dev_err(hdev
->dev
, "Queue type %d is invalid\n",
530 ctx
->cs_counters
.queue_full_drop_cnt
++;
534 if (q
->queue_type
== QUEUE_TYPE_EXT
)
539 if ((cs
->type
== CS_TYPE_SIGNAL
) || (cs
->type
== CS_TYPE_WAIT
))
540 init_signal_wait_cs(cs
);
542 spin_lock(&hdev
->hw_queues_mirror_lock
);
543 list_add_tail(&cs
->mirror_node
, &hdev
->hw_queues_mirror_list
);
545 /* Queue TDR if the CS is the first entry and if timeout is wanted */
546 if ((hdev
->timeout_jiffies
!= MAX_SCHEDULE_TIMEOUT
) &&
547 (list_first_entry(&hdev
->hw_queues_mirror_list
,
548 struct hl_cs
, mirror_node
) == cs
)) {
549 cs
->tdr_active
= true;
550 schedule_delayed_work(&cs
->work_tdr
, hdev
->timeout_jiffies
);
551 spin_unlock(&hdev
->hw_queues_mirror_lock
);
553 spin_unlock(&hdev
->hw_queues_mirror_lock
);
556 if (!hdev
->cs_active_cnt
++) {
557 struct hl_device_idle_busy_ts
*ts
;
559 ts
= &hdev
->idle_busy_ts_arr
[hdev
->idle_busy_ts_idx
];
560 ts
->busy_to_idle_ts
= ktime_set(0, 0);
561 ts
->idle_to_busy_ts
= ktime_get();
564 list_for_each_entry_safe(job
, tmp
, &cs
->job_list
, cs_node
)
565 switch (job
->queue_type
) {
567 ext_queue_schedule_job(job
);
570 int_queue_schedule_job(job
);
573 hw_queue_schedule_job(job
);
579 cs
->submitted
= true;
584 q
= &hdev
->kernel_queues
[0];
585 for (i
= 0 ; (i
< max_queues
) && (cq_cnt
> 0) ; i
++, q
++) {
586 if ((q
->queue_type
== QUEUE_TYPE_EXT
) &&
587 (cs
->jobs_in_queue_cnt
[i
])) {
588 atomic_t
*free_slots
=
589 &hdev
->completion_queue
[i
].free_slots_cnt
;
590 atomic_add(cs
->jobs_in_queue_cnt
[i
], free_slots
);
596 hdev
->asic_funcs
->hw_queues_unlock(hdev
);
602 * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue
604 * @hdev: pointer to hl_device structure
605 * @hw_queue_id: which queue to increment its ci
607 void hl_hw_queue_inc_ci_kernel(struct hl_device
*hdev
, u32 hw_queue_id
)
609 struct hl_hw_queue
*q
= &hdev
->kernel_queues
[hw_queue_id
];
614 static int ext_and_cpu_queue_init(struct hl_device
*hdev
, struct hl_hw_queue
*q
,
621 p
= hdev
->asic_funcs
->cpu_accessible_dma_pool_alloc(hdev
,
622 HL_QUEUE_SIZE_IN_BYTES
,
625 p
= hdev
->asic_funcs
->asic_dma_alloc_coherent(hdev
,
626 HL_QUEUE_SIZE_IN_BYTES
,
628 GFP_KERNEL
| __GFP_ZERO
);
632 q
->kernel_address
= p
;
634 q
->shadow_queue
= kmalloc_array(HL_QUEUE_LENGTH
,
635 sizeof(*q
->shadow_queue
),
637 if (!q
->shadow_queue
) {
639 "Failed to allocate shadow queue for H/W queue %d\n",
645 /* Make sure read/write pointers are initialized to start of queue */
646 atomic_set(&q
->ci
, 0);
653 hdev
->asic_funcs
->cpu_accessible_dma_pool_free(hdev
,
654 HL_QUEUE_SIZE_IN_BYTES
,
657 hdev
->asic_funcs
->asic_dma_free_coherent(hdev
,
658 HL_QUEUE_SIZE_IN_BYTES
,
665 static int int_queue_init(struct hl_device
*hdev
, struct hl_hw_queue
*q
)
669 p
= hdev
->asic_funcs
->get_int_queue_base(hdev
, q
->hw_queue_id
,
670 &q
->bus_address
, &q
->int_queue_len
);
673 "Failed to get base address for internal queue %d\n",
678 q
->kernel_address
= p
;
680 atomic_set(&q
->ci
, 0);
685 static int cpu_queue_init(struct hl_device
*hdev
, struct hl_hw_queue
*q
)
687 return ext_and_cpu_queue_init(hdev
, q
, true);
690 static int ext_queue_init(struct hl_device
*hdev
, struct hl_hw_queue
*q
)
692 return ext_and_cpu_queue_init(hdev
, q
, false);
695 static int hw_queue_init(struct hl_device
*hdev
, struct hl_hw_queue
*q
)
699 p
= hdev
->asic_funcs
->asic_dma_alloc_coherent(hdev
,
700 HL_QUEUE_SIZE_IN_BYTES
,
702 GFP_KERNEL
| __GFP_ZERO
);
706 q
->kernel_address
= p
;
708 /* Make sure read/write pointers are initialized to start of queue */
709 atomic_set(&q
->ci
, 0);
715 static void sync_stream_queue_init(struct hl_device
*hdev
, u32 q_idx
)
717 struct hl_hw_queue
*hw_queue
= &hdev
->kernel_queues
[q_idx
];
718 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
719 struct hl_hw_sob
*hw_sob
;
720 int sob
, queue_idx
= hdev
->sync_stream_queue_idx
++;
722 hw_queue
->base_sob_id
=
723 prop
->sync_stream_first_sob
+ queue_idx
* HL_RSVD_SOBS
;
724 hw_queue
->base_mon_id
=
725 prop
->sync_stream_first_mon
+ queue_idx
* HL_RSVD_MONS
;
726 hw_queue
->next_sob_val
= 1;
727 hw_queue
->curr_sob_offset
= 0;
729 for (sob
= 0 ; sob
< HL_RSVD_SOBS
; sob
++) {
730 hw_sob
= &hw_queue
->hw_sob
[sob
];
732 hw_sob
->sob_id
= hw_queue
->base_sob_id
+ sob
;
733 hw_sob
->q_idx
= q_idx
;
734 kref_init(&hw_sob
->kref
);
738 static void sync_stream_queue_reset(struct hl_device
*hdev
, u32 q_idx
)
740 struct hl_hw_queue
*hw_queue
= &hdev
->kernel_queues
[q_idx
];
743 * In case we got here due to a stuck CS, the refcnt might be bigger
744 * than 1 and therefore we reset it.
746 kref_init(&hw_queue
->hw_sob
[hw_queue
->curr_sob_offset
].kref
);
747 hw_queue
->curr_sob_offset
= 0;
748 hw_queue
->next_sob_val
= 1;
752 * queue_init - main initialization function for H/W queue object
754 * @hdev: pointer to hl_device device structure
755 * @q: pointer to hl_hw_queue queue structure
756 * @hw_queue_id: The id of the H/W queue
758 * Allocate dma-able memory for the queue and initialize fields
759 * Returns 0 on success
761 static int queue_init(struct hl_device
*hdev
, struct hl_hw_queue
*q
,
766 q
->hw_queue_id
= hw_queue_id
;
768 switch (q
->queue_type
) {
770 rc
= ext_queue_init(hdev
, q
);
773 rc
= int_queue_init(hdev
, q
);
776 rc
= cpu_queue_init(hdev
, q
);
779 rc
= hw_queue_init(hdev
, q
);
785 dev_crit(hdev
->dev
, "wrong queue type %d during init\n",
791 if (q
->supports_sync_stream
)
792 sync_stream_queue_init(hdev
, q
->hw_queue_id
);
803 * hw_queue_fini - destroy queue
805 * @hdev: pointer to hl_device device structure
806 * @q: pointer to hl_hw_queue queue structure
808 * Free the queue memory
810 static void queue_fini(struct hl_device
*hdev
, struct hl_hw_queue
*q
)
816 * If we arrived here, there are no jobs waiting on this queue
817 * so we can safely remove it.
818 * This is because this function can only called when:
819 * 1. Either a context is deleted, which only can occur if all its
821 * 2. A context wasn't able to be created due to failure or timeout,
822 * which means there are no jobs on the queue yet
824 * The only exception are the queues of the kernel context, but
825 * if they are being destroyed, it means that the entire module is
826 * being removed. If the module is removed, it means there is no open
827 * user context. It also means that if a job was submitted by
828 * the kernel driver (e.g. context creation), the job itself was
829 * released by the kernel driver when a timeout occurred on its
830 * Completion. Thus, we don't need to release it again.
833 if (q
->queue_type
== QUEUE_TYPE_INT
)
836 kfree(q
->shadow_queue
);
838 if (q
->queue_type
== QUEUE_TYPE_CPU
)
839 hdev
->asic_funcs
->cpu_accessible_dma_pool_free(hdev
,
840 HL_QUEUE_SIZE_IN_BYTES
,
843 hdev
->asic_funcs
->asic_dma_free_coherent(hdev
,
844 HL_QUEUE_SIZE_IN_BYTES
,
849 int hl_hw_queues_create(struct hl_device
*hdev
)
851 struct asic_fixed_properties
*asic
= &hdev
->asic_prop
;
852 struct hl_hw_queue
*q
;
853 int i
, rc
, q_ready_cnt
;
855 hdev
->kernel_queues
= kcalloc(asic
->max_queues
,
856 sizeof(*hdev
->kernel_queues
), GFP_KERNEL
);
858 if (!hdev
->kernel_queues
) {
859 dev_err(hdev
->dev
, "Not enough memory for H/W queues\n");
863 /* Initialize the H/W queues */
864 for (i
= 0, q_ready_cnt
= 0, q
= hdev
->kernel_queues
;
865 i
< asic
->max_queues
; i
++, q_ready_cnt
++, q
++) {
867 q
->queue_type
= asic
->hw_queues_props
[i
].type
;
868 q
->supports_sync_stream
=
869 asic
->hw_queues_props
[i
].supports_sync_stream
;
870 rc
= queue_init(hdev
, q
, i
);
873 "failed to initialize queue %d\n", i
);
881 for (i
= 0, q
= hdev
->kernel_queues
; i
< q_ready_cnt
; i
++, q
++)
884 kfree(hdev
->kernel_queues
);
889 void hl_hw_queues_destroy(struct hl_device
*hdev
)
891 struct hl_hw_queue
*q
;
892 u32 max_queues
= hdev
->asic_prop
.max_queues
;
895 for (i
= 0, q
= hdev
->kernel_queues
; i
< max_queues
; i
++, q
++)
898 kfree(hdev
->kernel_queues
);
901 void hl_hw_queue_reset(struct hl_device
*hdev
, bool hard_reset
)
903 struct hl_hw_queue
*q
;
904 u32 max_queues
= hdev
->asic_prop
.max_queues
;
907 for (i
= 0, q
= hdev
->kernel_queues
; i
< max_queues
; i
++, q
++) {
909 ((!hard_reset
) && (q
->queue_type
== QUEUE_TYPE_CPU
)))
912 atomic_set(&q
->ci
, 0);
914 if (q
->supports_sync_stream
)
915 sync_stream_queue_reset(hdev
, q
->hw_queue_id
);