1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2019 HabanaLabs, Ltd.
8 #include "habanalabs.h"
10 #include <linux/slab.h>
13 * hl_queue_add_ptr - add to pi or ci and checks if it wraps around
15 * @ptr: the current pi/ci value
16 * @val: the amount to add
18 * Add val to ptr. It can go until twice the queue length.
20 inline u32
hl_hw_queue_add_ptr(u32 ptr
, u16 val
)
23 ptr
&= ((HL_QUEUE_LENGTH
<< 1) - 1);
26 static inline int queue_ci_get(atomic_t
*ci
, u32 queue_len
)
28 return atomic_read(ci
) & ((queue_len
<< 1) - 1);
31 static inline int queue_free_slots(struct hl_hw_queue
*q
, u32 queue_len
)
33 int delta
= (q
->pi
- queue_ci_get(&q
->ci
, queue_len
));
36 return (queue_len
- delta
);
38 return (abs(delta
) - queue_len
);
41 void hl_int_hw_queue_update_ci(struct hl_cs
*cs
)
43 struct hl_device
*hdev
= cs
->ctx
->hdev
;
44 struct hl_hw_queue
*q
;
50 q
= &hdev
->kernel_queues
[0];
51 for (i
= 0 ; i
< hdev
->asic_prop
.max_queues
; i
++, q
++) {
52 if (q
->queue_type
== QUEUE_TYPE_INT
)
53 atomic_add(cs
->jobs_in_queue_cnt
[i
], &q
->ci
);
58 * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
60 * @hdev: pointer to habanalabs device structure
61 * @q: pointer to habanalabs queue structure
62 * @ctl: BD's control word
66 * This function assumes there is enough space on the queue to submit a new
67 * BD to it. It initializes the next BD and calls the device specific
68 * function to set the pi (and doorbell)
70 * This function must be called when the scheduler mutex is taken
73 static void ext_and_hw_queue_submit_bd(struct hl_device
*hdev
,
74 struct hl_hw_queue
*q
, u32 ctl
, u32 len
, u64 ptr
)
78 bd
= (struct hl_bd
*) (uintptr_t) q
->kernel_address
;
79 bd
+= hl_pi_2_offset(q
->pi
);
80 bd
->ctl
= cpu_to_le32(ctl
);
81 bd
->len
= cpu_to_le32(len
);
82 bd
->ptr
= cpu_to_le64(ptr
);
84 q
->pi
= hl_queue_inc_ptr(q
->pi
);
85 hdev
->asic_funcs
->ring_doorbell(hdev
, q
->hw_queue_id
, q
->pi
);
89 * ext_queue_sanity_checks - perform some sanity checks on external queue
91 * @hdev : pointer to hl_device structure
92 * @q : pointer to hl_hw_queue structure
93 * @num_of_entries : how many entries to check for space
94 * @reserve_cq_entry : whether to reserve an entry in the cq
96 * H/W queues spinlock should be taken before calling this function
98 * Perform the following:
99 * - Make sure we have enough space in the h/w queue
100 * - Make sure we have enough space in the completion queue
101 * - Reserve space in the completion queue (needs to be reversed if there
102 * is a failure down the road before the actual submission of work). Only
103 * do this action if reserve_cq_entry is true
106 static int ext_queue_sanity_checks(struct hl_device
*hdev
,
107 struct hl_hw_queue
*q
, int num_of_entries
,
108 bool reserve_cq_entry
)
110 atomic_t
*free_slots
=
111 &hdev
->completion_queue
[q
->cq_id
].free_slots_cnt
;
114 /* Check we have enough space in the queue */
115 free_slots_cnt
= queue_free_slots(q
, HL_QUEUE_LENGTH
);
117 if (free_slots_cnt
< num_of_entries
) {
118 dev_dbg(hdev
->dev
, "Queue %d doesn't have room for %d CBs\n",
119 q
->hw_queue_id
, num_of_entries
);
123 if (reserve_cq_entry
) {
125 * Check we have enough space in the completion queue
126 * Add -1 to counter (decrement) unless counter was already 0
127 * In that case, CQ is full so we can't submit a new CB because
128 * we won't get ack on its completion
129 * atomic_add_unless will return 0 if counter was already 0
131 if (atomic_add_negative(num_of_entries
* -1, free_slots
)) {
132 dev_dbg(hdev
->dev
, "No space for %d on CQ %d\n",
133 num_of_entries
, q
->hw_queue_id
);
134 atomic_add(num_of_entries
, free_slots
);
143 * int_queue_sanity_checks - perform some sanity checks on internal queue
145 * @hdev : pointer to hl_device structure
146 * @q : pointer to hl_hw_queue structure
147 * @num_of_entries : how many entries to check for space
149 * H/W queues spinlock should be taken before calling this function
151 * Perform the following:
152 * - Make sure we have enough space in the h/w queue
155 static int int_queue_sanity_checks(struct hl_device
*hdev
,
156 struct hl_hw_queue
*q
,
161 if (num_of_entries
> q
->int_queue_len
) {
163 "Cannot populate queue %u with %u jobs\n",
164 q
->hw_queue_id
, num_of_entries
);
168 /* Check we have enough space in the queue */
169 free_slots_cnt
= queue_free_slots(q
, q
->int_queue_len
);
171 if (free_slots_cnt
< num_of_entries
) {
172 dev_dbg(hdev
->dev
, "Queue %d doesn't have room for %d CBs\n",
173 q
->hw_queue_id
, num_of_entries
);
181 * hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue
182 * @hdev: Pointer to hl_device structure.
183 * @q: Pointer to hl_hw_queue structure.
184 * @num_of_entries: How many entries to check for space.
186 * Notice: We do not reserve queue entries so this function mustn't be called
187 * more than once per CS for the same queue
190 static int hw_queue_sanity_checks(struct hl_device
*hdev
, struct hl_hw_queue
*q
,
195 /* Check we have enough space in the queue */
196 free_slots_cnt
= queue_free_slots(q
, HL_QUEUE_LENGTH
);
198 if (free_slots_cnt
< num_of_entries
) {
199 dev_dbg(hdev
->dev
, "Queue %d doesn't have room for %d CBs\n",
200 q
->hw_queue_id
, num_of_entries
);
208 * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion
210 * @hdev: pointer to hl_device structure
211 * @hw_queue_id: Queue's type
212 * @cb_size: size of CB
213 * @cb_ptr: pointer to CB location
215 * This function sends a single CB, that must NOT generate a completion entry
218 int hl_hw_queue_send_cb_no_cmpl(struct hl_device
*hdev
, u32 hw_queue_id
,
219 u32 cb_size
, u64 cb_ptr
)
221 struct hl_hw_queue
*q
= &hdev
->kernel_queues
[hw_queue_id
];
225 * The CPU queue is a synchronous queue with an effective depth of
226 * a single entry (although it is allocated with room for multiple
227 * entries). Therefore, there is a different lock, called
228 * send_cpu_message_lock, that serializes accesses to the CPU queue.
229 * As a result, we don't need to lock the access to the entire H/W
230 * queues module when submitting a JOB to the CPU queue
232 if (q
->queue_type
!= QUEUE_TYPE_CPU
)
233 hdev
->asic_funcs
->hw_queues_lock(hdev
);
235 if (hdev
->disabled
) {
241 * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue
242 * type only on init phase, when the queues are empty and being tested,
243 * so there is no need for sanity checks.
245 if (q
->queue_type
!= QUEUE_TYPE_HW
) {
246 rc
= ext_queue_sanity_checks(hdev
, q
, 1, false);
251 ext_and_hw_queue_submit_bd(hdev
, q
, 0, cb_size
, cb_ptr
);
254 if (q
->queue_type
!= QUEUE_TYPE_CPU
)
255 hdev
->asic_funcs
->hw_queues_unlock(hdev
);
261 * ext_queue_schedule_job - submit a JOB to an external queue
263 * @job: pointer to the job that needs to be submitted to the queue
265 * This function must be called when the scheduler mutex is taken
268 static void ext_queue_schedule_job(struct hl_cs_job
*job
)
270 struct hl_device
*hdev
= job
->cs
->ctx
->hdev
;
271 struct hl_hw_queue
*q
= &hdev
->kernel_queues
[job
->hw_queue_id
];
272 struct hl_cq_entry cq_pkt
;
281 * Update the JOB ID inside the BD CTL so the device would know what
282 * to write in the completion queue
284 ctl
= ((q
->pi
<< BD_CTL_SHADOW_INDEX_SHIFT
) & BD_CTL_SHADOW_INDEX_MASK
);
286 cb
= job
->patched_cb
;
287 len
= job
->job_cb_size
;
288 ptr
= cb
->bus_address
;
290 cq_pkt
.data
= cpu_to_le32(
291 ((q
->pi
<< CQ_ENTRY_SHADOW_INDEX_SHIFT
)
292 & CQ_ENTRY_SHADOW_INDEX_MASK
) |
293 FIELD_PREP(CQ_ENTRY_SHADOW_INDEX_VALID_MASK
, 1) |
294 FIELD_PREP(CQ_ENTRY_READY_MASK
, 1));
297 * No need to protect pi_offset because scheduling to the
298 * H/W queues is done under the scheduler mutex
300 * No need to check if CQ is full because it was already
301 * checked in ext_queue_sanity_checks
303 cq
= &hdev
->completion_queue
[q
->cq_id
];
304 cq_addr
= cq
->bus_address
+ cq
->pi
* sizeof(struct hl_cq_entry
);
306 hdev
->asic_funcs
->add_end_of_cb_packets(hdev
, cb
->kernel_address
, len
,
308 le32_to_cpu(cq_pkt
.data
),
310 job
->contains_dma_pkt
);
312 q
->shadow_queue
[hl_pi_2_offset(q
->pi
)] = job
;
314 cq
->pi
= hl_cq_inc_ptr(cq
->pi
);
316 ext_and_hw_queue_submit_bd(hdev
, q
, ctl
, len
, ptr
);
320 * int_queue_schedule_job - submit a JOB to an internal queue
322 * @job: pointer to the job that needs to be submitted to the queue
324 * This function must be called when the scheduler mutex is taken
327 static void int_queue_schedule_job(struct hl_cs_job
*job
)
329 struct hl_device
*hdev
= job
->cs
->ctx
->hdev
;
330 struct hl_hw_queue
*q
= &hdev
->kernel_queues
[job
->hw_queue_id
];
335 bd
.len
= cpu_to_le32(job
->job_cb_size
);
336 bd
.ptr
= cpu_to_le64((u64
) (uintptr_t) job
->user_cb
);
338 pi
= (__le64
*) (uintptr_t) (q
->kernel_address
+
339 ((q
->pi
& (q
->int_queue_len
- 1)) * sizeof(bd
)));
342 q
->pi
&= ((q
->int_queue_len
<< 1) - 1);
344 hdev
->asic_funcs
->pqe_write(hdev
, pi
, &bd
);
346 hdev
->asic_funcs
->ring_doorbell(hdev
, q
->hw_queue_id
, q
->pi
);
350 * hw_queue_schedule_job - submit a JOB to a H/W queue
352 * @job: pointer to the job that needs to be submitted to the queue
354 * This function must be called when the scheduler mutex is taken
357 static void hw_queue_schedule_job(struct hl_cs_job
*job
)
359 struct hl_device
*hdev
= job
->cs
->ctx
->hdev
;
360 struct hl_hw_queue
*q
= &hdev
->kernel_queues
[job
->hw_queue_id
];
362 u32 offset
, ctl
, len
;
365 * Upon PQE completion, COMP_DATA is used as the write data to the
366 * completion queue (QMAN HBW message), and COMP_OFFSET is used as the
367 * write address offset in the SM block (QMAN LBW message).
368 * The write address offset is calculated as "COMP_OFFSET << 2".
370 offset
= job
->cs
->sequence
& (hdev
->asic_prop
.max_pending_cs
- 1);
371 ctl
= ((offset
<< BD_CTL_COMP_OFFSET_SHIFT
) & BD_CTL_COMP_OFFSET_MASK
) |
372 ((q
->pi
<< BD_CTL_COMP_DATA_SHIFT
) & BD_CTL_COMP_DATA_MASK
);
374 len
= job
->job_cb_size
;
377 * A patched CB is created only if a user CB was allocated by driver and
378 * MMU is disabled. If MMU is enabled, the user CB should be used
379 * instead. If the user CB wasn't allocated by driver, assume that it
383 ptr
= job
->patched_cb
->bus_address
;
384 else if (job
->is_kernel_allocated_cb
)
385 ptr
= job
->user_cb
->bus_address
;
387 ptr
= (u64
) (uintptr_t) job
->user_cb
;
389 ext_and_hw_queue_submit_bd(hdev
, q
, ctl
, len
, ptr
);
393 * init_signal_wait_cs - initialize a signal/wait CS
394 * @cs: pointer to the signal/wait CS
396 * H/W queues spinlock should be taken before calling this function
398 static void init_signal_wait_cs(struct hl_cs
*cs
)
400 struct hl_ctx
*ctx
= cs
->ctx
;
401 struct hl_device
*hdev
= ctx
->hdev
;
402 struct hl_hw_queue
*hw_queue
;
403 struct hl_cs_compl
*cs_cmpl
=
404 container_of(cs
->fence
, struct hl_cs_compl
, base_fence
);
406 struct hl_hw_sob
*hw_sob
;
407 struct hl_cs_job
*job
;
410 /* There is only one job in a signal/wait CS */
411 job
= list_first_entry(&cs
->job_list
, struct hl_cs_job
,
413 q_idx
= job
->hw_queue_id
;
414 hw_queue
= &hdev
->kernel_queues
[q_idx
];
416 if (cs
->type
& CS_TYPE_SIGNAL
) {
417 hw_sob
= &hw_queue
->hw_sob
[hw_queue
->curr_sob_offset
];
419 cs_cmpl
->hw_sob
= hw_sob
;
420 cs_cmpl
->sob_val
= hw_queue
->next_sob_val
++;
423 "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
424 cs_cmpl
->hw_sob
->sob_id
, cs_cmpl
->sob_val
, q_idx
);
426 hdev
->asic_funcs
->gen_signal_cb(hdev
, job
->patched_cb
,
427 cs_cmpl
->hw_sob
->sob_id
);
429 kref_get(&hw_sob
->kref
);
431 /* check for wraparound */
432 if (hw_queue
->next_sob_val
== HL_MAX_SOB_VAL
) {
434 * Decrement as we reached the max value.
435 * The release function won't be called here as we've
436 * just incremented the refcount.
438 kref_put(&hw_sob
->kref
, hl_sob_reset_error
);
439 hw_queue
->next_sob_val
= 1;
440 /* only two SOBs are currently in use */
441 hw_queue
->curr_sob_offset
=
442 (hw_queue
->curr_sob_offset
+ 1) %
445 dev_dbg(hdev
->dev
, "switched to SOB %d, q_idx: %d\n",
446 hw_queue
->curr_sob_offset
, q_idx
);
448 } else if (cs
->type
& CS_TYPE_WAIT
) {
449 struct hl_cs_compl
*signal_cs_cmpl
;
451 signal_cs_cmpl
= container_of(cs
->signal_fence
,
455 /* copy the the SOB id and value of the signal CS */
456 cs_cmpl
->hw_sob
= signal_cs_cmpl
->hw_sob
;
457 cs_cmpl
->sob_val
= signal_cs_cmpl
->sob_val
;
460 "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n",
461 cs_cmpl
->hw_sob
->sob_id
, cs_cmpl
->sob_val
,
462 hw_queue
->base_mon_id
, q_idx
);
464 hdev
->asic_funcs
->gen_wait_cb(hdev
, job
->patched_cb
,
465 cs_cmpl
->hw_sob
->sob_id
,
467 hw_queue
->base_mon_id
,
470 kref_get(&cs_cmpl
->hw_sob
->kref
);
472 * Must put the signal fence after the SOB refcnt increment so
473 * the SOB refcnt won't turn 0 and reset the SOB before the
474 * wait CS was submitted.
477 hl_fence_put(cs
->signal_fence
);
478 cs
->signal_fence
= NULL
;
483 * hl_hw_queue_schedule_cs - schedule a command submission
484 * @cs: pointer to the CS
486 int hl_hw_queue_schedule_cs(struct hl_cs
*cs
)
488 struct hl_ctx
*ctx
= cs
->ctx
;
489 struct hl_device
*hdev
= ctx
->hdev
;
490 struct hl_cs_job
*job
, *tmp
;
491 struct hl_hw_queue
*q
;
493 int rc
= 0, i
, cq_cnt
;
495 hdev
->asic_funcs
->hw_queues_lock(hdev
);
497 if (hl_device_disabled_or_in_reset(hdev
)) {
498 ctx
->cs_counters
.device_in_reset_drop_cnt
++;
500 "device is disabled or in reset, CS rejected!\n");
505 max_queues
= hdev
->asic_prop
.max_queues
;
507 q
= &hdev
->kernel_queues
[0];
508 for (i
= 0, cq_cnt
= 0 ; i
< max_queues
; i
++, q
++) {
509 if (cs
->jobs_in_queue_cnt
[i
]) {
510 switch (q
->queue_type
) {
512 rc
= ext_queue_sanity_checks(hdev
, q
,
513 cs
->jobs_in_queue_cnt
[i
], true);
516 rc
= int_queue_sanity_checks(hdev
, q
,
517 cs
->jobs_in_queue_cnt
[i
]);
520 rc
= hw_queue_sanity_checks(hdev
, q
,
521 cs
->jobs_in_queue_cnt
[i
]);
524 dev_err(hdev
->dev
, "Queue type %d is invalid\n",
531 ctx
->cs_counters
.queue_full_drop_cnt
++;
535 if (q
->queue_type
== QUEUE_TYPE_EXT
)
540 if ((cs
->type
== CS_TYPE_SIGNAL
) || (cs
->type
== CS_TYPE_WAIT
))
541 init_signal_wait_cs(cs
);
543 spin_lock(&hdev
->hw_queues_mirror_lock
);
544 list_add_tail(&cs
->mirror_node
, &hdev
->hw_queues_mirror_list
);
546 /* Queue TDR if the CS is the first entry and if timeout is wanted */
547 if ((hdev
->timeout_jiffies
!= MAX_SCHEDULE_TIMEOUT
) &&
548 (list_first_entry(&hdev
->hw_queues_mirror_list
,
549 struct hl_cs
, mirror_node
) == cs
)) {
550 cs
->tdr_active
= true;
551 schedule_delayed_work(&cs
->work_tdr
, hdev
->timeout_jiffies
);
552 spin_unlock(&hdev
->hw_queues_mirror_lock
);
554 spin_unlock(&hdev
->hw_queues_mirror_lock
);
557 if (!hdev
->cs_active_cnt
++) {
558 struct hl_device_idle_busy_ts
*ts
;
560 ts
= &hdev
->idle_busy_ts_arr
[hdev
->idle_busy_ts_idx
];
561 ts
->busy_to_idle_ts
= ktime_set(0, 0);
562 ts
->idle_to_busy_ts
= ktime_get();
565 list_for_each_entry_safe(job
, tmp
, &cs
->job_list
, cs_node
)
566 switch (job
->queue_type
) {
568 ext_queue_schedule_job(job
);
571 int_queue_schedule_job(job
);
574 hw_queue_schedule_job(job
);
580 cs
->submitted
= true;
585 q
= &hdev
->kernel_queues
[0];
586 for (i
= 0 ; (i
< max_queues
) && (cq_cnt
> 0) ; i
++, q
++) {
587 if ((q
->queue_type
== QUEUE_TYPE_EXT
) &&
588 (cs
->jobs_in_queue_cnt
[i
])) {
589 atomic_t
*free_slots
=
590 &hdev
->completion_queue
[i
].free_slots_cnt
;
591 atomic_add(cs
->jobs_in_queue_cnt
[i
], free_slots
);
597 hdev
->asic_funcs
->hw_queues_unlock(hdev
);
603 * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue
605 * @hdev: pointer to hl_device structure
606 * @hw_queue_id: which queue to increment its ci
608 void hl_hw_queue_inc_ci_kernel(struct hl_device
*hdev
, u32 hw_queue_id
)
610 struct hl_hw_queue
*q
= &hdev
->kernel_queues
[hw_queue_id
];
615 static int ext_and_cpu_queue_init(struct hl_device
*hdev
, struct hl_hw_queue
*q
,
622 p
= hdev
->asic_funcs
->cpu_accessible_dma_pool_alloc(hdev
,
623 HL_QUEUE_SIZE_IN_BYTES
,
626 p
= hdev
->asic_funcs
->asic_dma_alloc_coherent(hdev
,
627 HL_QUEUE_SIZE_IN_BYTES
,
629 GFP_KERNEL
| __GFP_ZERO
);
633 q
->kernel_address
= (u64
) (uintptr_t) p
;
635 q
->shadow_queue
= kmalloc_array(HL_QUEUE_LENGTH
,
636 sizeof(*q
->shadow_queue
),
638 if (!q
->shadow_queue
) {
640 "Failed to allocate shadow queue for H/W queue %d\n",
646 /* Make sure read/write pointers are initialized to start of queue */
647 atomic_set(&q
->ci
, 0);
654 hdev
->asic_funcs
->cpu_accessible_dma_pool_free(hdev
,
655 HL_QUEUE_SIZE_IN_BYTES
,
656 (void *) (uintptr_t) q
->kernel_address
);
658 hdev
->asic_funcs
->asic_dma_free_coherent(hdev
,
659 HL_QUEUE_SIZE_IN_BYTES
,
660 (void *) (uintptr_t) q
->kernel_address
,
666 static int int_queue_init(struct hl_device
*hdev
, struct hl_hw_queue
*q
)
670 p
= hdev
->asic_funcs
->get_int_queue_base(hdev
, q
->hw_queue_id
,
671 &q
->bus_address
, &q
->int_queue_len
);
674 "Failed to get base address for internal queue %d\n",
679 q
->kernel_address
= (u64
) (uintptr_t) p
;
681 atomic_set(&q
->ci
, 0);
686 static int cpu_queue_init(struct hl_device
*hdev
, struct hl_hw_queue
*q
)
688 return ext_and_cpu_queue_init(hdev
, q
, true);
691 static int ext_queue_init(struct hl_device
*hdev
, struct hl_hw_queue
*q
)
693 return ext_and_cpu_queue_init(hdev
, q
, false);
696 static int hw_queue_init(struct hl_device
*hdev
, struct hl_hw_queue
*q
)
700 p
= hdev
->asic_funcs
->asic_dma_alloc_coherent(hdev
,
701 HL_QUEUE_SIZE_IN_BYTES
,
703 GFP_KERNEL
| __GFP_ZERO
);
707 q
->kernel_address
= (u64
) (uintptr_t) p
;
709 /* Make sure read/write pointers are initialized to start of queue */
710 atomic_set(&q
->ci
, 0);
716 static void sync_stream_queue_init(struct hl_device
*hdev
, u32 q_idx
)
718 struct hl_hw_queue
*hw_queue
= &hdev
->kernel_queues
[q_idx
];
719 struct asic_fixed_properties
*prop
= &hdev
->asic_prop
;
720 struct hl_hw_sob
*hw_sob
;
721 int sob
, queue_idx
= hdev
->sync_stream_queue_idx
++;
723 hw_queue
->base_sob_id
=
724 prop
->sync_stream_first_sob
+ queue_idx
* HL_RSVD_SOBS
;
725 hw_queue
->base_mon_id
=
726 prop
->sync_stream_first_mon
+ queue_idx
* HL_RSVD_MONS
;
727 hw_queue
->next_sob_val
= 1;
728 hw_queue
->curr_sob_offset
= 0;
730 for (sob
= 0 ; sob
< HL_RSVD_SOBS
; sob
++) {
731 hw_sob
= &hw_queue
->hw_sob
[sob
];
733 hw_sob
->sob_id
= hw_queue
->base_sob_id
+ sob
;
734 hw_sob
->q_idx
= q_idx
;
735 kref_init(&hw_sob
->kref
);
739 static void sync_stream_queue_reset(struct hl_device
*hdev
, u32 q_idx
)
741 struct hl_hw_queue
*hw_queue
= &hdev
->kernel_queues
[q_idx
];
744 * In case we got here due to a stuck CS, the refcnt might be bigger
745 * than 1 and therefore we reset it.
747 kref_init(&hw_queue
->hw_sob
[hw_queue
->curr_sob_offset
].kref
);
748 hw_queue
->curr_sob_offset
= 0;
749 hw_queue
->next_sob_val
= 1;
753 * queue_init - main initialization function for H/W queue object
755 * @hdev: pointer to hl_device device structure
756 * @q: pointer to hl_hw_queue queue structure
757 * @hw_queue_id: The id of the H/W queue
759 * Allocate dma-able memory for the queue and initialize fields
760 * Returns 0 on success
762 static int queue_init(struct hl_device
*hdev
, struct hl_hw_queue
*q
,
767 q
->hw_queue_id
= hw_queue_id
;
769 switch (q
->queue_type
) {
771 rc
= ext_queue_init(hdev
, q
);
774 rc
= int_queue_init(hdev
, q
);
777 rc
= cpu_queue_init(hdev
, q
);
780 rc
= hw_queue_init(hdev
, q
);
786 dev_crit(hdev
->dev
, "wrong queue type %d during init\n",
792 if (q
->supports_sync_stream
)
793 sync_stream_queue_init(hdev
, q
->hw_queue_id
);
804 * hw_queue_fini - destroy queue
806 * @hdev: pointer to hl_device device structure
807 * @q: pointer to hl_hw_queue queue structure
809 * Free the queue memory
811 static void queue_fini(struct hl_device
*hdev
, struct hl_hw_queue
*q
)
817 * If we arrived here, there are no jobs waiting on this queue
818 * so we can safely remove it.
819 * This is because this function can only called when:
820 * 1. Either a context is deleted, which only can occur if all its
822 * 2. A context wasn't able to be created due to failure or timeout,
823 * which means there are no jobs on the queue yet
825 * The only exception are the queues of the kernel context, but
826 * if they are being destroyed, it means that the entire module is
827 * being removed. If the module is removed, it means there is no open
828 * user context. It also means that if a job was submitted by
829 * the kernel driver (e.g. context creation), the job itself was
830 * released by the kernel driver when a timeout occurred on its
831 * Completion. Thus, we don't need to release it again.
834 if (q
->queue_type
== QUEUE_TYPE_INT
)
837 kfree(q
->shadow_queue
);
839 if (q
->queue_type
== QUEUE_TYPE_CPU
)
840 hdev
->asic_funcs
->cpu_accessible_dma_pool_free(hdev
,
841 HL_QUEUE_SIZE_IN_BYTES
,
842 (void *) (uintptr_t) q
->kernel_address
);
844 hdev
->asic_funcs
->asic_dma_free_coherent(hdev
,
845 HL_QUEUE_SIZE_IN_BYTES
,
846 (void *) (uintptr_t) q
->kernel_address
,
850 int hl_hw_queues_create(struct hl_device
*hdev
)
852 struct asic_fixed_properties
*asic
= &hdev
->asic_prop
;
853 struct hl_hw_queue
*q
;
854 int i
, rc
, q_ready_cnt
;
856 hdev
->kernel_queues
= kcalloc(asic
->max_queues
,
857 sizeof(*hdev
->kernel_queues
), GFP_KERNEL
);
859 if (!hdev
->kernel_queues
) {
860 dev_err(hdev
->dev
, "Not enough memory for H/W queues\n");
864 /* Initialize the H/W queues */
865 for (i
= 0, q_ready_cnt
= 0, q
= hdev
->kernel_queues
;
866 i
< asic
->max_queues
; i
++, q_ready_cnt
++, q
++) {
868 q
->queue_type
= asic
->hw_queues_props
[i
].type
;
869 q
->supports_sync_stream
=
870 asic
->hw_queues_props
[i
].supports_sync_stream
;
871 rc
= queue_init(hdev
, q
, i
);
874 "failed to initialize queue %d\n", i
);
882 for (i
= 0, q
= hdev
->kernel_queues
; i
< q_ready_cnt
; i
++, q
++)
885 kfree(hdev
->kernel_queues
);
890 void hl_hw_queues_destroy(struct hl_device
*hdev
)
892 struct hl_hw_queue
*q
;
893 u32 max_queues
= hdev
->asic_prop
.max_queues
;
896 for (i
= 0, q
= hdev
->kernel_queues
; i
< max_queues
; i
++, q
++)
899 kfree(hdev
->kernel_queues
);
902 void hl_hw_queue_reset(struct hl_device
*hdev
, bool hard_reset
)
904 struct hl_hw_queue
*q
;
905 u32 max_queues
= hdev
->asic_prop
.max_queues
;
908 for (i
= 0, q
= hdev
->kernel_queues
; i
< max_queues
; i
++, q
++) {
910 ((!hard_reset
) && (q
->queue_type
== QUEUE_TYPE_CPU
)))
913 atomic_set(&q
->ci
, 0);
915 if (q
->supports_sync_stream
)
916 sync_stream_queue_reset(hdev
, q
->hw_queue_id
);