]>
Commit | Line | Data |
---|---|---|
9494a8dd OG |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | /* | |
4 | * Copyright 2016-2019 HabanaLabs, Ltd. | |
5 | * All Rights Reserved. | |
6 | */ | |
7 | ||
8 | #include "habanalabs.h" | |
9 | ||
10 | #include <linux/slab.h> | |
11 | ||
12 | /* | |
13 | * hl_queue_add_ptr - add to pi or ci and checks if it wraps around | |
14 | * | |
15 | * @ptr: the current pi/ci value | |
16 | * @val: the amount to add | |
17 | * | |
18 | * Add val to ptr. It can go until twice the queue length. | |
19 | */ | |
20 | inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val) | |
21 | { | |
22 | ptr += val; | |
23 | ptr &= ((HL_QUEUE_LENGTH << 1) - 1); | |
24 | return ptr; | |
25 | } | |
79b1894c OB |
26 | static inline int queue_ci_get(atomic_t *ci, u32 queue_len) |
27 | { | |
28 | return atomic_read(ci) & ((queue_len << 1) - 1); | |
29 | } | |
9494a8dd OG |
30 | |
31 | static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len) | |
32 | { | |
79b1894c | 33 | int delta = (q->pi - queue_ci_get(&q->ci, queue_len)); |
9494a8dd OG |
34 | |
35 | if (delta >= 0) | |
36 | return (queue_len - delta); | |
37 | else | |
38 | return (abs(delta) - queue_len); | |
39 | } | |
40 | ||
eff6f4a0 OG |
41 | void hl_int_hw_queue_update_ci(struct hl_cs *cs) |
42 | { | |
43 | struct hl_device *hdev = cs->ctx->hdev; | |
44 | struct hl_hw_queue *q; | |
45 | int i; | |
46 | ||
eff6f4a0 | 47 | if (hdev->disabled) |
79b1894c | 48 | return; |
eff6f4a0 OG |
49 | |
50 | q = &hdev->kernel_queues[0]; | |
3abc99bb | 51 | for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) { |
79b1894c OB |
52 | if (q->queue_type == QUEUE_TYPE_INT) |
53 | atomic_add(cs->jobs_in_queue_cnt[i], &q->ci); | |
eff6f4a0 | 54 | } |
eff6f4a0 OG |
55 | } |
56 | ||
9494a8dd | 57 | /* |
cb596aee TT |
58 | * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a |
59 | * H/W queue. | |
9494a8dd OG |
60 | * @hdev: pointer to habanalabs device structure |
61 | * @q: pointer to habanalabs queue structure | |
62 | * @ctl: BD's control word | |
63 | * @len: BD's length | |
64 | * @ptr: BD's pointer | |
65 | * | |
66 | * This function assumes there is enough space on the queue to submit a new | |
67 | * BD to it. It initializes the next BD and calls the device specific | |
68 | * function to set the pi (and doorbell) | |
69 | * | |
70 | * This function must be called when the scheduler mutex is taken | |
71 | * | |
72 | */ | |
cb596aee TT |
73 | static void ext_and_hw_queue_submit_bd(struct hl_device *hdev, |
74 | struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr) | |
9494a8dd OG |
75 | { |
76 | struct hl_bd *bd; | |
77 | ||
82948e6e | 78 | bd = q->kernel_address; |
9494a8dd | 79 | bd += hl_pi_2_offset(q->pi); |
abca3a82 OG |
80 | bd->ctl = cpu_to_le32(ctl); |
81 | bd->len = cpu_to_le32(len); | |
82 | bd->ptr = cpu_to_le64(ptr); | |
9494a8dd OG |
83 | |
84 | q->pi = hl_queue_inc_ptr(q->pi); | |
85 | hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); | |
86 | } | |
87 | ||
88 | /* | |
89 | * ext_queue_sanity_checks - perform some sanity checks on external queue | |
90 | * | |
91 | * @hdev : pointer to hl_device structure | |
92 | * @q : pointer to hl_hw_queue structure | |
93 | * @num_of_entries : how many entries to check for space | |
94 | * @reserve_cq_entry : whether to reserve an entry in the cq | |
95 | * | |
96 | * H/W queues spinlock should be taken before calling this function | |
97 | * | |
98 | * Perform the following: | |
99 | * - Make sure we have enough space in the h/w queue | |
100 | * - Make sure we have enough space in the completion queue | |
101 | * - Reserve space in the completion queue (needs to be reversed if there | |
102 | * is a failure down the road before the actual submission of work). Only | |
103 | * do this action if reserve_cq_entry is true | |
104 | * | |
105 | */ | |
106 | static int ext_queue_sanity_checks(struct hl_device *hdev, | |
107 | struct hl_hw_queue *q, int num_of_entries, | |
108 | bool reserve_cq_entry) | |
109 | { | |
110 | atomic_t *free_slots = | |
1fa185c6 | 111 | &hdev->completion_queue[q->cq_id].free_slots_cnt; |
9494a8dd OG |
112 | int free_slots_cnt; |
113 | ||
114 | /* Check we have enough space in the queue */ | |
115 | free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH); | |
116 | ||
117 | if (free_slots_cnt < num_of_entries) { | |
118 | dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", | |
119 | q->hw_queue_id, num_of_entries); | |
120 | return -EAGAIN; | |
121 | } | |
122 | ||
123 | if (reserve_cq_entry) { | |
124 | /* | |
125 | * Check we have enough space in the completion queue | |
126 | * Add -1 to counter (decrement) unless counter was already 0 | |
127 | * In that case, CQ is full so we can't submit a new CB because | |
128 | * we won't get ack on its completion | |
129 | * atomic_add_unless will return 0 if counter was already 0 | |
130 | */ | |
131 | if (atomic_add_negative(num_of_entries * -1, free_slots)) { | |
132 | dev_dbg(hdev->dev, "No space for %d on CQ %d\n", | |
133 | num_of_entries, q->hw_queue_id); | |
134 | atomic_add(num_of_entries, free_slots); | |
135 | return -EAGAIN; | |
136 | } | |
137 | } | |
138 | ||
139 | return 0; | |
140 | } | |
141 | ||
eff6f4a0 OG |
142 | /* |
143 | * int_queue_sanity_checks - perform some sanity checks on internal queue | |
144 | * | |
145 | * @hdev : pointer to hl_device structure | |
146 | * @q : pointer to hl_hw_queue structure | |
147 | * @num_of_entries : how many entries to check for space | |
148 | * | |
149 | * H/W queues spinlock should be taken before calling this function | |
150 | * | |
151 | * Perform the following: | |
152 | * - Make sure we have enough space in the h/w queue | |
153 | * | |
154 | */ | |
155 | static int int_queue_sanity_checks(struct hl_device *hdev, | |
156 | struct hl_hw_queue *q, | |
157 | int num_of_entries) | |
158 | { | |
159 | int free_slots_cnt; | |
160 | ||
22cb8555 OB |
161 | if (num_of_entries > q->int_queue_len) { |
162 | dev_err(hdev->dev, | |
163 | "Cannot populate queue %u with %u jobs\n", | |
164 | q->hw_queue_id, num_of_entries); | |
165 | return -ENOMEM; | |
166 | } | |
167 | ||
eff6f4a0 OG |
168 | /* Check we have enough space in the queue */ |
169 | free_slots_cnt = queue_free_slots(q, q->int_queue_len); | |
170 | ||
171 | if (free_slots_cnt < num_of_entries) { | |
172 | dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", | |
173 | q->hw_queue_id, num_of_entries); | |
174 | return -EAGAIN; | |
175 | } | |
176 | ||
177 | return 0; | |
178 | } | |
179 | ||
cb596aee | 180 | /* |
79b1894c | 181 | * hw_queue_sanity_checks() - Make sure we have enough space in the h/w queue |
cb596aee TT |
182 | * @hdev: Pointer to hl_device structure. |
183 | * @q: Pointer to hl_hw_queue structure. | |
184 | * @num_of_entries: How many entries to check for space. | |
185 | * | |
79b1894c OB |
186 | * Notice: We do not reserve queue entries so this function mustn't be called |
187 | * more than once per CS for the same queue | |
cb596aee | 188 | * |
cb596aee TT |
189 | */ |
190 | static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q, | |
191 | int num_of_entries) | |
192 | { | |
79b1894c | 193 | int free_slots_cnt; |
cb596aee | 194 | |
79b1894c OB |
195 | /* Check we have enough space in the queue */ |
196 | free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH); | |
197 | ||
198 | if (free_slots_cnt < num_of_entries) { | |
199 | dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n", | |
200 | q->hw_queue_id, num_of_entries); | |
cb596aee TT |
201 | return -EAGAIN; |
202 | } | |
203 | ||
204 | return 0; | |
205 | } | |
206 | ||
9494a8dd OG |
207 | /* |
208 | * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion | |
209 | * | |
210 | * @hdev: pointer to hl_device structure | |
211 | * @hw_queue_id: Queue's type | |
212 | * @cb_size: size of CB | |
213 | * @cb_ptr: pointer to CB location | |
214 | * | |
215 | * This function sends a single CB, that must NOT generate a completion entry | |
216 | * | |
217 | */ | |
218 | int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id, | |
219 | u32 cb_size, u64 cb_ptr) | |
220 | { | |
221 | struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; | |
cb596aee | 222 | int rc = 0; |
9494a8dd OG |
223 | |
224 | /* | |
225 | * The CPU queue is a synchronous queue with an effective depth of | |
226 | * a single entry (although it is allocated with room for multiple | |
227 | * entries). Therefore, there is a different lock, called | |
228 | * send_cpu_message_lock, that serializes accesses to the CPU queue. | |
229 | * As a result, we don't need to lock the access to the entire H/W | |
230 | * queues module when submitting a JOB to the CPU queue | |
231 | */ | |
232 | if (q->queue_type != QUEUE_TYPE_CPU) | |
233 | hdev->asic_funcs->hw_queues_lock(hdev); | |
234 | ||
235 | if (hdev->disabled) { | |
236 | rc = -EPERM; | |
237 | goto out; | |
238 | } | |
239 | ||
cb596aee TT |
240 | /* |
241 | * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue | |
242 | * type only on init phase, when the queues are empty and being tested, | |
243 | * so there is no need for sanity checks. | |
244 | */ | |
245 | if (q->queue_type != QUEUE_TYPE_HW) { | |
246 | rc = ext_queue_sanity_checks(hdev, q, 1, false); | |
247 | if (rc) | |
248 | goto out; | |
249 | } | |
9494a8dd | 250 | |
cb596aee | 251 | ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr); |
9494a8dd OG |
252 | |
253 | out: | |
254 | if (q->queue_type != QUEUE_TYPE_CPU) | |
255 | hdev->asic_funcs->hw_queues_unlock(hdev); | |
256 | ||
257 | return rc; | |
258 | } | |
259 | ||
eff6f4a0 | 260 | /* |
cb596aee | 261 | * ext_queue_schedule_job - submit a JOB to an external queue |
eff6f4a0 OG |
262 | * |
263 | * @job: pointer to the job that needs to be submitted to the queue | |
264 | * | |
265 | * This function must be called when the scheduler mutex is taken | |
266 | * | |
267 | */ | |
cb596aee | 268 | static void ext_queue_schedule_job(struct hl_cs_job *job) |
eff6f4a0 OG |
269 | { |
270 | struct hl_device *hdev = job->cs->ctx->hdev; | |
271 | struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; | |
272 | struct hl_cq_entry cq_pkt; | |
273 | struct hl_cq *cq; | |
274 | u64 cq_addr; | |
275 | struct hl_cb *cb; | |
276 | u32 ctl; | |
277 | u32 len; | |
278 | u64 ptr; | |
279 | ||
280 | /* | |
281 | * Update the JOB ID inside the BD CTL so the device would know what | |
282 | * to write in the completion queue | |
283 | */ | |
284 | ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK); | |
285 | ||
286 | cb = job->patched_cb; | |
287 | len = job->job_cb_size; | |
288 | ptr = cb->bus_address; | |
289 | ||
abca3a82 | 290 | cq_pkt.data = cpu_to_le32( |
65887291 OG |
291 | ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT) |
292 | & CQ_ENTRY_SHADOW_INDEX_MASK) | | |
293 | FIELD_PREP(CQ_ENTRY_SHADOW_INDEX_VALID_MASK, 1) | | |
294 | FIELD_PREP(CQ_ENTRY_READY_MASK, 1)); | |
eff6f4a0 OG |
295 | |
296 | /* | |
297 | * No need to protect pi_offset because scheduling to the | |
298 | * H/W queues is done under the scheduler mutex | |
299 | * | |
300 | * No need to check if CQ is full because it was already | |
cb596aee | 301 | * checked in ext_queue_sanity_checks |
eff6f4a0 | 302 | */ |
1fa185c6 | 303 | cq = &hdev->completion_queue[q->cq_id]; |
94cb669c | 304 | cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry); |
eff6f4a0 | 305 | |
921a465b | 306 | hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len, |
8c844879 | 307 | cq_addr, |
fe9a52c9 | 308 | le32_to_cpu(cq_pkt.data), |
926ba4cc OG |
309 | q->msi_vec, |
310 | job->contains_dma_pkt); | |
eff6f4a0 OG |
311 | |
312 | q->shadow_queue[hl_pi_2_offset(q->pi)] = job; | |
313 | ||
314 | cq->pi = hl_cq_inc_ptr(cq->pi); | |
315 | ||
cb596aee | 316 | ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); |
eff6f4a0 OG |
317 | } |
318 | ||
319 | /* | |
cb596aee | 320 | * int_queue_schedule_job - submit a JOB to an internal queue |
eff6f4a0 OG |
321 | * |
322 | * @job: pointer to the job that needs to be submitted to the queue | |
323 | * | |
324 | * This function must be called when the scheduler mutex is taken | |
325 | * | |
326 | */ | |
cb596aee | 327 | static void int_queue_schedule_job(struct hl_cs_job *job) |
eff6f4a0 OG |
328 | { |
329 | struct hl_device *hdev = job->cs->ctx->hdev; | |
330 | struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; | |
331 | struct hl_bd bd; | |
b9040c99 | 332 | __le64 *pi; |
eff6f4a0 OG |
333 | |
334 | bd.ctl = 0; | |
b9040c99 OG |
335 | bd.len = cpu_to_le32(job->job_cb_size); |
336 | bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb); | |
eff6f4a0 | 337 | |
82948e6e | 338 | pi = q->kernel_address + (q->pi & (q->int_queue_len - 1)) * sizeof(bd); |
eff6f4a0 | 339 | |
eff6f4a0 OG |
340 | q->pi++; |
341 | q->pi &= ((q->int_queue_len << 1) - 1); | |
342 | ||
b9040c99 | 343 | hdev->asic_funcs->pqe_write(hdev, pi, &bd); |
eff6f4a0 OG |
344 | |
345 | hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi); | |
346 | } | |
347 | ||
cb596aee TT |
348 | /* |
349 | * hw_queue_schedule_job - submit a JOB to a H/W queue | |
350 | * | |
351 | * @job: pointer to the job that needs to be submitted to the queue | |
352 | * | |
353 | * This function must be called when the scheduler mutex is taken | |
354 | * | |
355 | */ | |
356 | static void hw_queue_schedule_job(struct hl_cs_job *job) | |
357 | { | |
358 | struct hl_device *hdev = job->cs->ctx->hdev; | |
359 | struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id]; | |
cb596aee TT |
360 | u64 ptr; |
361 | u32 offset, ctl, len; | |
362 | ||
363 | /* | |
364 | * Upon PQE completion, COMP_DATA is used as the write data to the | |
365 | * completion queue (QMAN HBW message), and COMP_OFFSET is used as the | |
366 | * write address offset in the SM block (QMAN LBW message). | |
367 | * The write address offset is calculated as "COMP_OFFSET << 2". | |
368 | */ | |
c16d45f4 | 369 | offset = job->cs->sequence & (hdev->asic_prop.max_pending_cs - 1); |
cb596aee TT |
370 | ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) | |
371 | ((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK); | |
372 | ||
373 | len = job->job_cb_size; | |
374 | ||
375 | /* | |
376 | * A patched CB is created only if a user CB was allocated by driver and | |
377 | * MMU is disabled. If MMU is enabled, the user CB should be used | |
378 | * instead. If the user CB wasn't allocated by driver, assume that it | |
379 | * holds an address. | |
380 | */ | |
381 | if (job->patched_cb) | |
382 | ptr = job->patched_cb->bus_address; | |
383 | else if (job->is_kernel_allocated_cb) | |
384 | ptr = job->user_cb->bus_address; | |
385 | else | |
386 | ptr = (u64) (uintptr_t) job->user_cb; | |
387 | ||
cb596aee TT |
388 | ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr); |
389 | } | |
390 | ||
eff6f4a0 | 391 | /* |
b0b5d925 OS |
392 | * init_signal_wait_cs - initialize a signal/wait CS |
393 | * @cs: pointer to the signal/wait CS | |
eff6f4a0 | 394 | * |
b0b5d925 OS |
395 | * H/W queues spinlock should be taken before calling this function |
396 | */ | |
397 | static void init_signal_wait_cs(struct hl_cs *cs) | |
398 | { | |
399 | struct hl_ctx *ctx = cs->ctx; | |
400 | struct hl_device *hdev = ctx->hdev; | |
401 | struct hl_hw_queue *hw_queue; | |
402 | struct hl_cs_compl *cs_cmpl = | |
403 | container_of(cs->fence, struct hl_cs_compl, base_fence); | |
404 | ||
405 | struct hl_hw_sob *hw_sob; | |
406 | struct hl_cs_job *job; | |
407 | u32 q_idx; | |
408 | ||
409 | /* There is only one job in a signal/wait CS */ | |
410 | job = list_first_entry(&cs->job_list, struct hl_cs_job, | |
411 | cs_node); | |
412 | q_idx = job->hw_queue_id; | |
413 | hw_queue = &hdev->kernel_queues[q_idx]; | |
414 | ||
415 | if (cs->type & CS_TYPE_SIGNAL) { | |
416 | hw_sob = &hw_queue->hw_sob[hw_queue->curr_sob_offset]; | |
417 | ||
418 | cs_cmpl->hw_sob = hw_sob; | |
419 | cs_cmpl->sob_val = hw_queue->next_sob_val++; | |
420 | ||
421 | dev_dbg(hdev->dev, | |
422 | "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n", | |
423 | cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx); | |
424 | ||
425 | hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb, | |
426 | cs_cmpl->hw_sob->sob_id); | |
427 | ||
428 | kref_get(&hw_sob->kref); | |
429 | ||
430 | /* check for wraparound */ | |
431 | if (hw_queue->next_sob_val == HL_MAX_SOB_VAL) { | |
432 | /* | |
433 | * Decrement as we reached the max value. | |
434 | * The release function won't be called here as we've | |
435 | * just incremented the refcount. | |
436 | */ | |
437 | kref_put(&hw_sob->kref, hl_sob_reset_error); | |
438 | hw_queue->next_sob_val = 1; | |
439 | /* only two SOBs are currently in use */ | |
440 | hw_queue->curr_sob_offset = | |
441 | (hw_queue->curr_sob_offset + 1) % | |
442 | HL_RSVD_SOBS_IN_USE; | |
443 | ||
444 | dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n", | |
445 | hw_queue->curr_sob_offset, q_idx); | |
446 | } | |
447 | } else if (cs->type & CS_TYPE_WAIT) { | |
448 | struct hl_cs_compl *signal_cs_cmpl; | |
449 | ||
450 | signal_cs_cmpl = container_of(cs->signal_fence, | |
451 | struct hl_cs_compl, | |
452 | base_fence); | |
453 | ||
454 | /* copy the the SOB id and value of the signal CS */ | |
455 | cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; | |
456 | cs_cmpl->sob_val = signal_cs_cmpl->sob_val; | |
457 | ||
458 | dev_dbg(hdev->dev, | |
459 | "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n", | |
460 | cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, | |
461 | hw_queue->base_mon_id, q_idx); | |
462 | ||
463 | hdev->asic_funcs->gen_wait_cb(hdev, job->patched_cb, | |
464 | cs_cmpl->hw_sob->sob_id, | |
465 | cs_cmpl->sob_val, | |
466 | hw_queue->base_mon_id, | |
467 | q_idx); | |
468 | ||
469 | kref_get(&cs_cmpl->hw_sob->kref); | |
470 | /* | |
471 | * Must put the signal fence after the SOB refcnt increment so | |
472 | * the SOB refcnt won't turn 0 and reset the SOB before the | |
473 | * wait CS was submitted. | |
474 | */ | |
475 | mb(); | |
a98d73c7 | 476 | hl_fence_put(cs->signal_fence); |
b0b5d925 OS |
477 | cs->signal_fence = NULL; |
478 | } | |
479 | } | |
480 | ||
481 | /* | |
482 | * hl_hw_queue_schedule_cs - schedule a command submission | |
483 | * @cs: pointer to the CS | |
eff6f4a0 OG |
484 | */ |
485 | int hl_hw_queue_schedule_cs(struct hl_cs *cs) | |
486 | { | |
b0b5d925 OS |
487 | struct hl_ctx *ctx = cs->ctx; |
488 | struct hl_device *hdev = ctx->hdev; | |
eff6f4a0 OG |
489 | struct hl_cs_job *job, *tmp; |
490 | struct hl_hw_queue *q; | |
3abc99bb | 491 | u32 max_queues; |
eff6f4a0 OG |
492 | int rc = 0, i, cq_cnt; |
493 | ||
494 | hdev->asic_funcs->hw_queues_lock(hdev); | |
495 | ||
496 | if (hl_device_disabled_or_in_reset(hdev)) { | |
db491e4f | 497 | ctx->cs_counters.device_in_reset_drop_cnt++; |
eff6f4a0 OG |
498 | dev_err(hdev->dev, |
499 | "device is disabled or in reset, CS rejected!\n"); | |
500 | rc = -EPERM; | |
501 | goto out; | |
502 | } | |
503 | ||
3abc99bb OB |
504 | max_queues = hdev->asic_prop.max_queues; |
505 | ||
eff6f4a0 | 506 | q = &hdev->kernel_queues[0]; |
3abc99bb | 507 | for (i = 0, cq_cnt = 0 ; i < max_queues ; i++, q++) { |
cb596aee TT |
508 | if (cs->jobs_in_queue_cnt[i]) { |
509 | switch (q->queue_type) { | |
510 | case QUEUE_TYPE_EXT: | |
eff6f4a0 | 511 | rc = ext_queue_sanity_checks(hdev, q, |
cb596aee TT |
512 | cs->jobs_in_queue_cnt[i], true); |
513 | break; | |
514 | case QUEUE_TYPE_INT: | |
eff6f4a0 | 515 | rc = int_queue_sanity_checks(hdev, q, |
cb596aee TT |
516 | cs->jobs_in_queue_cnt[i]); |
517 | break; | |
518 | case QUEUE_TYPE_HW: | |
519 | rc = hw_queue_sanity_checks(hdev, q, | |
520 | cs->jobs_in_queue_cnt[i]); | |
521 | break; | |
522 | default: | |
523 | dev_err(hdev->dev, "Queue type %d is invalid\n", | |
524 | q->queue_type); | |
525 | rc = -EINVAL; | |
526 | break; | |
eff6f4a0 | 527 | } |
cb596aee | 528 | |
db491e4f OB |
529 | if (rc) { |
530 | ctx->cs_counters.queue_full_drop_cnt++; | |
cb596aee | 531 | goto unroll_cq_resv; |
db491e4f | 532 | } |
cb596aee | 533 | |
79b1894c | 534 | if (q->queue_type == QUEUE_TYPE_EXT) |
cb596aee | 535 | cq_cnt++; |
eff6f4a0 OG |
536 | } |
537 | } | |
538 | ||
b0b5d925 OS |
539 | if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) |
540 | init_signal_wait_cs(cs); | |
541 | ||
eff6f4a0 OG |
542 | spin_lock(&hdev->hw_queues_mirror_lock); |
543 | list_add_tail(&cs->mirror_node, &hdev->hw_queues_mirror_list); | |
544 | ||
545 | /* Queue TDR if the CS is the first entry and if timeout is wanted */ | |
546 | if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) && | |
547 | (list_first_entry(&hdev->hw_queues_mirror_list, | |
548 | struct hl_cs, mirror_node) == cs)) { | |
549 | cs->tdr_active = true; | |
550 | schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies); | |
551 | spin_unlock(&hdev->hw_queues_mirror_lock); | |
552 | } else { | |
553 | spin_unlock(&hdev->hw_queues_mirror_lock); | |
554 | } | |
555 | ||
75b3cb2b OG |
556 | if (!hdev->cs_active_cnt++) { |
557 | struct hl_device_idle_busy_ts *ts; | |
558 | ||
559 | ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx]; | |
560 | ts->busy_to_idle_ts = ktime_set(0, 0); | |
561 | ts->idle_to_busy_ts = ktime_get(); | |
562 | } | |
cbaa99ed OG |
563 | |
564 | list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) | |
cb596aee TT |
565 | switch (job->queue_type) { |
566 | case QUEUE_TYPE_EXT: | |
567 | ext_queue_schedule_job(job); | |
568 | break; | |
569 | case QUEUE_TYPE_INT: | |
570 | int_queue_schedule_job(job); | |
571 | break; | |
572 | case QUEUE_TYPE_HW: | |
573 | hw_queue_schedule_job(job); | |
574 | break; | |
575 | default: | |
576 | break; | |
577 | } | |
eff6f4a0 OG |
578 | |
579 | cs->submitted = true; | |
580 | ||
581 | goto out; | |
582 | ||
583 | unroll_cq_resv: | |
eff6f4a0 | 584 | q = &hdev->kernel_queues[0]; |
3abc99bb | 585 | for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) { |
79b1894c OB |
586 | if ((q->queue_type == QUEUE_TYPE_EXT) && |
587 | (cs->jobs_in_queue_cnt[i])) { | |
eff6f4a0 OG |
588 | atomic_t *free_slots = |
589 | &hdev->completion_queue[i].free_slots_cnt; | |
590 | atomic_add(cs->jobs_in_queue_cnt[i], free_slots); | |
591 | cq_cnt--; | |
592 | } | |
593 | } | |
594 | ||
595 | out: | |
596 | hdev->asic_funcs->hw_queues_unlock(hdev); | |
597 | ||
598 | return rc; | |
599 | } | |
600 | ||
9494a8dd OG |
601 | /* |
602 | * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue | |
603 | * | |
604 | * @hdev: pointer to hl_device structure | |
605 | * @hw_queue_id: which queue to increment its ci | |
606 | */ | |
607 | void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id) | |
608 | { | |
609 | struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id]; | |
610 | ||
79b1894c | 611 | atomic_inc(&q->ci); |
9494a8dd OG |
612 | } |
613 | ||
cb596aee TT |
614 | static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, |
615 | bool is_cpu_queue) | |
9494a8dd OG |
616 | { |
617 | void *p; | |
618 | int rc; | |
619 | ||
03d5f641 TT |
620 | if (is_cpu_queue) |
621 | p = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, | |
622 | HL_QUEUE_SIZE_IN_BYTES, | |
623 | &q->bus_address); | |
624 | else | |
d9c3aa80 | 625 | p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, |
03d5f641 TT |
626 | HL_QUEUE_SIZE_IN_BYTES, |
627 | &q->bus_address, | |
628 | GFP_KERNEL | __GFP_ZERO); | |
9494a8dd OG |
629 | if (!p) |
630 | return -ENOMEM; | |
631 | ||
82948e6e | 632 | q->kernel_address = p; |
9494a8dd OG |
633 | |
634 | q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, | |
635 | sizeof(*q->shadow_queue), | |
636 | GFP_KERNEL); | |
637 | if (!q->shadow_queue) { | |
638 | dev_err(hdev->dev, | |
639 | "Failed to allocate shadow queue for H/W queue %d\n", | |
640 | q->hw_queue_id); | |
641 | rc = -ENOMEM; | |
642 | goto free_queue; | |
643 | } | |
644 | ||
645 | /* Make sure read/write pointers are initialized to start of queue */ | |
79b1894c | 646 | atomic_set(&q->ci, 0); |
9494a8dd OG |
647 | q->pi = 0; |
648 | ||
649 | return 0; | |
650 | ||
651 | free_queue: | |
03d5f641 TT |
652 | if (is_cpu_queue) |
653 | hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, | |
654 | HL_QUEUE_SIZE_IN_BYTES, | |
82948e6e | 655 | q->kernel_address); |
03d5f641 | 656 | else |
d9c3aa80 | 657 | hdev->asic_funcs->asic_dma_free_coherent(hdev, |
03d5f641 | 658 | HL_QUEUE_SIZE_IN_BYTES, |
82948e6e | 659 | q->kernel_address, |
03d5f641 | 660 | q->bus_address); |
9494a8dd OG |
661 | |
662 | return rc; | |
663 | } | |
664 | ||
cb596aee | 665 | static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) |
9494a8dd OG |
666 | { |
667 | void *p; | |
668 | ||
669 | p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id, | |
670 | &q->bus_address, &q->int_queue_len); | |
671 | if (!p) { | |
672 | dev_err(hdev->dev, | |
673 | "Failed to get base address for internal queue %d\n", | |
674 | q->hw_queue_id); | |
675 | return -EFAULT; | |
676 | } | |
677 | ||
82948e6e | 678 | q->kernel_address = p; |
9494a8dd | 679 | q->pi = 0; |
79b1894c | 680 | atomic_set(&q->ci, 0); |
9494a8dd OG |
681 | |
682 | return 0; | |
683 | } | |
684 | ||
cb596aee TT |
685 | static int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) |
686 | { | |
687 | return ext_and_cpu_queue_init(hdev, q, true); | |
688 | } | |
689 | ||
690 | static int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) | |
9494a8dd | 691 | { |
cb596aee | 692 | return ext_and_cpu_queue_init(hdev, q, false); |
9494a8dd OG |
693 | } |
694 | ||
cb596aee | 695 | static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) |
9494a8dd | 696 | { |
cb596aee TT |
697 | void *p; |
698 | ||
699 | p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, | |
700 | HL_QUEUE_SIZE_IN_BYTES, | |
701 | &q->bus_address, | |
702 | GFP_KERNEL | __GFP_ZERO); | |
703 | if (!p) | |
704 | return -ENOMEM; | |
705 | ||
82948e6e | 706 | q->kernel_address = p; |
cb596aee TT |
707 | |
708 | /* Make sure read/write pointers are initialized to start of queue */ | |
79b1894c | 709 | atomic_set(&q->ci, 0); |
cb596aee TT |
710 | q->pi = 0; |
711 | ||
712 | return 0; | |
9494a8dd OG |
713 | } |
714 | ||
21e7a346 OB |
715 | static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx) |
716 | { | |
717 | struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx]; | |
718 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
719 | struct hl_hw_sob *hw_sob; | |
720 | int sob, queue_idx = hdev->sync_stream_queue_idx++; | |
721 | ||
722 | hw_queue->base_sob_id = | |
723 | prop->sync_stream_first_sob + queue_idx * HL_RSVD_SOBS; | |
724 | hw_queue->base_mon_id = | |
725 | prop->sync_stream_first_mon + queue_idx * HL_RSVD_MONS; | |
726 | hw_queue->next_sob_val = 1; | |
727 | hw_queue->curr_sob_offset = 0; | |
728 | ||
729 | for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) { | |
730 | hw_sob = &hw_queue->hw_sob[sob]; | |
731 | hw_sob->hdev = hdev; | |
732 | hw_sob->sob_id = hw_queue->base_sob_id + sob; | |
733 | hw_sob->q_idx = q_idx; | |
734 | kref_init(&hw_sob->kref); | |
735 | } | |
736 | } | |
737 | ||
738 | static void sync_stream_queue_reset(struct hl_device *hdev, u32 q_idx) | |
739 | { | |
740 | struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx]; | |
741 | ||
742 | /* | |
743 | * In case we got here due to a stuck CS, the refcnt might be bigger | |
744 | * than 1 and therefore we reset it. | |
745 | */ | |
746 | kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref); | |
747 | hw_queue->curr_sob_offset = 0; | |
748 | hw_queue->next_sob_val = 1; | |
749 | } | |
750 | ||
9494a8dd | 751 | /* |
cb596aee | 752 | * queue_init - main initialization function for H/W queue object |
9494a8dd OG |
753 | * |
754 | * @hdev: pointer to hl_device device structure | |
755 | * @q: pointer to hl_hw_queue queue structure | |
756 | * @hw_queue_id: The id of the H/W queue | |
757 | * | |
758 | * Allocate dma-able memory for the queue and initialize fields | |
759 | * Returns 0 on success | |
760 | */ | |
cb596aee | 761 | static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q, |
9494a8dd OG |
762 | u32 hw_queue_id) |
763 | { | |
764 | int rc; | |
765 | ||
9494a8dd OG |
766 | q->hw_queue_id = hw_queue_id; |
767 | ||
768 | switch (q->queue_type) { | |
769 | case QUEUE_TYPE_EXT: | |
cb596aee | 770 | rc = ext_queue_init(hdev, q); |
9494a8dd | 771 | break; |
9494a8dd | 772 | case QUEUE_TYPE_INT: |
cb596aee | 773 | rc = int_queue_init(hdev, q); |
9494a8dd | 774 | break; |
9494a8dd | 775 | case QUEUE_TYPE_CPU: |
cb596aee TT |
776 | rc = cpu_queue_init(hdev, q); |
777 | break; | |
778 | case QUEUE_TYPE_HW: | |
779 | rc = hw_queue_init(hdev, q); | |
9494a8dd | 780 | break; |
9494a8dd OG |
781 | case QUEUE_TYPE_NA: |
782 | q->valid = 0; | |
783 | return 0; | |
9494a8dd OG |
784 | default: |
785 | dev_crit(hdev->dev, "wrong queue type %d during init\n", | |
786 | q->queue_type); | |
787 | rc = -EINVAL; | |
788 | break; | |
789 | } | |
790 | ||
21e7a346 OB |
791 | if (q->supports_sync_stream) |
792 | sync_stream_queue_init(hdev, q->hw_queue_id); | |
793 | ||
9494a8dd OG |
794 | if (rc) |
795 | return rc; | |
796 | ||
797 | q->valid = 1; | |
798 | ||
799 | return 0; | |
800 | } | |
801 | ||
802 | /* | |
803 | * hw_queue_fini - destroy queue | |
804 | * | |
805 | * @hdev: pointer to hl_device device structure | |
806 | * @q: pointer to hl_hw_queue queue structure | |
807 | * | |
808 | * Free the queue memory | |
809 | */ | |
cb596aee | 810 | static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q) |
9494a8dd OG |
811 | { |
812 | if (!q->valid) | |
813 | return; | |
814 | ||
815 | /* | |
816 | * If we arrived here, there are no jobs waiting on this queue | |
817 | * so we can safely remove it. | |
818 | * This is because this function can only called when: | |
819 | * 1. Either a context is deleted, which only can occur if all its | |
820 | * jobs were finished | |
821 | * 2. A context wasn't able to be created due to failure or timeout, | |
822 | * which means there are no jobs on the queue yet | |
823 | * | |
824 | * The only exception are the queues of the kernel context, but | |
825 | * if they are being destroyed, it means that the entire module is | |
826 | * being removed. If the module is removed, it means there is no open | |
827 | * user context. It also means that if a job was submitted by | |
828 | * the kernel driver (e.g. context creation), the job itself was | |
829 | * released by the kernel driver when a timeout occurred on its | |
830 | * Completion. Thus, we don't need to release it again. | |
831 | */ | |
832 | ||
833 | if (q->queue_type == QUEUE_TYPE_INT) | |
834 | return; | |
835 | ||
836 | kfree(q->shadow_queue); | |
837 | ||
03d5f641 TT |
838 | if (q->queue_type == QUEUE_TYPE_CPU) |
839 | hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, | |
840 | HL_QUEUE_SIZE_IN_BYTES, | |
82948e6e | 841 | q->kernel_address); |
03d5f641 | 842 | else |
d9c3aa80 | 843 | hdev->asic_funcs->asic_dma_free_coherent(hdev, |
03d5f641 | 844 | HL_QUEUE_SIZE_IN_BYTES, |
82948e6e | 845 | q->kernel_address, |
03d5f641 | 846 | q->bus_address); |
9494a8dd OG |
847 | } |
848 | ||
849 | int hl_hw_queues_create(struct hl_device *hdev) | |
850 | { | |
851 | struct asic_fixed_properties *asic = &hdev->asic_prop; | |
852 | struct hl_hw_queue *q; | |
853 | int i, rc, q_ready_cnt; | |
854 | ||
3abc99bb | 855 | hdev->kernel_queues = kcalloc(asic->max_queues, |
9494a8dd OG |
856 | sizeof(*hdev->kernel_queues), GFP_KERNEL); |
857 | ||
858 | if (!hdev->kernel_queues) { | |
859 | dev_err(hdev->dev, "Not enough memory for H/W queues\n"); | |
860 | return -ENOMEM; | |
861 | } | |
862 | ||
863 | /* Initialize the H/W queues */ | |
864 | for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues; | |
3abc99bb | 865 | i < asic->max_queues ; i++, q_ready_cnt++, q++) { |
9494a8dd OG |
866 | |
867 | q->queue_type = asic->hw_queues_props[i].type; | |
21e7a346 OB |
868 | q->supports_sync_stream = |
869 | asic->hw_queues_props[i].supports_sync_stream; | |
cb596aee | 870 | rc = queue_init(hdev, q, i); |
9494a8dd OG |
871 | if (rc) { |
872 | dev_err(hdev->dev, | |
873 | "failed to initialize queue %d\n", i); | |
874 | goto release_queues; | |
875 | } | |
876 | } | |
877 | ||
878 | return 0; | |
879 | ||
880 | release_queues: | |
881 | for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++) | |
cb596aee | 882 | queue_fini(hdev, q); |
9494a8dd OG |
883 | |
884 | kfree(hdev->kernel_queues); | |
885 | ||
886 | return rc; | |
887 | } | |
888 | ||
889 | void hl_hw_queues_destroy(struct hl_device *hdev) | |
890 | { | |
891 | struct hl_hw_queue *q; | |
3abc99bb | 892 | u32 max_queues = hdev->asic_prop.max_queues; |
9494a8dd OG |
893 | int i; |
894 | ||
3abc99bb | 895 | for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) |
cb596aee | 896 | queue_fini(hdev, q); |
9494a8dd OG |
897 | |
898 | kfree(hdev->kernel_queues); | |
899 | } | |
900 | ||
901 | void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset) | |
902 | { | |
903 | struct hl_hw_queue *q; | |
3abc99bb | 904 | u32 max_queues = hdev->asic_prop.max_queues; |
9494a8dd OG |
905 | int i; |
906 | ||
3abc99bb | 907 | for (i = 0, q = hdev->kernel_queues ; i < max_queues ; i++, q++) { |
9494a8dd OG |
908 | if ((!q->valid) || |
909 | ((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU))) | |
910 | continue; | |
79b1894c OB |
911 | q->pi = 0; |
912 | atomic_set(&q->ci, 0); | |
b0b5d925 | 913 | |
21e7a346 OB |
914 | if (q->supports_sync_stream) |
915 | sync_stream_queue_reset(hdev, q->hw_queue_id); | |
9494a8dd OG |
916 | } |
917 | } |