]>
Commit | Line | Data |
---|---|---|
1 | // SPDX-License-Identifier: GPL-2.0 | |
2 | ||
3 | /* | |
4 | * Copyright 2016-2019 HabanaLabs, Ltd. | |
5 | * All Rights Reserved. | |
6 | */ | |
7 | ||
8 | #include <uapi/misc/habanalabs.h> | |
9 | #include "habanalabs.h" | |
10 | ||
11 | #include <linux/uaccess.h> | |
12 | #include <linux/slab.h> | |
13 | ||
14 | #define HL_CS_FLAGS_SIG_WAIT (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT) | |
15 | ||
16 | static void job_wq_completion(struct work_struct *work); | |
17 | static long _hl_cs_wait_ioctl(struct hl_device *hdev, | |
18 | struct hl_ctx *ctx, u64 timeout_us, u64 seq); | |
19 | static void cs_do_release(struct kref *ref); | |
20 | ||
21 | static void hl_sob_reset(struct kref *ref) | |
22 | { | |
23 | struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, | |
24 | kref); | |
25 | struct hl_device *hdev = hw_sob->hdev; | |
26 | ||
27 | hdev->asic_funcs->reset_sob(hdev, hw_sob); | |
28 | } | |
29 | ||
30 | void hl_sob_reset_error(struct kref *ref) | |
31 | { | |
32 | struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, | |
33 | kref); | |
34 | struct hl_device *hdev = hw_sob->hdev; | |
35 | ||
36 | dev_crit(hdev->dev, | |
37 | "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n", | |
38 | hw_sob->q_idx, hw_sob->sob_id); | |
39 | } | |
40 | ||
41 | static const char *hl_fence_get_driver_name(struct dma_fence *fence) | |
42 | { | |
43 | return "HabanaLabs"; | |
44 | } | |
45 | ||
46 | static const char *hl_fence_get_timeline_name(struct dma_fence *fence) | |
47 | { | |
48 | struct hl_cs_compl *hl_cs_compl = | |
49 | container_of(fence, struct hl_cs_compl, base_fence); | |
50 | ||
51 | return dev_name(hl_cs_compl->hdev->dev); | |
52 | } | |
53 | ||
54 | static bool hl_fence_enable_signaling(struct dma_fence *fence) | |
55 | { | |
56 | return true; | |
57 | } | |
58 | ||
59 | static void hl_fence_release(struct dma_fence *fence) | |
60 | { | |
61 | struct hl_cs_compl *hl_cs_cmpl = | |
62 | container_of(fence, struct hl_cs_compl, base_fence); | |
63 | struct hl_device *hdev = hl_cs_cmpl->hdev; | |
64 | ||
65 | /* EBUSY means the CS was never submitted and hence we don't have | |
66 | * an attached hw_sob object that we should handle here | |
67 | */ | |
68 | if (fence->error == -EBUSY) | |
69 | goto free; | |
70 | ||
71 | if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || | |
72 | (hl_cs_cmpl->type == CS_TYPE_WAIT)) { | |
73 | ||
74 | dev_dbg(hdev->dev, | |
75 | "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n", | |
76 | hl_cs_cmpl->cs_seq, | |
77 | hl_cs_cmpl->type, | |
78 | hl_cs_cmpl->hw_sob->sob_id, | |
79 | hl_cs_cmpl->sob_val); | |
80 | ||
81 | /* | |
82 | * A signal CS can get completion while the corresponding wait | |
83 | * for signal CS is on its way to the PQ. The wait for signal CS | |
84 | * will get stuck if the signal CS incremented the SOB to its | |
85 | * max value and there are no pending (submitted) waits on this | |
86 | * SOB. | |
87 | * We do the following to void this situation: | |
88 | * 1. The wait for signal CS must get a ref for the signal CS as | |
89 | * soon as possible in cs_ioctl_signal_wait() and put it | |
90 | * before being submitted to the PQ but after it incremented | |
91 | * the SOB refcnt in init_signal_wait_cs(). | |
92 | * 2. Signal/Wait for signal CS will decrement the SOB refcnt | |
93 | * here. | |
94 | * These two measures guarantee that the wait for signal CS will | |
95 | * reset the SOB upon completion rather than the signal CS and | |
96 | * hence the above scenario is avoided. | |
97 | */ | |
98 | kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset); | |
99 | } | |
100 | ||
101 | free: | |
102 | kfree_rcu(hl_cs_cmpl, base_fence.rcu); | |
103 | } | |
104 | ||
105 | static const struct dma_fence_ops hl_fence_ops = { | |
106 | .get_driver_name = hl_fence_get_driver_name, | |
107 | .get_timeline_name = hl_fence_get_timeline_name, | |
108 | .enable_signaling = hl_fence_enable_signaling, | |
109 | .release = hl_fence_release | |
110 | }; | |
111 | ||
112 | static void cs_get(struct hl_cs *cs) | |
113 | { | |
114 | kref_get(&cs->refcount); | |
115 | } | |
116 | ||
117 | static int cs_get_unless_zero(struct hl_cs *cs) | |
118 | { | |
119 | return kref_get_unless_zero(&cs->refcount); | |
120 | } | |
121 | ||
122 | static void cs_put(struct hl_cs *cs) | |
123 | { | |
124 | kref_put(&cs->refcount, cs_do_release); | |
125 | } | |
126 | ||
127 | static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job) | |
128 | { | |
129 | /* | |
130 | * Patched CB is created for external queues jobs, and for H/W queues | |
131 | * jobs if the user CB was allocated by driver and MMU is disabled. | |
132 | */ | |
133 | return (job->queue_type == QUEUE_TYPE_EXT || | |
134 | (job->queue_type == QUEUE_TYPE_HW && | |
135 | job->is_kernel_allocated_cb && | |
136 | !hdev->mmu_enable)); | |
137 | } | |
138 | ||
139 | /* | |
140 | * cs_parser - parse the user command submission | |
141 | * | |
142 | * @hpriv : pointer to the private data of the fd | |
143 | * @job : pointer to the job that holds the command submission info | |
144 | * | |
145 | * The function parses the command submission of the user. It calls the | |
146 | * ASIC specific parser, which returns a list of memory blocks to send | |
147 | * to the device as different command buffers | |
148 | * | |
149 | */ | |
150 | static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) | |
151 | { | |
152 | struct hl_device *hdev = hpriv->hdev; | |
153 | struct hl_cs_parser parser; | |
154 | int rc; | |
155 | ||
156 | parser.ctx_id = job->cs->ctx->asid; | |
157 | parser.cs_sequence = job->cs->sequence; | |
158 | parser.job_id = job->id; | |
159 | ||
160 | parser.hw_queue_id = job->hw_queue_id; | |
161 | parser.job_userptr_list = &job->userptr_list; | |
162 | parser.patched_cb = NULL; | |
163 | parser.user_cb = job->user_cb; | |
164 | parser.user_cb_size = job->user_cb_size; | |
165 | parser.queue_type = job->queue_type; | |
166 | parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; | |
167 | job->patched_cb = NULL; | |
168 | ||
169 | rc = hdev->asic_funcs->cs_parser(hdev, &parser); | |
170 | ||
171 | if (is_cb_patched(hdev, job)) { | |
172 | if (!rc) { | |
173 | job->patched_cb = parser.patched_cb; | |
174 | job->job_cb_size = parser.patched_cb_size; | |
175 | job->contains_dma_pkt = parser.contains_dma_pkt; | |
176 | ||
177 | spin_lock(&job->patched_cb->lock); | |
178 | job->patched_cb->cs_cnt++; | |
179 | spin_unlock(&job->patched_cb->lock); | |
180 | } | |
181 | ||
182 | /* | |
183 | * Whether the parsing worked or not, we don't need the | |
184 | * original CB anymore because it was already parsed and | |
185 | * won't be accessed again for this CS | |
186 | */ | |
187 | spin_lock(&job->user_cb->lock); | |
188 | job->user_cb->cs_cnt--; | |
189 | spin_unlock(&job->user_cb->lock); | |
190 | hl_cb_put(job->user_cb); | |
191 | job->user_cb = NULL; | |
192 | } else if (!rc) { | |
193 | job->job_cb_size = job->user_cb_size; | |
194 | } | |
195 | ||
196 | return rc; | |
197 | } | |
198 | ||
199 | static void free_job(struct hl_device *hdev, struct hl_cs_job *job) | |
200 | { | |
201 | struct hl_cs *cs = job->cs; | |
202 | ||
203 | if (is_cb_patched(hdev, job)) { | |
204 | hl_userptr_delete_list(hdev, &job->userptr_list); | |
205 | ||
206 | /* | |
207 | * We might arrive here from rollback and patched CB wasn't | |
208 | * created, so we need to check it's not NULL | |
209 | */ | |
210 | if (job->patched_cb) { | |
211 | spin_lock(&job->patched_cb->lock); | |
212 | job->patched_cb->cs_cnt--; | |
213 | spin_unlock(&job->patched_cb->lock); | |
214 | ||
215 | hl_cb_put(job->patched_cb); | |
216 | } | |
217 | } | |
218 | ||
219 | /* For H/W queue jobs, if a user CB was allocated by driver and MMU is | |
220 | * enabled, the user CB isn't released in cs_parser() and thus should be | |
221 | * released here. | |
222 | */ | |
223 | if (job->queue_type == QUEUE_TYPE_HW && | |
224 | job->is_kernel_allocated_cb && hdev->mmu_enable) { | |
225 | spin_lock(&job->user_cb->lock); | |
226 | job->user_cb->cs_cnt--; | |
227 | spin_unlock(&job->user_cb->lock); | |
228 | ||
229 | hl_cb_put(job->user_cb); | |
230 | } | |
231 | ||
232 | /* | |
233 | * This is the only place where there can be multiple threads | |
234 | * modifying the list at the same time | |
235 | */ | |
236 | spin_lock(&cs->job_lock); | |
237 | list_del(&job->cs_node); | |
238 | spin_unlock(&cs->job_lock); | |
239 | ||
240 | hl_debugfs_remove_job(hdev, job); | |
241 | ||
242 | if (job->queue_type == QUEUE_TYPE_EXT || | |
243 | job->queue_type == QUEUE_TYPE_HW) | |
244 | cs_put(cs); | |
245 | ||
246 | kfree(job); | |
247 | } | |
248 | ||
249 | static void cs_do_release(struct kref *ref) | |
250 | { | |
251 | struct hl_cs *cs = container_of(ref, struct hl_cs, | |
252 | refcount); | |
253 | struct hl_device *hdev = cs->ctx->hdev; | |
254 | struct hl_cs_job *job, *tmp; | |
255 | ||
256 | cs->completed = true; | |
257 | ||
258 | /* | |
259 | * Although if we reached here it means that all external jobs have | |
260 | * finished, because each one of them took refcnt to CS, we still | |
261 | * need to go over the internal jobs and free them. Otherwise, we | |
262 | * will have leaked memory and what's worse, the CS object (and | |
263 | * potentially the CTX object) could be released, while the JOB | |
264 | * still holds a pointer to them (but no reference). | |
265 | */ | |
266 | list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) | |
267 | free_job(hdev, job); | |
268 | ||
269 | /* We also need to update CI for internal queues */ | |
270 | if (cs->submitted) { | |
271 | hdev->asic_funcs->hw_queues_lock(hdev); | |
272 | ||
273 | hdev->cs_active_cnt--; | |
274 | if (!hdev->cs_active_cnt) { | |
275 | struct hl_device_idle_busy_ts *ts; | |
276 | ||
277 | ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++]; | |
278 | ts->busy_to_idle_ts = ktime_get(); | |
279 | ||
280 | if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE) | |
281 | hdev->idle_busy_ts_idx = 0; | |
282 | } else if (hdev->cs_active_cnt < 0) { | |
283 | dev_crit(hdev->dev, "CS active cnt %d is negative\n", | |
284 | hdev->cs_active_cnt); | |
285 | } | |
286 | ||
287 | hdev->asic_funcs->hw_queues_unlock(hdev); | |
288 | ||
289 | hl_int_hw_queue_update_ci(cs); | |
290 | ||
291 | spin_lock(&hdev->hw_queues_mirror_lock); | |
292 | /* remove CS from hw_queues mirror list */ | |
293 | list_del_init(&cs->mirror_node); | |
294 | spin_unlock(&hdev->hw_queues_mirror_lock); | |
295 | ||
296 | /* | |
297 | * Don't cancel TDR in case this CS was timedout because we | |
298 | * might be running from the TDR context | |
299 | */ | |
300 | if ((!cs->timedout) && | |
301 | (hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT)) { | |
302 | struct hl_cs *next; | |
303 | ||
304 | if (cs->tdr_active) | |
305 | cancel_delayed_work_sync(&cs->work_tdr); | |
306 | ||
307 | spin_lock(&hdev->hw_queues_mirror_lock); | |
308 | ||
309 | /* queue TDR for next CS */ | |
310 | next = list_first_entry_or_null( | |
311 | &hdev->hw_queues_mirror_list, | |
312 | struct hl_cs, mirror_node); | |
313 | ||
314 | if ((next) && (!next->tdr_active)) { | |
315 | next->tdr_active = true; | |
316 | schedule_delayed_work(&next->work_tdr, | |
317 | hdev->timeout_jiffies); | |
318 | } | |
319 | ||
320 | spin_unlock(&hdev->hw_queues_mirror_lock); | |
321 | } | |
322 | } else if (cs->type == CS_TYPE_WAIT) { | |
323 | /* | |
324 | * In case the wait for signal CS was submitted, the put occurs | |
325 | * in init_signal_wait_cs() right before hanging on the PQ. | |
326 | */ | |
327 | dma_fence_put(cs->signal_fence); | |
328 | } | |
329 | ||
330 | /* | |
331 | * Must be called before hl_ctx_put because inside we use ctx to get | |
332 | * the device | |
333 | */ | |
334 | hl_debugfs_remove_cs(cs); | |
335 | ||
336 | hl_ctx_put(cs->ctx); | |
337 | ||
338 | /* We need to mark an error for not submitted because in that case | |
339 | * the dma fence release flow is different. Mainly, we don't need | |
340 | * to handle hw_sob for signal/wait | |
341 | */ | |
342 | if (cs->timedout) | |
343 | dma_fence_set_error(cs->fence, -ETIMEDOUT); | |
344 | else if (cs->aborted) | |
345 | dma_fence_set_error(cs->fence, -EIO); | |
346 | else if (!cs->submitted) | |
347 | dma_fence_set_error(cs->fence, -EBUSY); | |
348 | ||
349 | dma_fence_signal(cs->fence); | |
350 | dma_fence_put(cs->fence); | |
351 | ||
352 | kfree(cs); | |
353 | } | |
354 | ||
355 | static void cs_timedout(struct work_struct *work) | |
356 | { | |
357 | struct hl_device *hdev; | |
358 | int ctx_asid, rc; | |
359 | struct hl_cs *cs = container_of(work, struct hl_cs, | |
360 | work_tdr.work); | |
361 | rc = cs_get_unless_zero(cs); | |
362 | if (!rc) | |
363 | return; | |
364 | ||
365 | if ((!cs->submitted) || (cs->completed)) { | |
366 | cs_put(cs); | |
367 | return; | |
368 | } | |
369 | ||
370 | /* Mark the CS is timed out so we won't try to cancel its TDR */ | |
371 | cs->timedout = true; | |
372 | ||
373 | hdev = cs->ctx->hdev; | |
374 | ctx_asid = cs->ctx->asid; | |
375 | ||
376 | /* TODO: add information about last signaled seq and last emitted seq */ | |
377 | dev_err(hdev->dev, "User %d command submission %llu got stuck!\n", | |
378 | ctx_asid, cs->sequence); | |
379 | ||
380 | cs_put(cs); | |
381 | ||
382 | if (hdev->reset_on_lockup) | |
383 | hl_device_reset(hdev, false, false); | |
384 | } | |
385 | ||
386 | static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, | |
387 | enum hl_cs_type cs_type, struct hl_cs **cs_new) | |
388 | { | |
389 | struct hl_cs_compl *cs_cmpl; | |
390 | struct dma_fence *other = NULL; | |
391 | struct hl_cs *cs; | |
392 | int rc; | |
393 | ||
394 | cs = kzalloc(sizeof(*cs), GFP_ATOMIC); | |
395 | if (!cs) | |
396 | return -ENOMEM; | |
397 | ||
398 | cs->ctx = ctx; | |
399 | cs->submitted = false; | |
400 | cs->completed = false; | |
401 | cs->type = cs_type; | |
402 | INIT_LIST_HEAD(&cs->job_list); | |
403 | INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); | |
404 | kref_init(&cs->refcount); | |
405 | spin_lock_init(&cs->job_lock); | |
406 | ||
407 | cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC); | |
408 | if (!cs_cmpl) { | |
409 | rc = -ENOMEM; | |
410 | goto free_cs; | |
411 | } | |
412 | ||
413 | cs_cmpl->hdev = hdev; | |
414 | cs_cmpl->type = cs->type; | |
415 | spin_lock_init(&cs_cmpl->lock); | |
416 | cs->fence = &cs_cmpl->base_fence; | |
417 | ||
418 | spin_lock(&ctx->cs_lock); | |
419 | ||
420 | cs_cmpl->cs_seq = ctx->cs_sequence; | |
421 | other = ctx->cs_pending[cs_cmpl->cs_seq & | |
422 | (hdev->asic_prop.max_pending_cs - 1)]; | |
423 | if ((other) && (!dma_fence_is_signaled(other))) { | |
424 | spin_unlock(&ctx->cs_lock); | |
425 | dev_dbg(hdev->dev, | |
426 | "Rejecting CS because of too many in-flights CS\n"); | |
427 | rc = -EAGAIN; | |
428 | goto free_fence; | |
429 | } | |
430 | ||
431 | dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock, | |
432 | ctx->asid, ctx->cs_sequence); | |
433 | ||
434 | cs->sequence = cs_cmpl->cs_seq; | |
435 | ||
436 | ctx->cs_pending[cs_cmpl->cs_seq & | |
437 | (hdev->asic_prop.max_pending_cs - 1)] = | |
438 | &cs_cmpl->base_fence; | |
439 | ctx->cs_sequence++; | |
440 | ||
441 | dma_fence_get(&cs_cmpl->base_fence); | |
442 | ||
443 | dma_fence_put(other); | |
444 | ||
445 | spin_unlock(&ctx->cs_lock); | |
446 | ||
447 | *cs_new = cs; | |
448 | ||
449 | return 0; | |
450 | ||
451 | free_fence: | |
452 | kfree(cs_cmpl); | |
453 | free_cs: | |
454 | kfree(cs); | |
455 | return rc; | |
456 | } | |
457 | ||
458 | static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) | |
459 | { | |
460 | struct hl_cs_job *job, *tmp; | |
461 | ||
462 | list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) | |
463 | free_job(hdev, job); | |
464 | } | |
465 | ||
466 | void hl_cs_rollback_all(struct hl_device *hdev) | |
467 | { | |
468 | struct hl_cs *cs, *tmp; | |
469 | ||
470 | /* flush all completions */ | |
471 | flush_workqueue(hdev->cq_wq); | |
472 | ||
473 | /* Make sure we don't have leftovers in the H/W queues mirror list */ | |
474 | list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list, | |
475 | mirror_node) { | |
476 | cs_get(cs); | |
477 | cs->aborted = true; | |
478 | dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n", | |
479 | cs->ctx->asid, cs->sequence); | |
480 | cs_rollback(hdev, cs); | |
481 | cs_put(cs); | |
482 | } | |
483 | } | |
484 | ||
485 | static void job_wq_completion(struct work_struct *work) | |
486 | { | |
487 | struct hl_cs_job *job = container_of(work, struct hl_cs_job, | |
488 | finish_work); | |
489 | struct hl_cs *cs = job->cs; | |
490 | struct hl_device *hdev = cs->ctx->hdev; | |
491 | ||
492 | /* job is no longer needed */ | |
493 | free_job(hdev, job); | |
494 | } | |
495 | ||
496 | static int validate_queue_index(struct hl_device *hdev, | |
497 | struct hl_cs_chunk *chunk, | |
498 | enum hl_queue_type *queue_type, | |
499 | bool *is_kernel_allocated_cb) | |
500 | { | |
501 | struct asic_fixed_properties *asic = &hdev->asic_prop; | |
502 | struct hw_queue_properties *hw_queue_prop; | |
503 | ||
504 | hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; | |
505 | ||
506 | if ((chunk->queue_index >= HL_MAX_QUEUES) || | |
507 | (hw_queue_prop->type == QUEUE_TYPE_NA)) { | |
508 | dev_err(hdev->dev, "Queue index %d is invalid\n", | |
509 | chunk->queue_index); | |
510 | return -EINVAL; | |
511 | } | |
512 | ||
513 | if (hw_queue_prop->driver_only) { | |
514 | dev_err(hdev->dev, | |
515 | "Queue index %d is restricted for the kernel driver\n", | |
516 | chunk->queue_index); | |
517 | return -EINVAL; | |
518 | } | |
519 | ||
520 | *queue_type = hw_queue_prop->type; | |
521 | *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb; | |
522 | ||
523 | return 0; | |
524 | } | |
525 | ||
526 | static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev, | |
527 | struct hl_cb_mgr *cb_mgr, | |
528 | struct hl_cs_chunk *chunk) | |
529 | { | |
530 | struct hl_cb *cb; | |
531 | u32 cb_handle; | |
532 | ||
533 | cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT); | |
534 | ||
535 | cb = hl_cb_get(hdev, cb_mgr, cb_handle); | |
536 | if (!cb) { | |
537 | dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle); | |
538 | return NULL; | |
539 | } | |
540 | ||
541 | if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) { | |
542 | dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size); | |
543 | goto release_cb; | |
544 | } | |
545 | ||
546 | spin_lock(&cb->lock); | |
547 | cb->cs_cnt++; | |
548 | spin_unlock(&cb->lock); | |
549 | ||
550 | return cb; | |
551 | ||
552 | release_cb: | |
553 | hl_cb_put(cb); | |
554 | return NULL; | |
555 | } | |
556 | ||
557 | struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, | |
558 | enum hl_queue_type queue_type, bool is_kernel_allocated_cb) | |
559 | { | |
560 | struct hl_cs_job *job; | |
561 | ||
562 | job = kzalloc(sizeof(*job), GFP_ATOMIC); | |
563 | if (!job) | |
564 | return NULL; | |
565 | ||
566 | job->queue_type = queue_type; | |
567 | job->is_kernel_allocated_cb = is_kernel_allocated_cb; | |
568 | ||
569 | if (is_cb_patched(hdev, job)) | |
570 | INIT_LIST_HEAD(&job->userptr_list); | |
571 | ||
572 | if (job->queue_type == QUEUE_TYPE_EXT) | |
573 | INIT_WORK(&job->finish_work, job_wq_completion); | |
574 | ||
575 | return job; | |
576 | } | |
577 | ||
578 | static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, | |
579 | u32 num_chunks, u64 *cs_seq) | |
580 | { | |
581 | struct hl_device *hdev = hpriv->hdev; | |
582 | struct hl_cs_chunk *cs_chunk_array; | |
583 | struct hl_cs_job *job; | |
584 | struct hl_cs *cs; | |
585 | struct hl_cb *cb; | |
586 | bool int_queues_only = true; | |
587 | u32 size_to_copy; | |
588 | int rc, i; | |
589 | ||
590 | *cs_seq = ULLONG_MAX; | |
591 | ||
592 | if (num_chunks > HL_MAX_JOBS_PER_CS) { | |
593 | dev_err(hdev->dev, | |
594 | "Number of chunks can NOT be larger than %d\n", | |
595 | HL_MAX_JOBS_PER_CS); | |
596 | rc = -EINVAL; | |
597 | goto out; | |
598 | } | |
599 | ||
600 | cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array), | |
601 | GFP_ATOMIC); | |
602 | if (!cs_chunk_array) { | |
603 | rc = -ENOMEM; | |
604 | goto out; | |
605 | } | |
606 | ||
607 | size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); | |
608 | if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) { | |
609 | dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); | |
610 | rc = -EFAULT; | |
611 | goto free_cs_chunk_array; | |
612 | } | |
613 | ||
614 | /* increment refcnt for context */ | |
615 | hl_ctx_get(hdev, hpriv->ctx); | |
616 | ||
617 | rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs); | |
618 | if (rc) { | |
619 | hl_ctx_put(hpriv->ctx); | |
620 | goto free_cs_chunk_array; | |
621 | } | |
622 | ||
623 | *cs_seq = cs->sequence; | |
624 | ||
625 | hl_debugfs_add_cs(cs); | |
626 | ||
627 | /* Validate ALL the CS chunks before submitting the CS */ | |
628 | for (i = 0 ; i < num_chunks ; i++) { | |
629 | struct hl_cs_chunk *chunk = &cs_chunk_array[i]; | |
630 | enum hl_queue_type queue_type; | |
631 | bool is_kernel_allocated_cb; | |
632 | ||
633 | rc = validate_queue_index(hdev, chunk, &queue_type, | |
634 | &is_kernel_allocated_cb); | |
635 | if (rc) | |
636 | goto free_cs_object; | |
637 | ||
638 | if (is_kernel_allocated_cb) { | |
639 | cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk); | |
640 | if (!cb) { | |
641 | rc = -EINVAL; | |
642 | goto free_cs_object; | |
643 | } | |
644 | } else { | |
645 | cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; | |
646 | } | |
647 | ||
648 | if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW) | |
649 | int_queues_only = false; | |
650 | ||
651 | job = hl_cs_allocate_job(hdev, queue_type, | |
652 | is_kernel_allocated_cb); | |
653 | if (!job) { | |
654 | dev_err(hdev->dev, "Failed to allocate a new job\n"); | |
655 | rc = -ENOMEM; | |
656 | if (is_kernel_allocated_cb) | |
657 | goto release_cb; | |
658 | else | |
659 | goto free_cs_object; | |
660 | } | |
661 | ||
662 | job->id = i + 1; | |
663 | job->cs = cs; | |
664 | job->user_cb = cb; | |
665 | job->user_cb_size = chunk->cb_size; | |
666 | job->hw_queue_id = chunk->queue_index; | |
667 | ||
668 | cs->jobs_in_queue_cnt[job->hw_queue_id]++; | |
669 | ||
670 | list_add_tail(&job->cs_node, &cs->job_list); | |
671 | ||
672 | /* | |
673 | * Increment CS reference. When CS reference is 0, CS is | |
674 | * done and can be signaled to user and free all its resources | |
675 | * Only increment for JOB on external or H/W queues, because | |
676 | * only for those JOBs we get completion | |
677 | */ | |
678 | if (job->queue_type == QUEUE_TYPE_EXT || | |
679 | job->queue_type == QUEUE_TYPE_HW) | |
680 | cs_get(cs); | |
681 | ||
682 | hl_debugfs_add_job(hdev, job); | |
683 | ||
684 | rc = cs_parser(hpriv, job); | |
685 | if (rc) { | |
686 | dev_err(hdev->dev, | |
687 | "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", | |
688 | cs->ctx->asid, cs->sequence, job->id, rc); | |
689 | goto free_cs_object; | |
690 | } | |
691 | } | |
692 | ||
693 | if (int_queues_only) { | |
694 | dev_err(hdev->dev, | |
695 | "Reject CS %d.%llu because only internal queues jobs are present\n", | |
696 | cs->ctx->asid, cs->sequence); | |
697 | rc = -EINVAL; | |
698 | goto free_cs_object; | |
699 | } | |
700 | ||
701 | rc = hl_hw_queue_schedule_cs(cs); | |
702 | if (rc) { | |
703 | if (rc != -EAGAIN) | |
704 | dev_err(hdev->dev, | |
705 | "Failed to submit CS %d.%llu to H/W queues, error %d\n", | |
706 | cs->ctx->asid, cs->sequence, rc); | |
707 | goto free_cs_object; | |
708 | } | |
709 | ||
710 | rc = HL_CS_STATUS_SUCCESS; | |
711 | goto put_cs; | |
712 | ||
713 | release_cb: | |
714 | spin_lock(&cb->lock); | |
715 | cb->cs_cnt--; | |
716 | spin_unlock(&cb->lock); | |
717 | hl_cb_put(cb); | |
718 | free_cs_object: | |
719 | cs_rollback(hdev, cs); | |
720 | *cs_seq = ULLONG_MAX; | |
721 | /* The path below is both for good and erroneous exits */ | |
722 | put_cs: | |
723 | /* We finished with the CS in this function, so put the ref */ | |
724 | cs_put(cs); | |
725 | free_cs_chunk_array: | |
726 | kfree(cs_chunk_array); | |
727 | out: | |
728 | return rc; | |
729 | } | |
730 | ||
731 | static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, | |
732 | void __user *chunks, u32 num_chunks, | |
733 | u64 *cs_seq) | |
734 | { | |
735 | struct hl_device *hdev = hpriv->hdev; | |
736 | struct hl_ctx *ctx = hpriv->ctx; | |
737 | struct hl_cs_chunk *cs_chunk_array, *chunk; | |
738 | struct hw_queue_properties *hw_queue_prop; | |
739 | struct dma_fence *sig_fence = NULL; | |
740 | struct hl_cs_job *job; | |
741 | struct hl_cs *cs; | |
742 | struct hl_cb *cb; | |
743 | enum hl_queue_type q_type; | |
744 | u64 *signal_seq_arr = NULL, signal_seq; | |
745 | u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size; | |
746 | int rc; | |
747 | ||
748 | *cs_seq = ULLONG_MAX; | |
749 | ||
750 | if (num_chunks > HL_MAX_JOBS_PER_CS) { | |
751 | dev_err(hdev->dev, | |
752 | "Number of chunks can NOT be larger than %d\n", | |
753 | HL_MAX_JOBS_PER_CS); | |
754 | rc = -EINVAL; | |
755 | goto out; | |
756 | } | |
757 | ||
758 | cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array), | |
759 | GFP_ATOMIC); | |
760 | if (!cs_chunk_array) { | |
761 | rc = -ENOMEM; | |
762 | goto out; | |
763 | } | |
764 | ||
765 | size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); | |
766 | if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) { | |
767 | dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); | |
768 | rc = -EFAULT; | |
769 | goto free_cs_chunk_array; | |
770 | } | |
771 | ||
772 | /* currently it is guaranteed to have only one chunk */ | |
773 | chunk = &cs_chunk_array[0]; | |
774 | q_idx = chunk->queue_index; | |
775 | hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; | |
776 | q_type = hw_queue_prop->type; | |
777 | ||
778 | if ((q_idx >= HL_MAX_QUEUES) || | |
779 | (!hw_queue_prop->supports_sync_stream)) { | |
780 | dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx); | |
781 | rc = -EINVAL; | |
782 | goto free_cs_chunk_array; | |
783 | } | |
784 | ||
785 | if (cs_type == CS_TYPE_WAIT) { | |
786 | struct hl_cs_compl *sig_waitcs_cmpl; | |
787 | ||
788 | signal_seq_arr_len = chunk->num_signal_seq_arr; | |
789 | ||
790 | /* currently only one signal seq is supported */ | |
791 | if (signal_seq_arr_len != 1) { | |
792 | dev_err(hdev->dev, | |
793 | "Wait for signal CS supports only one signal CS seq\n"); | |
794 | rc = -EINVAL; | |
795 | goto free_cs_chunk_array; | |
796 | } | |
797 | ||
798 | signal_seq_arr = kmalloc_array(signal_seq_arr_len, | |
799 | sizeof(*signal_seq_arr), | |
800 | GFP_ATOMIC); | |
801 | if (!signal_seq_arr) { | |
802 | rc = -ENOMEM; | |
803 | goto free_cs_chunk_array; | |
804 | } | |
805 | ||
806 | size_to_copy = chunk->num_signal_seq_arr * | |
807 | sizeof(*signal_seq_arr); | |
808 | if (copy_from_user(signal_seq_arr, | |
809 | u64_to_user_ptr(chunk->signal_seq_arr), | |
810 | size_to_copy)) { | |
811 | dev_err(hdev->dev, | |
812 | "Failed to copy signal seq array from user\n"); | |
813 | rc = -EFAULT; | |
814 | goto free_signal_seq_array; | |
815 | } | |
816 | ||
817 | /* currently it is guaranteed to have only one signal seq */ | |
818 | signal_seq = signal_seq_arr[0]; | |
819 | sig_fence = hl_ctx_get_fence(ctx, signal_seq); | |
820 | if (IS_ERR(sig_fence)) { | |
821 | dev_err(hdev->dev, | |
822 | "Failed to get signal CS with seq 0x%llx\n", | |
823 | signal_seq); | |
824 | rc = PTR_ERR(sig_fence); | |
825 | goto free_signal_seq_array; | |
826 | } | |
827 | ||
828 | if (!sig_fence) { | |
829 | /* signal CS already finished */ | |
830 | rc = 0; | |
831 | goto free_signal_seq_array; | |
832 | } | |
833 | ||
834 | sig_waitcs_cmpl = | |
835 | container_of(sig_fence, struct hl_cs_compl, base_fence); | |
836 | ||
837 | if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) { | |
838 | dev_err(hdev->dev, | |
839 | "CS seq 0x%llx is not of a signal CS\n", | |
840 | signal_seq); | |
841 | dma_fence_put(sig_fence); | |
842 | rc = -EINVAL; | |
843 | goto free_signal_seq_array; | |
844 | } | |
845 | ||
846 | if (dma_fence_is_signaled(sig_fence)) { | |
847 | /* signal CS already finished */ | |
848 | dma_fence_put(sig_fence); | |
849 | rc = 0; | |
850 | goto free_signal_seq_array; | |
851 | } | |
852 | } | |
853 | ||
854 | /* increment refcnt for context */ | |
855 | hl_ctx_get(hdev, ctx); | |
856 | ||
857 | rc = allocate_cs(hdev, ctx, cs_type, &cs); | |
858 | if (rc) { | |
859 | if (cs_type == CS_TYPE_WAIT) | |
860 | dma_fence_put(sig_fence); | |
861 | hl_ctx_put(ctx); | |
862 | goto free_signal_seq_array; | |
863 | } | |
864 | ||
865 | /* | |
866 | * Save the signal CS fence for later initialization right before | |
867 | * hanging the wait CS on the queue. | |
868 | */ | |
869 | if (cs->type == CS_TYPE_WAIT) | |
870 | cs->signal_fence = sig_fence; | |
871 | ||
872 | hl_debugfs_add_cs(cs); | |
873 | ||
874 | *cs_seq = cs->sequence; | |
875 | ||
876 | job = hl_cs_allocate_job(hdev, q_type, true); | |
877 | if (!job) { | |
878 | dev_err(hdev->dev, "Failed to allocate a new job\n"); | |
879 | rc = -ENOMEM; | |
880 | goto put_cs; | |
881 | } | |
882 | ||
883 | cb = hl_cb_kernel_create(hdev, PAGE_SIZE); | |
884 | if (!cb) { | |
885 | kfree(job); | |
886 | rc = -EFAULT; | |
887 | goto put_cs; | |
888 | } | |
889 | ||
890 | if (cs->type == CS_TYPE_WAIT) | |
891 | cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); | |
892 | else | |
893 | cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); | |
894 | ||
895 | job->id = 0; | |
896 | job->cs = cs; | |
897 | job->user_cb = cb; | |
898 | job->user_cb->cs_cnt++; | |
899 | job->user_cb_size = cb_size; | |
900 | job->hw_queue_id = q_idx; | |
901 | ||
902 | /* | |
903 | * No need in parsing, user CB is the patched CB. | |
904 | * We call hl_cb_destroy() out of two reasons - we don't need the CB in | |
905 | * the CB idr anymore and to decrement its refcount as it was | |
906 | * incremented inside hl_cb_kernel_create(). | |
907 | */ | |
908 | job->patched_cb = job->user_cb; | |
909 | job->job_cb_size = job->user_cb_size; | |
910 | hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); | |
911 | ||
912 | cs->jobs_in_queue_cnt[job->hw_queue_id]++; | |
913 | ||
914 | list_add_tail(&job->cs_node, &cs->job_list); | |
915 | ||
916 | /* increment refcount as for external queues we get completion */ | |
917 | cs_get(cs); | |
918 | ||
919 | hl_debugfs_add_job(hdev, job); | |
920 | ||
921 | rc = hl_hw_queue_schedule_cs(cs); | |
922 | if (rc) { | |
923 | if (rc != -EAGAIN) | |
924 | dev_err(hdev->dev, | |
925 | "Failed to submit CS %d.%llu to H/W queues, error %d\n", | |
926 | ctx->asid, cs->sequence, rc); | |
927 | goto free_cs_object; | |
928 | } | |
929 | ||
930 | rc = HL_CS_STATUS_SUCCESS; | |
931 | goto put_cs; | |
932 | ||
933 | free_cs_object: | |
934 | cs_rollback(hdev, cs); | |
935 | *cs_seq = ULLONG_MAX; | |
936 | /* The path below is both for good and erroneous exits */ | |
937 | put_cs: | |
938 | /* We finished with the CS in this function, so put the ref */ | |
939 | cs_put(cs); | |
940 | free_signal_seq_array: | |
941 | if (cs_type == CS_TYPE_WAIT) | |
942 | kfree(signal_seq_arr); | |
943 | free_cs_chunk_array: | |
944 | kfree(cs_chunk_array); | |
945 | out: | |
946 | return rc; | |
947 | } | |
948 | ||
949 | int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) | |
950 | { | |
951 | struct hl_device *hdev = hpriv->hdev; | |
952 | union hl_cs_args *args = data; | |
953 | struct hl_ctx *ctx = hpriv->ctx; | |
954 | void __user *chunks_execute, *chunks_restore; | |
955 | enum hl_cs_type cs_type; | |
956 | u32 num_chunks_execute, num_chunks_restore, sig_wait_flags; | |
957 | u64 cs_seq = ULONG_MAX; | |
958 | int rc, do_ctx_switch; | |
959 | bool need_soft_reset = false; | |
960 | ||
961 | if (hl_device_disabled_or_in_reset(hdev)) { | |
962 | dev_warn_ratelimited(hdev->dev, | |
963 | "Device is %s. Can't submit new CS\n", | |
964 | atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); | |
965 | rc = -EBUSY; | |
966 | goto out; | |
967 | } | |
968 | ||
969 | sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT; | |
970 | ||
971 | if (unlikely(sig_wait_flags == HL_CS_FLAGS_SIG_WAIT)) { | |
972 | dev_err(hdev->dev, | |
973 | "Signal and wait CS flags are mutually exclusive, context %d\n", | |
974 | ctx->asid); | |
975 | rc = -EINVAL; | |
976 | goto out; | |
977 | } | |
978 | ||
979 | if (unlikely((sig_wait_flags & HL_CS_FLAGS_SIG_WAIT) && | |
980 | (!hdev->supports_sync_stream))) { | |
981 | dev_err(hdev->dev, "Sync stream CS is not supported\n"); | |
982 | rc = -EINVAL; | |
983 | goto out; | |
984 | } | |
985 | ||
986 | if (args->in.cs_flags & HL_CS_FLAGS_SIGNAL) | |
987 | cs_type = CS_TYPE_SIGNAL; | |
988 | else if (args->in.cs_flags & HL_CS_FLAGS_WAIT) | |
989 | cs_type = CS_TYPE_WAIT; | |
990 | else | |
991 | cs_type = CS_TYPE_DEFAULT; | |
992 | ||
993 | chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute; | |
994 | num_chunks_execute = args->in.num_chunks_execute; | |
995 | ||
996 | if (cs_type == CS_TYPE_DEFAULT) { | |
997 | if (!num_chunks_execute) { | |
998 | dev_err(hdev->dev, | |
999 | "Got execute CS with 0 chunks, context %d\n", | |
1000 | ctx->asid); | |
1001 | rc = -EINVAL; | |
1002 | goto out; | |
1003 | } | |
1004 | } else if (num_chunks_execute != 1) { | |
1005 | dev_err(hdev->dev, | |
1006 | "Sync stream CS mandates one chunk only, context %d\n", | |
1007 | ctx->asid); | |
1008 | rc = -EINVAL; | |
1009 | goto out; | |
1010 | } | |
1011 | ||
1012 | do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); | |
1013 | ||
1014 | if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { | |
1015 | long ret; | |
1016 | ||
1017 | chunks_restore = | |
1018 | (void __user *) (uintptr_t) args->in.chunks_restore; | |
1019 | num_chunks_restore = args->in.num_chunks_restore; | |
1020 | ||
1021 | mutex_lock(&hpriv->restore_phase_mutex); | |
1022 | ||
1023 | if (do_ctx_switch) { | |
1024 | rc = hdev->asic_funcs->context_switch(hdev, ctx->asid); | |
1025 | if (rc) { | |
1026 | dev_err_ratelimited(hdev->dev, | |
1027 | "Failed to switch to context %d, rejecting CS! %d\n", | |
1028 | ctx->asid, rc); | |
1029 | /* | |
1030 | * If we timedout, or if the device is not IDLE | |
1031 | * while we want to do context-switch (-EBUSY), | |
1032 | * we need to soft-reset because QMAN is | |
1033 | * probably stuck. However, we can't call to | |
1034 | * reset here directly because of deadlock, so | |
1035 | * need to do it at the very end of this | |
1036 | * function | |
1037 | */ | |
1038 | if ((rc == -ETIMEDOUT) || (rc == -EBUSY)) | |
1039 | need_soft_reset = true; | |
1040 | mutex_unlock(&hpriv->restore_phase_mutex); | |
1041 | goto out; | |
1042 | } | |
1043 | } | |
1044 | ||
1045 | hdev->asic_funcs->restore_phase_topology(hdev); | |
1046 | ||
1047 | if (!num_chunks_restore) { | |
1048 | dev_dbg(hdev->dev, | |
1049 | "Need to run restore phase but restore CS is empty\n"); | |
1050 | rc = 0; | |
1051 | } else { | |
1052 | rc = cs_ioctl_default(hpriv, chunks_restore, | |
1053 | num_chunks_restore, &cs_seq); | |
1054 | } | |
1055 | ||
1056 | mutex_unlock(&hpriv->restore_phase_mutex); | |
1057 | ||
1058 | if (rc) { | |
1059 | dev_err(hdev->dev, | |
1060 | "Failed to submit restore CS for context %d (%d)\n", | |
1061 | ctx->asid, rc); | |
1062 | goto out; | |
1063 | } | |
1064 | ||
1065 | /* Need to wait for restore completion before execution phase */ | |
1066 | if (num_chunks_restore) { | |
1067 | ret = _hl_cs_wait_ioctl(hdev, ctx, | |
1068 | jiffies_to_usecs(hdev->timeout_jiffies), | |
1069 | cs_seq); | |
1070 | if (ret <= 0) { | |
1071 | dev_err(hdev->dev, | |
1072 | "Restore CS for context %d failed to complete %ld\n", | |
1073 | ctx->asid, ret); | |
1074 | rc = -ENOEXEC; | |
1075 | goto out; | |
1076 | } | |
1077 | } | |
1078 | ||
1079 | ctx->thread_ctx_switch_wait_token = 1; | |
1080 | } else if (!ctx->thread_ctx_switch_wait_token) { | |
1081 | u32 tmp; | |
1082 | ||
1083 | rc = hl_poll_timeout_memory(hdev, | |
1084 | &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), | |
1085 | 100, jiffies_to_usecs(hdev->timeout_jiffies), false); | |
1086 | ||
1087 | if (rc == -ETIMEDOUT) { | |
1088 | dev_err(hdev->dev, | |
1089 | "context switch phase timeout (%d)\n", tmp); | |
1090 | goto out; | |
1091 | } | |
1092 | } | |
1093 | ||
1094 | if (cs_type == CS_TYPE_DEFAULT) | |
1095 | rc = cs_ioctl_default(hpriv, chunks_execute, num_chunks_execute, | |
1096 | &cs_seq); | |
1097 | else | |
1098 | rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks_execute, | |
1099 | num_chunks_execute, &cs_seq); | |
1100 | ||
1101 | out: | |
1102 | if (rc != -EAGAIN) { | |
1103 | memset(args, 0, sizeof(*args)); | |
1104 | args->out.status = rc; | |
1105 | args->out.seq = cs_seq; | |
1106 | } | |
1107 | ||
1108 | if (((rc == -ETIMEDOUT) || (rc == -EBUSY)) && (need_soft_reset)) | |
1109 | hl_device_reset(hdev, false, false); | |
1110 | ||
1111 | return rc; | |
1112 | } | |
1113 | ||
1114 | static long _hl_cs_wait_ioctl(struct hl_device *hdev, | |
1115 | struct hl_ctx *ctx, u64 timeout_us, u64 seq) | |
1116 | { | |
1117 | struct dma_fence *fence; | |
1118 | unsigned long timeout; | |
1119 | long rc; | |
1120 | ||
1121 | if (timeout_us == MAX_SCHEDULE_TIMEOUT) | |
1122 | timeout = timeout_us; | |
1123 | else | |
1124 | timeout = usecs_to_jiffies(timeout_us); | |
1125 | ||
1126 | hl_ctx_get(hdev, ctx); | |
1127 | ||
1128 | fence = hl_ctx_get_fence(ctx, seq); | |
1129 | if (IS_ERR(fence)) { | |
1130 | rc = PTR_ERR(fence); | |
1131 | if (rc == -EINVAL) | |
1132 | dev_notice_ratelimited(hdev->dev, | |
1133 | "Can't wait on seq %llu because current CS is at seq %llu\n", | |
1134 | seq, ctx->cs_sequence); | |
1135 | } else if (fence) { | |
1136 | rc = dma_fence_wait_timeout(fence, true, timeout); | |
1137 | if (fence->error == -ETIMEDOUT) | |
1138 | rc = -ETIMEDOUT; | |
1139 | else if (fence->error == -EIO) | |
1140 | rc = -EIO; | |
1141 | dma_fence_put(fence); | |
1142 | } else { | |
1143 | dev_dbg(hdev->dev, | |
1144 | "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", | |
1145 | seq, ctx->cs_sequence); | |
1146 | rc = 1; | |
1147 | } | |
1148 | ||
1149 | hl_ctx_put(ctx); | |
1150 | ||
1151 | return rc; | |
1152 | } | |
1153 | ||
1154 | int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) | |
1155 | { | |
1156 | struct hl_device *hdev = hpriv->hdev; | |
1157 | union hl_wait_cs_args *args = data; | |
1158 | u64 seq = args->in.seq; | |
1159 | long rc; | |
1160 | ||
1161 | rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq); | |
1162 | ||
1163 | memset(args, 0, sizeof(*args)); | |
1164 | ||
1165 | if (rc < 0) { | |
1166 | dev_err_ratelimited(hdev->dev, | |
1167 | "Error %ld on waiting for CS handle %llu\n", | |
1168 | rc, seq); | |
1169 | if (rc == -ERESTARTSYS) { | |
1170 | args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED; | |
1171 | rc = -EINTR; | |
1172 | } else if (rc == -ETIMEDOUT) { | |
1173 | args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT; | |
1174 | } else if (rc == -EIO) { | |
1175 | args->out.status = HL_WAIT_CS_STATUS_ABORTED; | |
1176 | } | |
1177 | return rc; | |
1178 | } | |
1179 | ||
1180 | if (rc == 0) | |
1181 | args->out.status = HL_WAIT_CS_STATUS_BUSY; | |
1182 | else | |
1183 | args->out.status = HL_WAIT_CS_STATUS_COMPLETED; | |
1184 | ||
1185 | return 0; | |
1186 | } |