]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/commitdiff
habanalabs: rate limit error msg on waiting for CS
authorOded Gabbay <oded.gabbay@gmail.com>
Tue, 3 Dec 2019 08:12:10 +0000 (10:12 +0200)
committerPaolo Pisati <paolo.pisati@canonical.com>
Thu, 30 Jan 2020 15:22:57 +0000 (16:22 +0100)
BugLink: https://bugs.launchpad.net/bugs/1860130
[ Upstream commit 018e0e3594f7dcd029d258e368c485e742fa9cdb ]

In case a user submits a CS, and the submission fails, and the user doesn't
check the return value and instead use the error return value as a valid
sequence number of a CS and ask to wait on it, the driver will print an
error and return an error code for that wait.

The real problem happens if now the user ignores the error of the wait, and
try to wait again and again. This can lead to a flood of error messages
from the driver and even soft lockup event.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Reviewed-by: Tomer Tayar <ttayar@habana.ai>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Paolo Pisati <paolo.pisati@canonical.com>
drivers/misc/habanalabs/command_submission.c
drivers/misc/habanalabs/context.c

index a9ac045dcfde307f69c94a4ff28f0466a409d28c..447f307ef4d6f0c5bb92823943a28819882e3257 100644 (file)
@@ -777,8 +777,9 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
        memset(args, 0, sizeof(*args));
 
        if (rc < 0) {
-               dev_err(hdev->dev, "Error %ld on waiting for CS handle %llu\n",
-                       rc, seq);
+               dev_err_ratelimited(hdev->dev,
+                               "Error %ld on waiting for CS handle %llu\n",
+                               rc, seq);
                if (rc == -ERESTARTSYS) {
                        args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
                        rc = -EINTR;
index 17db7b3dfb4c2635d9c67a4d3b3d24392fcd96bb..2df6fb87e7ff9bab9ebdd3a8a7f512ca002c59f1 100644 (file)
@@ -176,7 +176,7 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
        spin_lock(&ctx->cs_lock);
 
        if (seq >= ctx->cs_sequence) {
-               dev_notice(hdev->dev,
+               dev_notice_ratelimited(hdev->dev,
                        "Can't wait on seq %llu because current CS is at seq %llu\n",
                        seq, ctx->cs_sequence);
                spin_unlock(&ctx->cs_lock);