]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
RDMA/mlx5: Issue FW command to destroy SRQ on reentry
authorLeon Romanovsky <leonro@mellanox.com>
Mon, 7 Sep 2020 12:09:15 +0000 (15:09 +0300)
committerJason Gunthorpe <jgg@nvidia.com>
Wed, 9 Sep 2020 17:04:13 +0000 (14:04 -0300)
The HW release can fail and leave the system in limbo state, where SRQ is
removed from the table, but can't be destroyed later.  In every reentry,
the initial xa_erase_irq() check will fail.

Rewrite the erase logic to keep index, but don't store the entry
itself. By doing it, we can safely reinsert entry back in the case of
destroy failure.

Link: https://lore.kernel.org/r/20200907120921.476363-4-leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/hw/mlx5/srq_cmd.c

index 37aaacebd3f26ea652f1f2c5cd3a3e464fe2daf6..c53acbc63d0b0f5823a5c8a0077a0072f4aab1a1 100644 (file)
@@ -596,13 +596,22 @@ void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
        struct mlx5_core_srq *tmp;
        int err;
 
-       tmp = xa_erase_irq(&table->array, srq->srqn);
-       if (!tmp || tmp != srq)
+       /* Delete entry, but leave index occupied */
+       tmp = xa_cmpxchg_irq(&table->array, srq->srqn, srq, XA_ZERO_ENTRY, 0);
+       if (WARN_ON(tmp != srq))
                return;
 
        err = destroy_srq_split(dev, srq);
-       if (err)
+       if (err) {
+               /*
+                * We don't need to check returned result for an error,
+                * because  we are storing in pre-allocated space xarray
+                * entry and it can't fail at this stage.
+                */
+               xa_cmpxchg_irq(&table->array, srq->srqn, XA_ZERO_ENTRY, srq, 0);
                return;
+       }
+       xa_erase_irq(&table->array, srq->srqn);
 
        mlx5_core_res_put(&srq->common);
        wait_for_completion(&srq->common.free);