]> git.proxmox.com Git - mirror_ubuntu-kernels.git/commitdiff
net/mlx5e: xsk: Fix crash on regular rq reactivation
authorDragos Tatulea <dtatulea@nvidia.com>
Mon, 24 Apr 2023 15:19:00 +0000 (18:19 +0300)
committerStefan Bader <stefan.bader@canonical.com>
Mon, 30 Oct 2023 10:59:59 +0000 (11:59 +0100)
BugLink: https://bugs.launchpad.net/bugs/2038236
[ Upstream commit 39646d9bcd1a65d2396328026626859a1dab59d7 ]

When the regular rq is reactivated after the XSK socket is closed
it could be reading stale cqes which eventually corrupts the rq.
This leads to no more traffic being received on the regular rq and a
crash on the next close or deactivation of the rq.

Kal Cuttler Conely reported this issue as a crash on the release
path when the xdpsock sample program is stopped (killed) and restarted
in sequence while traffic is running.

This patch flushes all cqes when during the rq flush. The cqe flushing
is done in the reset state of the rq. mlx5e_rq_to_ready code is moved
into the flush function to allow for this.

Fixes: 082a9edf12fe ("net/mlx5e: xsk: Flush RQ on XSK activation to save memory")
Reported-by: Kal Cutter Conley <kal.conley@dectris.com>
Closes: https://lore.kernel.org/xdp-newbies/CAHApi-nUAs4TeFWUDV915CZJo07XVg2Vp63-no7UDfj6wur9nQ@mail.gmail.com
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Kamal Mostafa <kamal@canonical.com>
Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
drivers/net/ethernet/mellanox/mlx5/core/en_main.c

index 44114909a69b30ada8cdaae376b0831b8d91a974..d38f284360799aff3e79c1e5d16db0e9a51288f0 100644 (file)
@@ -989,7 +989,23 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_s
        return err;
 }
 
-static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
+static void mlx5e_flush_rq_cq(struct mlx5e_rq *rq)
+{
+       struct mlx5_cqwq *cqwq = &rq->cq.wq;
+       struct mlx5_cqe64 *cqe;
+
+       if (test_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state)) {
+               while ((cqe = mlx5_cqwq_get_cqe_enahnced_comp(cqwq)))
+                       mlx5_cqwq_pop(cqwq);
+       } else {
+               while ((cqe = mlx5_cqwq_get_cqe(cqwq)))
+                       mlx5_cqwq_pop(cqwq);
+       }
+
+       mlx5_cqwq_update_db_record(cqwq);
+}
+
+int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
 {
        struct net_device *dev = rq->netdev;
        int err;
@@ -999,6 +1015,10 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
                netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
                return err;
        }
+
+       mlx5e_free_rx_descs(rq);
+       mlx5e_flush_rq_cq(rq);
+
        err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
        if (err) {
                netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
@@ -1008,13 +1028,6 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
        return 0;
 }
 
-int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
-{
-       mlx5e_free_rx_descs(rq);
-
-       return mlx5e_rq_to_ready(rq, curr_state);
-}
-
 static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
 {
        struct mlx5_core_dev *mdev = rq->mdev;