From 739cfa34518ef3a6789f5f77239073972a387359 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 11 Oct 2022 16:14:55 +0300 Subject: [PATCH] net/mlx5: Make ASO poll CQ usable in atomic context Poll CQ functions shouldn't sleep as they are called in atomic context. The following splat appears once the mlx5_aso_poll_cq() is used in such flow. BUG: scheduling while atomic: swapper/17/0/0x00000100 Modules linked in: sch_ingress openvswitch nsh mlx5_vdpa vringh vhost_iotlb vdpa mlx5_ib mlx5_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter overlay rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm ib_uverbs ib_core fuse [last unloaded: mlx5_core] CPU: 17 PID: 0 Comm: swapper/17 Tainted: G W 6.0.0-rc2+ #13 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x34/0x44 __schedule_bug.cold+0x47/0x53 __schedule+0x4b6/0x670 ? hrtimer_start_range_ns+0x28d/0x360 schedule+0x50/0x90 schedule_hrtimeout_range_clock+0x98/0x120 ? __hrtimer_init+0xb0/0xb0 usleep_range_state+0x60/0x90 mlx5_aso_poll_cq+0xad/0x190 [mlx5_core] mlx5e_ipsec_aso_update_curlft+0x81/0xb0 [mlx5_core] xfrm_timer_handler+0x6b/0x360 ? xfrm_find_acq_byseq+0x50/0x50 __hrtimer_run_queues+0x139/0x290 hrtimer_run_softirq+0x7d/0xe0 __do_softirq+0xc7/0x272 irq_exit_rcu+0x87/0xb0 sysvec_apic_timer_interrupt+0x72/0x90 asm_sysvec_apic_timer_interrupt+0x16/0x20 RIP: 0010:default_idle+0x18/0x20 Code: ae 7d ff ff cc cc cc cc cc cc cc cc cc cc cc cc cc cc 0f 1f 44 00 00 8b 05 b5 30 0d 01 85 c0 7e 07 0f 00 2d 0a e3 53 00 fb f4 0f 1f 80 00 00 00 00 0f 1f 44 00 00 65 48 8b 04 25 80 ad 01 00 RSP: 0018:ffff888100883ee0 EFLAGS: 00000242 RAX: 0000000000000001 RBX: ffff888100849580 RCX: 4000000000000000 RDX: 0000000000000001 RSI: 0000000000000083 RDI: 000000000008863c RBP: 0000000000000011 R08: 00000064e6977fa9 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 default_idle_call+0x37/0xb0 do_idle+0x1cd/0x1e0 cpu_startup_entry+0x19/0x20 start_secondary+0xfe/0x120 secondary_startup_64_no_verify+0xcd/0xdb softirq: huh, entered softirq 8 HRTIMER 00000000a97c08cb with preempt_count 00000100, exited with 00000000? Signed-off-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c | 8 +++++++- .../net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c | 10 +--------- drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h | 2 +- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c index a53e205f4a89..be74e1403328 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c @@ -115,6 +115,7 @@ mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev, struct mlx5e_flow_meters *flow_meters; u8 cir_man, cir_exp, cbs_man, cbs_exp; struct mlx5_aso_wqe *aso_wqe; + unsigned long expires; struct mlx5_aso *aso; u64 rate, burst; u8 ds_cnt; @@ -187,7 +188,12 @@ mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev, mlx5_aso_post_wqe(aso, true, &aso_wqe->ctrl); /* With newer FW, the wait for the first ASO WQE is more than 2us, put the wait 10ms. */ - err = mlx5_aso_poll_cq(aso, true, 10); + expires = jiffies + msecs_to_jiffies(10); + do { + err = mlx5_aso_poll_cq(aso, true); + if (err) + usleep_range(2, 10); + } while (err && time_is_after_jiffies(expires)); mutex_unlock(&flow_meters->aso_lock); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 5da746da898d..41970067917b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -1405,7 +1405,7 @@ static int macsec_aso_set_arm_event(struct mlx5_core_dev *mdev, struct mlx5e_mac MLX5_ACCESS_ASO_OPC_MOD_MACSEC); macsec_aso_build_ctrl(aso, &aso_wqe->aso_ctrl, in); mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl); - err = mlx5_aso_poll_cq(maso, false, 10); + err = mlx5_aso_poll_cq(maso, false); mutex_unlock(&aso->aso_lock); return err; @@ -1430,7 +1430,7 @@ static int macsec_aso_query(struct mlx5_core_dev *mdev, struct mlx5e_macsec *mac macsec_aso_build_wqe_ctrl_seg(aso, &aso_wqe->aso_ctrl, NULL); mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl); - err = mlx5_aso_poll_cq(maso, false, 10); + err = mlx5_aso_poll_cq(maso, false); if (err) goto err_out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c index 21e14507ff5c..baa8092f335e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c @@ -381,20 +381,12 @@ void mlx5_aso_post_wqe(struct mlx5_aso *aso, bool with_data, WRITE_ONCE(doorbell_cseg, NULL); } -int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data, u32 interval_ms) +int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data) { struct mlx5_aso_cq *cq = &aso->cq; struct mlx5_cqe64 *cqe; - unsigned long expires; cqe = mlx5_cqwq_get_cqe(&cq->wq); - - expires = jiffies + msecs_to_jiffies(interval_ms); - while (!cqe && time_is_after_jiffies(expires)) { - usleep_range(2, 10); - cqe = mlx5_cqwq_get_cqe(&cq->wq); - } - if (!cqe) return -ETIMEDOUT; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h index d854e01d7fc5..2d40dcf9d42e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h @@ -83,7 +83,7 @@ void mlx5_aso_build_wqe(struct mlx5_aso *aso, u8 ds_cnt, u32 obj_id, u32 opc_mode); void mlx5_aso_post_wqe(struct mlx5_aso *aso, bool with_data, struct mlx5_wqe_ctrl_seg *doorbell_cseg); -int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data, u32 interval_ms); +int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data); struct mlx5_aso *mlx5_aso_create(struct mlx5_core_dev *mdev, u32 pdn); void mlx5_aso_destroy(struct mlx5_aso *aso); -- 2.39.5