From edbb92fc06417d299ca274bdd235a8071b2ad694 Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Mon, 12 Apr 2021 19:57:40 +0300 Subject: [PATCH] scsi: qla2xxx: Reserve extra IRQ vectors BugLink: https://bugs.launchpad.net/bugs/1928857 commit f02d4086a8f36a0e1aaebf559b54cf24a177a486 upstream. Commit a6dcfe08487e ("scsi: qla2xxx: Limit interrupt vectors to number of CPUs") lowers the number of allocated MSI-X vectors to the number of CPUs. That breaks vector allocation assumptions in qla83xx_iospace_config(), qla24xx_enable_msix() and qla2x00_iospace_config(). Either of the functions computes maximum number of qpairs as: ha->max_qpairs = ha->msix_count - 1 (MB interrupt) - 1 (default response queue) - 1 (ATIO, in dual or pure target mode) max_qpairs is set to zero in case of two CPUs and initiator mode. The number is then used to allocate ha->queue_pair_map inside qla2x00_alloc_queues(). No allocation happens and ha->queue_pair_map is left NULL but the driver thinks there are queue pairs available. qla2xxx_queuecommand() tries to find a qpair in the map and crashes: if (ha->mqenable) { uint32_t tag; uint16_t hwq; struct qla_qpair *qpair = NULL; tag = blk_mq_unique_tag(cmd->request); hwq = blk_mq_unique_tag_to_hwq(tag); qpair = ha->queue_pair_map[hwq]; # <- HERE if (qpair) return qla2xxx_mqueuecommand(host, cmd, qpair); } BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 0 PID: 72 Comm: kworker/u4:3 Tainted: G W 5.10.0-rc1+ #25 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 Workqueue: scsi_wq_7 fc_scsi_scan_rport [scsi_transport_fc] RIP: 0010:qla2xxx_queuecommand+0x16b/0x3f0 [qla2xxx] Call Trace: scsi_queue_rq+0x58c/0xa60 blk_mq_dispatch_rq_list+0x2b7/0x6f0 ? __sbitmap_get_word+0x2a/0x80 __blk_mq_sched_dispatch_requests+0xb8/0x170 blk_mq_sched_dispatch_requests+0x2b/0x50 __blk_mq_run_hw_queue+0x49/0xb0 __blk_mq_delay_run_hw_queue+0xfb/0x150 blk_mq_sched_insert_request+0xbe/0x110 blk_execute_rq+0x45/0x70 __scsi_execute+0x10e/0x250 scsi_probe_and_add_lun+0x228/0xda0 __scsi_scan_target+0xf4/0x620 ? __pm_runtime_resume+0x4f/0x70 scsi_scan_target+0x100/0x110 fc_scsi_scan_rport+0xa1/0xb0 [scsi_transport_fc] process_one_work+0x1ea/0x3b0 worker_thread+0x28/0x3b0 ? process_one_work+0x3b0/0x3b0 kthread+0x112/0x130 ? kthread_park+0x80/0x80 ret_from_fork+0x22/0x30 The driver should allocate enough vectors to provide every CPU it's own HW queue and still handle reserved (MB, RSP, ATIO) interrupts. The change fixes the crash on dual core VM and prevents unbalanced QP allocation where nr_hw_queues is two less than the number of CPUs. Link: https://lore.kernel.org/r/20210412165740.39318-1-r.bolshakov@yadro.com Fixes: a6dcfe08487e ("scsi: qla2xxx: Limit interrupt vectors to number of CPUs") Cc: Daniel Wagner Cc: Himanshu Madhani Cc: Quinn Tran Cc: Nilesh Javali Cc: Martin K. Petersen Cc: stable@vger.kernel.org # 5.11+ Reported-by: Aleksandr Volkov Reported-by: Aleksandr Miloserdov Reviewed-by: Daniel Wagner Reviewed-by: Himanshu Madhani Signed-off-by: Roman Bolshakov Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman Signed-off-by: Kamal Mostafa Signed-off-by: Stefan Bader --- drivers/scsi/qla2xxx/qla_isr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index f9142dbec112..4a3809a8da4b 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -3978,11 +3978,11 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp) if (USER_CTRL_IRQ(ha) || !ha->mqiobase) { /* user wants to control IRQ setting for target mode */ ret = pci_alloc_irq_vectors(ha->pdev, min_vecs, - min((u16)ha->msix_count, (u16)num_online_cpus()), + min((u16)ha->msix_count, (u16)(num_online_cpus() + min_vecs)), PCI_IRQ_MSIX); } else ret = pci_alloc_irq_vectors_affinity(ha->pdev, min_vecs, - min((u16)ha->msix_count, (u16)num_online_cpus()), + min((u16)ha->msix_count, (u16)(num_online_cpus() + min_vecs)), PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, &desc); -- 2.39.5