From d940f9ae3e5d65fe2f0ca1a0174cc9e35ee25acc Mon Sep 17 00:00:00 2001 From: Uma Krishnan Date: Fri, 7 Jul 2017 13:05:18 -0300 Subject: [PATCH] scsi: cxlflash: Cancel scheduled workers before stopping AFU BugLink: http://bugs.launchpad.net/bugs/1702521 When processing an AFU asynchronous interrupt, if the action results in an operation that requires off level processing (a link reset for example), the worker thread is scheduled. In the meantime a reset event (i.e.: EEH) could unmap the AFU to recover. This results in an Oops when the worker thread tries to access the AFU mapping. [c000000f17e03b90] d000000007cd5978 cxlflash_worker_thread+0x268/0x550 [c000000f17e03c40] c00000000011883c process_one_work+0x1dc/0x680 [c000000f17e03ce0] c000000000118e80 worker_thread+0x1a0/0x520 [c000000f17e03d80] c000000000126174 kthread+0xf4/0x100 [c000000f17e03e30] c00000000000a47c ret_from_kernel_thread+0x5c/0xe0 In an effort to avoid this, a mapcount was introduced in commit b45cdbaf9f7f ("cxlflash: Resolve oops in wait_port_offline") but due to the race condition described above, this solution is incomplete. In order to fully resolve this problem and to simplify things, this commit removes the mapcount solution. Instead, the scheduled worker thread is cancelled after interrupts have been disabled and prior to the mapping being freed. Fixes: b45cdbaf9f7f ("cxlflash: Resolve oops in wait_port_offline") Signed-off-by: Uma Krishnan Acked-by: Matthew R. Ochs Signed-off-by: Martin K. Petersen (cherry picked from commit 0df5bef739601f18bffc0d256ae451f239a826bd) Signed-off-by: Victor Aoqui Acked-by: Stefan Bader Acked-by: Seth Forshee Signed-off-by: Thadeu Lima de Souza Cascardo --- drivers/scsi/cxlflash/common.h | 2 -- drivers/scsi/cxlflash/main.c | 34 ++++++---------------------------- 2 files changed, 6 insertions(+), 30 deletions(-) diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h index dee865735ac0..d11dcc59ff46 100644 --- a/drivers/scsi/cxlflash/common.h +++ b/drivers/scsi/cxlflash/common.h @@ -174,8 +174,6 @@ struct afu { struct sisl_host_map __iomem *host_map; /* MC host map */ struct sisl_ctrl_map __iomem *ctrl_map; /* MC control map */ - struct kref mapcount; - ctx_hndl_t ctx_hndl; /* master's context handle */ atomic_t hsq_credits; diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index ab38bca5df2b..7069639e92bc 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -419,16 +419,6 @@ out: return rc; } -static void afu_unmap(struct kref *ref) -{ - struct afu *afu = container_of(ref, struct afu, mapcount); - - if (likely(afu->afu_map)) { - cxl_psa_unmap((void __iomem *)afu->afu_map); - afu->afu_map = NULL; - } -} - /** * cxlflash_driver_info() - information handler for this host driver * @host: SCSI host associated with device. @@ -459,7 +449,6 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp) ulong lock_flags; int nseg = 0; int rc = 0; - int kref_got = 0; dev_dbg_ratelimited(dev, "%s: (scp=%p) %d/%d/%d/%llu " "cdb=(%08x-%08x-%08x-%08x)\n", @@ -497,9 +486,6 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp) break; } - kref_get(&cfg->afu->mapcount); - kref_got = 1; - if (likely(sg)) { nseg = scsi_dma_map(scp); if (unlikely(nseg < 0)) { @@ -530,8 +516,6 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp) if (unlikely(rc)) scsi_dma_unmap(scp); out: - if (kref_got) - kref_put(&afu->mapcount, afu_unmap); return rc; } @@ -569,13 +553,15 @@ static void free_mem(struct cxlflash_cfg *cfg) * * Safe to call with AFU in a partially allocated/initialized state. * - * Waits for any active internal AFU commands to timeout and then unmaps - * the MMIO space. + * Cancels scheduled worker threads, waits for any active internal AFU + * commands to timeout and then unmaps the MMIO space. */ static void stop_afu(struct cxlflash_cfg *cfg) { struct afu *afu = cfg->afu; + cancel_work_sync(&cfg->work_q); + if (likely(afu)) { while (atomic_read(&afu->cmds_active)) ssleep(1); @@ -583,7 +569,6 @@ static void stop_afu(struct cxlflash_cfg *cfg) cxl_psa_unmap((void __iomem *)afu->afu_map); afu->afu_map = NULL; } - kref_put(&afu->mapcount, afu_unmap); } } @@ -767,7 +752,6 @@ static void cxlflash_remove(struct pci_dev *pdev) scsi_remove_host(cfg->host); /* fall through */ case INIT_STATE_AFU: - cancel_work_sync(&cfg->work_q); term_afu(cfg); case INIT_STATE_PCI: pci_disable_device(pdev); @@ -1277,7 +1261,6 @@ static irqreturn_t cxlflash_async_err_irq(int irq, void *data) __func__, port); cfg->lr_state = LINK_RESET_REQUIRED; cfg->lr_port = port; - kref_get(&cfg->afu->mapcount); schedule_work(&cfg->work_q); } @@ -1298,7 +1281,6 @@ static irqreturn_t cxlflash_async_err_irq(int irq, void *data) if (info->action & SCAN_HOST) { atomic_inc(&cfg->scan_host_needed); - kref_get(&cfg->afu->mapcount); schedule_work(&cfg->work_q); } } @@ -1704,7 +1686,6 @@ static int init_afu(struct cxlflash_cfg *cfg) rc = -ENOMEM; goto err1; } - kref_init(&afu->mapcount); /* No byte reverse on reading afu_version or string will be backwards */ reg = readq(&afu->afu_map->global.regs.afu_version); @@ -1716,7 +1697,7 @@ static int init_afu(struct cxlflash_cfg *cfg) "interface version %016llx\n", afu->version, afu->interface_version); rc = -EINVAL; - goto err2; + goto err1; } if (afu_is_sq_cmd_mode(afu)) { @@ -1733,7 +1714,7 @@ static int init_afu(struct cxlflash_cfg *cfg) rc = start_afu(cfg); if (rc) { dev_err(dev, "%s: start_afu failed, rc=%d\n", __func__, rc); - goto err2; + goto err1; } afu_err_intr_init(cfg->afu); @@ -1746,8 +1727,6 @@ out: dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc); return rc; -err2: - kref_put(&afu->mapcount, afu_unmap); err1: term_intr(cfg, UNMAP_THREE); term_mc(cfg); @@ -2341,7 +2320,6 @@ static void cxlflash_worker_thread(struct work_struct *work) if (atomic_dec_if_positive(&cfg->scan_host_needed) >= 0) scsi_scan_host(cfg->host); - kref_put(&afu->mapcount, afu_unmap); } /** -- 2.39.2