BugLink: http://bugs.launchpad.net/bugs/1702521
When processing an AFU asynchronous interrupt, if the action results in an
operation that requires off level processing (a link reset for example),
the worker thread is scheduled. In the meantime a reset event (i.e.: EEH)
could unmap the AFU to recover. This results in an Oops when the worker
thread tries to access the AFU mapping.
[
c000000f17e03b90]
d000000007cd5978 cxlflash_worker_thread+0x268/0x550
[
c000000f17e03c40]
c00000000011883c process_one_work+0x1dc/0x680
[
c000000f17e03ce0]
c000000000118e80 worker_thread+0x1a0/0x520
[
c000000f17e03d80]
c000000000126174 kthread+0xf4/0x100
[
c000000f17e03e30]
c00000000000a47c ret_from_kernel_thread+0x5c/0xe0
In an effort to avoid this, a mapcount was introduced in
commit
b45cdbaf9f7f ("cxlflash: Resolve oops in wait_port_offline")
but due to the race condition described above, this solution is incomplete.
In order to fully resolve this problem and to simplify things, this commit
removes the mapcount solution. Instead, the scheduled worker thread is
cancelled after interrupts have been disabled and prior to the mapping
being freed.
Fixes: b45cdbaf9f7f ("cxlflash: Resolve oops in wait_port_offline")
Signed-off-by: Uma Krishnan <ukrishn@linux.vnet.ibm.com>
Acked-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
(cherry picked from commit
0df5bef739601f18bffc0d256ae451f239a826bd)
Signed-off-by: Victor Aoqui <victora@linux.vnet.ibm.com>
Acked-by: Stefan Bader <stefan.bader@canonical.com>
Acked-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
struct sisl_host_map __iomem *host_map; /* MC host map */
struct sisl_ctrl_map __iomem *ctrl_map; /* MC control map */
struct sisl_host_map __iomem *host_map; /* MC host map */
struct sisl_ctrl_map __iomem *ctrl_map; /* MC control map */
- struct kref mapcount;
-
ctx_hndl_t ctx_hndl; /* master's context handle */
atomic_t hsq_credits;
ctx_hndl_t ctx_hndl; /* master's context handle */
atomic_t hsq_credits;
-static void afu_unmap(struct kref *ref)
-{
- struct afu *afu = container_of(ref, struct afu, mapcount);
-
- if (likely(afu->afu_map)) {
- cxl_psa_unmap((void __iomem *)afu->afu_map);
- afu->afu_map = NULL;
- }
-}
-
/**
* cxlflash_driver_info() - information handler for this host driver
* @host: SCSI host associated with device.
/**
* cxlflash_driver_info() - information handler for this host driver
* @host: SCSI host associated with device.
ulong lock_flags;
int nseg = 0;
int rc = 0;
ulong lock_flags;
int nseg = 0;
int rc = 0;
dev_dbg_ratelimited(dev, "%s: (scp=%p) %d/%d/%d/%llu "
"cdb=(%08x-%08x-%08x-%08x)\n",
dev_dbg_ratelimited(dev, "%s: (scp=%p) %d/%d/%d/%llu "
"cdb=(%08x-%08x-%08x-%08x)\n",
- kref_get(&cfg->afu->mapcount);
- kref_got = 1;
-
if (likely(sg)) {
nseg = scsi_dma_map(scp);
if (unlikely(nseg < 0)) {
if (likely(sg)) {
nseg = scsi_dma_map(scp);
if (unlikely(nseg < 0)) {
if (unlikely(rc))
scsi_dma_unmap(scp);
out:
if (unlikely(rc))
scsi_dma_unmap(scp);
out:
- if (kref_got)
- kref_put(&afu->mapcount, afu_unmap);
*
* Safe to call with AFU in a partially allocated/initialized state.
*
*
* Safe to call with AFU in a partially allocated/initialized state.
*
- * Waits for any active internal AFU commands to timeout and then unmaps
- * the MMIO space.
+ * Cancels scheduled worker threads, waits for any active internal AFU
+ * commands to timeout and then unmaps the MMIO space.
*/
static void stop_afu(struct cxlflash_cfg *cfg)
{
struct afu *afu = cfg->afu;
*/
static void stop_afu(struct cxlflash_cfg *cfg)
{
struct afu *afu = cfg->afu;
+ cancel_work_sync(&cfg->work_q);
+
if (likely(afu)) {
while (atomic_read(&afu->cmds_active))
ssleep(1);
if (likely(afu)) {
while (atomic_read(&afu->cmds_active))
ssleep(1);
cxl_psa_unmap((void __iomem *)afu->afu_map);
afu->afu_map = NULL;
}
cxl_psa_unmap((void __iomem *)afu->afu_map);
afu->afu_map = NULL;
}
- kref_put(&afu->mapcount, afu_unmap);
scsi_remove_host(cfg->host);
/* fall through */
case INIT_STATE_AFU:
scsi_remove_host(cfg->host);
/* fall through */
case INIT_STATE_AFU:
- cancel_work_sync(&cfg->work_q);
term_afu(cfg);
case INIT_STATE_PCI:
pci_disable_device(pdev);
term_afu(cfg);
case INIT_STATE_PCI:
pci_disable_device(pdev);
__func__, port);
cfg->lr_state = LINK_RESET_REQUIRED;
cfg->lr_port = port;
__func__, port);
cfg->lr_state = LINK_RESET_REQUIRED;
cfg->lr_port = port;
- kref_get(&cfg->afu->mapcount);
schedule_work(&cfg->work_q);
}
schedule_work(&cfg->work_q);
}
if (info->action & SCAN_HOST) {
atomic_inc(&cfg->scan_host_needed);
if (info->action & SCAN_HOST) {
atomic_inc(&cfg->scan_host_needed);
- kref_get(&cfg->afu->mapcount);
schedule_work(&cfg->work_q);
}
}
schedule_work(&cfg->work_q);
}
}
rc = -ENOMEM;
goto err1;
}
rc = -ENOMEM;
goto err1;
}
- kref_init(&afu->mapcount);
/* No byte reverse on reading afu_version or string will be backwards */
reg = readq(&afu->afu_map->global.regs.afu_version);
/* No byte reverse on reading afu_version or string will be backwards */
reg = readq(&afu->afu_map->global.regs.afu_version);
"interface version %016llx\n", afu->version,
afu->interface_version);
rc = -EINVAL;
"interface version %016llx\n", afu->version,
afu->interface_version);
rc = -EINVAL;
}
if (afu_is_sq_cmd_mode(afu)) {
}
if (afu_is_sq_cmd_mode(afu)) {
rc = start_afu(cfg);
if (rc) {
dev_err(dev, "%s: start_afu failed, rc=%d\n", __func__, rc);
rc = start_afu(cfg);
if (rc) {
dev_err(dev, "%s: start_afu failed, rc=%d\n", __func__, rc);
}
afu_err_intr_init(cfg->afu);
}
afu_err_intr_init(cfg->afu);
dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc);
return rc;
dev_dbg(dev, "%s: returning rc=%d\n", __func__, rc);
return rc;
-err2:
- kref_put(&afu->mapcount, afu_unmap);
err1:
term_intr(cfg, UNMAP_THREE);
term_mc(cfg);
err1:
term_intr(cfg, UNMAP_THREE);
term_mc(cfg);
if (atomic_dec_if_positive(&cfg->scan_host_needed) >= 0)
scsi_scan_host(cfg->host);
if (atomic_dec_if_positive(&cfg->scan_host_needed) >= 0)
scsi_scan_host(cfg->host);
- kref_put(&afu->mapcount, afu_unmap);