During expander reset handling, the driver invokes kernel function scsi_host_find_tag() to obtain outstanding requests associated with the scsi host managed by the driver. Kernel’s block layer may return stale entry for one or more outstanding requests if blk-mq is enabled. This may lead to Kernel panic if the returned value is inaccessible or the memory pointed by the returned value is reused.
Reference of upstream discussion - https://patchwork.kernel.org/patch/10734933/
So to fix this issue, driver will use scsi lookup table to track outstanding IOs at driver level and it avoids using scsi_host_find_tag().
Have done following changes in this patch, * Allocated & initialized scsi_lookup table of type (struct scsiio_tracker) and of depth host's can_queue at driver load time and it will be deallocated at driver unload time.
* Once scmd is received, driver will take scsiio_tracker at entry corresponding to scmd's tag value from scsi_lookup table. Then this scsiio_tracker entry is initialized with proper smid, scmd, cb_idx etc. And this scsiio_tracker entry contents are cleared before driver calling scsi_done callback function.
* scmd's host_scribble variable is used to save the corresponding scsiio_tracker address, later at any time driver can easily retrieve the scmd's corresponding scsiio_tacker using this host_scribble variable.
* Whenever driver wants to get the outstanding IOs at the driver level then driver can go through this scsi_lookup table and if it observe any entry with non-null scmd then it means that scmd is outstanding at the driver level.
v1 change set: Updated the patch description.
Cc: stable@vger.kernel.org Signed-off-by: Sreekanth Reddy sreekanth.reddy@broadcom.com --- drivers/scsi/mpt3sas/mpt3sas_base.c | 45 +++++++++++++++++++++++++++++--- drivers/scsi/mpt3sas/mpt3sas_base.h | 10 +++++++ drivers/scsi/mpt3sas/mpt3sas_ctl.c | 2 +- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 16 +++++------- drivers/scsi/mpt3sas/mpt3sas_warpdrive.c | 2 +- 5 files changed, 60 insertions(+), 15 deletions(-)
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 0a6cb8f..8e3d0d5 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -1301,7 +1301,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
cmd = mpt3sas_scsih_scsi_lookup_get(ioc, smid); if (cmd) - return scsi_cmd_priv(cmd); + return mpt3sas_get_st_from_scmd(cmd);
return NULL; } @@ -1709,7 +1709,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg) struct scsi_cmnd *scmd) { struct chain_tracker *chain_req; - struct scsiio_tracker *st = scsi_cmd_priv(scmd); + struct scsiio_tracker *st = mpt3sas_get_st_from_scmd(scmd); u16 smid = st->smid; u8 chain_offset = atomic_read(&ioc->chain_lookup[smid - 1].chain_offset); @@ -3203,14 +3203,18 @@ static int mpt3sas_remove_dead_ioc_func(void *arg) mpt3sas_base_get_smid_scsiio(struct MPT3SAS_ADAPTER *ioc, u8 cb_idx, struct scsi_cmnd *scmd) { - struct scsiio_tracker *request = scsi_cmd_priv(scmd); + struct scsiio_tracker *request; unsigned int tag = scmd->request->tag; u16 smid;
+ scmd->host_scribble = (unsigned char *)(&ioc->scsi_lookup[tag]); + request = mpt3sas_get_st_from_scmd(scmd); + smid = tag + 1; request->cb_idx = cb_idx; request->msix_io = _base_get_msix_index(ioc); request->smid = smid; + request->scmd = scmd; INIT_LIST_HEAD(&request->chain_list); return smid; } @@ -3264,6 +3268,7 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, return; st->cb_idx = 0xFF; st->direct_io = 0; + st->scmd = NULL; atomic_set(&ioc->chain_lookup[st->smid - 1].chain_offset, 0); st->smid = 0; } @@ -4252,6 +4257,11 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, ioc->config_page, ioc->config_page_dma); }
+ if (ioc->scsi_lookup) { + free_pages((ulong)ioc->scsi_lookup, ioc->scsi_lookup_pages); + ioc->scsi_lookup = NULL; + } + kfree(ioc->hpr_lookup); kfree(ioc->internal_lookup); if (ioc->chain_lookup) { @@ -4377,6 +4387,7 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, max_request_credit = min_t(u16, facts->RequestCredit, MAX_HBA_QUEUE_DEPTH);
+retry: /* Firmware maintains additional facts->HighPriorityCredit number of * credits for HiPriprity Request messages, so hba queue depth will be * sum of max_request_credit and high priority queue depth. @@ -4581,9 +4592,29 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, (unsigned long long)ioc->request_dma)); total_sz += sz;
+ sz = ioc->shost->can_queue * sizeof(struct scsiio_tracker); + ioc->scsi_lookup_pages = get_order(sz); + ioc->scsi_lookup = (struct scsiio_tracker *)__get_free_pages( + GFP_KERNEL, ioc->scsi_lookup_pages); + if (!ioc->scsi_lookup) { + /* Retry allocating memory by reducing the queue depth */ + if ((max_request_credit - 64) > + (ioc->internal_depth + INTERNAL_SCSIIO_CMDS_COUNT)) { + max_request_credit -= 64; + _base_release_memory_pools(ioc); + goto retry; + } else { + ioc_err(ioc, + "scsi_lookup: get_free_pages failed, sz(%d)\n", + (int)sz); + goto out; + } + } + dinitprintk(ioc, ioc_info(ioc, "scsiio(0x%p): depth(%d)\n", ioc->request, ioc->scsiio_depth)); + total_sz += sz;
ioc->chain_depth = min_t(u32, ioc->chain_depth, MAX_CHAIN_DEPTH); sz = ioc->scsiio_depth * sizeof(struct chain_lookup); @@ -6239,6 +6270,14 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
+ smid = 1; + for (i = 0; i < ioc->shost->can_queue; i++, smid++) { + ioc->scsi_lookup[i].cb_idx = 0xFF; + ioc->scsi_lookup[i].smid = smid; + ioc->scsi_lookup[i].scmd = NULL; + ioc->scsi_lookup[i].direct_io = 0; + } + /* hi-priority queue */ INIT_LIST_HEAD(&ioc->hpr_free_list); smid = ioc->hi_priority_smid; diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 19158cb..f8c82f6 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -816,6 +816,7 @@ struct chain_lookup { /** * struct scsiio_tracker - scsi mf request tracker * @smid: system message id + * @scmd: scsi request pointer * @cb_idx: callback index * @direct_io: To indicate whether I/O is direct (WARPDRIVE) * @chain_list: list of associated firmware chain tracker @@ -823,6 +824,7 @@ struct chain_lookup { */ struct scsiio_tracker { u16 smid; + struct scsi_cmnd *scmd; u8 cb_idx; u8 direct_io; struct pcie_sg_list pcie_sg_list; @@ -830,6 +832,12 @@ struct scsiio_tracker { u16 msix_io; };
+static inline struct scsiio_tracker * +mpt3sas_get_st_from_scmd(struct scsi_cmnd *scmd) +{ + return (struct scsiio_tracker *)scmd->host_scribble; +} + /** * struct request_tracker - firmware request tracker * @smid: system message id @@ -1296,6 +1304,8 @@ struct MPT3SAS_ADAPTER { u8 *request; dma_addr_t request_dma; u32 request_dma_sz; + struct scsiio_tracker *scsi_lookup; + ulong scsi_lookup_pages; struct pcie_sg_list *pcie_sg_lookup; spinlock_t scsi_lookup_lock; int pending_io_count; diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index b2bb47c..ad43e60 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -595,7 +595,7 @@ void mpt3sas_ctl_reset_done_handler(struct MPT3SAS_ADAPTER *ioc) continue; if (priv_data->sas_target->handle != handle) continue; - st = scsi_cmd_priv(scmd); + st = mpt3sas_get_st_from_scmd(scmd); tm_request->TaskMID = cpu_to_le16(st->smid); found = 1; } diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 8bb5b8f..86d0e3c 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -1465,11 +1465,9 @@ struct scsi_cmnd *
if (smid > 0 && smid <= ioc->scsiio_depth - INTERNAL_SCSIIO_CMDS_COUNT) { - u32 unique_tag = smid - 1; - - scmd = scsi_host_find_tag(ioc->shost, unique_tag); + scmd = ioc->scsi_lookup[smid - 1].scmd; if (scmd) { - st = scsi_cmd_priv(scmd); + st = mpt3sas_get_st_from_scmd(scmd); if (st->cb_idx == 0xFF || st->smid == 0) scmd = NULL; } @@ -2819,7 +2817,7 @@ int mpt3sas_scsih_issue_locked_tm(struct MPT3SAS_ADAPTER *ioc, u16 handle, { struct MPT3SAS_ADAPTER *ioc = shost_priv(scmd->device->host); struct MPT3SAS_DEVICE *sas_device_priv_data; - struct scsiio_tracker *st = scsi_cmd_priv(scmd); + struct scsiio_tracker *st = mpt3sas_get_st_from_scmd(scmd); u16 handle; int r;
@@ -4466,7 +4464,7 @@ static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending) continue; count++; _scsih_set_satl_pending(scmd, false); - st = scsi_cmd_priv(scmd); + st = mpt3sas_get_st_from_scmd(scmd); mpt3sas_base_clear_st(ioc, st); scsi_dma_unmap(scmd); if (ioc->pci_error_recovery || ioc->remove_host) @@ -5193,7 +5191,7 @@ static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending) * WARPDRIVE: If direct_io is set then it is directIO, * the failed direct I/O should be redirected to volume */ - st = scsi_cmd_priv(scmd); + st = mpt3sas_get_st_from_scmd(scmd); if (st->direct_io && ((ioc_status & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SCSI_TASK_TERMINATED)) { @@ -7335,7 +7333,7 @@ static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending) scmd = mpt3sas_scsih_scsi_lookup_get(ioc, smid); if (!scmd) continue; - st = scsi_cmd_priv(scmd); + st = mpt3sas_get_st_from_scmd(scmd); sdev = scmd->device; sas_device_priv_data = sdev->hostdata; if (!sas_device_priv_data || !sas_device_priv_data->sas_target) @@ -10176,7 +10174,6 @@ static void pcie_device_make_active(struct MPT3SAS_ADAPTER *ioc, .shost_attrs = mpt3sas_host_attrs, .sdev_attrs = mpt3sas_dev_attrs, .track_queue_depth = 1, - .cmd_size = sizeof(struct scsiio_tracker), };
/* raid transport support for SAS 2.0 HBA devices */ @@ -10214,7 +10211,6 @@ static void pcie_device_make_active(struct MPT3SAS_ADAPTER *ioc, .shost_attrs = mpt3sas_host_attrs, .sdev_attrs = mpt3sas_dev_attrs, .track_queue_depth = 1, - .cmd_size = sizeof(struct scsiio_tracker), };
/* raid transport support for SAS 3.0 HBA devices */ diff --git a/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c b/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c index cc07ba4..2a05bf3 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c +++ b/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c @@ -259,7 +259,7 @@ sector_t v_lba, p_lba, stripe_off, column, io_size; u32 stripe_sz, stripe_exp; u8 num_pds, cmd = scmd->cmnd[0]; - struct scsiio_tracker *st = scsi_cmd_priv(scmd); + struct scsiio_tracker *st = mpt3sas_get_st_from_scmd(scmd);
if (cmd != READ_10 && cmd != WRITE_10 && cmd != READ_16 && cmd != WRITE_16)
On 2/21/19 1:11 PM, Sreekanth Reddy wrote:
During expander reset handling, the driver invokes kernel function scsi_host_find_tag() to obtain outstanding requests associated with the scsi host managed by the driver. Kernel’s block layer may return stale entry for one or more outstanding requests if blk-mq is enabled. This may lead to Kernel panic if the returned value is inaccessible or the memory pointed by the returned value is reused. Reference of upstream discussion - https://patchwork.kernel.org/patch/10734933/
So to fix this issue, driver will use scsi lookup table to track outstanding IOs at driver level and it avoids using scsi_host_find_tag().
Have done following changes in this patch,
Allocated & initialized scsi_lookup table of type (struct scsiio_tracker) and of depth host's can_queue at driver load time and it will be deallocated at driver unload time.
Once scmd is received, driver will take scsiio_tracker at entry corresponding to scmd's tag value from scsi_lookup table. Then this scsiio_tracker entry is initialized with proper smid, scmd, cb_idx etc. And this scsiio_tracker entry contents are cleared before driver calling scsi_done callback function.
scmd's host_scribble variable is used to save the corresponding scsiio_tracker address, later at any time driver can easily retrieve the scmd's corresponding scsiio_tacker using this host_scribble variable.
Whenever driver wants to get the outstanding IOs at the driver level then driver can go through this scsi_lookup table and if it observe any entry with non-null scmd then it means that scmd is outstanding at the driver level.
v1 change set: Updated the patch description.
Cc: stable@vger.kernel.org Signed-off-by: Sreekanth Reddy sreekanth.reddy@broadcom.com
drivers/scsi/mpt3sas/mpt3sas_base.c | 45 +++++++++++++++++++++++++++++--- drivers/scsi/mpt3sas/mpt3sas_base.h | 10 +++++++ drivers/scsi/mpt3sas/mpt3sas_ctl.c | 2 +- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 16 +++++------- drivers/scsi/mpt3sas/mpt3sas_warpdrive.c | 2 +- 5 files changed, 60 insertions(+), 15 deletions(-)
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 0a6cb8f..8e3d0d5 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -1301,7 +1301,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg) cmd = mpt3sas_scsih_scsi_lookup_get(ioc, smid); if (cmd)
return scsi_cmd_priv(cmd);
return mpt3sas_get_st_from_scmd(cmd);
return NULL; } @@ -1709,7 +1709,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg) struct scsi_cmnd *scmd) { struct chain_tracker *chain_req;
- struct scsiio_tracker *st = scsi_cmd_priv(scmd);
- struct scsiio_tracker *st = mpt3sas_get_st_from_scmd(scmd); u16 smid = st->smid; u8 chain_offset = atomic_read(&ioc->chain_lookup[smid - 1].chain_offset);
@@ -3203,14 +3203,18 @@ static int mpt3sas_remove_dead_ioc_func(void *arg) mpt3sas_base_get_smid_scsiio(struct MPT3SAS_ADAPTER *ioc, u8 cb_idx, struct scsi_cmnd *scmd) {
- struct scsiio_tracker *request = scsi_cmd_priv(scmd);
- struct scsiio_tracker *request; unsigned int tag = scmd->request->tag; u16 smid;
- scmd->host_scribble = (unsigned char *)(&ioc->scsi_lookup[tag]);
- request = mpt3sas_get_st_from_scmd(scmd);
- smid = tag + 1; request->cb_idx = cb_idx; request->msix_io = _base_get_msix_index(ioc); request->smid = smid;
- request->scmd = scmd; INIT_LIST_HEAD(&request->chain_list); return smid; }
@@ -3264,6 +3268,7 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, return; st->cb_idx = 0xFF; st->direct_io = 0;
- st->scmd = NULL; atomic_set(&ioc->chain_lookup[st->smid - 1].chain_offset, 0); st->smid = 0; }
@@ -4252,6 +4257,11 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, ioc->config_page, ioc->config_page_dma); }
- if (ioc->scsi_lookup) {
free_pages((ulong)ioc->scsi_lookup, ioc->scsi_lookup_pages);
ioc->scsi_lookup = NULL;
- }
- kfree(ioc->hpr_lookup); kfree(ioc->internal_lookup); if (ioc->chain_lookup) {
@@ -4377,6 +4387,7 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, max_request_credit = min_t(u16, facts->RequestCredit, MAX_HBA_QUEUE_DEPTH); +retry: /* Firmware maintains additional facts->HighPriorityCredit number of * credits for HiPriprity Request messages, so hba queue depth will be * sum of max_request_credit and high priority queue depth. @@ -4581,9 +4592,29 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, (unsigned long long)ioc->request_dma)); total_sz += sz;
- sz = ioc->shost->can_queue * sizeof(struct scsiio_tracker);
- ioc->scsi_lookup_pages = get_order(sz);
- ioc->scsi_lookup = (struct scsiio_tracker *)__get_free_pages(
GFP_KERNEL, ioc->scsi_lookup_pages);
- if (!ioc->scsi_lookup) {
/* Retry allocating memory by reducing the queue depth */
if ((max_request_credit - 64) >
(ioc->internal_depth + INTERNAL_SCSIIO_CMDS_COUNT)) {
max_request_credit -= 64;
_base_release_memory_pools(ioc);
goto retry;
} else {
ioc_err(ioc,
"scsi_lookup: get_free_pages failed, sz(%d)\n",
(int)sz);
goto out;
}
- }
- dinitprintk(ioc, ioc_info(ioc, "scsiio(0x%p): depth(%d)\n", ioc->request, ioc->scsiio_depth));
- total_sz += sz;
ioc->chain_depth = min_t(u32, ioc->chain_depth, MAX_CHAIN_DEPTH); sz = ioc->scsiio_depth * sizeof(struct chain_lookup); @@ -6239,6 +6270,14 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
- smid = 1;
- for (i = 0; i < ioc->shost->can_queue; i++, smid++) {
ioc->scsi_lookup[i].cb_idx = 0xFF;
ioc->scsi_lookup[i].smid = smid;
ioc->scsi_lookup[i].scmd = NULL;
ioc->scsi_lookup[i].direct_io = 0;
- }
- /* hi-priority queue */ INIT_LIST_HEAD(&ioc->hpr_free_list); smid = ioc->hi_priority_smid;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 19158cb..f8c82f6 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -816,6 +816,7 @@ struct chain_lookup { /**
- struct scsiio_tracker - scsi mf request tracker
- @smid: system message id
- @scmd: scsi request pointer
- @cb_idx: callback index
- @direct_io: To indicate whether I/O is direct (WARPDRIVE)
- @chain_list: list of associated firmware chain tracker
@@ -823,6 +824,7 @@ struct chain_lookup { */ struct scsiio_tracker { u16 smid;
- struct scsi_cmnd *scmd; u8 cb_idx; u8 direct_io; struct pcie_sg_list pcie_sg_list;
@@ -830,6 +832,12 @@ struct scsiio_tracker { u16 msix_io; }; +static inline struct scsiio_tracker * +mpt3sas_get_st_from_scmd(struct scsi_cmnd *scmd) +{
- return (struct scsiio_tracker *)scmd->host_scribble;
+}
- /**
- struct request_tracker - firmware request tracker
- @smid: system message id
@@ -1296,6 +1304,8 @@ struct MPT3SAS_ADAPTER { u8 *request; dma_addr_t request_dma; u32 request_dma_sz;
- struct scsiio_tracker *scsi_lookup;
- ulong scsi_lookup_pages; struct pcie_sg_list *pcie_sg_lookup; spinlock_t scsi_lookup_lock; int pending_io_count;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index b2bb47c..ad43e60 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -595,7 +595,7 @@ void mpt3sas_ctl_reset_done_handler(struct MPT3SAS_ADAPTER *ioc) continue; if (priv_data->sas_target->handle != handle) continue;
st = scsi_cmd_priv(scmd);
tm_request->TaskMID = cpu_to_le16(st->smid); found = 1; }st = mpt3sas_get_st_from_scmd(scmd);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 8bb5b8f..86d0e3c 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -1465,11 +1465,9 @@ struct scsi_cmnd * if (smid > 0 && smid <= ioc->scsiio_depth - INTERNAL_SCSIIO_CMDS_COUNT) {
u32 unique_tag = smid - 1;
scmd = scsi_host_find_tag(ioc->shost, unique_tag);
if (scmd) {scmd = ioc->scsi_lookup[smid - 1].scmd;
st = scsi_cmd_priv(scmd);
}st = mpt3sas_get_st_from_scmd(scmd); if (st->cb_idx == 0xFF || st->smid == 0) scmd = NULL;
@@ -2819,7 +2817,7 @@ int mpt3sas_scsih_issue_locked_tm(struct MPT3SAS_ADAPTER *ioc, u16 handle, { struct MPT3SAS_ADAPTER *ioc = shost_priv(scmd->device->host); struct MPT3SAS_DEVICE *sas_device_priv_data;
- struct scsiio_tracker *st = scsi_cmd_priv(scmd);
- struct scsiio_tracker *st = mpt3sas_get_st_from_scmd(scmd); u16 handle; int r;
@@ -4466,7 +4464,7 @@ static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending) continue; count++; _scsih_set_satl_pending(scmd, false);
st = scsi_cmd_priv(scmd);
mpt3sas_base_clear_st(ioc, st); scsi_dma_unmap(scmd); if (ioc->pci_error_recovery || ioc->remove_host)st = mpt3sas_get_st_from_scmd(scmd);
@@ -5193,7 +5191,7 @@ static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending) * WARPDRIVE: If direct_io is set then it is directIO, * the failed direct I/O should be redirected to volume */
- st = scsi_cmd_priv(scmd);
- st = mpt3sas_get_st_from_scmd(scmd); if (st->direct_io && ((ioc_status & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SCSI_TASK_TERMINATED)) {
@@ -7335,7 +7333,7 @@ static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending) scmd = mpt3sas_scsih_scsi_lookup_get(ioc, smid); if (!scmd) continue;
st = scsi_cmd_priv(scmd);
sdev = scmd->device; sas_device_priv_data = sdev->hostdata; if (!sas_device_priv_data || !sas_device_priv_data->sas_target)st = mpt3sas_get_st_from_scmd(scmd);
@@ -10176,7 +10174,6 @@ static void pcie_device_make_active(struct MPT3SAS_ADAPTER *ioc, .shost_attrs = mpt3sas_host_attrs, .sdev_attrs = mpt3sas_dev_attrs, .track_queue_depth = 1,
- .cmd_size = sizeof(struct scsiio_tracker), };
/* raid transport support for SAS 2.0 HBA devices */ @@ -10214,7 +10211,6 @@ static void pcie_device_make_active(struct MPT3SAS_ADAPTER *ioc, .shost_attrs = mpt3sas_host_attrs, .sdev_attrs = mpt3sas_dev_attrs, .track_queue_depth = 1,
- .cmd_size = sizeof(struct scsiio_tracker), };
/* raid transport support for SAS 3.0 HBA devices */ diff --git a/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c b/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c index cc07ba4..2a05bf3 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c +++ b/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c @@ -259,7 +259,7 @@ sector_t v_lba, p_lba, stripe_off, column, io_size; u32 stripe_sz, stripe_exp; u8 num_pds, cmd = scmd->cmnd[0];
- struct scsiio_tracker *st = scsi_cmd_priv(scmd);
- struct scsiio_tracker *st = mpt3sas_get_st_from_scmd(scmd);
if (cmd != READ_10 && cmd != WRITE_10 && cmd != READ_16 && cmd != WRITE_16)
I still think this is the wrong way of approaching it. I'd rather using blk_mq_tagset_busy_iter() here; that would avoid the issue mentioned.
Let's see if I can come up with a patch.
Cheers,
Hannes
On Tue, Feb 26, 2019 at 12:38 AM Hannes Reinecke hare@suse.de wrote:
On 2/21/19 1:11 PM, Sreekanth Reddy wrote:
During expander reset handling, the driver invokes kernel function scsi_host_find_tag() to obtain outstanding requests associated with the scsi host managed by the driver. Kernel’s block layer may return stale entry for one or more outstanding requests if blk-mq is enabled. This may lead to Kernel panic if the returned value is inaccessible or the memory pointed by the returned value is reused.
Reference of upstream discussion - https://patchwork.kernel.org/patch/10734933/
So to fix this issue, driver will use scsi lookup table to track outstanding IOs at driver level and it avoids using scsi_host_find_tag().
Have done following changes in this patch,
Allocated & initialized scsi_lookup table of type (struct scsiio_tracker) and of depth host's can_queue at driver load time and it will be deallocated at driver unload time.
Once scmd is received, driver will take scsiio_tracker at entry corresponding to scmd's tag value from scsi_lookup table. Then this scsiio_tracker entry is initialized with proper smid, scmd, cb_idx etc. And this scsiio_tracker entry contents are cleared before driver calling scsi_done callback function.
scmd's host_scribble variable is used to save the corresponding scsiio_tracker address, later at any time driver can easily retrieve the scmd's corresponding scsiio_tacker using this host_scribble variable.
Whenever driver wants to get the outstanding IOs at the driver level then driver can go through this scsi_lookup table and if it observe any entry with non-null scmd then it means that scmd is outstanding at the driver level.
v1 change set: Updated the patch description.
Cc: stable@vger.kernel.org Signed-off-by: Sreekanth Reddy sreekanth.reddy@broadcom.com
drivers/scsi/mpt3sas/mpt3sas_base.c | 45 +++++++++++++++++++++++++++++--- drivers/scsi/mpt3sas/mpt3sas_base.h | 10 +++++++ drivers/scsi/mpt3sas/mpt3sas_ctl.c | 2 +- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 16 +++++------- drivers/scsi/mpt3sas/mpt3sas_warpdrive.c | 2 +- 5 files changed, 60 insertions(+), 15 deletions(-)
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 0a6cb8f..8e3d0d5 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -1301,7 +1301,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
cmd = mpt3sas_scsih_scsi_lookup_get(ioc, smid); if (cmd)
return scsi_cmd_priv(cmd);
return mpt3sas_get_st_from_scmd(cmd); return NULL;
}
@@ -1709,7 +1709,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg) struct scsi_cmnd *scmd) { struct chain_tracker *chain_req;
struct scsiio_tracker *st = scsi_cmd_priv(scmd);
struct scsiio_tracker *st = mpt3sas_get_st_from_scmd(scmd); u16 smid = st->smid; u8 chain_offset = atomic_read(&ioc->chain_lookup[smid - 1].chain_offset);
@@ -3203,14 +3203,18 @@ static int mpt3sas_remove_dead_ioc_func(void *arg) mpt3sas_base_get_smid_scsiio(struct MPT3SAS_ADAPTER *ioc, u8 cb_idx, struct scsi_cmnd *scmd) {
struct scsiio_tracker *request = scsi_cmd_priv(scmd);
struct scsiio_tracker *request; unsigned int tag = scmd->request->tag; u16 smid;
scmd->host_scribble = (unsigned char *)(&ioc->scsi_lookup[tag]);
request = mpt3sas_get_st_from_scmd(scmd);
smid = tag + 1; request->cb_idx = cb_idx; request->msix_io = _base_get_msix_index(ioc); request->smid = smid;
request->scmd = scmd; INIT_LIST_HEAD(&request->chain_list); return smid;
}
@@ -3264,6 +3268,7 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, return; st->cb_idx = 0xFF; st->direct_io = 0;
}st->scmd = NULL; atomic_set(&ioc->chain_lookup[st->smid - 1].chain_offset, 0); st->smid = 0;
@@ -4252,6 +4257,11 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, ioc->config_page, ioc->config_page_dma); }
if (ioc->scsi_lookup) {
free_pages((ulong)ioc->scsi_lookup, ioc->scsi_lookup_pages);
ioc->scsi_lookup = NULL;
}
kfree(ioc->hpr_lookup); kfree(ioc->internal_lookup); if (ioc->chain_lookup) {
@@ -4377,6 +4387,7 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, max_request_credit = min_t(u16, facts->RequestCredit, MAX_HBA_QUEUE_DEPTH);
+retry: /* Firmware maintains additional facts->HighPriorityCredit number of * credits for HiPriprity Request messages, so hba queue depth will be * sum of max_request_credit and high priority queue depth. @@ -4581,9 +4592,29 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, (unsigned long long)ioc->request_dma)); total_sz += sz;
sz = ioc->shost->can_queue * sizeof(struct scsiio_tracker);
ioc->scsi_lookup_pages = get_order(sz);
ioc->scsi_lookup = (struct scsiio_tracker *)__get_free_pages(
GFP_KERNEL, ioc->scsi_lookup_pages);
if (!ioc->scsi_lookup) {
/* Retry allocating memory by reducing the queue depth */
if ((max_request_credit - 64) >
(ioc->internal_depth + INTERNAL_SCSIIO_CMDS_COUNT)) {
max_request_credit -= 64;
_base_release_memory_pools(ioc);
goto retry;
} else {
ioc_err(ioc,
"scsi_lookup: get_free_pages failed, sz(%d)\n",
(int)sz);
goto out;
}
}
dinitprintk(ioc, ioc_info(ioc, "scsiio(0x%p): depth(%d)\n", ioc->request, ioc->scsiio_depth));
total_sz += sz; ioc->chain_depth = min_t(u32, ioc->chain_depth, MAX_CHAIN_DEPTH); sz = ioc->scsiio_depth * sizeof(struct chain_lookup);
@@ -6239,6 +6270,14 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
smid = 1;
for (i = 0; i < ioc->shost->can_queue; i++, smid++) {
ioc->scsi_lookup[i].cb_idx = 0xFF;
ioc->scsi_lookup[i].smid = smid;
ioc->scsi_lookup[i].scmd = NULL;
ioc->scsi_lookup[i].direct_io = 0;
}
/* hi-priority queue */ INIT_LIST_HEAD(&ioc->hpr_free_list); smid = ioc->hi_priority_smid;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 19158cb..f8c82f6 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -816,6 +816,7 @@ struct chain_lookup { /**
- struct scsiio_tracker - scsi mf request tracker
- @smid: system message id
- @scmd: scsi request pointer
- @cb_idx: callback index
- @direct_io: To indicate whether I/O is direct (WARPDRIVE)
- @chain_list: list of associated firmware chain tracker
@@ -823,6 +824,7 @@ struct chain_lookup { */ struct scsiio_tracker { u16 smid;
struct scsi_cmnd *scmd; u8 cb_idx; u8 direct_io; struct pcie_sg_list pcie_sg_list;
@@ -830,6 +832,12 @@ struct scsiio_tracker { u16 msix_io; };
+static inline struct scsiio_tracker * +mpt3sas_get_st_from_scmd(struct scsi_cmnd *scmd) +{
return (struct scsiio_tracker *)scmd->host_scribble;
+}
- /**
- struct request_tracker - firmware request tracker
- @smid: system message id
@@ -1296,6 +1304,8 @@ struct MPT3SAS_ADAPTER { u8 *request; dma_addr_t request_dma; u32 request_dma_sz;
struct scsiio_tracker *scsi_lookup;
ulong scsi_lookup_pages; struct pcie_sg_list *pcie_sg_lookup; spinlock_t scsi_lookup_lock; int pending_io_count;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index b2bb47c..ad43e60 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -595,7 +595,7 @@ void mpt3sas_ctl_reset_done_handler(struct MPT3SAS_ADAPTER *ioc) continue; if (priv_data->sas_target->handle != handle) continue;
st = scsi_cmd_priv(scmd);
st = mpt3sas_get_st_from_scmd(scmd); tm_request->TaskMID = cpu_to_le16(st->smid); found = 1; }
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 8bb5b8f..86d0e3c 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -1465,11 +1465,9 @@ struct scsi_cmnd *
if (smid > 0 && smid <= ioc->scsiio_depth - INTERNAL_SCSIIO_CMDS_COUNT) {
u32 unique_tag = smid - 1;
scmd = scsi_host_find_tag(ioc->shost, unique_tag);
scmd = ioc->scsi_lookup[smid - 1].scmd; if (scmd) {
st = scsi_cmd_priv(scmd);
st = mpt3sas_get_st_from_scmd(scmd); if (st->cb_idx == 0xFF || st->smid == 0) scmd = NULL; }
@@ -2819,7 +2817,7 @@ int mpt3sas_scsih_issue_locked_tm(struct MPT3SAS_ADAPTER *ioc, u16 handle, { struct MPT3SAS_ADAPTER *ioc = shost_priv(scmd->device->host); struct MPT3SAS_DEVICE *sas_device_priv_data;
struct scsiio_tracker *st = scsi_cmd_priv(scmd);
struct scsiio_tracker *st = mpt3sas_get_st_from_scmd(scmd); u16 handle; int r;
@@ -4466,7 +4464,7 @@ static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending) continue; count++; _scsih_set_satl_pending(scmd, false);
st = scsi_cmd_priv(scmd);
st = mpt3sas_get_st_from_scmd(scmd); mpt3sas_base_clear_st(ioc, st); scsi_dma_unmap(scmd); if (ioc->pci_error_recovery || ioc->remove_host)
@@ -5193,7 +5191,7 @@ static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending) * WARPDRIVE: If direct_io is set then it is directIO, * the failed direct I/O should be redirected to volume */
st = scsi_cmd_priv(scmd);
st = mpt3sas_get_st_from_scmd(scmd); if (st->direct_io && ((ioc_status & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SCSI_TASK_TERMINATED)) {
@@ -7335,7 +7333,7 @@ static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending) scmd = mpt3sas_scsih_scsi_lookup_get(ioc, smid); if (!scmd) continue;
st = scsi_cmd_priv(scmd);
st = mpt3sas_get_st_from_scmd(scmd); sdev = scmd->device; sas_device_priv_data = sdev->hostdata; if (!sas_device_priv_data || !sas_device_priv_data->sas_target)
@@ -10176,7 +10174,6 @@ static void pcie_device_make_active(struct MPT3SAS_ADAPTER *ioc, .shost_attrs = mpt3sas_host_attrs, .sdev_attrs = mpt3sas_dev_attrs, .track_queue_depth = 1,
.cmd_size = sizeof(struct scsiio_tracker),
};
/* raid transport support for SAS 2.0 HBA devices */
@@ -10214,7 +10211,6 @@ static void pcie_device_make_active(struct MPT3SAS_ADAPTER *ioc, .shost_attrs = mpt3sas_host_attrs, .sdev_attrs = mpt3sas_dev_attrs, .track_queue_depth = 1,
.cmd_size = sizeof(struct scsiio_tracker),
};
/* raid transport support for SAS 3.0 HBA devices */
diff --git a/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c b/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c index cc07ba4..2a05bf3 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c +++ b/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c @@ -259,7 +259,7 @@ sector_t v_lba, p_lba, stripe_off, column, io_size; u32 stripe_sz, stripe_exp; u8 num_pds, cmd = scmd->cmnd[0];
struct scsiio_tracker *st = scsi_cmd_priv(scmd);
struct scsiio_tracker *st = mpt3sas_get_st_from_scmd(scmd); if (cmd != READ_10 && cmd != WRITE_10 && cmd != READ_16 && cmd != WRITE_16)
I still think this is the wrong way of approaching it. I'd rather using blk_mq_tagset_busy_iter() here; that would avoid the issue mentioned.
Let's see if I can come up with a patch.
Hannes,
This API blk_mq_tagset_busy_iter() is not flexible to accommodate it into mpt3sas driver code. For example driver can’t exit from this API in-between if need, i.e. While processing Async Broadcast primitive event if host reset occurs then driver has to wait for this blk_mq_tagset_busy_iter() API to call the callback function for all the outstanding requests and no way to quit in-between.
Thanks, Sreekanth
Cheers,
Hannes
On 2/26/19 12:48 PM, Sreekanth Reddy wrote:
On Tue, Feb 26, 2019 at 12:38 AM Hannes Reinecke hare@suse.de wrote:
On 2/21/19 1:11 PM, Sreekanth Reddy wrote:
During expander reset handling, the driver invokes kernel function scsi_host_find_tag() to obtain outstanding requests associated with the scsi host managed by the driver. Kernel’s block layer may return stale entry for one or more outstanding requests if blk-mq is enabled. This may lead to Kernel panic if the returned value is inaccessible or the memory pointed by the returned value is reused.
Reference of upstream discussion - https://patchwork.kernel.org/patch/10734933/
So to fix this issue, driver will use scsi lookup table to track outstanding IOs at driver level and it avoids using scsi_host_find_tag().
Have done following changes in this patch,
Allocated & initialized scsi_lookup table of type (struct scsiio_tracker) and of depth host's can_queue at driver load time and it will be deallocated at driver unload time.
Once scmd is received, driver will take scsiio_tracker at entry corresponding to scmd's tag value from scsi_lookup table. Then this scsiio_tracker entry is initialized with proper smid, scmd, cb_idx etc. And this scsiio_tracker entry contents are cleared before driver calling scsi_done callback function.
scmd's host_scribble variable is used to save the corresponding scsiio_tracker address, later at any time driver can easily retrieve the scmd's corresponding scsiio_tacker using this host_scribble variable.
Whenever driver wants to get the outstanding IOs at the driver level then driver can go through this scsi_lookup table and if it observe any entry with non-null scmd then it means that scmd is outstanding at the driver level.
v1 change set: Updated the patch description.
Cc: stable@vger.kernel.org Signed-off-by: Sreekanth Reddy sreekanth.reddy@broadcom.com
drivers/scsi/mpt3sas/mpt3sas_base.c | 45 +++++++++++++++++++++++++++++--- drivers/scsi/mpt3sas/mpt3sas_base.h | 10 +++++++ drivers/scsi/mpt3sas/mpt3sas_ctl.c | 2 +- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 16 +++++------- drivers/scsi/mpt3sas/mpt3sas_warpdrive.c | 2 +- 5 files changed, 60 insertions(+), 15 deletions(-)
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 0a6cb8f..8e3d0d5 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -1301,7 +1301,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
cmd = mpt3sas_scsih_scsi_lookup_get(ioc, smid); if (cmd)
return scsi_cmd_priv(cmd);
return mpt3sas_get_st_from_scmd(cmd); return NULL;
}
@@ -1709,7 +1709,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg) struct scsi_cmnd *scmd) { struct chain_tracker *chain_req;
struct scsiio_tracker *st = scsi_cmd_priv(scmd);
struct scsiio_tracker *st = mpt3sas_get_st_from_scmd(scmd); u16 smid = st->smid; u8 chain_offset = atomic_read(&ioc->chain_lookup[smid - 1].chain_offset);
@@ -3203,14 +3203,18 @@ static int mpt3sas_remove_dead_ioc_func(void *arg) mpt3sas_base_get_smid_scsiio(struct MPT3SAS_ADAPTER *ioc, u8 cb_idx, struct scsi_cmnd *scmd) {
struct scsiio_tracker *request = scsi_cmd_priv(scmd);
struct scsiio_tracker *request; unsigned int tag = scmd->request->tag; u16 smid;
scmd->host_scribble = (unsigned char *)(&ioc->scsi_lookup[tag]);
request = mpt3sas_get_st_from_scmd(scmd);
smid = tag + 1; request->cb_idx = cb_idx; request->msix_io = _base_get_msix_index(ioc); request->smid = smid;
request->scmd = scmd; INIT_LIST_HEAD(&request->chain_list); return smid;
}
@@ -3264,6 +3268,7 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, return; st->cb_idx = 0xFF; st->direct_io = 0;
}st->scmd = NULL; atomic_set(&ioc->chain_lookup[st->smid - 1].chain_offset, 0); st->smid = 0;
@@ -4252,6 +4257,11 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, ioc->config_page, ioc->config_page_dma); }
if (ioc->scsi_lookup) {
free_pages((ulong)ioc->scsi_lookup, ioc->scsi_lookup_pages);
ioc->scsi_lookup = NULL;
}
kfree(ioc->hpr_lookup); kfree(ioc->internal_lookup); if (ioc->chain_lookup) {
@@ -4377,6 +4387,7 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, max_request_credit = min_t(u16, facts->RequestCredit, MAX_HBA_QUEUE_DEPTH);
+retry: /* Firmware maintains additional facts->HighPriorityCredit number of * credits for HiPriprity Request messages, so hba queue depth will be * sum of max_request_credit and high priority queue depth. @@ -4581,9 +4592,29 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, (unsigned long long)ioc->request_dma)); total_sz += sz;
sz = ioc->shost->can_queue * sizeof(struct scsiio_tracker);
ioc->scsi_lookup_pages = get_order(sz);
ioc->scsi_lookup = (struct scsiio_tracker *)__get_free_pages(
GFP_KERNEL, ioc->scsi_lookup_pages);
if (!ioc->scsi_lookup) {
/* Retry allocating memory by reducing the queue depth */
if ((max_request_credit - 64) >
(ioc->internal_depth + INTERNAL_SCSIIO_CMDS_COUNT)) {
max_request_credit -= 64;
_base_release_memory_pools(ioc);
goto retry;
} else {
ioc_err(ioc,
"scsi_lookup: get_free_pages failed, sz(%d)\n",
(int)sz);
goto out;
}
}
dinitprintk(ioc, ioc_info(ioc, "scsiio(0x%p): depth(%d)\n", ioc->request, ioc->scsiio_depth));
total_sz += sz; ioc->chain_depth = min_t(u32, ioc->chain_depth, MAX_CHAIN_DEPTH); sz = ioc->scsiio_depth * sizeof(struct chain_lookup);
@@ -6239,6 +6270,14 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
smid = 1;
for (i = 0; i < ioc->shost->can_queue; i++, smid++) {
ioc->scsi_lookup[i].cb_idx = 0xFF;
ioc->scsi_lookup[i].smid = smid;
ioc->scsi_lookup[i].scmd = NULL;
ioc->scsi_lookup[i].direct_io = 0;
}
/* hi-priority queue */ INIT_LIST_HEAD(&ioc->hpr_free_list); smid = ioc->hi_priority_smid;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 19158cb..f8c82f6 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -816,6 +816,7 @@ struct chain_lookup { /** * struct scsiio_tracker - scsi mf request tracker * @smid: system message id
- @scmd: scsi request pointer
- @cb_idx: callback index
- @direct_io: To indicate whether I/O is direct (WARPDRIVE)
- @chain_list: list of associated firmware chain tracker
@@ -823,6 +824,7 @@ struct chain_lookup { */ struct scsiio_tracker { u16 smid;
struct scsi_cmnd *scmd; u8 cb_idx; u8 direct_io; struct pcie_sg_list pcie_sg_list;
@@ -830,6 +832,12 @@ struct scsiio_tracker { u16 msix_io; };
+static inline struct scsiio_tracker * +mpt3sas_get_st_from_scmd(struct scsi_cmnd *scmd) +{
return (struct scsiio_tracker *)scmd->host_scribble;
+}
- /**
- struct request_tracker - firmware request tracker
- @smid: system message id
@@ -1296,6 +1304,8 @@ struct MPT3SAS_ADAPTER { u8 *request; dma_addr_t request_dma; u32 request_dma_sz;
struct scsiio_tracker *scsi_lookup;
ulong scsi_lookup_pages; struct pcie_sg_list *pcie_sg_lookup; spinlock_t scsi_lookup_lock; int pending_io_count;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index b2bb47c..ad43e60 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -595,7 +595,7 @@ void mpt3sas_ctl_reset_done_handler(struct MPT3SAS_ADAPTER *ioc) continue; if (priv_data->sas_target->handle != handle) continue;
st = scsi_cmd_priv(scmd);
st = mpt3sas_get_st_from_scmd(scmd); tm_request->TaskMID = cpu_to_le16(st->smid); found = 1; }
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 8bb5b8f..86d0e3c 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -1465,11 +1465,9 @@ struct scsi_cmnd *
if (smid > 0 && smid <= ioc->scsiio_depth - INTERNAL_SCSIIO_CMDS_COUNT) {
u32 unique_tag = smid - 1;
scmd = scsi_host_find_tag(ioc->shost, unique_tag);
scmd = ioc->scsi_lookup[smid - 1].scmd; if (scmd) {
st = scsi_cmd_priv(scmd);
st = mpt3sas_get_st_from_scmd(scmd); if (st->cb_idx == 0xFF || st->smid == 0) scmd = NULL; }
@@ -2819,7 +2817,7 @@ int mpt3sas_scsih_issue_locked_tm(struct MPT3SAS_ADAPTER *ioc, u16 handle, { struct MPT3SAS_ADAPTER *ioc = shost_priv(scmd->device->host); struct MPT3SAS_DEVICE *sas_device_priv_data;
struct scsiio_tracker *st = scsi_cmd_priv(scmd);
struct scsiio_tracker *st = mpt3sas_get_st_from_scmd(scmd); u16 handle; int r;
@@ -4466,7 +4464,7 @@ static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending) continue; count++; _scsih_set_satl_pending(scmd, false);
st = scsi_cmd_priv(scmd);
st = mpt3sas_get_st_from_scmd(scmd); mpt3sas_base_clear_st(ioc, st); scsi_dma_unmap(scmd); if (ioc->pci_error_recovery || ioc->remove_host)
@@ -5193,7 +5191,7 @@ static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending) * WARPDRIVE: If direct_io is set then it is directIO, * the failed direct I/O should be redirected to volume */
st = scsi_cmd_priv(scmd);
st = mpt3sas_get_st_from_scmd(scmd); if (st->direct_io && ((ioc_status & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SCSI_TASK_TERMINATED)) {
@@ -7335,7 +7333,7 @@ static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending) scmd = mpt3sas_scsih_scsi_lookup_get(ioc, smid); if (!scmd) continue;
st = scsi_cmd_priv(scmd);
st = mpt3sas_get_st_from_scmd(scmd); sdev = scmd->device; sas_device_priv_data = sdev->hostdata; if (!sas_device_priv_data || !sas_device_priv_data->sas_target)
@@ -10176,7 +10174,6 @@ static void pcie_device_make_active(struct MPT3SAS_ADAPTER *ioc, .shost_attrs = mpt3sas_host_attrs, .sdev_attrs = mpt3sas_dev_attrs, .track_queue_depth = 1,
.cmd_size = sizeof(struct scsiio_tracker),
};
/* raid transport support for SAS 2.0 HBA devices */
@@ -10214,7 +10211,6 @@ static void pcie_device_make_active(struct MPT3SAS_ADAPTER *ioc, .shost_attrs = mpt3sas_host_attrs, .sdev_attrs = mpt3sas_dev_attrs, .track_queue_depth = 1,
.cmd_size = sizeof(struct scsiio_tracker),
};
/* raid transport support for SAS 3.0 HBA devices */
diff --git a/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c b/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c index cc07ba4..2a05bf3 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c +++ b/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c @@ -259,7 +259,7 @@ sector_t v_lba, p_lba, stripe_off, column, io_size; u32 stripe_sz, stripe_exp; u8 num_pds, cmd = scmd->cmnd[0];
struct scsiio_tracker *st = scsi_cmd_priv(scmd);
struct scsiio_tracker *st = mpt3sas_get_st_from_scmd(scmd); if (cmd != READ_10 && cmd != WRITE_10 && cmd != READ_16 && cmd != WRITE_16)
I still think this is the wrong way of approaching it. I'd rather using blk_mq_tagset_busy_iter() here; that would avoid the issue mentioned.
Let's see if I can come up with a patch.
Hannes,
This API blk_mq_tagset_busy_iter() is not flexible to accommodate it into mpt3sas driver code. For example driver can’t exit from this API in-between if need, i.e. While processing Async Broadcast primitive event if host reset occurs then driver has to wait for this blk_mq_tagset_busy_iter() API to call the callback function for all the outstanding requests and no way to quit in-between.
This is actually not true; the return value from the iter function determines if the loop should continue. If the iter function returns false the loop will be terminated.
Cheers,
Hannes
On Tue, Feb 26, 2019 at 5:59 PM Hannes Reinecke hare@suse.de wrote:
On 2/26/19 12:48 PM, Sreekanth Reddy wrote:
On Tue, Feb 26, 2019 at 12:38 AM Hannes Reinecke hare@suse.de wrote:
On 2/21/19 1:11 PM, Sreekanth Reddy wrote:
During expander reset handling, the driver invokes kernel function scsi_host_find_tag() to obtain outstanding requests associated with the scsi host managed by the driver. Kernel’s block layer may return stale entry for one or more outstanding requests if blk-mq is enabled. This may lead to Kernel panic if the returned value is inaccessible or the memory pointed by the returned value is reused.
Reference of upstream discussion - https://patchwork.kernel.org/patch/10734933/
So to fix this issue, driver will use scsi lookup table to track outstanding IOs at driver level and it avoids using scsi_host_find_tag().
Have done following changes in this patch,
Allocated & initialized scsi_lookup table of type (struct scsiio_tracker) and of depth host's can_queue at driver load time and it will be deallocated at driver unload time.
Once scmd is received, driver will take scsiio_tracker at entry corresponding to scmd's tag value from scsi_lookup table. Then this scsiio_tracker entry is initialized with proper smid, scmd, cb_idx etc. And this scsiio_tracker entry contents are cleared before driver calling scsi_done callback function.
scmd's host_scribble variable is used to save the corresponding scsiio_tracker address, later at any time driver can easily retrieve the scmd's corresponding scsiio_tacker using this host_scribble variable.
Whenever driver wants to get the outstanding IOs at the driver level then driver can go through this scsi_lookup table and if it observe any entry with non-null scmd then it means that scmd is outstanding at the driver level.
v1 change set: Updated the patch description.
Cc: stable@vger.kernel.org Signed-off-by: Sreekanth Reddy sreekanth.reddy@broadcom.com
drivers/scsi/mpt3sas/mpt3sas_base.c | 45 +++++++++++++++++++++++++++++--- drivers/scsi/mpt3sas/mpt3sas_base.h | 10 +++++++ drivers/scsi/mpt3sas/mpt3sas_ctl.c | 2 +- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 16 +++++------- drivers/scsi/mpt3sas/mpt3sas_warpdrive.c | 2 +- 5 files changed, 60 insertions(+), 15 deletions(-)
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 0a6cb8f..8e3d0d5 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -1301,7 +1301,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
cmd = mpt3sas_scsih_scsi_lookup_get(ioc, smid); if (cmd)
return scsi_cmd_priv(cmd);
return mpt3sas_get_st_from_scmd(cmd); return NULL;
}
@@ -1709,7 +1709,7 @@ static int mpt3sas_remove_dead_ioc_func(void *arg) struct scsi_cmnd *scmd) { struct chain_tracker *chain_req;
struct scsiio_tracker *st = scsi_cmd_priv(scmd);
struct scsiio_tracker *st = mpt3sas_get_st_from_scmd(scmd); u16 smid = st->smid; u8 chain_offset = atomic_read(&ioc->chain_lookup[smid - 1].chain_offset);
@@ -3203,14 +3203,18 @@ static int mpt3sas_remove_dead_ioc_func(void *arg) mpt3sas_base_get_smid_scsiio(struct MPT3SAS_ADAPTER *ioc, u8 cb_idx, struct scsi_cmnd *scmd) {
struct scsiio_tracker *request = scsi_cmd_priv(scmd);
struct scsiio_tracker *request; unsigned int tag = scmd->request->tag; u16 smid;
scmd->host_scribble = (unsigned char *)(&ioc->scsi_lookup[tag]);
request = mpt3sas_get_st_from_scmd(scmd);
smid = tag + 1; request->cb_idx = cb_idx; request->msix_io = _base_get_msix_index(ioc); request->smid = smid;
request->scmd = scmd; INIT_LIST_HEAD(&request->chain_list); return smid;
}
@@ -3264,6 +3268,7 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, return; st->cb_idx = 0xFF; st->direct_io = 0;
}st->scmd = NULL; atomic_set(&ioc->chain_lookup[st->smid - 1].chain_offset, 0); st->smid = 0;
@@ -4252,6 +4257,11 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, ioc->config_page, ioc->config_page_dma); }
if (ioc->scsi_lookup) {
free_pages((ulong)ioc->scsi_lookup, ioc->scsi_lookup_pages);
ioc->scsi_lookup = NULL;
}
kfree(ioc->hpr_lookup); kfree(ioc->internal_lookup); if (ioc->chain_lookup) {
@@ -4377,6 +4387,7 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, max_request_credit = min_t(u16, facts->RequestCredit, MAX_HBA_QUEUE_DEPTH);
+retry: /* Firmware maintains additional facts->HighPriorityCredit number of * credits for HiPriprity Request messages, so hba queue depth will be * sum of max_request_credit and high priority queue depth. @@ -4581,9 +4592,29 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc, (unsigned long long)ioc->request_dma)); total_sz += sz;
sz = ioc->shost->can_queue * sizeof(struct scsiio_tracker);
ioc->scsi_lookup_pages = get_order(sz);
ioc->scsi_lookup = (struct scsiio_tracker *)__get_free_pages(
GFP_KERNEL, ioc->scsi_lookup_pages);
if (!ioc->scsi_lookup) {
/* Retry allocating memory by reducing the queue depth */
if ((max_request_credit - 64) >
(ioc->internal_depth + INTERNAL_SCSIIO_CMDS_COUNT)) {
max_request_credit -= 64;
_base_release_memory_pools(ioc);
goto retry;
} else {
ioc_err(ioc,
"scsi_lookup: get_free_pages failed, sz(%d)\n",
(int)sz);
goto out;
}
}
dinitprintk(ioc, ioc_info(ioc, "scsiio(0x%p): depth(%d)\n", ioc->request, ioc->scsiio_depth));
total_sz += sz; ioc->chain_depth = min_t(u32, ioc->chain_depth, MAX_CHAIN_DEPTH); sz = ioc->scsiio_depth * sizeof(struct chain_lookup);
@@ -6239,6 +6270,14 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
smid = 1;
for (i = 0; i < ioc->shost->can_queue; i++, smid++) {
ioc->scsi_lookup[i].cb_idx = 0xFF;
ioc->scsi_lookup[i].smid = smid;
ioc->scsi_lookup[i].scmd = NULL;
ioc->scsi_lookup[i].direct_io = 0;
}
/* hi-priority queue */ INIT_LIST_HEAD(&ioc->hpr_free_list); smid = ioc->hi_priority_smid;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 19158cb..f8c82f6 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -816,6 +816,7 @@ struct chain_lookup { /** * struct scsiio_tracker - scsi mf request tracker * @smid: system message id
- @scmd: scsi request pointer
- @cb_idx: callback index
- @direct_io: To indicate whether I/O is direct (WARPDRIVE)
- @chain_list: list of associated firmware chain tracker
@@ -823,6 +824,7 @@ struct chain_lookup { */ struct scsiio_tracker { u16 smid;
struct scsi_cmnd *scmd; u8 cb_idx; u8 direct_io; struct pcie_sg_list pcie_sg_list;
@@ -830,6 +832,12 @@ struct scsiio_tracker { u16 msix_io; };
+static inline struct scsiio_tracker * +mpt3sas_get_st_from_scmd(struct scsi_cmnd *scmd) +{
return (struct scsiio_tracker *)scmd->host_scribble;
+}
- /**
- struct request_tracker - firmware request tracker
- @smid: system message id
@@ -1296,6 +1304,8 @@ struct MPT3SAS_ADAPTER { u8 *request; dma_addr_t request_dma; u32 request_dma_sz;
struct scsiio_tracker *scsi_lookup;
ulong scsi_lookup_pages; struct pcie_sg_list *pcie_sg_lookup; spinlock_t scsi_lookup_lock; int pending_io_count;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index b2bb47c..ad43e60 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -595,7 +595,7 @@ void mpt3sas_ctl_reset_done_handler(struct MPT3SAS_ADAPTER *ioc) continue; if (priv_data->sas_target->handle != handle) continue;
st = scsi_cmd_priv(scmd);
st = mpt3sas_get_st_from_scmd(scmd); tm_request->TaskMID = cpu_to_le16(st->smid); found = 1; }
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 8bb5b8f..86d0e3c 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -1465,11 +1465,9 @@ struct scsi_cmnd *
if (smid > 0 && smid <= ioc->scsiio_depth - INTERNAL_SCSIIO_CMDS_COUNT) {
u32 unique_tag = smid - 1;
scmd = scsi_host_find_tag(ioc->shost, unique_tag);
scmd = ioc->scsi_lookup[smid - 1].scmd; if (scmd) {
st = scsi_cmd_priv(scmd);
st = mpt3sas_get_st_from_scmd(scmd); if (st->cb_idx == 0xFF || st->smid == 0) scmd = NULL; }
@@ -2819,7 +2817,7 @@ int mpt3sas_scsih_issue_locked_tm(struct MPT3SAS_ADAPTER *ioc, u16 handle, { struct MPT3SAS_ADAPTER *ioc = shost_priv(scmd->device->host); struct MPT3SAS_DEVICE *sas_device_priv_data;
struct scsiio_tracker *st = scsi_cmd_priv(scmd);
struct scsiio_tracker *st = mpt3sas_get_st_from_scmd(scmd); u16 handle; int r;
@@ -4466,7 +4464,7 @@ static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending) continue; count++; _scsih_set_satl_pending(scmd, false);
st = scsi_cmd_priv(scmd);
st = mpt3sas_get_st_from_scmd(scmd); mpt3sas_base_clear_st(ioc, st); scsi_dma_unmap(scmd); if (ioc->pci_error_recovery || ioc->remove_host)
@@ -5193,7 +5191,7 @@ static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending) * WARPDRIVE: If direct_io is set then it is directIO, * the failed direct I/O should be redirected to volume */
st = scsi_cmd_priv(scmd);
st = mpt3sas_get_st_from_scmd(scmd); if (st->direct_io && ((ioc_status & MPI2_IOCSTATUS_MASK) != MPI2_IOCSTATUS_SCSI_TASK_TERMINATED)) {
@@ -7335,7 +7333,7 @@ static int _scsih_set_satl_pending(struct scsi_cmnd *scmd, bool pending) scmd = mpt3sas_scsih_scsi_lookup_get(ioc, smid); if (!scmd) continue;
st = scsi_cmd_priv(scmd);
st = mpt3sas_get_st_from_scmd(scmd); sdev = scmd->device; sas_device_priv_data = sdev->hostdata; if (!sas_device_priv_data || !sas_device_priv_data->sas_target)
@@ -10176,7 +10174,6 @@ static void pcie_device_make_active(struct MPT3SAS_ADAPTER *ioc, .shost_attrs = mpt3sas_host_attrs, .sdev_attrs = mpt3sas_dev_attrs, .track_queue_depth = 1,
.cmd_size = sizeof(struct scsiio_tracker),
};
/* raid transport support for SAS 2.0 HBA devices */
@@ -10214,7 +10211,6 @@ static void pcie_device_make_active(struct MPT3SAS_ADAPTER *ioc, .shost_attrs = mpt3sas_host_attrs, .sdev_attrs = mpt3sas_dev_attrs, .track_queue_depth = 1,
.cmd_size = sizeof(struct scsiio_tracker),
};
/* raid transport support for SAS 3.0 HBA devices */
diff --git a/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c b/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c index cc07ba4..2a05bf3 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c +++ b/drivers/scsi/mpt3sas/mpt3sas_warpdrive.c @@ -259,7 +259,7 @@ sector_t v_lba, p_lba, stripe_off, column, io_size; u32 stripe_sz, stripe_exp; u8 num_pds, cmd = scmd->cmnd[0];
struct scsiio_tracker *st = scsi_cmd_priv(scmd);
struct scsiio_tracker *st = mpt3sas_get_st_from_scmd(scmd); if (cmd != READ_10 && cmd != WRITE_10 && cmd != READ_16 && cmd != WRITE_16)
I still think this is the wrong way of approaching it. I'd rather using blk_mq_tagset_busy_iter() here; that would avoid the issue mentioned.
Let's see if I can come up with a patch.
Hannes,
This API blk_mq_tagset_busy_iter() is not flexible to accommodate it into mpt3sas driver code. For example driver can’t exit from this API in-between if need, i.e. While processing Async Broadcast primitive event if host reset occurs then driver has to wait for this blk_mq_tagset_busy_iter() API to call the callback function for all the outstanding requests and no way to quit in-between.
This is actually not true; the return value from the iter function determines if the loop should continue. If the iter function returns false the loop will be terminated.
Oh ok I will look it again. Also I think this function can be used only if MQ is enabled, we can't use it in Non-MQ mode (as same fix need for stable kernels also). Driver's scsi lookup table mechanism works both in MQ & Non-MQ mode.
Also currently most of the customers are observing this issue with latest kernels as MQ is enabled by default and this fix is need very urgently.
Also in below thread we concluded that first we go with driver's scsi lookup table mechanism, as this fix is need very urgently and in pipe line we will work to use this blk_mq_tagset_busy_iter() API. Here I am copying the Kashyap reply from below thread, "We will address this issue through <mpt3sas> driver changes in two steps. 1. I can use driver's internal memory and will not rely on request/scsi command. Tag 0...depth loop is not in main IO path, so what we need is contention free access of the list. Having driver's internal memory and array will provide that control. 2. As you suggested best way is to use busy tag iteration. (only for blk-mq stack)"
https://patchwork.kernel.org/patch/10734933/
Thanks, Sreekanth
Cheers,
Hannes
On 2/26/19 2:12 PM, Sreekanth Reddy wrote:
On Tue, Feb 26, 2019 at 5:59 PM Hannes Reinecke hare@suse.de wrote:
On 2/26/19 12:48 PM, Sreekanth Reddy wrote:
On Tue, Feb 26, 2019 at 12:38 AM Hannes Reinecke hare@suse.de wrote:
[ .. ]
Hannes,
This API blk_mq_tagset_busy_iter() is not flexible to accommodate it into mpt3sas driver code. For example driver can’t exit from this API in-between if need, i.e. While processing Async Broadcast primitive event if host reset occurs then driver has to wait for this blk_mq_tagset_busy_iter() API to call the callback function for all the outstanding requests and no way to quit in-between.
This is actually not true; the return value from the iter function determines if the loop should continue. If the iter function returns false the loop will be terminated.
Oh ok I will look it again. Also I think this function can be used only if MQ is enabled, we can't use it in Non-MQ mode (as same fix need for stable kernels also). Driver's scsi lookup table mechanism works both in MQ & Non-MQ mode.
Also currently most of the customers are observing this issue with latest kernels as MQ is enabled by default and this fix is need very urgently.
Also in below thread we concluded that first we go with driver's scsi lookup table mechanism, as this fix is need very urgently and in pipe line we will work to use this blk_mq_tagset_busy_iter() API. Here I am copying the Kashyap reply from below thread, "We will address this issue through <mpt3sas> driver changes in two steps.
- I can use driver's internal memory and will not rely on request/scsi
command. Tag 0...depth loop is not in main IO path, so what we need is contention free access of the list. Having driver's internal memory and array will provide that control. 2. As you suggested best way is to use busy tag iteration. (only for blk-mq stack)"
Attached is a patch to demonstrate my approach. I am aware that it'll only be useful for latest upstream where the legacy I/O path has been dropped completely, so we wouldn't need to worry about it. For older releases indeed you would need to with something like your proposed patch, but for upstream I really would like to switch to blk_mq_tagset_busy() iter.
Cheers,
Hannes
On Tue, Feb 26, 2019 at 02:49:30PM +0100, Hannes Reinecke wrote:
Attached is a patch to demonstrate my approach. I am aware that it'll only be useful for latest upstream where the legacy I/O path has been dropped completely, so we wouldn't need to worry about it. For older releases indeed you would need to with something like your proposed patch, but for upstream I really would like to switch to blk_mq_tagset_busy() iter.
While this is better than the driver private tracking we really should not have to iterate all outstanding command, because if we have any there is a bug we need to fix in the higher layers instead of working around it in the drivers.
While this is better than the driver private tracking we really should not have to iterate all outstanding command, because if we have any there is a bug we need to fix in the higher layers instead of working around it in the drivers.
I agree completely.
On 2/26/19 3:33 PM, Christoph Hellwig wrote:
On Tue, Feb 26, 2019 at 02:49:30PM +0100, Hannes Reinecke wrote:
Attached is a patch to demonstrate my approach. I am aware that it'll only be useful for latest upstream where the legacy I/O path has been dropped completely, so we wouldn't need to worry about it. For older releases indeed you would need to with something like your proposed patch, but for upstream I really would like to switch to blk_mq_tagset_busy() iter.
While this is better than the driver private tracking we really should not have to iterate all outstanding command, because if we have any there is a bug we need to fix in the higher layers instead of working around it in the drivers.
Ah-ha.
But what else should we be doing here? The driver needs to abort all outstanding commands; I somewhat fail to see how we could be doing it otherwise ...
Cheers,
Hannes
On Tue, Feb 26, 2019 at 8:23 PM Hannes Reinecke hare@suse.de wrote:
On 2/26/19 3:33 PM, Christoph Hellwig wrote:
On Tue, Feb 26, 2019 at 02:49:30PM +0100, Hannes Reinecke wrote:
Attached is a patch to demonstrate my approach. I am aware that it'll only be useful for latest upstream where the legacy I/O path has been dropped completely, so we wouldn't need to worry about it. For older releases indeed you would need to with something like your proposed patch, but for upstream I really would like to switch to blk_mq_tagset_busy() iter.
While this is better than the driver private tracking we really should not have to iterate all outstanding command, because if we have any there is a bug we need to fix in the higher layers instead of working around it in the drivers.
Hi Chris, Looking at other driver's code, I think similar issue is impacted to many scsi hbas (like fnic, qla4xxx, snic etc..). They also need similar logic to traverse outstanding scsi commands. One of the example is - At the time of HBA reset driver would like to release scsi command back to SML to retry and for that driver requires to loop all possible smid to figure out outstanding scsi command @Firmware/SML level.
Ah-ha.
But what else should we be doing here? The driver needs to abort all outstanding commands; I somewhat fail to see how we could be doing it otherwise ...
Hannes, Primary drawback of using "blk_mq_tagset_busy_iter" is API is only for blk-mq and it is not available for all the kernel with blk-mq support. We have seen multiple failures from customer and those kernels does not support blk_mq_tagset_busy_iter. In fact, blk-mq and non-mq stack is alive in many Linux distribution and customer is using those. If we just scope to fix current kernel 5.x (which does not have non-mq support), we can opt blk_mq_tagset_busy_iter(). Earlier I requested upstream to accept driver changes without blk_mq_tagset_busy_iter() because it is hitting many customers so we are looking for generic fix which can server both blk-mq as well as non-mq.
We will certainly enhance and optimize working this area (driver trying to find outstanding scsi command) with scsi mailing list as incremental approach.
Kashyap
Cheers,
Hannes
Kashyap,
Primary drawback of using "blk_mq_tagset_busy_iter" is API is only for blk-mq and it is not available for all the kernel with blk-mq support. We have seen multiple failures from customer and those kernels does not support blk_mq_tagset_busy_iter. In fact, blk-mq and non-mq stack is alive in many Linux distribution and customer is using those.
I'm afraid the burden falls upon Broadcom to manage enablement patches for older kernels. So the non-MQ/non-tagset-busy-iter cases are not particularly relevant for the discussion here.
Let's focus on getting some usable plumbing that drivers can use to abort oustanding tags.
On Wed, Feb 27, 2019 at 8:20 PM Martin K. Petersen martin.petersen@oracle.com wrote:
Kashyap,
Primary drawback of using "blk_mq_tagset_busy_iter" is API is only for blk-mq and it is not available for all the kernel with blk-mq support. We have seen multiple failures from customer and those kernels does not support blk_mq_tagset_busy_iter. In fact, blk-mq and non-mq stack is alive in many Linux distribution and customer is using those.
I'm afraid the burden falls upon Broadcom to manage enablement patches for older kernels. So the non-MQ/non-tagset-busy-iter cases are not particularly relevant for the discussion here.
Let's focus on getting some usable plumbing that drivers can use to abort oustanding tags.
Today I have discussed with Kashyap internally and found below solution,
Solution: Instead of calling scsi_host_find_tag() API for each and every smid from one to shost->can_queue, now driver will call this API only for those smid's which are outstanding at the driver level. Driver will determine whether this smid is outstanding at driver level by looking into it's corresponding MPI request frame, if it's MPI request frame is empty then it means that this smid is free and no need to call scsi_host_find_tag() API for this smid. By doing this driver will invoke scsi_host_find_tag() for only those tags which are outstanding at the driver level.
Driver will check whether particular MPI request frame is empty or not by looking into the "DevHandle" field. if this field is zero then it means that this MPI request is empty. Also driver will memset the MPI request frame once the corresponding scmd is processed (i.e. just before calling scmd->done function).
Here is the code change. currently I am testing this code and will post this patch once the testing completes. Please let us known your thoughts on this.
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index e577744..1d8c584 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -3281,12 +3281,18 @@ void mpt3sas_base_clear_st(struct MPT3SAS_ADAPTER *ioc,
if (smid < ioc->hi_priority_smid) { struct scsiio_tracker *st; + void *request;
st = _get_st_from_smid(ioc, smid); if (!st) { _base_recovery_check(ioc); return; } + + /* Clear MPI request frame */ + request = mpt3sas_base_get_msg_frame(ioc, smid); + memset(request, 0, ioc->request_sz); + mpt3sas_base_clear_st(ioc, st); _base_recovery_check(ioc); return; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 8bb5b8f..55bec88 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -1462,11 +1462,20 @@ struct scsi_cmnd * { struct scsi_cmnd *scmd = NULL; struct scsiio_tracker *st; + Mpi25SCSIIORequest_t *mpi_request;
if (smid > 0 && smid <= ioc->scsiio_depth - INTERNAL_SCSIIO_CMDS_COUNT) { u32 unique_tag = smid - 1;
+ mpi_request = mpt3sas_base_get_msg_frame(ioc, smid); + + /* DevHandle zero means that this smid is free + * at driver level, So return NULL. + */ + if (!mpi_request->DevHandle) + return scmd; + scmd = scsi_host_find_tag(ioc->shost, unique_tag); if (scmd) { st = scsi_cmd_priv(scmd); ---
Thanks, Sreekanth
-- Martin K. Petersen Oracle Linux Engineering
linux-stable-mirror@lists.linaro.org