On Tuesday, July 8, 2025 1:54 AM, Hannes Reinecke hare@suse.de wrote:
On 6/16/25 18:26, Karan Tilak Kumar wrote:
When both the RHBA and RPA FDMI requests time out, fnic reuses a frame to send ABTS for each of them. On send completion, this causes an attempt to free the same frame twice that leads to a crash.
Fix crash by allocating separate frames for RHBA and RPA, and modify ABTS logic accordingly.
Tested by checking MDS for FDMI information. Tested by using instrumented driver to: Drop PLOGI response Drop RHBA response Drop RPA response Drop RHBA and RPA response Drop PLOGI response + ABTS response Drop RHBA response + ABTS response Drop RPA response + ABTS response Drop RHBA and RPA response + ABTS response for both of them
Fixes: 09c1e6ab4ab2 ("scsi: fnic: Add and integrate support for FDMI") Reviewed-by: Sesidhar Baddela sebaddel@cisco.com Reviewed-by: Arulprabhu Ponnusamy arulponn@cisco.com Reviewed-by: Gian Carlo Boffa gcboffa@cisco.com Tested-by: Arun Easi aeasi@cisco.com Co-developed-by: Arun Easi aeasi@cisco.com Signed-off-by: Arun Easi aeasi@cisco.com Tested-by: Karan Tilak Kumar kartilak@cisco.com Cc: stable@vger.kernel.org Signed-off-by: Karan Tilak Kumar kartilak@cisco.com
Changes between v4 and v5: - Incorporate review comments from John: - Refactor patches
Changes between v3 and v4: - Incorporate review comments from Dan: - Remove comments from Cc tag
Changes between v2 and v3: - Incorporate review comments from Dan: - Add Cc to stable
Changes between v1 and v2: - Incorporate review comments from Dan: - Add Fixes tag
drivers/scsi/fnic/fdls_disc.c | 113 +++++++++++++++++++++++++--------- drivers/scsi/fnic/fnic.h | 2 +- drivers/scsi/fnic/fnic_fdls.h | 1 + 3 files changed, 87 insertions(+), 29 deletions(-)
diff --git a/drivers/scsi/fnic/fdls_disc.c b/drivers/scsi/fnic/fdls_disc.c index c2b6f4eb338e..0ee1b74967b9 100644 --- a/drivers/scsi/fnic/fdls_disc.c +++ b/drivers/scsi/fnic/fdls_disc.c @@ -763,47 +763,69 @@ static void fdls_send_fabric_abts(struct fnic_iport_s *iport) iport->fabric.timer_pending = 1; }
-static void fdls_send_fdmi_abts(struct fnic_iport_s *iport) +static uint8_t *fdls_alloc_init_fdmi_abts_frame(struct fnic_iport_s *iport,
{uint16_t oxid)
- uint8_t *frame;
- struct fc_frame_header *pfdmi_abts; uint8_t d_id[3];
- uint8_t *frame; struct fnic *fnic = iport->fnic;
struct fc_frame_header *pfabric_abts;
unsigned long fdmi_tov;
uint16_t oxid;
uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET +
sizeof(struct fc_frame_header);
frame = fdls_alloc_frame(iport); if (frame == NULL) { FNIC_FCS_DBG(KERN_ERR, fnic->host, fnic->fnic_num, "Failed to allocate frame to send FDMI ABTS");
return;
}return NULL;
- pfabric_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET);
pfdmi_abts = (struct fc_frame_header *) (frame + FNIC_ETH_FCOE_HDRS_OFFSET); fdls_init_fabric_abts_frame(frame, iport);
hton24(d_id, FC_FID_MGMT_SERV);
- FNIC_STD_SET_D_ID(*pfabric_abts, d_id);
- FNIC_STD_SET_D_ID(*pfdmi_abts, d_id);
- FNIC_STD_SET_OX_ID(*pfdmi_abts, oxid);
- return frame;
+}
+static void fdls_send_fdmi_abts(struct fnic_iport_s *iport) +{
uint8_t *frame;
unsigned long fdmi_tov;
uint16_t frame_size = FNIC_ETH_FCOE_HDRS_OFFSET +
sizeof(struct fc_frame_header);
if (iport->fabric.fdmi_pending & FDLS_FDMI_PLOGI_PENDING) {
oxid = iport->active_oxid_fdmi_plogi;
FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
frame = fdls_alloc_init_fdmi_abts_frame(iport,
iport->active_oxid_fdmi_plogi);
if (frame == NULL)
return;
} else { if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING) {fnic_send_fcoe_frame(iport, frame, frame_size);
oxid = iport->active_oxid_fdmi_rhba;
FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
frame = fdls_alloc_init_fdmi_abts_frame(iport,
iport->active_oxid_fdmi_rhba);
if (frame == NULL)
return;
fnic_send_fcoe_frame(iport, frame, frame_size); } if (iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING) {
oxid = iport->active_oxid_fdmi_rpa;
FNIC_STD_SET_OX_ID(*pfabric_abts, oxid);
frame = fdls_alloc_init_fdmi_abts_frame(iport,
iport->active_oxid_fdmi_rpa);
if (frame == NULL) {
if (iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING)
goto arm_timer;
else
return;
}
}fnic_send_fcoe_frame(iport, frame, frame_size); }
+arm_timer: fdmi_tov = jiffies + msecs_to_jiffies(2 * iport->e_d_tov); mod_timer(&iport->fabric.fdmi_timer, round_jiffies(fdmi_tov)); iport->fabric.fdmi_pending |= FDLS_FDMI_ABORT_PENDING; @@ -2244,6 +2266,21 @@ void fdls_fabric_timer_callback(struct timer_list *t) spin_unlock_irqrestore(&fnic->fnic_lock, flags); }
+void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport) +{
- struct fnic *fnic = iport->fnic;
- iport->fabric.fdmi_pending = 0;
- /* If max retries not exhausted, start over from fdmi plogi */
- if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) {
iport->fabric.fdmi_retry++;
FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
"Retry FDMI PLOGI. FDMI retry: %d",
iport->fabric.fdmi_retry);
fdls_send_fdmi_plogi(iport);
- }
+}
- void fdls_fdmi_timer_callback(struct timer_list *t) { struct fnic_fdls_fabric_s *fabric = from_timer(fabric, t, fdmi_timer);
@@ -2289,14 +2326,7 @@ void fdls_fdmi_timer_callback(struct timer_list *t) FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending);
- iport->fabric.fdmi_pending = 0;
- /* If max retries not exhaused, start over from fdmi plogi */
- if (iport->fabric.fdmi_retry < FDLS_FDMI_MAX_RETRY) {
iport->fabric.fdmi_retry++;
FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num,
"retry fdmi timer %d", iport->fabric.fdmi_retry);
fdls_send_fdmi_plogi(iport);
- }
- fdls_fdmi_retry_plogi(iport); FNIC_FCS_DBG(KERN_INFO, fnic->host, fnic->fnic_num, "fdmi timer callback : 0x%x\n", iport->fabric.fdmi_pending); spin_unlock_irqrestore(&fnic->fnic_lock, flags);
@@ -3714,11 +3744,32 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport, switch (FNIC_FRAME_TYPE(oxid)) { case FNIC_FRAME_TYPE_FDMI_PLOGI: fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_plogi);
iport->fabric.fdmi_pending &= ~FDLS_FDMI_PLOGI_PENDING;
case FNIC_FRAME_TYPE_FDMI_RHBA:iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING; break;
iport->fabric.fdmi_pending &= ~FDLS_FDMI_REG_HBA_PENDING;
/* If RPA is still pending, don't turn off ABORT PENDING.
* We count on the timer to detect the ABTS timeout and take
* corrective action.
*/
if (!(iport->fabric.fdmi_pending & FDLS_FDMI_RPA_PENDING))
iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
case FNIC_FRAME_TYPE_FDMI_RPA:fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rhba); break;
iport->fabric.fdmi_pending &= ~FDLS_FDMI_RPA_PENDING;
/* If RHBA is still pending, don't turn off ABORT PENDING.
* We count on the timer to detect the ABTS timeout and take
* corrective action.
*/
if (!(iport->fabric.fdmi_pending & FDLS_FDMI_REG_HBA_PENDING))
iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
default:fdls_free_oxid(iport, oxid, &iport->active_oxid_fdmi_rpa); break;
@@ -3728,10 +3779,16 @@ static void fdls_process_fdmi_abts_rsp(struct fnic_iport_s *iport, break; }
- timer_delete_sync(&iport->fabric.fdmi_timer);
- iport->fabric.fdmi_pending &= ~FDLS_FDMI_ABORT_PENDING;
- fdls_send_fdmi_plogi(iport);
/*
- Only if ABORT PENDING is off, delete the timer, and if no other
- operations are pending, retry FDMI.
- Otherwise, let the timer pop and take the appropriate action.
*/
if (!(iport->fabric.fdmi_pending & FDLS_FDMI_ABORT_PENDING)) {
timer_delete_sync(&iport->fabric.fdmi_timer);
if (!iport->fabric.fdmi_pending)
fdls_fdmi_retry_plogi(iport);
} }
static void
diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h index 6c5f6046b1f5..86e293ce530d 100644 --- a/drivers/scsi/fnic/fnic.h +++ b/drivers/scsi/fnic/fnic.h @@ -30,7 +30,7 @@
#define DRV_NAME "fnic" #define DRV_DESCRIPTION "Cisco FCoE HBA Driver" -#define DRV_VERSION "1.8.0.0" +#define DRV_VERSION "1.8.0.1" #define PFX DRV_NAME ": " #define DFX DRV_NAME "%d: "
diff --git a/drivers/scsi/fnic/fnic_fdls.h b/drivers/scsi/fnic/fnic_fdls.h index 8e610b65ad57..531d0b37e450 100644 --- a/drivers/scsi/fnic/fnic_fdls.h +++ b/drivers/scsi/fnic/fnic_fdls.h @@ -394,6 +394,7 @@ void fdls_send_tport_abts(struct fnic_iport_s *iport, bool fdls_delete_tport(struct fnic_iport_s *iport, struct fnic_tport_s *tport); void fdls_fdmi_timer_callback(struct timer_list *t); +void fdls_fdmi_retry_plogi(struct fnic_iport_s *iport);
/* fnic_fcs.c */ void fnic_fdls_init(struct fnic *fnic, int usefip);
Please make the version change a separate patch and add it as the last patch in the series. That way you'll have better tracking if all patches from that series are applied.
Cheers,
Hannes
Dr. Hannes Reinecke Kernel Storage Architect hare@suse.de +49 911 74053 688 SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
Thanks for your review and feedback Hannes. Yes, that was the format with the prior versions of the patches. However, we received feedback that since two of those patches were critical fixes, and the others were debug enhancements, that we make some modifications to this format. That's why we went along with this approach.
Regards, Karan