From: James Smart jsmart2021@gmail.com
[ Upstream commit af984c87293b19dccbd0b16afc57c5c9a4a279c7 ]
A link bounce to a slow fabric may observe FDISC response delays lasting longer than devloss tmo. Current logic decrements the final fabric node kref during a devloss tmo event. This results in a NULL ptr dereference crash if the FDISC completes for that fabric node after devloss tmo.
Fix by adding the NLP_IN_RECOV_POST_DEV_LOSS flag, which is set when devloss tmo triggers and we've noticed that fabric node recovery has already started or finished in between the time lpfc_dev_loss_tmo_callbk queues lpfc_dev_loss_tmo_handler. If fabric node recovery succeeds, then the driver reverses the devloss tmo marked kref put with a kref get. If fabric node recovery fails, then the final kref put relies on the ELS timing out or the REG_LOGIN cmpl routine.
Link: https://lore.kernel.org/r/20211020211417.88754-8-jsmart2021@gmail.com Co-developed-by: Justin Tee justin.tee@broadcom.com Signed-off-by: Justin Tee justin.tee@broadcom.com Signed-off-by: James Smart jsmart2021@gmail.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/lpfc/lpfc_crtn.h | 2 + drivers/scsi/lpfc/lpfc_disc.h | 12 +++- drivers/scsi/lpfc/lpfc_els.c | 7 +- drivers/scsi/lpfc/lpfc_hbadisc.c | 111 ++++++++++++++++++++++++++++++- drivers/scsi/lpfc/lpfc_init.c | 12 ++-- drivers/scsi/lpfc/lpfc_scsi.c | 10 +-- 6 files changed, 139 insertions(+), 15 deletions(-)
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index c512f41991429..c9770b1d2366a 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -119,6 +119,8 @@ int lpfc_check_sli_ndlp(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_nodelist *lpfc_nlp_init(struct lpfc_vport *vport, uint32_t did); struct lpfc_nodelist *lpfc_nlp_get(struct lpfc_nodelist *); int lpfc_nlp_put(struct lpfc_nodelist *); +void lpfc_check_nlp_post_devloss(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp); void lpfc_ignore_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, struct lpfc_iocbq *rspiocb); int lpfc_nlp_not_used(struct lpfc_nodelist *ndlp); diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h index 871b665bd72e3..37a4b79010bfc 100644 --- a/drivers/scsi/lpfc/lpfc_disc.h +++ b/drivers/scsi/lpfc/lpfc_disc.h @@ -85,6 +85,13 @@ enum lpfc_fc4_xpt_flags { NLP_XPT_HAS_HH = 0x10 };
+enum lpfc_nlp_save_flags { + /* devloss occurred during recovery */ + NLP_IN_RECOV_POST_DEV_LOSS = 0x1, + /* wait for outstanding LOGO to cmpl */ + NLP_WAIT_FOR_LOGO = 0x2, +}; + struct lpfc_nodelist { struct list_head nlp_listp; struct serv_parm fc_sparam; /* buffer for service params */ @@ -144,8 +151,9 @@ struct lpfc_nodelist { unsigned long *active_rrqs_xri_bitmap; struct lpfc_scsicmd_bkt *lat_data; /* Latency data */ uint32_t fc4_prli_sent; - u32 upcall_flags; -#define NLP_WAIT_FOR_LOGO 0x2 + + /* flags to keep ndlp alive until special conditions are met */ + enum lpfc_nlp_save_flags save_flags;
enum lpfc_fc4_xpt_flags fc4_xpt_flags;
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 666b0a1b558ac..1a406df251cbf 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -2898,9 +2898,9 @@ lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, irsp = &(rspiocb->iocb); spin_lock_irq(&ndlp->lock); ndlp->nlp_flag &= ~NLP_LOGO_SND; - if (ndlp->upcall_flags & NLP_WAIT_FOR_LOGO) { + if (ndlp->save_flags & NLP_WAIT_FOR_LOGO) { wake_up_waiter = 1; - ndlp->upcall_flags &= ~NLP_WAIT_FOR_LOGO; + ndlp->save_flags &= ~NLP_WAIT_FOR_LOGO; } spin_unlock_irq(&ndlp->lock);
@@ -10713,6 +10713,9 @@ lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, irsp->ulpStatus, irsp->un.ulpWord[4]); goto fdisc_failed; } + + lpfc_check_nlp_post_devloss(vport, ndlp); + spin_lock_irq(shost->host_lock); vport->fc_flag &= ~FC_VPORT_CVL_RCVD; vport->fc_flag &= ~FC_VPORT_LOGO_RCVD; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index e1c02229c82d9..9ccb904e35fcf 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -209,7 +209,12 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
spin_lock_irqsave(&ndlp->lock, iflags); ndlp->nlp_flag |= NLP_IN_DEV_LOSS; - ndlp->nlp_flag &= ~NLP_NPR_2B_DISC; + + /* If there is a PLOGI in progress, and we are in a + * NLP_NPR_2B_DISC state, don't turn off the flag. + */ + if (ndlp->nlp_state != NLP_STE_PLOGI_ISSUE) + ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
/* * The backend does not expect any more calls associated with this @@ -340,6 +345,37 @@ static void lpfc_check_inactive_vmid(struct lpfc_hba *phba) lpfc_destroy_vport_work_array(phba, vports); }
+/** + * lpfc_check_nlp_post_devloss - Check to restore ndlp refcnt after devloss + * @vport: Pointer to vport object. + * @ndlp: Pointer to remote node object. + * + * If NLP_IN_RECOV_POST_DEV_LOSS flag was set due to outstanding recovery of + * node during dev_loss_tmo processing, then this function restores the nlp_put + * kref decrement from lpfc_dev_loss_tmo_handler. + **/ +void +lpfc_check_nlp_post_devloss(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp) +{ + unsigned long iflags; + + spin_lock_irqsave(&ndlp->lock, iflags); + if (ndlp->save_flags & NLP_IN_RECOV_POST_DEV_LOSS) { + ndlp->save_flags &= ~NLP_IN_RECOV_POST_DEV_LOSS; + spin_unlock_irqrestore(&ndlp->lock, iflags); + lpfc_nlp_get(ndlp); + lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY | LOG_NODE, + "8438 Devloss timeout reversed on DID x%x " + "refcnt %d ndlp %p flag x%x " + "port_state = x%x\n", + ndlp->nlp_DID, kref_read(&ndlp->kref), ndlp, + ndlp->nlp_flag, vport->port_state); + spin_lock_irqsave(&ndlp->lock, iflags); + } + spin_unlock_irqrestore(&ndlp->lock, iflags); +} + /** * lpfc_dev_loss_tmo_handler - Remote node devloss timeout handler * @ndlp: Pointer to remote node object. @@ -358,6 +394,8 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) uint8_t *name; int warn_on = 0; int fcf_inuse = 0; + bool recovering = false; + struct fc_vport *fc_vport = NULL; unsigned long iflags;
vport = ndlp->vport; @@ -394,6 +432,64 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
/* Fabric nodes are done. */ if (ndlp->nlp_type & NLP_FABRIC) { + spin_lock_irqsave(&ndlp->lock, iflags); + /* In massive vport configuration settings, it's possible + * dev_loss_tmo fired during node recovery. So, check if + * fabric nodes are in discovery states outstanding. + */ + switch (ndlp->nlp_DID) { + case Fabric_DID: + fc_vport = vport->fc_vport; + if (fc_vport && + fc_vport->vport_state == FC_VPORT_INITIALIZING) + recovering = true; + break; + case Fabric_Cntl_DID: + if (ndlp->nlp_flag & NLP_REG_LOGIN_SEND) + recovering = true; + break; + case FDMI_DID: + fallthrough; + case NameServer_DID: + if (ndlp->nlp_state >= NLP_STE_PLOGI_ISSUE && + ndlp->nlp_state <= NLP_STE_REG_LOGIN_ISSUE) + recovering = true; + break; + } + spin_unlock_irqrestore(&ndlp->lock, iflags); + + /* Mark an NLP_IN_RECOV_POST_DEV_LOSS flag to know if reversing + * the following lpfc_nlp_put is necessary after fabric node is + * recovered. + */ + if (recovering) { + lpfc_printf_vlog(vport, KERN_INFO, + LOG_DISCOVERY | LOG_NODE, + "8436 Devloss timeout marked on " + "DID x%x refcnt %d ndlp %p " + "flag x%x port_state = x%x\n", + ndlp->nlp_DID, kref_read(&ndlp->kref), + ndlp, ndlp->nlp_flag, + vport->port_state); + spin_lock_irqsave(&ndlp->lock, iflags); + ndlp->save_flags |= NLP_IN_RECOV_POST_DEV_LOSS; + spin_unlock_irqrestore(&ndlp->lock, iflags); + } else if (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) { + /* Fabric node fully recovered before this dev_loss_tmo + * queue work is processed. Thus, ignore the + * dev_loss_tmo event. + */ + lpfc_printf_vlog(vport, KERN_INFO, + LOG_DISCOVERY | LOG_NODE, + "8437 Devloss timeout ignored on " + "DID x%x refcnt %d ndlp %p " + "flag x%x port_state = x%x\n", + ndlp->nlp_DID, kref_read(&ndlp->kref), + ndlp, ndlp->nlp_flag, + vport->port_state); + return fcf_inuse; + } + lpfc_nlp_put(ndlp); return fcf_inuse; } @@ -423,6 +519,14 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) ndlp->nlp_state, ndlp->nlp_rpi); }
+ /* If we are devloss, but we are in the process of rediscovering the + * ndlp, don't issue a NLP_EVT_DEVICE_RM event. + */ + if (ndlp->nlp_state >= NLP_STE_PLOGI_ISSUE && + ndlp->nlp_state <= NLP_STE_PRLI_ISSUE) { + return fcf_inuse; + } + if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD)) lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM);
@@ -4351,6 +4455,8 @@ lpfc_mbx_cmpl_fc_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) goto out; }
+ lpfc_check_nlp_post_devloss(vport, ndlp); + if (phba->sli_rev < LPFC_SLI_REV4) ndlp->nlp_rpi = mb->un.varWords[0];
@@ -4528,9 +4634,10 @@ lpfc_nlp_counters(struct lpfc_vport *vport, int state, int count) void lpfc_nlp_reg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) { - unsigned long iflags;
+ lpfc_check_nlp_post_devloss(vport, ndlp); + spin_lock_irqsave(&ndlp->lock, iflags); if (ndlp->fc4_xpt_flags & NLP_XPT_REGD) { /* Already registered with backend, trigger rescan */ diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 195169badb372..0fee8d590b0c4 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -3694,12 +3694,16 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action) lpfc_disc_state_machine(vports[i], ndlp, NULL, NLP_EVT_DEVICE_RECOVERY);
- /* Don't remove the node unless the + /* Don't remove the node unless the node * has been unregistered with the - * transport. If so, let dev_loss - * take care of the node. + * transport, and we're not in recovery + * before dev_loss_tmo triggered. + * Otherwise, let dev_loss take care of + * the node. */ - if (!(ndlp->fc4_xpt_flags & + if (!(ndlp->save_flags & + NLP_IN_RECOV_POST_DEV_LOSS) && + !(ndlp->fc4_xpt_flags & (NVME_XPT_REGD | SCSI_XPT_REGD))) lpfc_disc_state_machine (vports[i], ndlp, diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 364c8a9b99095..3d9175f1b6787 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -6455,28 +6455,28 @@ lpfc_target_reset_handler(struct scsi_cmnd *cmnd)
/* Issue LOGO, if no LOGO is outstanding */ spin_lock_irqsave(&pnode->lock, flags); - if (!(pnode->upcall_flags & NLP_WAIT_FOR_LOGO) && + if (!(pnode->save_flags & NLP_WAIT_FOR_LOGO) && !pnode->logo_waitq) { pnode->logo_waitq = &waitq; pnode->nlp_fcp_info &= ~NLP_FCP_2_DEVICE; pnode->nlp_flag |= NLP_ISSUE_LOGO; - pnode->upcall_flags |= NLP_WAIT_FOR_LOGO; + pnode->save_flags |= NLP_WAIT_FOR_LOGO; spin_unlock_irqrestore(&pnode->lock, flags); lpfc_unreg_rpi(vport, pnode); wait_event_timeout(waitq, - (!(pnode->upcall_flags & + (!(pnode->save_flags & NLP_WAIT_FOR_LOGO)), msecs_to_jiffies(dev_loss_tmo * 1000));
- if (pnode->upcall_flags & NLP_WAIT_FOR_LOGO) { + if (pnode->save_flags & NLP_WAIT_FOR_LOGO) { lpfc_printf_vlog(vport, KERN_ERR, logit, "0725 SCSI layer TGTRST " "failed & LOGO TMO (%d, %llu) " "return x%x\n", tgt_id, lun_id, status); spin_lock_irqsave(&pnode->lock, flags); - pnode->upcall_flags &= ~NLP_WAIT_FOR_LOGO; + pnode->save_flags &= ~NLP_WAIT_FOR_LOGO; } else { spin_lock_irqsave(&pnode->lock, flags); }