From: James Smart jsmart2021@gmail.com
[ Upstream commit 93a3922da428ec0752e8b2ab00c42dadbbf805a9 ]
During remote port loss fault testing, the driver crashed with the following trace:
general protection fault: 0000 [#1] SMP RIP: ... lpfc_nvme_register_port+0x250/0x480 [lpfc] Call Trace: lpfc_nlp_state_cleanup+0x1b3/0x7a0 [lpfc] lpfc_nlp_set_state+0xa6/0x1d0 [lpfc] lpfc_cmpl_prli_prli_issue+0x213/0x440 lpfc_disc_state_machine+0x7e/0x1e0 [lpfc] lpfc_cmpl_els_prli+0x18a/0x200 [lpfc] lpfc_sli_sp_handle_rspiocb+0x3b5/0x6f0 [lpfc] lpfc_sli_handle_slow_ring_event_s4+0x161/0x240 [lpfc] lpfc_work_done+0x948/0x14c0 [lpfc] lpfc_do_work+0x16f/0x180 [lpfc] kthread+0xc9/0xe0 ret_from_fork+0x55/0x80
After registering a new remoteport, the driver is pulling an ndlp pointer from the lpfc rport associated with the private area of a newly registered remoteport. The private area is uninitialized, so it's garbage.
Correct by pulling the the lpfc rport pointer from the entering ndlp point, then ndlp value from at rport. Note the entering ndlp may be replacing by the rport->ndlp due to an address change swap.
Signed-off-by: Dick Kennedy dick.kennedy@broadcom.com Signed-off-by: James Smart james.smart@broadcom.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin alexander.levin@microsoft.com --- drivers/scsi/lpfc/lpfc_nvme.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 76a5a99605aa..d723fd1d7b26 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2687,7 +2687,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) struct lpfc_nvme_rport *oldrport; struct nvme_fc_remote_port *remote_port; struct nvme_fc_port_info rpinfo; - struct lpfc_nodelist *prev_ndlp; + struct lpfc_nodelist *prev_ndlp = NULL;
lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NVME_DISC, "6006 Register NVME PORT. DID x%06x nlptype x%x\n", @@ -2736,23 +2736,29 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) spin_unlock_irq(&vport->phba->hbalock); rport = remote_port->private; if (oldrport) { + /* New remoteport record does not guarantee valid + * host private memory area. + */ + prev_ndlp = oldrport->ndlp; if (oldrport == remote_port->private) { - /* Same remoteport. Just reuse. */ + /* Same remoteport - ndlp should match. + * Just reuse. + */ lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NVME_DISC, "6014 Rebinding lport to " "remoteport %p wwpn 0x%llx, " - "Data: x%x x%x %p x%x x%06x\n", + "Data: x%x x%x %p %p x%x x%06x\n", remote_port, remote_port->port_name, remote_port->port_id, remote_port->port_role, + prev_ndlp, ndlp, ndlp->nlp_type, ndlp->nlp_DID); return 0; } - prev_ndlp = rport->ndlp;
/* Sever the ndlp<->rport association * before dropping the ndlp ref from @@ -2786,13 +2792,13 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_NODE, "6022 Binding new rport to " - "lport %p Remoteport %p WWNN 0x%llx, " + "lport %p Remoteport %p rport %p WWNN 0x%llx, " "Rport WWPN 0x%llx DID " - "x%06x Role x%x, ndlp %p\n", - lport, remote_port, + "x%06x Role x%x, ndlp %p prev_ndlp %p\n", + lport, remote_port, rport, rpinfo.node_name, rpinfo.port_name, rpinfo.port_id, rpinfo.port_role, - ndlp); + ndlp, prev_ndlp); } else { lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC | LOG_NODE,