When driver is set to enable bb credit recovery, the switch displayed the setting as inactive. If the link bounces, it switches to Active.
During link up processing, the driver currently does a MBX_READ_SPARAM followed by a MBX_CONFIG_LINK. These mbox commands are queued to be executed, one at a time and the completion is processed by the worker thread. Since the MBX_READ_SPARAM is done BEFORE the MBX_CONFIG_LINK, the BB_SC_N bit is never set the the returned values. BB Credit recovery status only gets set after the driver requests the feature in CONFIG_LINK, which is done after the link up. Thus the ordering of READ_SPARAM needs to follow the CONFIG_LINK.
Fix by reordering so that READ_SPARAM is done after CONFIG_LINK. Added a HBA_DEFER_FLOGI flag so that any FLOGI handling waits until after the READ_SPARAM is done so that the proper BB credit value is set in the FLOGI payload.
Fixes: 6bfb16208298 ("scsi: lpfc: Fix configuration of BB credit recovery in service parameters") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Dick Kennedy dick.kennedy@broadcom.com Signed-off-by: James Smart jsmart2021@gmail.com --- drivers/scsi/lpfc/lpfc.h | 1 + drivers/scsi/lpfc/lpfc_hbadisc.c | 59 ++++++++++++++++++++++++++-------------- 2 files changed, 40 insertions(+), 20 deletions(-)
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 04d73e2be373..3f2cb17c4574 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -749,6 +749,7 @@ struct lpfc_hba { * capability */ #define HBA_FLOGI_ISSUED 0x100000 /* FLOGI was issued */ +#define HBA_DEFER_FLOGI 0x800000 /* Defer FLOGI till read_sparm cmpl */
uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/ struct lpfc_dmabuf slim2p; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index dcc8999c6a68..6a2bdae0e52a 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -1163,13 +1163,16 @@ lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) }
/* Start discovery by sending a FLOGI. port_state is identically - * LPFC_FLOGI while waiting for FLOGI cmpl + * LPFC_FLOGI while waiting for FLOGI cmpl. Check if sending + * the FLOGI is being deferred till after MBX_READ_SPARAM completes. */ - if (vport->port_state != LPFC_FLOGI) - lpfc_initial_flogi(vport); - else if (vport->fc_flag & FC_PT2PT) - lpfc_disc_start(vport); - + if (vport->port_state != LPFC_FLOGI) { + if (!(phba->hba_flag & HBA_DEFER_FLOGI)) + lpfc_initial_flogi(vport); + } else { + if (vport->fc_flag & FC_PT2PT) + lpfc_disc_start(vport); + } return;
out: @@ -3094,6 +3097,14 @@ lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) lpfc_mbuf_free(phba, mp->virt, mp->phys); kfree(mp); mempool_free(pmb, phba->mbox_mem_pool); + + /* Check if sending the FLOGI is being deferred to after we get + * up to date CSPs from MBX_READ_SPARAM. + */ + if (phba->hba_flag & HBA_DEFER_FLOGI) { + lpfc_initial_flogi(vport); + phba->hba_flag &= ~HBA_DEFER_FLOGI; + } return;
out: @@ -3224,6 +3235,23 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la) }
lpfc_linkup(phba); + sparam_mbox = NULL; + + if (!(phba->hba_flag & HBA_FCOE_MODE)) { + cfglink_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!cfglink_mbox) + goto out; + vport->port_state = LPFC_LOCAL_CFG_LINK; + lpfc_config_link(phba, cfglink_mbox); + cfglink_mbox->vport = vport; + cfglink_mbox->mbox_cmpl = lpfc_mbx_cmpl_local_config_link; + rc = lpfc_sli_issue_mbox(phba, cfglink_mbox, MBX_NOWAIT); + if (rc == MBX_NOT_FINISHED) { + mempool_free(cfglink_mbox, phba->mbox_mem_pool); + goto out; + } + } + sparam_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); if (!sparam_mbox) goto out; @@ -3244,20 +3272,7 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la) goto out; }
- if (!(phba->hba_flag & HBA_FCOE_MODE)) { - cfglink_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (!cfglink_mbox) - goto out; - vport->port_state = LPFC_LOCAL_CFG_LINK; - lpfc_config_link(phba, cfglink_mbox); - cfglink_mbox->vport = vport; - cfglink_mbox->mbox_cmpl = lpfc_mbx_cmpl_local_config_link; - rc = lpfc_sli_issue_mbox(phba, cfglink_mbox, MBX_NOWAIT); - if (rc == MBX_NOT_FINISHED) { - mempool_free(cfglink_mbox, phba->mbox_mem_pool); - goto out; - } - } else { + if (phba->hba_flag & HBA_FCOE_MODE) { vport->port_state = LPFC_VPORT_UNKNOWN; /* * Add the driver's default FCF record at FCF index 0 now. This @@ -3314,6 +3329,10 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la) } /* Reset FCF roundrobin bmask for new discovery */ lpfc_sli4_clear_fcf_rr_bmask(phba); + } else { + if (phba->bbcredit_support && phba->cfg_enable_bbcr && + !(phba->link_flag & LS_LOOPBACK_MODE)) + phba->hba_flag |= HBA_DEFER_FLOGI; }
/* Prepare for LINK up registrations */
Hi,
this commit, applied in Ubuntu's 5.4.0-30.34 tree as 77d5805eafdb5c42bdfe78f058ad9c40ee1278b4, appears to cause our HPE-branded 2-port 8Gb lpfcs to report FLOGI errors. Reverting it fixes target discovery for me. See below for log messages and HW details.
* James Smart jsmart2021@gmail.com [700101 01:00]:
When driver is set to enable bb credit recovery, the switch displayed the setting as inactive. If the link bounces, it switches to Active.
[..]
Fixes: 6bfb16208298 ("scsi: lpfc: Fix configuration of BB credit recovery in service parameters") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Dick Kennedy dick.kennedy@broadcom.com Signed-off-by: James Smart jsmart2021@gmail.com
Broken log messages:
[ 5.837826] Emulex LightPulse Fibre Channel SCSI driver 12.6.0.4 [ 5.837827] Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries. [ 5.838807] scsi host2: Emulex LPe12000 PCIe Fibre Channel Adapter on PCI bus 07 device 00 irq 128 [ 8.300583] scsi host4: Emulex LPe12000 PCIe Fibre Channel Adapter on PCI bus 07 device 01 irq 182 [ 8.858018] lpfc 0000:07:00.0: 0:1303 Link Up Event x1 received Data: x1 xf7 x20 x0 x0 x0 0 [ 11.380022] lpfc 0000:07:00.1: 1:1303 Link Up Event x1 received Data: x1 xf7 x20 x0 x0 x0 0 [ 28.819755] lpfc 0000:07:00.1: 1:(0):0237 Pending Link Event during Discovery: State x7 [ 28.819963] lpfc 0000:07:00.1: 1:1305 Link Down Event x2 received Data: x2 x7 x98014 x0 x0 [ 28.915823] lpfc 0000:07:00.1: 1:1303 Link Up Event x3 received Data: x3 x0 x20 x0 x0 x0 0 [ 28.920083] lpfc 0000:07:00.0: 0:(0):2858 FLOGI failure Status:x3/x2 TMO:x10 Data x101000 x0
Reverted:
[ 74.838109] Emulex LightPulse Fibre Channel SCSI driver 12.6.0.4-7fbb1b050a65 [ 74.838111] Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries. [ 74.840310] scsi host2: Emulex LPe12000 PCIe Fibre Channel Adapter on PCI bus 07 device 00 irq 128 [ 77.272319] scsi host4: Emulex LPe12000 PCIe Fibre Channel Adapter on PCI bus 07 device 01 irq 182 [ 77.813387] lpfc 0000:07:00.0: 0:1303 Link Up Event x1 received Data: x1 xf7 x20 x0 x0 x0 0 [ 80.261594] lpfc 0000:07:00.1: 1:1303 Link Up Event x1 received Data: x1 xf7 x20 x0 x0 x0 0 (plus various sd attach messages)
systool info:
active_fc4s = "0x00 0x00 0x01 0x00 0x00 0x00 0x00 0x01 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 " dev_loss_tmo = "30" max_npiv_vports = "255" maxframe_size = "2048 bytes" npiv_vports_inuse = "0" port_id = "0x0b0260" port_state = "Online" port_type = "NPort (fabric via point-to-point)" speed = "8 Gbit" supported_classes = "Class 3" supported_fc4s = "0x00 0x00 0x01 0x00 0x00 0x00 0x00 0x01 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 " supported_speeds = "2 Gbit, 4 Gbit, 8 Gbit" symbolic_name = "Emulex AJ763B/AH403A FV2.10X6 DV12.6.0.4-7fbb1b050a65 HN:pm01-vh03 OS:Linux" tgtid_bind_type = "wwpn (World Wide Port Name)"
Let me know if you need further debug logs or something.
Thanks,
On 5/12/2020 2:28 PM, Chris Hofstaedtler wrote:
Hi,
this commit, applied in Ubuntu's 5.4.0-30.34 tree as 77d5805eafdb5c42bdfe78f058ad9c40ee1278b4, appears to cause our HPE-branded 2-port 8Gb lpfcs to report FLOGI errors. Reverting it fixes target discovery for me. See below for log messages and HW details.
... Let me know if you need further debug logs or something.
Thanks,
I'm more interested in what other patches you do or do not have in your tree.
This is the message that threw it to the left: 0237 Pending Link Event during Discovery
Let me look a little.
-- james
* James Smart jsmart2021@gmail.com [200513 02:00]:
On 5/12/2020 2:28 PM, Chris Hofstaedtler wrote:
this commit, applied in Ubuntu's 5.4.0-30.34 tree as 77d5805eafdb5c42bdfe78f058ad9c40ee1278b4, appears to cause our HPE-branded 2-port 8Gb lpfcs to report FLOGI errors. Reverting it fixes target discovery for me. See below for log messages and HW details.
..
I'm more interested in what other patches you do or do not have in your tree.
I'd imagine the patches come from LP#1855303.
Please find a short log of drivers/scsi/lpfc below. As an additional info, before we were running off Ubuntu's 5.3.0-43.36 which had lpfc 12.2.0.3.
This is the message that threw it to the left: 0237 Pending Link Event during Discovery
Let me look a little.
-- james
Many thanks, Chris
432bc5a9f5c1 scsi: lpfc: add RDF registration and Link Integrity FPIN logging 40953bf88d5f scsi: lpfc: Copyright updates for 12.6.0.4 patches dbbf73335448 scsi: lpfc: Update lpfc version to 12.6.0.4 e8b80ff76088 scsi: lpfc: Clean up hba max_lun_queue_depth checks 3c011ebffaef scsi: lpfc: Remove handler for obsolete ELS - Read Port Status (RPS) 9b200f712004 scsi: lpfc: Fix coverity errors in fmdi attribute handling 0be9b30cd635 scsi: lpfc: Fix compiler warning on frame size 2f985fdbaec3 scsi: lpfc: Fix release of hwq to clear the eq relationship 5acd7d9f7eea scsi: lpfc: Fix registration of ELS type support in fdmi 77d5805eafdb scsi: lpfc: Fix broken Credit Recovery after driver load 6b5806410b8f scsi: lpfc: Fix lpfc_io_buf resource leak in lpfc_get_scsi_buf_s4 error path fa9df073dbce scsi: lpfc: Fix RQ buffer leakage when no IOCBs available c5198e2e539b scsi: lpfc: Update lpfc version to 12.6.0.3 390e6a77e06f scsi: lpfc: Fix improper flag check for IO type e2200d875825 scsi: lpfc: Fix MDS Latency Diagnostics Err-drop rates 038e6643ac38 scsi: lpfc: Fix unmap of dpp bars affecting next driver load 42898fe98599 scsi: lpfc: Fix disablement of FC-AL on lpe35000 models 1b6e85b0950d scsi: lpfc: Fix ras_log via debugfs 8d291637e21c scsi: lpfc: Fix Fabric hostname registration if system hostname changes c7f141b558f6 scsi: lpfc: Fix missing check for CSF in Write Object Mbox Rsp 107a9fe98ada scsi: lpfc: Fix incomplete NVME discovery when target 45dc4914d65f scsi: lpfc: size cpu map by last cpu id set 48d76e9b38a4 scsi: lpfc: Update lpfc version to 12.6.0.2 e667dc171fe4 scsi: lpfc: revise nvme max queues to be hdwq count db8d6e2ab615 scsi: lpfc: Initialize cpu_map for not present cpus d7b48a096eb7 scsi: lpfc: fix inlining of lpfc_sli4_cleanup_poll_list() d0ff88ef8591 scsi: lpfc: Fix lpfc_cpumask_of_node_init() a41d3723e398 scsi: lpfc: Fix a kernel warning triggered by lpfc_sli4_enable_intr() 723fabed8c5e scsi: lpfc: Update lpfc version to 12.6.0.1 8c708b2e7d79 scsi: lpfc: Add enablement of multiple adapter dumps c50fd99a1fe9 scsi: lpfc: Change default IRQ model on AMD architectures 79d488b36a1e scsi: lpfc: Add registration for CPU Offline/Online events 328383a3f61b scsi: lpfc: Clarify FAWNN error message e45f34740925 scsi: lpfc: Sync with FC-NVMe-2 SLER change to require Conf with SLER eaeaddbfee55 scsi: lpfc: Fix dynamic fw log enablement check e35dc794fcdd scsi: lpfc: Fix kernel crash at lpfc_nvme_info_show during remote port bounce 4f175d5b6bca scsi: lpfc: Fix configuration of BB credit recovery in service parameters 8b22efb072fc scsi: lpfc: Make lpfc_debugfs_ras_log_data static 3323028b62de scsi: lpfc: Fix NULL check before mempool_destroy is not needed ae036af7d4f6 scsi: lpfc: fix spelling error in MAGIC_NUMER_xxx 18c38c2053fb scsi: lpfc: fix build error of lpfc_debugfs.c for vfree/vmalloc 0df2dcbaaa0d scsi: lpfc: lpfc_nvmet: Fix Use plain integer as NULL pointer 90c3f71713d5 scsi: lpfc: lpfc_attr: Fix Use plain integer as NULL pointer a3babc0488ff scsi: lpfc: Update lpfc version to 12.6.0.0 d7d082705c12 scsi: lpfc: Add additional discovery log messages 24dc02b77a1a scsi: lpfc: Add FC-AL support to lpe32000 models 980ed57564f6 scsi: lpfc: Add FA-WWN Async Event reporting 6ac712225cca scsi: lpfc: Add log macros to allow print by serverity or verbosity setting 2c0bc5bbd84a scsi: lpfc: Make FW logging dynamically configurable 3965a40a4e75 scsi: lpfc: Revise interrupt coalescing for missing scenarios 5078750b034c scsi: lpfc: Remove lock contention target write path 8f82561d4ed4 scsi: lpfc: Slight fast-path performance optimizations 632e3ddb91dc scsi: lpfc: fix coverity error of dereference after null check 07a9248d7ea0 scsi: lpfc: Fix lockdep errors in sli_ringtx_put 10662b03cd9f scsi: lpfc: Fix reporting of read-only fw error errors 2c737f5a5c47 scsi: lpfc: fix lpfc_nvmet_mrq to be bound by hdw queue count ce0e7ee555b1 scsi: lpfc: Make function lpfc_defer_pt2pt_acc static 035922837ebc scsi: lpfc: Update lpfc version to 12.4.0.1 c0e0637b5639 scsi: lpfc: cleanup: remove unused fcp_txcmlpq_cnt f6f38dbe38dd scsi: lpfc: Complete removal of FCoE T10 PI support on SLI-4 adapters 87ba9f4e1e44 scsi: lpfc: Update async event logging 53aa87d8ac63 scsi: lpfc: Fix host hang at boot or slow boot 5df05a9d6459 scsi: lpfc: Fix coverity errors on NULL pointer checks c01400547f7a scsi: lpfc: Fix NVMe ABTS in response to receiving an ABTS e5c93b45be68 scsi: lpfc: Fix GPF on scsi command completion 55863d7f0b03 scsi: lpfc: Fix device recovery errors after PLOGI failures 9084ef6d5222 scsi: lpfc: Fix NVME io abort failures causing hangs e3537e7f1fe8 scsi: lpfc: Fix miss of register read failure check 88764affd6e3 scsi: lpfc: Fix premature re-enabling of interrupts in lpfc_sli_host_down fc99d4225045 scsi: lpfc: Fix pt2pt discovery on SLI3 HBAs 15e8455c4668 UBUNTU: SAUCE: Revert "nvme_fc: add module to ops template to allow module references" 190fa9115ac0 scsi: lpfc: Fix: Rework setting of fdmi symbolic node name registration ebd66c10c8f0 scsi: lpfc: use hdwq assigned cpu for allocation f42c518b6b1e scsi: lpfc: Fix a kernel warning triggered by lpfc_get_sgl_per_hdwq() 1a8ce116a003 scsi: lpfc: Fix hdwq sgl locks and irq handling 3815dd60ccda scsi: lpfc: Fix list corruption detected in lpfc_put_sgl_per_hdwq 644726cf4623 scsi: lpfc: fix: Coverity: lpfc_get_scsi_buf_s3(): Null pointer dereferences 15d7e5222908 scsi: lpfc: Fix rpi release when deleting vport 5b82af018796 scsi: lpfc: Fix memory leak on lpfc_bsg_write_ebuf_set func a7441301b20d nvme_fc: add module to ops template to allow module references ac173a225715 scsi: lpfc: fix: Coverity: lpfc_cmpl_els_rsp(): Null pointer dereferences 170cd3bb6108 scsi: lpfc: Fix duplicate unreg_rpi error in port offline flow 46dc7c48f754 scsi: lpfc: Fix unexpected error messages during RSCN handling 6182634f0d10 scsi: lpfc: Fix SLI3 hba in loop mode not discovering devices 4b4a2235f14f scsi: lpfc: Fix hardlockup in lpfc_abort_handler d46f5c50c520 scsi: lpfc: Fix list corruption in lpfc_sli_get_iocbq ba7d99a490e2 scsi: lpfc: Fix locking on mailbox command completion 763e1491d43a scsi: lpfc: Fix discovery failures when target device connectivity bounces f027d0e29db4 scsi: lpfc: Fix spinlock_irq issues in lpfc_els_flush_cmd() a3c51276ddfc scsi: lpfc: Fix bad ndlp ptr in xri aborted handling f83e148a4100 Merge tag 'scsi-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi (this merge is in 5.4 mainline)
* James Smart jsmart2021@gmail.com [200513 02:00]:
On 5/12/2020 2:28 PM, Chris Hofstaedtler wrote:
this commit, applied in Ubuntu's 5.4.0-30.34 tree as 77d5805eafdb5c42bdfe78f058ad9c40ee1278b4, appears to cause our HPE-branded 2-port 8Gb lpfcs to report FLOGI errors. Reverting it fixes target discovery for me. See below for log messages and HW details.
... Let me know if you need further debug logs or something.
Thanks,
I'm more interested in what other patches you do or do not have in your tree.
To save everybody time figuring out patches, I've tried with a 5.7-rc tree 24085f70a6e1b0cb647ec92623284641d8270637, which gives these dmesg messages:
[ 4.222975] Emulex LightPulse Fibre Channel SCSI driver 12.8.0.0 [ 4.223864] scsi host2: Emulex LPe12000 PCIe Fibre Channel Adapter on PCI bus 07 device 00 irq 128 [ 6.654380] scsi host4: Emulex LPe12000 PCIe Fibre Channel Adapter on PCI bus 07 device 01 irq 182 [ 7.169041] lpfc 0000:07:00.0: 0:1303 Link Up Event x1 received Data: x1 xf7 x20 x0 x0 x0 0 [ 9.578752] lpfc 0000:07:00.1: 1:1303 Link Up Event x1 received Data: x1 xf7 x20 x0 x0 x0 0 [ 27.225755] lpfc 0000:07:00.0: 0:(0):2858 FLOGI failure Status:x3/x2 TMO:x10 Data x101000 x0 [ 29.637644] lpfc 0000:07:00.1: 1:(0):2858 FLOGI failure Status:x3/x2 TMO:x10 Data x101000 x0 [ 47.275946] lpfc 0000:07:00.0: 0:(0):2858 FLOGI failure Status:x3/x2 TMO:x10 Data x101000 x0 [ 49.787867] lpfc 0000:07:00.1: 1:(0):2858 FLOGI failure Status:x3/x2 TMO:x10 Data x101000 x0 [ 67.356082] lpfc 0000:07:00.0: 0:(0):2858 FLOGI failure Status:x3/x2 TMO:x10 Data x101000 x0 [ 69.875049] lpfc 0000:07:00.1: 1:(0):2858 FLOGI failure Status:x3/x2 TMO:x10 Data x101000 x0 [ 87.401269] lpfc 0000:07:00.0: 0:(0):2858 FLOGI failure Status:x3/x2 TMO:x10 Data x101000 x0 [ 89.929189] lpfc 0000:07:00.1: 1:(0):2858 FLOGI failure Status:x3/x2 TMO:x10 Data x101000 x0 [ 105.533242] lpfc 0000:07:00.0: 0:(0):0237 Pending Link Event during Discovery: State x7 [ 105.533546] lpfc 0000:07:00.0: 0:1305 Link Down Event x2 received Data: x2 x7 x98014 x0 x0 [ 105.615008] lpfc 0000:07:00.0: 0:1303 Link Up Event x3 received Data: x3 x0 x20 x0 x0 x0 0 [ 109.989341] lpfc 0000:07:00.1: 1:(0):2858 FLOGI failure Status:x3/x2 TMO:x10 Data x101000 x0 [ 124.772701] lpfc 0000:07:00.0: 0:(0):2858 FLOGI failure Status:x3/x2 TMO:x10 Data x101000 x0
This is the message that threw it to the left: 0237 Pending Link Event during Discovery
Let me look a little.
-- james
Best, Chris
linux-stable-mirror@lists.linaro.org