[PATCH 1/1] PCI/bwctrl: Disable PCIe BW controller during reset - Linux-stable-mirror

17 Feb 2025

PCIe BW controller enables BW notifications for Downstream Ports by
setting Link Bandwidth Management Interrupt Enable (LBMIE) and Link
Autonomous Bandwidth Interrupt Enable (LABIE) (PCIe Spec. r6.2 sec.
7.5.3.7).
It was discovered that performing a reset can lead to the device
underneath the Downstream Port becoming unavailable if BW notifications
are left enabled throughout the reset sequence (at least LBMIE was
found to cause an issue).
While the PCIe Specifications do not indicate BW notifications could not
be kept enabled during resets, the PCIe Link state during an
intentional reset is not of large interest. Thus, disable BW controller
for the bridge while reset is performed and re-enable it after the
reset has completed to workaround the problems some devices encounter
if BW notifications are left on throughout the reset sequence.
Keep a counter for the disable/enable because MFD will execute
pci_dev_save_and_disable() and pci_dev_restore() back to back for
sibling devices:
[   50.139010] vfio-pci 0000:01:00.0: resetting
[   50.139053] vfio-pci 0000:01:00.1: resetting
[   50.141126] pcieport 0000:00:01.1: PME: Spurious native interrupt!
[   50.141133] pcieport 0000:00:01.1: PME: Spurious native interrupt!
[   50.441466] vfio-pci 0000:01:00.0: reset done
[   50.501534] vfio-pci 0000:01:00.1: reset done
Fixes: 665745f27487 ("PCI/bwctrl: Re-add BW notification portdrv as PCIe BW controller")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219765
Tested-by: Joel Mathew Thomas proxy0@tutamail.com
Signed-off-by: Ilpo Järvinen ilpo.jarvinen@linux.intel.com
Cc: stable@vger.kernel.org
---
I suspect the root cause is some kind of violation of specifications.
Resets shouldn't cause devices to become unavailable just because BW
notifications have been enabled.
Before somebody comments on those dual rwsems, I do have yet to be
submitted patch to simplify the locking as per Lukas Wunner's earlier
suggestion. I've just focused on solving the regressions first.
drivers/pci/pci.c         |  8 +++++++
 drivers/pci/pci.h         |  4 ++++
 drivers/pci/pcie/bwctrl.c | 49 ++++++++++++++++++++++++++++++++-------
 3 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 869d204a70a3..7a53d7474175 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5148,6 +5148,7 @@ static void pci_dev_save_and_disable(struct pci_dev *dev)
 {
    const struct pci_error_handlers *err_handler =
    		dev->driver ? dev->driver->err_handler : NULL;
+	struct pci_dev *bridge = pci_upstream_bridge(dev);
/*
     * dev->driver->err_handler->reset_prepare() is protected against
@@ -5166,6 +5167,9 @@ static void pci_dev_save_and_disable(struct pci_dev *dev)
     */
    pci_set_power_state(dev, PCI_D0);
+	if (bridge)
+		pcie_bwnotif_disable(bridge);
+
    pci_save_state(dev);
    /*
     * Disable the device by clearing the Command register, except for
@@ -5181,9 +5185,13 @@ static void pci_dev_restore(struct pci_dev *dev)
 {
    const struct pci_error_handlers *err_handler =
    		dev->driver ? dev->driver->err_handler : NULL;
+	struct pci_dev *bridge = pci_upstream_bridge(dev);
pci_restore_state(dev);
+	if (bridge)
+		pcie_bwnotif_enable(bridge);
+
    /*
     * dev->driver->err_handler->reset_done() is protected against
     * races with ->remove() by the device lock, which must be held by
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 01e51db8d285..856546f1aad9 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -759,12 +759,16 @@ static inline void pcie_ecrc_get_policy(char *str) { }
 #ifdef CONFIG_PCIEPORTBUS
 void pcie_reset_lbms_count(struct pci_dev *port);
 int pcie_lbms_count(struct pci_dev *port, unsigned long *val);
+void pcie_bwnotif_enable(struct pci_dev *port);
+void pcie_bwnotif_disable(struct pci_dev *port);
 #else
 static inline void pcie_reset_lbms_count(struct pci_dev *port) {}
 static inline int pcie_lbms_count(struct pci_dev *port, unsigned long *val)
 {
    return -EOPNOTSUPP;
 }
+static inline void pcie_bwnotif_enable(struct pci_dev *port) {}
+static inline void pcie_bwnotif_disable(struct pci_dev *port) {}
 #endif
struct pci_dev_reset_methods {
diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c
index 0a5e7efbce2c..a117f6f67c07 100644
--- a/drivers/pci/pcie/bwctrl.c
+++ b/drivers/pci/pcie/bwctrl.c
@@ -40,11 +40,13 @@
  * @set_speed_mutex:	Serializes link speed changes
  * @lbms_count:		Count for LBMS (since last reset)
  * @cdev:		Thermal cooling device associated with the port
+ * @disable_count:	BW notifications disabled/enabled counter
  */
 struct pcie_bwctrl_data {
    struct mutex set_speed_mutex;
    atomic_t lbms_count;
    struct thermal_cooling_device *cdev;
+	int disable_count;
 };
/*
@@ -200,10 +202,9 @@ int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req,
    return ret;
 }
-static void pcie_bwnotif_enable(struct pcie_device *srv)
+static void __pcie_bwnotif_enable(struct pci_dev *port)
 {
-	struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
-	struct pci_dev *port = srv->port;
+	struct pcie_bwctrl_data *data = port->link_bwctrl;
    u16 link_status;
    int ret;
@@ -224,12 +225,44 @@ static void pcie_bwnotif_enable(struct pcie_device *srv)
    pcie_update_link_speed(port->subordinate);
 }
-static void pcie_bwnotif_disable(struct pci_dev *port)
+void pcie_bwnotif_enable(struct pci_dev *port)
+{
+	guard(rwsem_read)(&pcie_bwctrl_setspeed_rwsem);
+	guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem);
+
+	if (!port->link_bwctrl)
+		return;
+
+	port->link_bwctrl->disable_count--;
+	if (!port->link_bwctrl->disable_count) {
+		__pcie_bwnotif_enable(port);
+		pci_dbg(port, "BW notifications enabled\n");
+	}
+	WARN_ON_ONCE(port->link_bwctrl->disable_count < 0);
+}
+
+static void __pcie_bwnotif_disable(struct pci_dev *port)
 {
    pcie_capability_clear_word(port, PCI_EXP_LNKCTL,
    			   PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
 }
+void pcie_bwnotif_disable(struct pci_dev *port)
+{
+	guard(rwsem_read)(&pcie_bwctrl_setspeed_rwsem);
+	guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem);
+
+	if (!port->link_bwctrl)
+		return;
+
+	port->link_bwctrl->disable_count++;
+
+	if (port->link_bwctrl->disable_count == 1) {
+		__pcie_bwnotif_disable(port);
+		pci_dbg(port, "BW notifications disabled\n");
+	}
+}
+
 static irqreturn_t pcie_bwnotif_irq(int irq, void *context)
 {
    struct pcie_device *srv = context;
@@ -314,7 +347,7 @@ static int pcie_bwnotif_probe(struct pcie_device *srv)
    			return ret;
    		}
-			pcie_bwnotif_enable(srv);
+			__pcie_bwnotif_enable(port);
    	}
    }
@@ -336,7 +369,7 @@ static void pcie_bwnotif_remove(struct pcie_device *srv)
scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) {
    	scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) {
-			pcie_bwnotif_disable(srv->port);
+			__pcie_bwnotif_disable(srv->port);
free_irq(srv->irq, srv);
@@ -347,13 +380,13 @@ static void pcie_bwnotif_remove(struct pcie_device *srv)
static int pcie_bwnotif_suspend(struct pcie_device *srv)
 {
-	pcie_bwnotif_disable(srv->port);
+	__pcie_bwnotif_disable(srv->port);
    return 0;
 }
static int pcie_bwnotif_resume(struct pcie_device *srv)
 {
-	pcie_bwnotif_enable(srv);
+	__pcie_bwnotif_enable(srv->port);
    return 0;
 }
base-commit: 2014c95afecee3e76ca4a56956a936e23283f05b
-- 
2.39.5