From: Daniel Stodden dns@arista.com
[ Upstream commit df25461119d987b8c81d232cfe4411e91dcabe66 ]
A PCI device hot removal may occur while stdev->cdev is held open. The call to stdev_release() then happens during close or exit, at a point way past switchtec_pci_remove(). Otherwise the last ref would vanish with the trailing put_device(), just before return.
At that later point in time, the devm cleanup has already removed the stdev->mmio_mrpc mapping. Also, the stdev->pdev reference was not a counted one. Therefore, in DMA mode, the iowrite32() in stdev_release() will cause a fatal page fault, and the subsequent dma_free_coherent(), if reached, would pass a stale &stdev->pdev->dev pointer.
Fix by moving MRPC DMA shutdown into switchtec_pci_remove(), after stdev_kill(). Counting the stdev->pdev ref is now optional, but may prevent future accidents.
Reproducible via the script at https://lore.kernel.org/r/20231113212150.96410-1-dns@arista.com
Link: https://lore.kernel.org/r/20231122042316.91208-2-dns@arista.com Signed-off-by: Daniel Stodden dns@arista.com Signed-off-by: Bjorn Helgaas bhelgaas@google.com Reviewed-by: Logan Gunthorpe logang@deltatee.com Reviewed-by: Dmitry Safonov dima@arista.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/pci/switch/switchtec.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index 5b921387eca6..1804794d0e68 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -1308,13 +1308,6 @@ static void stdev_release(struct device *dev) { struct switchtec_dev *stdev = to_stdev(dev);
- if (stdev->dma_mrpc) { - iowrite32(0, &stdev->mmio_mrpc->dma_en); - flush_wc_buf(stdev); - writeq(0, &stdev->mmio_mrpc->dma_addr); - dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc), - stdev->dma_mrpc, stdev->dma_mrpc_dma_addr); - } kfree(stdev); }
@@ -1358,7 +1351,7 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev) return ERR_PTR(-ENOMEM);
stdev->alive = true; - stdev->pdev = pdev; + stdev->pdev = pci_dev_get(pdev); INIT_LIST_HEAD(&stdev->mrpc_queue); mutex_init(&stdev->mrpc_mutex); stdev->mrpc_busy = 0; @@ -1391,6 +1384,7 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev) return stdev;
err_put: + pci_dev_put(stdev->pdev); put_device(&stdev->dev); return ERR_PTR(rc); } @@ -1644,6 +1638,18 @@ static int switchtec_init_pci(struct switchtec_dev *stdev, return 0; }
+static void switchtec_exit_pci(struct switchtec_dev *stdev) +{ + if (stdev->dma_mrpc) { + iowrite32(0, &stdev->mmio_mrpc->dma_en); + flush_wc_buf(stdev); + writeq(0, &stdev->mmio_mrpc->dma_addr); + dma_free_coherent(&stdev->pdev->dev, sizeof(*stdev->dma_mrpc), + stdev->dma_mrpc, stdev->dma_mrpc_dma_addr); + stdev->dma_mrpc = NULL; + } +} + static int switchtec_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -1703,6 +1709,9 @@ static void switchtec_pci_remove(struct pci_dev *pdev) ida_free(&switchtec_minor_ida, MINOR(stdev->dev.devt)); dev_info(&stdev->dev, "unregistered.\n"); stdev_kill(stdev); + switchtec_exit_pci(stdev); + pci_dev_put(stdev->pdev); + stdev->pdev = NULL; put_device(&stdev->dev); }