Commit 2b938e3db335 ("vfio/pci: Enable iowrite64 and ioread64 for vfio pci") enables qword access to the PCI bar resources. However certain devices (e.g. Intel X710) are observed with problem upon qword accesses to the rom bar, e.g. triggering PCI aer errors.
Instead of trying to identify all broken devices, universally disable qword access to the rom bar i.e. going back to the old way which worked reliably for years.
Reported-by: Farrah Chen farrah.chen@intel.com Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220740 Fixes: 2b938e3db335 ("vfio/pci: Enable iowrite64 and ioread64 for vfio pci") Cc: stable@vger.kernel.org Signed-off-by: Kevin Tian kevin.tian@intel.com --- drivers/vfio/pci/nvgrace-gpu/main.c | 4 ++-- drivers/vfio/pci/vfio_pci_rdwr.c | 19 +++++++++++++++---- include/linux/vfio_pci_core.h | 2 +- 3 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c index e346392b72f6..9b39184f76b7 100644 --- a/drivers/vfio/pci/nvgrace-gpu/main.c +++ b/drivers/vfio/pci/nvgrace-gpu/main.c @@ -491,7 +491,7 @@ nvgrace_gpu_map_and_read(struct nvgrace_gpu_pci_core_device *nvdev, ret = vfio_pci_core_do_io_rw(&nvdev->core_device, false, nvdev->resmem.ioaddr, buf, offset, mem_count, - 0, 0, false); + 0, 0, false, true); }
return ret; @@ -609,7 +609,7 @@ nvgrace_gpu_map_and_write(struct nvgrace_gpu_pci_core_device *nvdev, ret = vfio_pci_core_do_io_rw(&nvdev->core_device, false, nvdev->resmem.ioaddr, (char __user *)buf, pos, mem_count, - 0, 0, true); + 0, 0, true, true); }
return ret; diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 6192788c8ba3..95dc7e04cb08 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -135,7 +135,7 @@ VFIO_IORDWR(64) ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, void __iomem *io, char __user *buf, loff_t off, size_t count, size_t x_start, - size_t x_end, bool iswrite) + size_t x_end, bool iswrite, bool allow_qword) { ssize_t done = 0; int ret; @@ -150,7 +150,7 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, else fillable = 0;
- if (fillable >= 8 && !(off % 8)) { + if (allow_qword && fillable >= 8 && !(off % 8)) { ret = vfio_pci_iordwr64(vdev, iswrite, test_mem, io, buf, off, &filled); if (ret) @@ -234,6 +234,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, void __iomem *io; struct resource *res = &vdev->pdev->resource[bar]; ssize_t done; + bool allow_qword = true;
if (pci_resource_start(pdev, bar)) end = pci_resource_len(pdev, bar); @@ -262,6 +263,16 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, if (!io) return -ENOMEM; x_end = end; + + /* + * Certain devices (e.g. Intel X710) don't support qword + * access to the ROM bar. Otherwise PCI AER errors might be + * triggered. + * + * Disable qword access to the ROM bar universally, which + * worked reliably for years before qword access is enabled. + */ + allow_qword = false; } else { int ret = vfio_pci_core_setup_barmap(vdev, bar); if (ret) { @@ -278,7 +289,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, }
done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos, - count, x_start, x_end, iswrite); + count, x_start, x_end, iswrite, allow_qword);
if (done >= 0) *ppos += done; @@ -352,7 +363,7 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, * to the memory enable bit in the command register. */ done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count, - 0, 0, iswrite); + 0, 0, iswrite, true);
vga_put(vdev->pdev, rsrc);
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index f541044e42a2..3a75b76eaed3 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -133,7 +133,7 @@ pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, void __iomem *io, char __user *buf, loff_t off, size_t count, size_t x_start, - size_t x_end, bool iswrite); + size_t x_end, bool iswrite, bool allow_qword); bool vfio_pci_core_range_intersect_range(loff_t buf_start, size_t buf_cnt, loff_t reg_start, size_t reg_cnt, loff_t *buf_offset,
From: Alex Williamson alex@shazbot.org Sent: Thursday, December 18, 2025 6:08 AM
On Fri, 12 Dec 2025 02:09:41 +0000 Kevin Tian kevin.tian@intel.com wrote:
Commit 2b938e3db335 ("vfio/pci: Enable iowrite64 and ioread64 for vfio pci") enables qword access to the PCI bar resources. However certain devices (e.g. Intel X710) are observed with problem upon qword accesses to the rom bar, e.g. triggering PCI aer errors.
Instead of trying to identify all broken devices, universally disable qword access to the rom bar i.e. going back to the old way which worked reliably for years.
Thanks for finding the root cause of this, Kevin. I think it would add useful context in the commit log here to describe that the ROM is somewhat unique in this respect because it's cached by QEMU which simply does a pread() of the remaining size until it gets the full contents. The other BARs would only perform operations at the same access width as their guest drivers.
will do
ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool
test_mem,
void __iomem *io, char __user *buf, loff_t off, size_t count, size_t x_start,
size_t x_end, bool iswrite)
size_t x_end, bool iswrite, bool allow_qword)I've been trying to think about how we avoid yet another bool arg here. What do you think about creating an enum:
enum vfio_pci_io_width { VFIO_PCI_IO_WIDTH_1 = 1, VFIO_PCI_IO_WIDTH_2 = 2, VFIO_PCI_IO_WIDTH_4 = 4, VFIO_PCI_IO_WIDTH_8 = 8, };
The arg here would then be enum vfio_pci_io_width max_width, and for each test we'd add a condition && max_width >= 8 (4, 2), where we can assume byte access as the minimum regardless of the arg. It's a little more than we need, but it follows a simple pattern and I think makes the call sites a bit more intuitive.
make sense
@@ -352,7 +363,7 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device
*vdev, char __user *buf,
* to the memory enable bit in the command register. */done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count,
0, 0, iswrite);
0, 0, iswrite, true);I have no basis other than paranoia and "VGA is old and a 64-bit access to it seem wrong", but I'm tempted to restrict this to dword access as well. I don't want to take your fix off track from it's specific goal though. Thanks,
I'll add a new patch for it.
On Fri, 12 Dec 2025 02:09:41 +0000 Kevin Tian kevin.tian@intel.com wrote:
Commit 2b938e3db335 ("vfio/pci: Enable iowrite64 and ioread64 for vfio pci") enables qword access to the PCI bar resources. However certain devices (e.g. Intel X710) are observed with problem upon qword accesses to the rom bar, e.g. triggering PCI aer errors.
Instead of trying to identify all broken devices, universally disable qword access to the rom bar i.e. going back to the old way which worked reliably for years.
Thanks for finding the root cause of this, Kevin. I think it would add useful context in the commit log here to describe that the ROM is somewhat unique in this respect because it's cached by QEMU which simply does a pread() of the remaining size until it gets the full contents. The other BARs would only perform operations at the same access width as their guest drivers.
Reported-by: Farrah Chen farrah.chen@intel.com Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220740 Fixes: 2b938e3db335 ("vfio/pci: Enable iowrite64 and ioread64 for vfio pci") Cc: stable@vger.kernel.org Signed-off-by: Kevin Tian kevin.tian@intel.com
drivers/vfio/pci/nvgrace-gpu/main.c | 4 ++-- drivers/vfio/pci/vfio_pci_rdwr.c | 19 +++++++++++++++---- include/linux/vfio_pci_core.h | 2 +- 3 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c index e346392b72f6..9b39184f76b7 100644 --- a/drivers/vfio/pci/nvgrace-gpu/main.c +++ b/drivers/vfio/pci/nvgrace-gpu/main.c @@ -491,7 +491,7 @@ nvgrace_gpu_map_and_read(struct nvgrace_gpu_pci_core_device *nvdev, ret = vfio_pci_core_do_io_rw(&nvdev->core_device, false, nvdev->resmem.ioaddr, buf, offset, mem_count,
0, 0, false);
}0, 0, false, true);return ret; @@ -609,7 +609,7 @@ nvgrace_gpu_map_and_write(struct nvgrace_gpu_pci_core_device *nvdev, ret = vfio_pci_core_do_io_rw(&nvdev->core_device, false, nvdev->resmem.ioaddr, (char __user *)buf, pos, mem_count,
0, 0, true);
}0, 0, true, true);return ret; diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 6192788c8ba3..95dc7e04cb08 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -135,7 +135,7 @@ VFIO_IORDWR(64) ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, void __iomem *io, char __user *buf, loff_t off, size_t count, size_t x_start,
size_t x_end, bool iswrite)
size_t x_end, bool iswrite, bool allow_qword)
I've been trying to think about how we avoid yet another bool arg here. What do you think about creating an enum:
enum vfio_pci_io_width { VFIO_PCI_IO_WIDTH_1 = 1, VFIO_PCI_IO_WIDTH_2 = 2, VFIO_PCI_IO_WIDTH_4 = 4, VFIO_PCI_IO_WIDTH_8 = 8, };
The arg here would then be enum vfio_pci_io_width max_width, and for each test we'd add a condition && max_width >= 8 (4, 2), where we can assume byte access as the minimum regardless of the arg. It's a little more than we need, but it follows a simple pattern and I think makes the call sites a bit more intuitive.
{ ssize_t done = 0; int ret; @@ -150,7 +150,7 @@ ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, else fillable = 0;
if (fillable >= 8 && !(off % 8)) {
if (allow_qword && fillable >= 8 && !(off % 8)) { ret = vfio_pci_iordwr64(vdev, iswrite, test_mem, io, buf, off, &filled); if (ret)@@ -234,6 +234,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, void __iomem *io; struct resource *res = &vdev->pdev->resource[bar]; ssize_t done;
- bool allow_qword = true;
if (pci_resource_start(pdev, bar)) end = pci_resource_len(pdev, bar); @@ -262,6 +263,16 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, if (!io) return -ENOMEM; x_end = end;
/** Certain devices (e.g. Intel X710) don't support qword* access to the ROM bar. Otherwise PCI AER errors might be* triggered.** Disable qword access to the ROM bar universally, which* worked reliably for years before qword access is enabled.*/ } else { int ret = vfio_pci_core_setup_barmap(vdev, bar); if (ret) {allow_qword = false;@@ -278,7 +289,7 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf, } done = vfio_pci_core_do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
count, x_start, x_end, iswrite);
count, x_start, x_end, iswrite, allow_qword);if (done >= 0) *ppos += done; @@ -352,7 +363,7 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf, * to the memory enable bit in the command register. */ done = vfio_pci_core_do_io_rw(vdev, false, iomem, buf, off, count,
0, 0, iswrite);
0, 0, iswrite, true);
I have no basis other than paranoia and "VGA is old and a 64-bit access to it seem wrong", but I'm tempted to restrict this to dword access as well. I don't want to take your fix off track from it's specific goal though. Thanks,
Alex
vga_put(vdev->pdev, rsrc); diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index f541044e42a2..3a75b76eaed3 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -133,7 +133,7 @@ pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev, ssize_t vfio_pci_core_do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem, void __iomem *io, char __user *buf, loff_t off, size_t count, size_t x_start,
size_t x_end, bool iswrite);
size_t x_end, bool iswrite, bool allow_qword);bool vfio_pci_core_range_intersect_range(loff_t buf_start, size_t buf_cnt, loff_t reg_start, size_t reg_cnt, loff_t *buf_offset,
linux-stable-mirror@lists.linaro.org