This helper, vfio_pci_core_mmap_prep_dmabuf(), creates a single-range DMABUF for the purpose of mapping a PCI BAR. This is used in a future commit by VFIO's ordinary mmap() path.
This function transfers ownership of the VFIO device fd to the DMABUF, which fput()s when it's released.
Refactor the existing vfio_pci_core_feature_dma_buf() to split out export code common to the two paths, VFIO_DEVICE_FEATURE_DMA_BUF and this new VFIO_BAR mmap().
Signed-off-by: Matt Evans mattev@meta.com --- drivers/vfio/pci/vfio_pci_dmabuf.c | 143 +++++++++++++++++++++++------ drivers/vfio/pci/vfio_pci_priv.h | 5 + 2 files changed, 118 insertions(+), 30 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c index 8b6bae56bbf2..3554afbc8ebc 100644 --- a/drivers/vfio/pci/vfio_pci_dmabuf.c +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c @@ -82,6 +82,8 @@ static void vfio_pci_dma_buf_release(struct dma_buf *dmabuf) up_write(&priv->vdev->memory_lock); vfio_device_put_registration(&priv->vdev->vdev); } + if (priv->vfile) + fput(priv->vfile); kfree(priv->phys_vec); kfree(priv); } @@ -204,6 +206,45 @@ int vfio_pci_dma_buf_find_pfn(struct vfio_pci_dma_buf *vpdmabuf, return -EFAULT; }
+/* + * Create a DMABUF corresponding to priv, add it to vdev->dmabufs list + * for tracking (meaning cleanup or revocation will zap it), and take + * a vfio_device registration. + */ +static int vfio_pci_dmabuf_export(struct vfio_pci_core_device *vdev, + struct vfio_pci_dma_buf *priv, uint32_t flags) +{ + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + + if (!vfio_device_try_get_registration(&vdev->vdev)) + return -ENODEV; + + exp_info.ops = &vfio_pci_dmabuf_ops; + exp_info.size = priv->size; + exp_info.flags = flags; + exp_info.priv = priv; + + priv->dmabuf = dma_buf_export(&exp_info); + if (IS_ERR(priv->dmabuf)) { + vfio_device_put_registration(&vdev->vdev); + return PTR_ERR(priv->dmabuf); + } + + kref_init(&priv->kref); + init_completion(&priv->comp); + + /* dma_buf_put() now frees priv */ + INIT_LIST_HEAD(&priv->dmabufs_elm); + down_write(&vdev->memory_lock); + dma_resv_lock(priv->dmabuf->resv, NULL); + priv->revoked = !__vfio_pci_memory_enabled(vdev); + list_add_tail(&priv->dmabufs_elm, &vdev->dmabufs); + dma_resv_unlock(priv->dmabuf->resv); + up_write(&vdev->memory_lock); + + return 0; +} + /* * This is a temporary "private interconnect" between VFIO DMABUF and iommufd. * It allows the two co-operating drivers to exchange the physical address of @@ -322,7 +363,6 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags, { struct vfio_device_feature_dma_buf get_dma_buf = {}; struct vfio_region_dma_range *dma_ranges; - DEFINE_DMA_BUF_EXPORT_INFO(exp_info); struct vfio_pci_dma_buf *priv; size_t length; int ret; @@ -392,34 +432,9 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags, kfree(dma_ranges); dma_ranges = NULL;
- if (!vfio_device_try_get_registration(&vdev->vdev)) { - ret = -ENODEV; + ret = vfio_pci_dmabuf_export(vdev, priv, get_dma_buf.open_flags); + if (ret) goto err_free_phys; - } - - exp_info.ops = &vfio_pci_dmabuf_ops; - exp_info.size = priv->size; - exp_info.flags = get_dma_buf.open_flags; - exp_info.priv = priv; - - priv->dmabuf = dma_buf_export(&exp_info); - if (IS_ERR(priv->dmabuf)) { - ret = PTR_ERR(priv->dmabuf); - goto err_dev_put; - } - - kref_init(&priv->kref); - init_completion(&priv->comp); - - /* dma_buf_put() now frees priv */ - INIT_LIST_HEAD(&priv->dmabufs_elm); - down_write(&vdev->memory_lock); - dma_resv_lock(priv->dmabuf->resv, NULL); - priv->revoked = !__vfio_pci_memory_enabled(vdev); - list_add_tail(&priv->dmabufs_elm, &vdev->dmabufs); - dma_resv_unlock(priv->dmabuf->resv); - up_write(&vdev->memory_lock); - /* * dma_buf_fd() consumes the reference, when the file closes the dmabuf * will be released. @@ -430,8 +445,6 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
return ret;
-err_dev_put: - vfio_device_put_registration(&vdev->vdev); err_free_phys: kfree(priv->phys_vec); err_free_priv: @@ -441,6 +454,76 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags, return ret; }
+int vfio_pci_core_mmap_prep_dmabuf(struct vfio_pci_core_device *vdev, + struct vm_area_struct *vma, + u64 phys_start, u64 req_len, + unsigned int res_index) +{ + struct vfio_pci_dma_buf *priv; + const unsigned int nr_ranges = 1; + int ret; + + priv = kzalloc_obj(*priv); + if (!priv) + return -ENOMEM; + + priv->phys_vec = kzalloc_obj(*priv->phys_vec); + if (!priv->phys_vec) { + ret = -ENOMEM; + goto err_free_priv; + } + + /* + * The mmap() request's vma->vm_offs might be non-zero, but + * the DMABUF is created from _offset zero_ of the BAR. The + * portion between zero and the vm_offs is inaccessible + * through this VMA, but this approach keeps the + * /proc/<pid>/maps offset somewhat consistent with the + * pre-DMABUF code. Size includes the offset portion. + * + * This differs from an mmap() of an explicitly-exported + * DMABUF which is an arbitrary slice of the BAR, would be + * created with the desired offset+size, and would usually be + * mmap()ed with pgoff = 0. + * + * Both are equivalent and vfio_pci_dma_buf_find_pfn() finds + * the same PFNs. + */ + priv->vdev = vdev; + priv->nr_ranges = nr_ranges; + priv->size = (vma->vm_pgoff << PAGE_SHIFT) + req_len; + priv->provider = pcim_p2pdma_provider(vdev->pdev, res_index); + if (!priv->provider) { + ret = -EINVAL; + goto err_free_phys; + } + + priv->phys_vec[0].paddr = phys_start; + priv->phys_vec[0].len = priv->size; + + ret = vfio_pci_dmabuf_export(vdev, priv, O_CLOEXEC | O_RDWR); + if (ret) + goto err_free_phys; + + /* + * The VMA gets the DMABUF file so that other users can locate + * the DMABUF via a VA. Ownership of the original VFIO device + * file being mmap()ed transfers to priv, and is put when the + * DMABUF is released. + */ + priv->vfile = vma->vm_file; + vma->vm_file = priv->dmabuf->file; + vma->vm_private_data = priv; + + return 0; + +err_free_phys: + kfree(priv->phys_vec); +err_free_priv: + kfree(priv); + return ret; +} + void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked) { struct vfio_pci_dma_buf *priv; diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h index 317170a5b407..3cff1b7eb47b 100644 --- a/drivers/vfio/pci/vfio_pci_priv.h +++ b/drivers/vfio/pci/vfio_pci_priv.h @@ -30,6 +30,7 @@ struct vfio_pci_dma_buf { size_t size; struct phys_vec *phys_vec; struct p2pdma_provider *provider; + struct file *vfile; u32 nr_ranges; struct kref kref; struct completion comp; @@ -132,6 +133,10 @@ int vfio_pci_dma_buf_find_pfn(struct vfio_pci_dma_buf *vpdmabuf, unsigned long address, unsigned int order, unsigned long *out_pfn); +int vfio_pci_core_mmap_prep_dmabuf(struct vfio_pci_core_device *vdev, + struct vm_area_struct *vma, + u64 phys_start, u64 req_len, + unsigned int res_index);
#ifdef CONFIG_VFIO_PCI_DMABUF int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,