On 2025-07-23 2:00 pm, Leon Romanovsky wrote: [...]
+static struct sg_table * +vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment,
enum dma_data_direction dir)
+{
- struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
- struct p2pdma_provider *provider = priv->vdev->provider;
- struct dma_iova_state *state = attachment->priv;
- struct phys_vec *phys_vec = &priv->phys_vec;
- struct scatterlist *sgl;
- struct sg_table *sgt;
- dma_addr_t addr;
- int ret;
- dma_resv_assert_held(priv->dmabuf->resv);
- sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
- if (!sgt)
return ERR_PTR(-ENOMEM);
- ret = sg_alloc_table(sgt, 1, GFP_KERNEL | __GFP_ZERO);
- if (ret)
goto err_kfree_sgt;
- sgl = sgt->sgl;
- if (!state) {
addr = pci_p2pdma_bus_addr_map(provider, phys_vec->paddr);
- } else if (dma_use_iova(state)) {
ret = dma_iova_link(attachment->dev, state, phys_vec->paddr, 0,
phys_vec->len, dir, DMA_ATTR_SKIP_CPU_SYNC);
The supposed benefits of this API are only for replacing scatterlists where multiple disjoint pages are being mapped. In this case with just one single contiguous mapping, it is clearly objectively worse to have to bounce in and out of the IOMMU layer 3 separate times and store a dma_map_state, to achieve the exact same operations that a single call to iommu_dma_map_resource() will perform more efficiently and with no external state required.
Oh yeah, and mapping MMIO with regular memory attributes (IOMMU_CACHE) rather than appropriate ones (IOMMU_MMIO), as this will end up doing, isn't guaranteed not to end badly either (e.g. if the system interconnect ends up merging consecutive write bursts and exceeding the target root port's MPS.)
if (ret)
goto err_free_table;
ret = dma_iova_sync(attachment->dev, state, 0, phys_vec->len);
if (ret)
goto err_unmap_dma;
addr = state->addr;
- } else {
addr = dma_map_phys(attachment->dev, phys_vec->paddr,
phys_vec->len, dir, DMA_ATTR_SKIP_CPU_SYNC);
And again, if the IOMMU is in bypass (the idea of P2P with vfio-noiommu simply isn't worth entertaining) then what purpose do you imagine this call serves at all, other than to hilariously crash under "swiotlb=force"? Even in the case that phys_to_dma(phys_vec->paddr) != phys_vec->paddr, in almost all circumstances (both hardware offsets and CoCo environments with address-based aliasing), it is more likely than not that the latter is still the address you want and the former is wrong (and liable to lead to corruption or fatal system errors), because MMIO and memory remain fundamentally different things.
AFAICS you're *depending* on this call being an effective no-op, and thus only demonstrating that the dma_map_phys() idea is still entirely unnecessary.
ret = dma_mapping_error(attachment->dev, addr);
if (ret)
goto err_free_table;
- }
- fill_sg_entry(sgl, phys_vec->len, addr);
- return sgt;
+err_unmap_dma:
- dma_iova_destroy(attachment->dev, state, phys_vec->len, dir,
DMA_ATTR_SKIP_CPU_SYNC);
+err_free_table:
- sg_free_table(sgt);
+err_kfree_sgt:
- kfree(sgt);
- return ERR_PTR(ret);
+}
+static void vfio_pci_dma_buf_unmap(struct dma_buf_attachment *attachment,
struct sg_table *sgt,
enum dma_data_direction dir)
+{
- struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
- struct dma_iova_state *state = attachment->priv;
- struct scatterlist *sgl;
- int i;
- if (!state)
; /* Do nothing */
- else if (dma_use_iova(state))
dma_iova_destroy(attachment->dev, state, priv->phys_vec.len,
dir, DMA_ATTR_SKIP_CPU_SYNC);
- else
for_each_sgtable_dma_sg(sgt, sgl, i)
The table always has exactly one entry...
Thanks, Robin.
dma_unmap_phys(attachment->dev, sg_dma_address(sgl),
sg_dma_len(sgl), dir,
DMA_ATTR_SKIP_CPU_SYNC);
- sg_free_table(sgt);
- kfree(sgt);
+}