Similar to xe, amdgpu wants to check pci_p2pdma_distance(), and only needs that if peer2peer can be supported by the GPU. It can migrate to system memory and support non-p2p DMA as well.
Further it supports a private non-PCI XGMI path. For now hack this on top of a SGT type, but eventually this is likely better off as its own mapping type.
Add two exporter SGT mapping types, one that matches P2P and one that matches all of the non-p2p. The pin and map code will force migrate if the non-p2p one is matched.
Signed-off-by: Jason Gunthorpe jgg@nvidia.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 94 +++++++++++++++------ drivers/gpu/drm/xe/xe_dma_buf.c | 2 +- 2 files changed, 69 insertions(+), 27 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index c1461317eb2987..bb9c602c061dc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -40,6 +40,7 @@ #include <drm/amdgpu_drm.h> #include <drm/ttm/ttm_tt.h> #include <linux/dma-buf.h> +#include <linux/dma-buf-mapping.h> #include <linux/dma-fence-array.h> #include <linux/pci-p2pdma.h>
@@ -77,28 +78,10 @@ static struct amdgpu_device *dma_buf_attach_adev(struct dma_buf_attachment *atta static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach) { - struct amdgpu_device *attach_adev = dma_buf_attach_adev(attach); struct drm_gem_object *obj = dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); int r;
- /* - * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+. - * Such buffers cannot be safely accessed over P2P due to device-local - * compression metadata. Fallback to system-memory path instead. - * Device supports GFX12 (GC 12.x or newer) - * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag - * - */ - if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0) && - bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) - attach->peer2peer = false; - - if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) && - pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) - attach->peer2peer = false; - r = dma_resv_lock(bo->tbo.base.resv, NULL); if (r) return r; @@ -137,7 +120,7 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) domains &= ~AMDGPU_GEM_DOMAIN_VRAM; } else { list_for_each_entry(attach, &dmabuf->attachments, node) - if (!attach->peer2peer) + if (!dma_buf_sgt_p2p_allowed(attach)) domains &= ~AMDGPU_GEM_DOMAIN_VRAM; }
@@ -181,6 +164,7 @@ static void amdgpu_dma_buf_unpin(struct dma_buf_attachment *attach) static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, enum dma_data_direction dir) { + struct device *dma_dev = dma_buf_sgt_dma_device(attach); struct dma_buf *dma_buf = attach->dmabuf; struct drm_gem_object *obj = dma_buf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -194,7 +178,7 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, unsigned int domains = AMDGPU_GEM_DOMAIN_GTT;
if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM && - attach->peer2peer) { + dma_buf_sgt_p2p_allowed(attach)) { bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; domains |= AMDGPU_GEM_DOMAIN_VRAM; } @@ -212,7 +196,7 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, if (IS_ERR(sgt)) return sgt;
- if (dma_map_sgtable(attach->dev, sgt, dir, + if (dma_map_sgtable(dma_dev, sgt, dir, DMA_ATTR_SKIP_CPU_SYNC)) goto error_free; break; @@ -224,7 +208,7 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, return ERR_PTR(-EINVAL);
r = amdgpu_vram_mgr_alloc_sgt(adev, bo->tbo.resource, 0, - bo->tbo.base.size, attach->dev, + bo->tbo.base.size, dma_dev, dir, &sgt); if (r) return ERR_PTR(r); @@ -254,12 +238,14 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach, struct sg_table *sgt, enum dma_data_direction dir) { + struct device *dma_dev = dma_buf_sgt_dma_device(attach); + if (sg_page(sgt->sgl)) { - dma_unmap_sgtable(attach->dev, sgt, dir, 0); + dma_unmap_sgtable(dma_dev, sgt, dir, 0); sg_free_table(sgt); kfree(sgt); } else { - amdgpu_vram_mgr_free_sgt(attach->dev, dir, sgt); + amdgpu_vram_mgr_free_sgt(dma_dev, dir, sgt); } }
@@ -334,17 +320,73 @@ static void amdgpu_dma_buf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map amdgpu_bo_unpin(bo); }
+static const struct dma_buf_mapping_sgt_exp_ops amdgpu_dma_buf_sgt_ops = { + .map_dma_buf = amdgpu_dma_buf_map, + .unmap_dma_buf = amdgpu_dma_buf_unmap, +}; + +static int amdgpu_dma_buf_match_mapping(struct dma_buf_match_args *args) +{ + struct dma_buf_attachment *attach = args->attach; + struct drm_gem_object *obj = args->dmabuf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct dma_buf_mapping_match sgt_match[2]; + unsigned int num_match = 0; + bool peer2peer = true; + int ret; + + /* + * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+. + * Such buffers cannot be safely accessed over P2P due to device-local + * compression metadata. Fallback to system-memory path instead. + * Device supports GFX12 (GC 12.x or newer) + * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag + * + */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0) && + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) + peer2peer = false; + + /* + * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+. + * Such buffers cannot be safely accessed over P2P due to device-local + * compression metadata. Fallback to system-memory path instead. + * Device supports GFX12 (GC 12.x or newer) + * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag + * + */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0) && + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) + peer2peer = false; + + if (peer2peer) + sgt_match[num_match++] = DMA_BUF_EMAPPING_SGT_P2P( + &amdgpu_dma_buf_sgt_ops, adev->pdev); + sgt_match[num_match++] = DMA_BUF_EMAPPING_SGT(&amdgpu_dma_buf_sgt_ops); + + ret = dma_buf_match_mapping(args, sgt_match, num_match); + if (ret) + return ret; + + /* If the transfer will use XGMI then force a P2P match. */ + if (peer2peer && !dma_buf_sgt_p2p_allowed(attach) && + amdgpu_dmabuf_is_xgmi_accessible(dma_buf_attach_adev(attach), bo)) + return attach->map_type.sgt_data.exporter_requires_p2p = + DMA_SGT_EXPORTER_REQUIRES_P2P_DISTANCE; + return 0; +} + const struct dma_buf_ops amdgpu_dmabuf_ops = { .attach = amdgpu_dma_buf_attach, .pin = amdgpu_dma_buf_pin, .unpin = amdgpu_dma_buf_unpin, - .map_dma_buf = amdgpu_dma_buf_map, - .unmap_dma_buf = amdgpu_dma_buf_unmap, .release = drm_gem_dmabuf_release, .begin_cpu_access = amdgpu_dma_buf_begin_cpu_access, .mmap = drm_gem_dmabuf_mmap, .vmap = amdgpu_dma_buf_vmap, .vunmap = amdgpu_dma_buf_vunmap, + .match_mapping = amdgpu_dma_buf_match_mapping, };
/** diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index 9968f37657d57d..848532aca432db 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -211,7 +211,7 @@ static int xe_dma_buf_match_mapping(struct dma_buf_match_args *args) sgt_match[num_match++] = DMA_BUF_EMAPPING_SGT(&xe_dma_buf_sgt_ops);
- return dma_buf_match_mapping(args, sgt_match, ARRAY_SIZE(sgt_match)); + return dma_buf_match_mapping(args, sgt_match, num_match); }
static const struct dma_buf_ops xe_dmabuf_ops = {