On 12-12-2025 15:45, Thomas Hellström wrote:
Hi, Himal,
On Fri, 2025-12-12 at 14:54 +0530, Ghimiray, Himal Prasad wrote:
On 11-12-2025 22:28, Thomas Hellström wrote:
In situations where no system memory is migrated to devmem, and in upcoming patches where another GPU is performing the migration to the newly allocated devmem buffer, there is nothing to ensure any ongoing clear to the devmem allocation or async eviction from the devmem allocation is complete.
Address that by passing a struct dma_fence down to the copy functions, and ensure it is waited for before migration is marked complete.
v3:
- New patch.
v4:
- Update the logic used for determining when to wait for the
pre_migrate_fence.
- Update the logic used for determining when to warn for the
pre_migrate_fence since the scheduler fences apparently can signal out-of-order.
Fixes: c5b3eb5a906c ("drm/xe: Add GPUSVM device memory copy vfunc functions") Cc: Matthew Brost matthew.brost@intel.com Cc: stable@vger.kernel.org # v6.15+ Signed-off-by: Thomas Hellström thomas.hellstrom@linux.intel.com
drivers/gpu/drm/drm_pagemap.c | 13 ++++--- drivers/gpu/drm/xe/xe_svm.c | 67 ++++++++++++++++++++++++++++++----- include/drm/drm_pagemap.h | 17 +++++++-- 3 files changed, 81 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c index 22c44807e3fe..864a73d019ed 100644 --- a/drivers/gpu/drm/drm_pagemap.c +++ b/drivers/gpu/drm/drm_pagemap.c @@ -408,7 +408,8 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, drm_pagemap_get_devmem_page(page, zdd); }
- err = ops->copy_to_devmem(pages, pagemap_addr, npages);
- err = ops->copy_to_devmem(pages, pagemap_addr, npages,
devmem_allocation-pre_migrate_fence);
if (err) goto err_finalize; @@ -596,7 +597,7 @@ int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem *devmem_allocation) for (i = 0; i < npages; ++i) pages[i] = migrate_pfn_to_page(src[i]);
- err = ops->copy_to_ram(pages, pagemap_addr, npages);
- err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL);
if (err) goto err_finalize; @@ -732,7 +733,7 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, for (i = 0; i < npages; ++i) pages[i] = migrate_pfn_to_page(migrate.src[i]);
- err = ops->copy_to_ram(pages, pagemap_addr, npages);
- err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL);
if (err) goto err_finalize; @@ -813,11 +814,14 @@ EXPORT_SYMBOL_GPL(drm_pagemap_pagemap_ops_get); * @ops: Pointer to the operations structure for GPU SVM device memory * @dpagemap: The struct drm_pagemap we're allocating from. * @size: Size of device memory allocation
- @pre_migrate_fence: Fence to wait for or pipeline behind before
migration starts.
- (May be NULL).
*/ void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation, struct device *dev, struct mm_struct *mm, const struct drm_pagemap_devmem_ops *ops,
struct drm_pagemap *dpagemap, size_tsize)
struct drm_pagemap *dpagemap, size_tsize,
struct dma_fence *pre_migrate_fence){ init_completion(&devmem_allocation->detached); devmem_allocation->dev = dev; @@ -825,6 +829,7 @@ void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation, devmem_allocation->ops = ops; devmem_allocation->dpagemap = dpagemap; devmem_allocation->size = size;
- devmem_allocation->pre_migrate_fence = pre_migrate_fence;
} EXPORT_SYMBOL_GPL(drm_pagemap_devmem_init); diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 36634c84d148..2152d20049e4 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -483,11 +483,12 @@ static void xe_svm_copy_us_stats_incr(struct xe_gt *gt, static int xe_svm_copy(struct page **pages, struct drm_pagemap_addr *pagemap_addr,
unsigned long npages, const enumxe_svm_copy_dir dir)
unsigned long npages, const enumxe_svm_copy_dir dir,
struct dma_fence *pre_migrate_fence){ struct xe_vram_region *vr = NULL; struct xe_gt *gt = NULL;
- struct xe_device *xe;
- struct xe_device *xe = NULL;
struct dma_fence *fence = NULL; unsigned long i; #define XE_VRAM_ADDR_INVALID ~0x0ull @@ -496,6 +497,18 @@ static int xe_svm_copy(struct page **pages, bool sram = dir == XE_SVM_COPY_TO_SRAM; ktime_t start = xe_svm_stats_ktime_get();
- if (pre_migrate_fence && (sram ||
dma_fence_is_container(pre_migrate_fence))) {
Patch LGTM. Nit, Moving sram check for p2p migration from source here makes better sense with [Patch 22] drm/pagemap: Support source migration over interconnect
Just to make sure I get this right, You're suggesting moving the sram check above and the comment below about source migration to the source migration patch (22), right? If so, Yeah that makes sense.
Thomas
Thats right.
Thanks
/** This would typically be a p2p migration fromsource, or
* a composite fence operation on the destinationmemory.
* Ensure that any other GPU operation on thedestination
* is complete.*/err = dma_fence_wait(pre_migrate_fence, true);if (err)return err;- }
/* * This flow is complex: it locates physically contiguous device pages, * derives the starting physical address, and performs a single GPU copy @@ -632,10 +645,28 @@ static int xe_svm_copy(struct page **pages, err_out: /* Wait for all copies to complete */
- if (fence) {
- if (fence)
dma_fence_wait(fence, false);
dma_fence_put(fence);
- /*
* If migrating to devmem, we should have pipelined themigration behind
* the pre_migrate_fence. Verify that this is indeedlikely. If we
* didn't perform any copying, just wait for thepre_migrate_fence.
*/- if (!sram && pre_migrate_fence &&
!dma_fence_is_signaled(pre_migrate_fence)) {
if (xe && fence &&(pre_migrate_fence->context != fence->context||
dma_fence_is_later(pre_migrate_fence,fence))) {
drm_WARN(&xe->drm, true, "Unsignaled pre-migrate fence");
drm_warn(&xe->drm, "fence contexts: %llu%llu. container %d\n",
(unsigned long long)fence-context,
(unsigned longlong)pre_migrate_fence->context,
dma_fence_is_container(pre_migrate_fence));
}dma_fence_wait(pre_migrate_fence, false);}
- dma_fence_put(fence);
/* * XXX: We can't derive the GT here (or anywhere in this functions, but @@ -652,16 +683,20 @@ static int xe_svm_copy(struct page **pages, static int xe_svm_copy_to_devmem(struct page **pages, struct drm_pagemap_addr *pagemap_addr,
unsigned long npages)
unsigned long npages,struct dma_fence*pre_migrate_fence) {
- return xe_svm_copy(pages, pagemap_addr, npages,
XE_SVM_COPY_TO_VRAM);
- return xe_svm_copy(pages, pagemap_addr, npages,
XE_SVM_COPY_TO_VRAM,
pre_migrate_fence);} static int xe_svm_copy_to_ram(struct page **pages, struct drm_pagemap_addr *pagemap_addr,
unsigned long npages)
unsigned long npages,struct dma_fence *pre_migrate_fence){
- return xe_svm_copy(pages, pagemap_addr, npages,
XE_SVM_COPY_TO_SRAM);
- return xe_svm_copy(pages, pagemap_addr, npages,
XE_SVM_COPY_TO_SRAM,
pre_migrate_fence);} static struct xe_bo *to_xe_bo(struct drm_pagemap_devmem *devmem_allocation) @@ -676,6 +711,7 @@ static void xe_svm_devmem_release(struct drm_pagemap_devmem *devmem_allocation) xe_bo_put_async(bo); xe_pm_runtime_put(xe);
- dma_fence_put(devmem_allocation->pre_migrate_fence);
} static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset) @@ -868,6 +904,7 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, unsigned long timeslice_ms) { struct xe_vram_region *vr = container_of(dpagemap, typeof(*vr), dpagemap);
- struct dma_fence *pre_migrate_fence = NULL;
struct xe_device *xe = vr->xe; struct device *dev = xe->drm.dev; struct drm_buddy_block *block; @@ -894,8 +931,20 @@ static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, break; }
/* Ensure that any clearing or async eviction willcomplete before migration. */
if (!dma_resv_test_signaled(bo->ttm.base.resv,DMA_RESV_USAGE_KERNEL)) {
err = dma_resv_get_singleton(bo-ttm.base.resv, DMA_RESV_USAGE_KERNEL,
&pre_migrate_fence);
if (err)dma_resv_wait_timeout(bo-ttm.base.resv, DMA_RESV_USAGE_KERNEL,
false,MAX_SCHEDULE_TIMEOUT);
else if (pre_migrate_fence)dma_fence_enable_sw_signaling(pre_migrate_fence);
}drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm,
&dpagemap_devmem_ops,dpagemap, end - start);
&dpagemap_devmem_ops,dpagemap, end - start,
pre_migrate_fence);blocks = &to_xe_ttm_vram_mgr_resource(bo-
ttm.resource)->blocks;
list_for_each_entry(block, blocks, link) diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h index f6e7e234c089..70a7991f784f 100644 --- a/include/drm/drm_pagemap.h +++ b/include/drm/drm_pagemap.h @@ -8,6 +8,7 @@ #define NR_PAGES(order) (1U << (order)) +struct dma_fence; struct drm_pagemap; struct drm_pagemap_zdd; struct device; @@ -174,6 +175,8 @@ struct drm_pagemap_devmem_ops { * @pages: Pointer to array of device memory pages (destination) * @pagemap_addr: Pointer to array of DMA information (source) * @npages: Number of pages to copy
* @pre_migrate_fence: dma-fence to wait for beforemigration start.
* May be NULL.* * Copy pages to device memory. If the order of a @pagemap_addr entry * is greater than 0, the entry is populated but subsequent entries @@ -183,13 +186,16 @@ struct drm_pagemap_devmem_ops { */ int (*copy_to_devmem)(struct page **pages, struct drm_pagemap_addr *pagemap_addr,
unsigned long npages);
unsigned long npages,struct dma_fence*pre_migrate_fence); /** * @copy_to_ram: Copy to system RAM (required for migration) * @pages: Pointer to array of device memory pages (source) * @pagemap_addr: Pointer to array of DMA information (destination) * @npages: Number of pages to copy
* @pre_migrate_fence: dma-fence to wait for beforemigration start.
* May be NULL.* * Copy pages to system RAM. If the order of a @pagemap_addr entry * is greater than 0, the entry is populated but subsequent entries @@ -199,7 +205,8 @@ struct drm_pagemap_devmem_ops { */ int (*copy_to_ram)(struct page **pages, struct drm_pagemap_addr *pagemap_addr,
unsigned long npages);
unsigned long npages,struct dma_fence *pre_migrate_fence);}; /** @@ -212,6 +219,8 @@ struct drm_pagemap_devmem_ops { * @dpagemap: The struct drm_pagemap of the pages this allocation belongs to. * @size: Size of device memory allocation * @timeslice_expiration: Timeslice expiration in jiffies
- @pre_migrate_fence: Fence to wait for or pipeline behind before
migration starts.
- (May be NULL).
*/ struct drm_pagemap_devmem { struct device *dev; @@ -221,6 +230,7 @@ struct drm_pagemap_devmem { struct drm_pagemap *dpagemap; size_t size; u64 timeslice_expiration;
- struct dma_fence *pre_migrate_fence;
}; int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, @@ -238,7 +248,8 @@ struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page); void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation, struct device *dev, struct mm_struct *mm, const struct drm_pagemap_devmem_ops *ops,
struct drm_pagemap *dpagemap, size_tsize);
struct drm_pagemap *dpagemap, size_tsize,
struct dma_fence *pre_migrate_fence);int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, unsigned long start, unsigned long end,