This series is a subset of the "use all SDMA instances" series. It starts at the first modified patch and ends at the last patch before the drm/ttm patch that got merged through drm-misc-next.
v3 of the full series: https://lists.freedesktop.org/archives/dri-devel/2025-November/537830.html
Pierre-Eric Pelloux-Prayer (13): drm/amdgpu: introduce amdgpu_ttm_buffer_entity drm/amdgpu: add amdgpu_ttm_job_submit helper drm/amdgpu: fix error handling in amdgpu_copy_buffer drm/amdgpu: pass the entity to use to amdgpu_ttm_map_buffer drm/amdgpu: pass the entity to use to ttm public functions drm/amdgpu: statically assign gart windows to ttm entities drm/amdgpu: remove AMDGPU_GTT_NUM_TRANSFER_WINDOWS drm/amdgpu: add missing lock in amdgpu_benchmark_do_move drm/amdgpu: check entity lock is held in amdgpu_ttm_job_submit drm/amdgpu: double AMDGPU_GTT_MAX_TRANSFER_SIZE drm/amdgpu: use larger gart window when possible drm/amdgpu: introduce amdgpu_sdma_set_vm_pte_scheds drm/amdgpu: move sched status check inside amdgpu_ttm_set_buffer_funcs_status
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 13 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 8 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 295 ++++++++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 54 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 + drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 31 +- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 31 +- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 31 +- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 35 +-- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 35 +-- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 31 +- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 31 +- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 29 +- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 29 +- drivers/gpu/drm/amd/amdgpu/si_dma.c | 31 +- drivers/gpu/drm/amd/amdgpu/vce_v1_0.c | 12 +- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 25 +- 23 files changed, 381 insertions(+), 385 deletions(-)
Deduplicate the IB padding code and will also be used later to check locking.
Signed-off-by: Pierre-Eric Pelloux-Prayer pierre-eric.pelloux-prayer@amd.com Reviewed-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 34 ++++++++++++------------- 1 file changed, 16 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 843a0e483268..ae3ad19667df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -162,6 +162,18 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, *placement = abo->placement; }
+static struct dma_fence * +amdgpu_ttm_job_submit(struct amdgpu_device *adev, struct amdgpu_job *job, u32 num_dw) +{ + struct amdgpu_ring *ring; + + ring = adev->mman.buffer_funcs_ring; + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + WARN_ON(job->ibs[0].length_dw > num_dw); + + return amdgpu_job_submit(job); +} + /** * amdgpu_ttm_map_buffer - Map memory into the GART windows * @bo: buffer object to map @@ -184,7 +196,6 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); unsigned int offset, num_pages, num_dw, num_bytes; uint64_t src_addr, dst_addr; - struct amdgpu_ring *ring; struct amdgpu_job *job; void *cpu_addr; uint64_t flags; @@ -239,10 +250,6 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, num_bytes, 0);
- ring = adev->mman.buffer_funcs_ring; - amdgpu_ring_pad_ib(ring, &job->ibs[0]); - WARN_ON(job->ibs[0].length_dw > num_dw); - flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem); if (tmz) flags |= AMDGPU_PTE_TMZ; @@ -260,7 +267,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, amdgpu_gart_map_vram_range(adev, pa, 0, num_pages, flags, cpu_addr); }
- dma_fence_put(amdgpu_job_submit(job)); + dma_fence_put(amdgpu_ttm_job_submit(adev, job, num_dw)); return 0; }
@@ -1497,10 +1504,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, PAGE_SIZE, 0);
- amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]); - WARN_ON(job->ibs[0].length_dw > num_dw); - - fence = amdgpu_job_submit(job); + fence = amdgpu_ttm_job_submit(adev, job, num_dw); mutex_unlock(&adev->mman.gtt_window_lock);
if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout)) @@ -2286,11 +2290,9 @@ int amdgpu_copy_buffer(struct amdgpu_device *adev, uint64_t src_offset, byte_count -= cur_size_in_bytes; }
- amdgpu_ring_pad_ib(ring, &job->ibs[0]); - WARN_ON(job->ibs[0].length_dw > num_dw); - *fence = amdgpu_job_submit(job); if (r) goto error_free; + *fence = amdgpu_ttm_job_submit(adev, job, num_dw);
return r;
@@ -2308,7 +2310,6 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_device *adev, uint32_t src_data, u64 k_job_id) { unsigned int num_loops, num_dw; - struct amdgpu_ring *ring; struct amdgpu_job *job; uint32_t max_bytes; unsigned int i; @@ -2332,10 +2333,7 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_device *adev, uint32_t src_data, byte_count -= cur_size; }
- ring = adev->mman.buffer_funcs_ring; - amdgpu_ring_pad_ib(ring, &job->ibs[0]); - WARN_ON(job->ibs[0].length_dw > num_dw); - *fence = amdgpu_job_submit(job); + *fence = amdgpu_ttm_job_submit(adev, job, num_dw); return 0; }
This way the caller can select the one it wants to use.
Signed-off-by: Pierre-Eric Pelloux-Prayer pierre-eric.pelloux-prayer@amd.com Reviewed-by: Christian König christian.koenig@amd.com Acked-by: Felix Kuehling felix.kuehling@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 35 +++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 16 +++++---- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 3 +- 5 files changed, 32 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 3636b757c974..a050167e76a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -37,7 +37,8 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
stime = ktime_get(); for (i = 0; i < n; i++) { - r = amdgpu_copy_buffer(adev, saddr, daddr, size, NULL, &fence, + r = amdgpu_copy_buffer(adev, &adev->mman.default_entity, + saddr, daddr, size, NULL, &fence, false, 0); if (r) goto exit_do_move; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 926a3f09a776..858eb9fa061b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1322,8 +1322,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) if (r) goto out;
- r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true, - AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE); + r = amdgpu_fill_buffer(&adev->mman.clear_entity, abo, 0, &bo->base._resv, + &fence, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE); if (WARN_ON(r)) goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index cc4e0aaa5ffa..a5048cd8e10d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -356,7 +356,7 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, write_compress_disable)); }
- r = amdgpu_copy_buffer(adev, from, to, cur_size, resv, + r = amdgpu_copy_buffer(adev, entity, from, to, cur_size, resv, &next, true, copy_flags); if (r) goto error; @@ -411,8 +411,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { struct dma_fence *wipe_fence = NULL;
- r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence, - false, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT); + r = amdgpu_fill_buffer(&adev->mman.move_entity, + abo, 0, NULL, &wipe_fence, + AMDGPU_KERNEL_JOB_ID_MOVE_BLIT); if (r) { goto error; } else if (wipe_fence) { @@ -2255,7 +2256,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, DMA_RESV_USAGE_BOOKKEEP); }
-int amdgpu_copy_buffer(struct amdgpu_device *adev, uint64_t src_offset, +int amdgpu_copy_buffer(struct amdgpu_device *adev, + struct amdgpu_ttm_buffer_entity *entity, + uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, @@ -2279,7 +2282,7 @@ int amdgpu_copy_buffer(struct amdgpu_device *adev, uint64_t src_offset, max_bytes = adev->mman.buffer_funcs->copy_max_bytes; num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); - r = amdgpu_ttm_prepare_job(adev, &adev->mman.move_entity, num_dw, + r = amdgpu_ttm_prepare_job(adev, entity, num_dw, resv, vm_needs_flush, &job, AMDGPU_KERNEL_JOB_ID_TTM_COPY_BUFFER); if (r) @@ -2408,22 +2411,18 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, return r; }
-int amdgpu_fill_buffer(struct amdgpu_bo *bo, - uint32_t src_data, - struct dma_resv *resv, - struct dma_fence **f, - bool delayed, - u64 k_job_id) +int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity, + struct amdgpu_bo *bo, + uint32_t src_data, + struct dma_resv *resv, + struct dma_fence **f, + u64 k_job_id) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_ttm_buffer_entity *entity; struct dma_fence *fence = NULL; struct amdgpu_res_cursor dst; int r;
- entity = delayed ? &adev->mman.clear_entity : - &adev->mman.move_entity; - if (!adev->mman.buffer_funcs_enabled) { dev_err(adev->dev, "Trying to clear memory with ring turned off.\n"); @@ -2440,13 +2439,13 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, /* Never fill more than 256MiB at once to avoid timeouts */ cur_size = min(dst.size, 256ULL << 20);
- r = amdgpu_ttm_map_buffer(&adev->mman.default_entity, - &bo->tbo, bo->tbo.resource, &dst, + r = amdgpu_ttm_map_buffer(entity, &bo->tbo, bo->tbo.resource, &dst, 1, false, &cur_size, &to); if (r) goto error;
- r = amdgpu_ttm_fill_mem(adev, entity, src_data, to, cur_size, resv, + r = amdgpu_ttm_fill_mem(adev, entity, + src_data, to, cur_size, resv, &next, true, k_job_id); if (r) goto error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 41bbc25680a2..9288599c9c46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -167,7 +167,9 @@ int amdgpu_ttm_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable); -int amdgpu_copy_buffer(struct amdgpu_device *adev, uint64_t src_offset, +int amdgpu_copy_buffer(struct amdgpu_device *adev, + struct amdgpu_ttm_buffer_entity *entity, + uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, @@ -175,12 +177,12 @@ int amdgpu_copy_buffer(struct amdgpu_device *adev, uint64_t src_offset, int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, struct dma_resv *resv, struct dma_fence **fence); -int amdgpu_fill_buffer(struct amdgpu_bo *bo, - uint32_t src_data, - struct dma_resv *resv, - struct dma_fence **fence, - bool delayed, - u64 k_job_id); +int amdgpu_fill_buffer(struct amdgpu_ttm_buffer_entity *entity, + struct amdgpu_bo *bo, + uint32_t src_data, + struct dma_resv *resv, + struct dma_fence **f, + u64 k_job_id);
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index ade1d4068d29..9c76f1ba0e55 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -157,7 +157,8 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys, goto out_unlock; }
- r = amdgpu_copy_buffer(adev, gart_s, gart_d, size * PAGE_SIZE, + r = amdgpu_copy_buffer(adev, entity, + gart_s, gart_d, size * PAGE_SIZE, NULL, &next, true, 0); if (r) { dev_err(adev->dev, "fail %d to copy memory\n", r);
linaro-mm-sig@lists.linaro.org