Hi Daniel,
only patch #2 had some significant changes. The rest ist pretty much the same except for the dropped exynos change and the added cleanup for the seqlock.
Thanks, Christian.
This change adds the dma_resv_usage enum and allows us to specify why a dma_resv object is queried for its containing fences.
Additional to that a dma_resv_usage_rw() helper function is added to aid retrieving the fences for a read or write userspace submission.
This is then deployed to the different query functions of the dma_resv object and all of their users. When the write paratermer was previously true we now use DMA_RESV_USAGE_WRITE and DMA_RESV_USAGE_READ otherwise.
v2: add KERNEL/OTHER in separate patch v3: some kerneldoc suggestions by Daniel v4: some more kerneldoc suggestions by Daniel, fix missing cases lost in the rebase pointed out by Bas.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/dma-buf/dma-buf.c | 6 +- drivers/dma-buf/dma-resv.c | 35 +++++---- drivers/dma-buf/st-dma-resv.c | 48 ++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 +- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +- drivers/gpu/drm/drm_gem.c | 3 +- drivers/gpu/drm/drm_gem_atomic_helper.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 6 +- .../gpu/drm/i915/display/intel_atomic_plane.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 4 +- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 6 +- .../drm/i915/gem/selftests/i915_gem_dmabuf.c | 3 +- drivers/gpu/drm/i915/i915_request.c | 3 +- drivers/gpu/drm/i915/i915_sw_fence.c | 2 +- drivers/gpu/drm/msm/msm_gem.c | 3 +- drivers/gpu/drm/nouveau/dispnv50/wndw.c | 3 +- drivers/gpu/drm/nouveau/nouveau_bo.c | 8 +- drivers/gpu/drm/nouveau/nouveau_fence.c | 8 +- drivers/gpu/drm/nouveau/nouveau_gem.c | 3 +- drivers/gpu/drm/panfrost/panfrost_drv.c | 3 +- drivers/gpu/drm/qxl/qxl_debugfs.c | 3 +- drivers/gpu/drm/radeon/radeon_display.c | 3 +- drivers/gpu/drm/radeon/radeon_gem.c | 9 ++- drivers/gpu/drm/radeon/radeon_mn.c | 4 +- drivers/gpu/drm/radeon/radeon_sync.c | 2 +- drivers/gpu/drm/radeon/radeon_uvd.c | 4 +- drivers/gpu/drm/scheduler/sched_main.c | 3 +- drivers/gpu/drm/ttm/ttm_bo.c | 18 +++-- drivers/gpu/drm/vgem/vgem_fence.c | 4 +- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 5 +- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 4 +- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 3 +- drivers/infiniband/core/umem_dmabuf.c | 3 +- include/linux/dma-buf.h | 8 +- include/linux/dma-resv.h | 73 +++++++++++++++---- 46 files changed, 215 insertions(+), 126 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 775d3afb4169..1cddb65eafda 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -216,7 +216,8 @@ static bool dma_buf_poll_add_cb(struct dma_resv *resv, bool write, struct dma_fence *fence; int r;
- dma_resv_for_each_fence(&cursor, resv, write, fence) { + dma_resv_for_each_fence(&cursor, resv, dma_resv_usage_rw(write), + fence) { dma_fence_get(fence); r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb); if (!r) @@ -1124,7 +1125,8 @@ static int __dma_buf_begin_cpu_access(struct dma_buf *dmabuf, long ret;
/* Wait on any implicit rendering fences */ - ret = dma_resv_wait_timeout(resv, write, true, MAX_SCHEDULE_TIMEOUT); + ret = dma_resv_wait_timeout(resv, dma_resv_usage_rw(write), + true, MAX_SCHEDULE_TIMEOUT); if (ret < 0) return ret;
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 8c650b96357a..17237e6ee30c 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -384,7 +384,7 @@ static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor) cursor->seq = read_seqcount_begin(&cursor->obj->seq); cursor->index = -1; cursor->shared_count = 0; - if (cursor->all_fences) { + if (cursor->usage >= DMA_RESV_USAGE_READ) { cursor->fences = dma_resv_shared_list(cursor->obj); if (cursor->fences) cursor->shared_count = cursor->fences->shared_count; @@ -496,7 +496,7 @@ struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor) dma_resv_assert_held(cursor->obj);
cursor->index = 0; - if (cursor->all_fences) + if (cursor->usage >= DMA_RESV_USAGE_READ) cursor->fences = dma_resv_shared_list(cursor->obj); else cursor->fences = NULL; @@ -551,7 +551,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) list = NULL; excl = NULL;
- dma_resv_iter_begin(&cursor, src, true); + dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_READ); dma_resv_for_each_fence_unlocked(&cursor, f) {
if (dma_resv_iter_is_restarted(&cursor)) { @@ -597,7 +597,7 @@ EXPORT_SYMBOL(dma_resv_copy_fences); * dma_resv_get_fences - Get an object's shared and exclusive * fences without update side lock held * @obj: the reservation object - * @write: true if we should return all fences + * @usage: controls which fences to include, see enum dma_resv_usage. * @num_fences: the number of fences returned * @fences: the array of fence ptrs returned (array is krealloc'd to the * required size, and must be freed by caller) @@ -605,7 +605,7 @@ EXPORT_SYMBOL(dma_resv_copy_fences); * Retrieve all fences from the reservation object. * Returns either zero or -ENOMEM. */ -int dma_resv_get_fences(struct dma_resv *obj, bool write, +int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage, unsigned int *num_fences, struct dma_fence ***fences) { struct dma_resv_iter cursor; @@ -614,7 +614,7 @@ int dma_resv_get_fences(struct dma_resv *obj, bool write, *num_fences = 0; *fences = NULL;
- dma_resv_iter_begin(&cursor, obj, write); + dma_resv_iter_begin(&cursor, obj, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) {
if (dma_resv_iter_is_restarted(&cursor)) { @@ -646,7 +646,7 @@ EXPORT_SYMBOL_GPL(dma_resv_get_fences); /** * dma_resv_get_singleton - Get a single fence for all the fences * @obj: the reservation object - * @write: true if we should return all fences + * @usage: controls which fences to include, see enum dma_resv_usage. * @fence: the resulting fence * * Get a single fence representing all the fences inside the resv object. @@ -658,7 +658,7 @@ EXPORT_SYMBOL_GPL(dma_resv_get_fences); * * Returns 0 on success and negative error values on failure. */ -int dma_resv_get_singleton(struct dma_resv *obj, bool write, +int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage, struct dma_fence **fence) { struct dma_fence_array *array; @@ -666,7 +666,7 @@ int dma_resv_get_singleton(struct dma_resv *obj, bool write, unsigned count; int r;
- r = dma_resv_get_fences(obj, write, &count, &fences); + r = dma_resv_get_fences(obj, usage, &count, &fences); if (r) return r;
@@ -700,7 +700,7 @@ EXPORT_SYMBOL_GPL(dma_resv_get_singleton); * dma_resv_wait_timeout - Wait on reservation's objects * shared and/or exclusive fences. * @obj: the reservation object - * @wait_all: if true, wait on all fences, else wait on just exclusive fence + * @usage: controls which fences to include, see enum dma_resv_usage. * @intr: if true, do interruptible wait * @timeout: timeout value in jiffies or zero to return immediately * @@ -710,14 +710,14 @@ EXPORT_SYMBOL_GPL(dma_resv_get_singleton); * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or * greater than zer on success. */ -long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, - unsigned long timeout) +long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage, + bool intr, unsigned long timeout) { long ret = timeout ? timeout : 1; struct dma_resv_iter cursor; struct dma_fence *fence;
- dma_resv_iter_begin(&cursor, obj, wait_all); + dma_resv_iter_begin(&cursor, obj, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) {
ret = dma_fence_wait_timeout(fence, intr, ret); @@ -737,8 +737,7 @@ EXPORT_SYMBOL_GPL(dma_resv_wait_timeout); * dma_resv_test_signaled - Test if a reservation object's fences have been * signaled. * @obj: the reservation object - * @test_all: if true, test all fences, otherwise only test the exclusive - * fence + * @usage: controls which fences to include, see enum dma_resv_usage. * * Callers are not required to hold specific locks, but maybe hold * dma_resv_lock() already. @@ -747,12 +746,12 @@ EXPORT_SYMBOL_GPL(dma_resv_wait_timeout); * * True if all fences signaled, else false. */ -bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all) +bool dma_resv_test_signaled(struct dma_resv *obj, enum dma_resv_usage usage) { struct dma_resv_iter cursor; struct dma_fence *fence;
- dma_resv_iter_begin(&cursor, obj, test_all); + dma_resv_iter_begin(&cursor, obj, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) { dma_resv_iter_end(&cursor); return false; @@ -775,7 +774,7 @@ void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq) struct dma_resv_iter cursor; struct dma_fence *fence;
- dma_resv_for_each_fence(&cursor, obj, true, fence) { + dma_resv_for_each_fence(&cursor, obj, DMA_RESV_USAGE_READ, fence) { seq_printf(seq, "\t%s fence:", dma_resv_iter_is_exclusive(&cursor) ? "Exclusive" : "Shared"); diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c index d2e61f6ae989..d097981061b1 100644 --- a/drivers/dma-buf/st-dma-resv.c +++ b/drivers/dma-buf/st-dma-resv.c @@ -58,7 +58,7 @@ static int sanitycheck(void *arg) return r; }
-static int test_signaling(void *arg, bool shared) +static int test_signaling(void *arg, enum dma_resv_usage usage) { struct dma_resv resv; struct dma_fence *f; @@ -81,18 +81,18 @@ static int test_signaling(void *arg, bool shared) goto err_unlock; }
- if (shared) + if (usage >= DMA_RESV_USAGE_READ) dma_resv_add_shared_fence(&resv, f); else dma_resv_add_excl_fence(&resv, f);
- if (dma_resv_test_signaled(&resv, shared)) { + if (dma_resv_test_signaled(&resv, usage)) { pr_err("Resv unexpectedly signaled\n"); r = -EINVAL; goto err_unlock; } dma_fence_signal(f); - if (!dma_resv_test_signaled(&resv, shared)) { + if (!dma_resv_test_signaled(&resv, usage)) { pr_err("Resv not reporting signaled\n"); r = -EINVAL; goto err_unlock; @@ -107,15 +107,15 @@ static int test_signaling(void *arg, bool shared)
static int test_excl_signaling(void *arg) { - return test_signaling(arg, false); + return test_signaling(arg, DMA_RESV_USAGE_WRITE); }
static int test_shared_signaling(void *arg) { - return test_signaling(arg, true); + return test_signaling(arg, DMA_RESV_USAGE_READ); }
-static int test_for_each(void *arg, bool shared) +static int test_for_each(void *arg, enum dma_resv_usage usage) { struct dma_resv_iter cursor; struct dma_fence *f, *fence; @@ -139,13 +139,13 @@ static int test_for_each(void *arg, bool shared) goto err_unlock; }
- if (shared) + if (usage >= DMA_RESV_USAGE_READ) dma_resv_add_shared_fence(&resv, f); else dma_resv_add_excl_fence(&resv, f);
r = -ENOENT; - dma_resv_for_each_fence(&cursor, &resv, shared, fence) { + dma_resv_for_each_fence(&cursor, &resv, usage, fence) { if (!r) { pr_err("More than one fence found\n"); r = -EINVAL; @@ -156,7 +156,8 @@ static int test_for_each(void *arg, bool shared) r = -EINVAL; goto err_unlock; } - if (dma_resv_iter_is_exclusive(&cursor) != !shared) { + if (dma_resv_iter_is_exclusive(&cursor) != + (usage >= DMA_RESV_USAGE_READ)) { pr_err("Unexpected fence usage\n"); r = -EINVAL; goto err_unlock; @@ -178,15 +179,15 @@ static int test_for_each(void *arg, bool shared)
static int test_excl_for_each(void *arg) { - return test_for_each(arg, false); + return test_for_each(arg, DMA_RESV_USAGE_WRITE); }
static int test_shared_for_each(void *arg) { - return test_for_each(arg, true); + return test_for_each(arg, DMA_RESV_USAGE_READ); }
-static int test_for_each_unlocked(void *arg, bool shared) +static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage) { struct dma_resv_iter cursor; struct dma_fence *f, *fence; @@ -211,14 +212,14 @@ static int test_for_each_unlocked(void *arg, bool shared) goto err_free; }
- if (shared) + if (usage >= DMA_RESV_USAGE_READ) dma_resv_add_shared_fence(&resv, f); else dma_resv_add_excl_fence(&resv, f); dma_resv_unlock(&resv);
r = -ENOENT; - dma_resv_iter_begin(&cursor, &resv, shared); + dma_resv_iter_begin(&cursor, &resv, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (!r) { pr_err("More than one fence found\n"); @@ -234,7 +235,8 @@ static int test_for_each_unlocked(void *arg, bool shared) r = -EINVAL; goto err_iter_end; } - if (dma_resv_iter_is_exclusive(&cursor) != !shared) { + if (dma_resv_iter_is_exclusive(&cursor) != + (usage >= DMA_RESV_USAGE_READ)) { pr_err("Unexpected fence usage\n"); r = -EINVAL; goto err_iter_end; @@ -262,15 +264,15 @@ static int test_for_each_unlocked(void *arg, bool shared)
static int test_excl_for_each_unlocked(void *arg) { - return test_for_each_unlocked(arg, false); + return test_for_each_unlocked(arg, DMA_RESV_USAGE_WRITE); }
static int test_shared_for_each_unlocked(void *arg) { - return test_for_each_unlocked(arg, true); + return test_for_each_unlocked(arg, DMA_RESV_USAGE_READ); }
-static int test_get_fences(void *arg, bool shared) +static int test_get_fences(void *arg, enum dma_resv_usage usage) { struct dma_fence *f, **fences = NULL; struct dma_resv resv; @@ -294,13 +296,13 @@ static int test_get_fences(void *arg, bool shared) goto err_resv; }
- if (shared) + if (usage >= DMA_RESV_USAGE_READ) dma_resv_add_shared_fence(&resv, f); else dma_resv_add_excl_fence(&resv, f); dma_resv_unlock(&resv);
- r = dma_resv_get_fences(&resv, shared, &i, &fences); + r = dma_resv_get_fences(&resv, usage, &i, &fences); if (r) { pr_err("get_fences failed\n"); goto err_free; @@ -324,12 +326,12 @@ static int test_get_fences(void *arg, bool shared)
static int test_excl_get_fences(void *arg) { - return test_get_fences(arg, false); + return test_get_fences(arg, DMA_RESV_USAGE_WRITE); }
static int test_shared_get_fences(void *arg) { - return test_get_fences(arg, true); + return test_get_fences(arg, DMA_RESV_USAGE_READ); }
int dma_resv(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e85e347eb670..413f32c3fd63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1288,7 +1288,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, * * TODO: Remove together with dma_resv rework. */ - dma_resv_for_each_fence(&cursor, resv, false, fence) { + dma_resv_for_each_fence(&cursor, resv, + DMA_RESV_USAGE_WRITE, + fence) { break; } dma_fence_chain_init(chain, fence, dma_fence_get(p->fence), 1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index fae5c1debfad..7a6908d71820 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -200,8 +200,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, goto unpin; }
- /* TODO: Unify this with other drivers */ - r = dma_resv_get_fences(new_abo->tbo.base.resv, true, + r = dma_resv_get_fences(new_abo->tbo.base.resv, DMA_RESV_USAGE_WRITE, &work->shared_count, &work->shared); if (unlikely(r != 0)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 57b74d35052f..84a53758e18e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -526,7 +526,8 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_amdgpu_bo(gobj); - ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, timeout); + ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ, + true, timeout);
/* ret == 0 means not signaled, * ret > 0 means signaled diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 81207737c716..65998cbcd7f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -111,7 +111,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, struct dma_fence *fence; int r;
- r = dma_resv_get_singleton(resv, true, &fence); + r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_READ, &fence); if (r) goto fallback;
@@ -139,7 +139,8 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, /* Not enough memory for the delayed delete, as last resort * block for all the fences to complete. */ - dma_resv_wait_timeout(resv, true, false, MAX_SCHEDULE_TIMEOUT); + dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ, + false, MAX_SCHEDULE_TIMEOUT); amdgpu_pasid_free(pasid); }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 4b153daf283d..86f5248676b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -75,8 +75,8 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,
mmu_interval_set_seq(mni, cur_seq);
- r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ, + false, MAX_SCHEDULE_TIMEOUT); mutex_unlock(&adev->notifier_lock); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 6f57a2fd5fe3..a7f39f8ab7be 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -768,8 +768,8 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) return 0; }
- r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE, + false, MAX_SCHEDULE_TIMEOUT); if (r < 0) return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 40e06745fae9..744e144e5fc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -259,7 +259,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, if (resv == NULL) return -EINVAL;
- dma_resv_for_each_fence(&cursor, resv, true, f) { + /* TODO: Use DMA_RESV_USAGE_READ here */ + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) { dma_fence_chain_for_each(f, f) { struct dma_fence *tmp = dma_fence_chain_contained(f);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index f7f149588432..5db5066e74b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1344,7 +1344,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) { + dma_resv_for_each_fence(&resv_cursor, bo->base.resv, + DMA_RESV_USAGE_READ, f) { if (amdkfd_fence_check_mm(f, current->mm)) return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 39c74d9fa7cc..3654326219e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1163,7 +1163,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, ib->length_dw = 16;
if (direct) { - r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, + r = dma_resv_wait_timeout(bo->tbo.base.resv, + DMA_RESV_USAGE_WRITE, false, msecs_to_jiffies(10)); if (r == 0) r = -ETIMEDOUT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b13451255e8b..a0376fd36a82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2059,7 +2059,7 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct dma_resv_iter cursor; struct dma_fence *fence;
- dma_resv_for_each_fence(&cursor, resv, true, fence) { + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, fence) { /* Add a callback for each fence in the reservation object */ amdgpu_vm_prt_get(adev); amdgpu_vm_add_prt_cb(adev, fence); @@ -2665,7 +2665,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return true;
/* Don't evict VM page tables while they are busy */ - if (!dma_resv_test_signaled(bo->tbo.base.resv, true)) + if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_READ)) return false;
/* Try to block ongoing updates */ @@ -2845,7 +2845,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, */ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, true, + timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, + DMA_RESV_USAGE_READ, true, timeout); if (timeout <= 0) return timeout; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b30656959fd8..9e24b1e616af 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9236,7 +9236,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, * deadlock during GPU reset when this fence will not signal * but we hold reservation lock for the BO. */ - r = dma_resv_wait_timeout(abo->tbo.base.resv, true, false, + r = dma_resv_wait_timeout(abo->tbo.base.resv, + DMA_RESV_USAGE_WRITE, false, msecs_to_jiffies(5000)); if (unlikely(r <= 0)) DRM_ERROR("Waiting for fences timed out!"); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 133dfae06fab..eb0c2d041f13 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -771,7 +771,8 @@ long drm_gem_dma_resv_wait(struct drm_file *filep, u32 handle, return -EINVAL; }
- ret = dma_resv_wait_timeout(obj->resv, wait_all, true, timeout); + ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(wait_all), + true, timeout); if (ret == 0) ret = -ETIME; else if (ret > 0) diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index 9338ddb7edff..a6d89aed0bda 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -151,7 +151,7 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st return 0;
obj = drm_gem_fb_get_obj(state->fb, 0); - ret = dma_resv_get_singleton(obj->resv, false, &fence); + ret = dma_resv_get_singleton(obj->resv, DMA_RESV_USAGE_WRITE, &fence); if (ret) return ret;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index d5314aa28ff7..507172e2780b 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -380,12 +380,14 @@ int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op, }
if (op & ETNA_PREP_NOSYNC) { - if (!dma_resv_test_signaled(obj->resv, write)) + if (!dma_resv_test_signaled(obj->resv, + dma_resv_usage_rw(write))) return -EBUSY; } else { unsigned long remain = etnaviv_timeout_to_jiffies(timeout);
- ret = dma_resv_wait_timeout(obj->resv, write, true, remain); + ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(write), + true, remain); if (ret <= 0) return ret == 0 ? -ETIMEDOUT : ret; } diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c index 5712688232fb..03e86e836a17 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c @@ -997,7 +997,8 @@ intel_prepare_plane_fb(struct drm_plane *_plane, if (ret < 0) goto unpin_fb;
- dma_resv_iter_begin(&cursor, obj->base.resv, false); + dma_resv_iter_begin(&cursor, obj->base.resv, + DMA_RESV_USAGE_WRITE); dma_resv_for_each_fence_unlocked(&cursor, fence) { add_rps_boost_after_vblank(new_plane_state->hw.crtc, fence); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 470fdfd61a0f..14a1c0ad8c3c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -138,12 +138,12 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * Alternatively, we can trade that extra information on read/write * activity with * args->busy = - * !dma_resv_test_signaled(obj->resv, true); + * !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ); * to report the overall busyness. This is what the wait-ioctl does. * */ args->busy = 0; - dma_resv_iter_begin(&cursor, obj->base.resv, true); + dma_resv_iter_begin(&cursor, obj->base.resv, DMA_RESV_USAGE_READ); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (dma_resv_iter_is_restarted(&cursor)) args->busy = 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index 444f8268b9c5..a200d3e66573 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -66,7 +66,7 @@ bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
#ifdef CONFIG_LOCKDEP - GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, true) && + GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, DMA_RESV_USAGE_READ) && i915_gem_object_evictable(obj)); #endif return mr && (mr->type == INTEL_MEMORY_LOCAL || diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 6d1a71d6404c..644fe237601c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -86,7 +86,7 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni, return true;
/* we will unbind on next submission, still have userptr pins */ - r = dma_resv_wait_timeout(obj->base.resv, true, false, + r = dma_resv_wait_timeout(obj->base.resv, DMA_RESV_USAGE_READ, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) drm_err(&i915->drm, "(%ld) failed to wait for idle\n", r); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index dab3d30c09a0..319936f91ac5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -40,7 +40,8 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, struct dma_fence *fence; long ret = timeout ?: 1;
- dma_resv_iter_begin(&cursor, resv, flags & I915_WAIT_ALL); + dma_resv_iter_begin(&cursor, resv, + dma_resv_usage_rw(flags & I915_WAIT_ALL)); dma_resv_for_each_fence_unlocked(&cursor, fence) { ret = i915_gem_object_wait_fence(fence, flags, timeout); if (ret <= 0) @@ -117,7 +118,8 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, struct dma_resv_iter cursor; struct dma_fence *fence;
- dma_resv_iter_begin(&cursor, obj->base.resv, flags & I915_WAIT_ALL); + dma_resv_iter_begin(&cursor, obj->base.resv, + dma_resv_usage_rw(flags & I915_WAIT_ALL)); dma_resv_for_each_fence_unlocked(&cursor, fence) i915_gem_fence_wait_priority(fence, attr); dma_resv_iter_end(&cursor); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index b071a58dd6da..b4275b55e5b8 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -219,7 +219,8 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, goto out_detach; }
- timeout = dma_resv_wait_timeout(dmabuf->resv, false, true, 5 * HZ); + timeout = dma_resv_wait_timeout(dmabuf->resv, DMA_RESV_USAGE_WRITE, + true, 5 * HZ); if (!timeout) { pr_err("dmabuf wait for exclusive fence timed out.\n"); timeout = -ETIME; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 582770360ad1..73d5195146b0 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1598,7 +1598,8 @@ i915_request_await_object(struct i915_request *to, struct dma_fence *fence; int ret = 0;
- dma_resv_for_each_fence(&cursor, obj->base.resv, write, fence) { + dma_resv_for_each_fence(&cursor, obj->base.resv, + dma_resv_usage_rw(write), fence) { ret = i915_request_await_dma_fence(to, fence); if (ret) break; diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 2a74a9a1cafe..ae984c66c48a 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -585,7 +585,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, debug_fence_assert(fence); might_sleep_if(gfpflags_allow_blocking(gfp));
- dma_resv_iter_begin(&cursor, resv, write); + dma_resv_iter_begin(&cursor, resv, dma_resv_usage_rw(write)); dma_resv_for_each_fence_unlocked(&cursor, f) { pending = i915_sw_fence_await_dma_fence(fence, f, timeout, gfp); diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 02b9ae65a96a..01bbb5f2d462 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -848,7 +848,8 @@ int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout) op & MSM_PREP_NOSYNC ? 0 : timeout_to_jiffies(timeout); long ret;
- ret = dma_resv_wait_timeout(obj->resv, write, true, remain); + ret = dma_resv_wait_timeout(obj->resv, dma_resv_usage_rw(write), + true, remain); if (ret == 0) return remain == 0 ? -EBUSY : -ETIMEDOUT; else if (ret < 0) diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index e2faf92e4831..8642b84ea20c 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -558,7 +558,8 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) asyw->image.handle[0] = ctxdma->object.handle; }
- ret = dma_resv_get_singleton(nvbo->bo.base.resv, false, + ret = dma_resv_get_singleton(nvbo->bo.base.resv, + DMA_RESV_USAGE_WRITE, &asyw->state.fence); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 74f8652d2bd3..c6bb4dbcd735 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -962,11 +962,11 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo, struct dma_fence *fence; int ret;
- /* TODO: This is actually a memory management dependency */ - ret = dma_resv_get_singleton(bo->base.resv, false, &fence); + ret = dma_resv_get_singleton(bo->base.resv, DMA_RESV_USAGE_WRITE, + &fence); if (ret) - dma_resv_wait_timeout(bo->base.resv, false, false, - MAX_SCHEDULE_TIMEOUT); + dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_WRITE, + false, MAX_SCHEDULE_TIMEOUT);
nv10_bo_put_tile_region(dev, *old_tile, fence); *old_tile = new_tile; diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 0268259e97eb..d5e81ccee01c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -350,14 +350,16 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, if (ret) return ret;
- /* Waiting for the exclusive fence first causes performance regressions - * under some circumstances. So manually wait for the shared ones first. + /* Waiting for the writes first causes performance regressions + * under some circumstances. So manually wait for the reads first. */ for (i = 0; i < 2; ++i) { struct dma_resv_iter cursor; struct dma_fence *fence;
- dma_resv_for_each_fence(&cursor, resv, exclusive, fence) { + dma_resv_for_each_fence(&cursor, resv, + dma_resv_usage_rw(exclusive), + fence) { struct nouveau_fence *f;
if (i == 0 && dma_resv_iter_is_exclusive(&cursor)) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 9416bee92141..fab542a758ff 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -962,7 +962,8 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data, return -ENOENT; nvbo = nouveau_gem_object(gem);
- lret = dma_resv_wait_timeout(nvbo->bo.base.resv, write, true, + lret = dma_resv_wait_timeout(nvbo->bo.base.resv, + dma_resv_usage_rw(write), true, no_wait ? 0 : 30 * HZ); if (!lret) ret = -EBUSY; diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 94b6f0a19c83..7fcbc2a5b6cd 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -316,7 +316,8 @@ panfrost_ioctl_wait_bo(struct drm_device *dev, void *data, if (!gem_obj) return -ENOENT;
- ret = dma_resv_wait_timeout(gem_obj->resv, true, true, timeout); + ret = dma_resv_wait_timeout(gem_obj->resv, DMA_RESV_USAGE_READ, + true, timeout); if (!ret) ret = timeout ? -ETIMEDOUT : -EBUSY;
diff --git a/drivers/gpu/drm/qxl/qxl_debugfs.c b/drivers/gpu/drm/qxl/qxl_debugfs.c index 6a36b0fd845c..33e5889d6608 100644 --- a/drivers/gpu/drm/qxl/qxl_debugfs.c +++ b/drivers/gpu/drm/qxl/qxl_debugfs.c @@ -61,7 +61,8 @@ qxl_debugfs_buffers_info(struct seq_file *m, void *data) struct dma_fence *fence; int rel = 0;
- dma_resv_iter_begin(&cursor, bo->tbo.base.resv, true); + dma_resv_iter_begin(&cursor, bo->tbo.base.resv, + DMA_RESV_USAGE_READ); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (dma_resv_iter_is_restarted(&cursor)) rel = 0; diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index f60e826cd292..57ff2b723c87 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -533,7 +533,8 @@ static int radeon_crtc_page_flip_target(struct drm_crtc *crtc, DRM_ERROR("failed to pin new rbo buffer before flip\n"); goto cleanup; } - r = dma_resv_get_singleton(new_rbo->tbo.base.resv, false, &work->fence); + r = dma_resv_get_singleton(new_rbo->tbo.base.resv, DMA_RESV_USAGE_WRITE, + &work->fence); if (r) { radeon_bo_unreserve(new_rbo); DRM_ERROR("failed to get new rbo buffer fences\n"); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index f563284a7fac..6616a828f40b 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -162,7 +162,9 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj, } if (domain == RADEON_GEM_DOMAIN_CPU) { /* Asking for cpu access wait for object idle */ - r = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, 30 * HZ); + r = dma_resv_wait_timeout(robj->tbo.base.resv, + DMA_RESV_USAGE_READ, + true, 30 * HZ); if (!r) r = -EBUSY;
@@ -524,7 +526,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, } robj = gem_to_radeon_bo(gobj);
- r = dma_resv_test_signaled(robj->tbo.base.resv, true); + r = dma_resv_test_signaled(robj->tbo.base.resv, DMA_RESV_USAGE_READ); if (r == 0) r = -EBUSY; else @@ -553,7 +555,8 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, } robj = gem_to_radeon_bo(gobj);
- ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, 30 * HZ); + ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ, + true, 30 * HZ); if (ret == 0) r = -EBUSY; else if (ret < 0) diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c index 9fa88549c89e..68ebeb1bdfff 100644 --- a/drivers/gpu/drm/radeon/radeon_mn.c +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -66,8 +66,8 @@ static bool radeon_mn_invalidate(struct mmu_interval_notifier *mn, return true; }
- r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ, + false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c index b991ba1bcd51..49bbb2266c0f 100644 --- a/drivers/gpu/drm/radeon/radeon_sync.c +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -96,7 +96,7 @@ int radeon_sync_resv(struct radeon_device *rdev, struct dma_fence *f; int r = 0;
- dma_resv_for_each_fence(&cursor, resv, shared, f) { + dma_resv_for_each_fence(&cursor, resv, dma_resv_usage_rw(shared), f) { fence = to_radeon_fence(f); if (fence && fence->rdev == rdev) radeon_sync_fence(sync, fence); diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index bc0f44299bb9..a50750740ab0 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -478,8 +478,8 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, return -EINVAL; }
- r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE, + false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) { DRM_ERROR("Failed waiting for UVD message (%ld)!\n", r); return r ? r : -ETIME; diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index c5660b066554..76fd2904c7c6 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -705,7 +705,8 @@ int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job,
dma_resv_assert_held(obj->resv);
- dma_resv_for_each_fence(&cursor, obj->resv, write, fence) { + dma_resv_for_each_fence(&cursor, obj->resv, dma_resv_usage_rw(write), + fence) { /* Make sure to grab an additional ref on the added fence */ dma_fence_get(fence); ret = drm_sched_job_add_dependency(job, fence); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index c49996cf25d0..cff05b62f3f7 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -223,7 +223,7 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) struct dma_resv_iter cursor; struct dma_fence *fence;
- dma_resv_iter_begin(&cursor, resv, true); + dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_READ); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (!fence->ops->signaled) dma_fence_enable_sw_signaling(fence); @@ -252,7 +252,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, struct dma_resv *resv = &bo->base._resv; int ret;
- if (dma_resv_test_signaled(resv, true)) + if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_READ)) ret = 0; else ret = -EBUSY; @@ -264,7 +264,8 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, dma_resv_unlock(bo->base.resv); spin_unlock(&bo->bdev->lru_lock);
- lret = dma_resv_wait_timeout(resv, true, interruptible, + lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ, + interruptible, 30 * HZ);
if (lret < 0) @@ -367,7 +368,8 @@ static void ttm_bo_release(struct kref *kref) /* Last resort, if we fail to allocate memory for the * fences block for the BO to become idle */ - dma_resv_wait_timeout(bo->base.resv, true, false, + dma_resv_wait_timeout(bo->base.resv, + DMA_RESV_USAGE_READ, false, 30 * HZ); }
@@ -378,7 +380,7 @@ static void ttm_bo_release(struct kref *kref) ttm_mem_io_free(bdev, bo->resource); }
- if (!dma_resv_test_signaled(bo->base.resv, true) || + if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ) || !dma_resv_trylock(bo->base.resv)) { /* The BO is not idle, resurrect it for delayed destroy */ ttm_bo_flush_all_fences(bo); @@ -1044,14 +1046,14 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, long timeout = 15 * HZ;
if (no_wait) { - if (dma_resv_test_signaled(bo->base.resv, true)) + if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) return 0; else return -EBUSY; }
- timeout = dma_resv_wait_timeout(bo->base.resv, true, interruptible, - timeout); + timeout = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_READ, + interruptible, timeout); if (timeout < 0) return timeout;
diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c index 2ddbebca87d9..91fc4940c65a 100644 --- a/drivers/gpu/drm/vgem/vgem_fence.c +++ b/drivers/gpu/drm/vgem/vgem_fence.c @@ -130,6 +130,7 @@ int vgem_fence_attach_ioctl(struct drm_device *dev, struct vgem_file *vfile = file->driver_priv; struct dma_resv *resv; struct drm_gem_object *obj; + enum dma_resv_usage usage; struct dma_fence *fence; int ret;
@@ -151,7 +152,8 @@ int vgem_fence_attach_ioctl(struct drm_device *dev,
/* Check for a conflicting fence */ resv = obj->resv; - if (!dma_resv_test_signaled(resv, arg->flags & VGEM_FENCE_WRITE)) { + usage = dma_resv_usage_rw(arg->flags & VGEM_FENCE_WRITE); + if (!dma_resv_test_signaled(resv, usage)) { ret = -EBUSY; goto err_fence; } diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 77743fd2c61a..f8d83358d2a0 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -518,9 +518,10 @@ static int virtio_gpu_wait_ioctl(struct drm_device *dev, void *data, return -ENOENT;
if (args->flags & VIRTGPU_WAIT_NOWAIT) { - ret = dma_resv_test_signaled(obj->resv, true); + ret = dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ); } else { - ret = dma_resv_wait_timeout(obj->resv, true, true, timeout); + ret = dma_resv_wait_timeout(obj->resv, DMA_RESV_USAGE_READ, + true, timeout); } if (ret == 0) ret = -EBUSY; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index fe13aa8b4a64..b96884f7d03d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -528,8 +528,8 @@ static int vmw_user_bo_synccpu_grab(struct vmw_buffer_object *vmw_bo, if (flags & drm_vmw_synccpu_allow_cs) { long lret;
- lret = dma_resv_wait_timeout(bo->base.resv, true, true, - nonblock ? 0 : + lret = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_READ, + true, nonblock ? 0 : MAX_SCHEDULE_TIMEOUT); if (!lret) return -EBUSY; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 626067104751..a84d1d5628d0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -1164,7 +1164,8 @@ int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start, if (bo->moving) dma_fence_put(bo->moving);
- return dma_resv_get_singleton(bo->base.resv, false, + return dma_resv_get_singleton(bo->base.resv, + DMA_RESV_USAGE_WRITE, &bo->moving); }
diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c index d32cd7538835..f9901d273b8e 100644 --- a/drivers/infiniband/core/umem_dmabuf.c +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -67,7 +67,8 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) * may be not up-to-date. Wait for the exporter to finish * the migration. */ - return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv, false, + return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv, + DMA_RESV_USAGE_WRITE, false, MAX_SCHEDULE_TIMEOUT); } EXPORT_SYMBOL(ib_umem_dmabuf_map_pages); diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 6fb91956ab8d..a297397743a2 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -408,6 +408,9 @@ struct dma_buf { * pipelining across drivers. These do not set any fences for their * access. An example here is v4l. * + * - Driver should use dma_resv_usage_rw() when retrieving fences as + * dependency for implicit synchronization. + * * DYNAMIC IMPORTER RULES: * * Dynamic importers, see dma_buf_attachment_is_dynamic(), have @@ -423,8 +426,9 @@ struct dma_buf { * * IMPORTANT: * - * All drivers must obey the struct dma_resv rules, specifically the - * rules for updating and obeying fences. + * All drivers and memory management related functions must obey the + * struct dma_resv rules, specifically the rules for updating and + * obeying fences. See enum dma_resv_usage for further descriptions. */ struct dma_resv *resv;
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 5fa04d0fccad..92cd8023980f 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -49,6 +49,53 @@ extern struct ww_class reservation_ww_class;
struct dma_resv_list;
+/** + * enum dma_resv_usage - how the fences from a dma_resv obj are used + * + * This enum describes the different use cases for a dma_resv object and + * controls which fences are returned when queried. + * + * An important fact is that there is the order WRITE<READ and when the + * dma_resv object is asked for fences for one use case the fences for the + * lower use case are returned as well. + */ +enum dma_resv_usage { + /** + * @DMA_RESV_USAGE_WRITE: Implicit write synchronization. + * + * This should only be used for userspace command submissions which add + * an implicit write dependency. + */ + DMA_RESV_USAGE_WRITE, + + /** + * @DMA_RESV_USAGE_READ: Implicit read synchronization. + * + * This should only be used for userspace command submissions which add + * an implicit read dependency. + */ + DMA_RESV_USAGE_READ, +}; + +/** + * dma_resv_usage_rw - helper for implicit sync + * @write: true if we create a new implicit sync write + * + * This returns the implicit synchronization usage for write or read accesses, + * see enum dma_resv_usage and &dma_buf.resv. + */ +static inline enum dma_resv_usage dma_resv_usage_rw(bool write) +{ + /* This looks confusing at first sight, but is indeed correct. + * + * The rational is that new write operations needs to wait for the + * existing read and write operations to finish. + * But a new read operation only needs to wait for the existing write + * operations to finish. + */ + return write ? DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE; +} + /** * struct dma_resv - a reservation object manages fences for a buffer * @@ -142,8 +189,8 @@ struct dma_resv_iter { /** @obj: The dma_resv object we iterate over */ struct dma_resv *obj;
- /** @all_fences: If all fences should be returned */ - bool all_fences; + /** @usage: Return fences with this usage or lower. */ + enum dma_resv_usage usage;
/** @fence: the currently handled fence */ struct dma_fence *fence; @@ -173,14 +220,14 @@ struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor); * dma_resv_iter_begin - initialize a dma_resv_iter object * @cursor: The dma_resv_iter object to initialize * @obj: The dma_resv object which we want to iterate over - * @all_fences: If all fences should be returned or just the exclusive one + * @usage: controls which fences to include, see enum dma_resv_usage. */ static inline void dma_resv_iter_begin(struct dma_resv_iter *cursor, struct dma_resv *obj, - bool all_fences) + enum dma_resv_usage usage) { cursor->obj = obj; - cursor->all_fences = all_fences; + cursor->usage = usage; cursor->fence = NULL; }
@@ -241,7 +288,7 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) * dma_resv_for_each_fence - fence iterator * @cursor: a struct dma_resv_iter pointer * @obj: a dma_resv object pointer - * @all_fences: true if all fences should be returned + * @usage: controls which fences to return * @fence: the current fence * * Iterate over the fences in a struct dma_resv object while holding the @@ -250,8 +297,8 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) * valid as long as the lock is held and so no extra reference to the fence is * taken. */ -#define dma_resv_for_each_fence(cursor, obj, all_fences, fence) \ - for (dma_resv_iter_begin(cursor, obj, all_fences), \ +#define dma_resv_for_each_fence(cursor, obj, usage, fence) \ + for (dma_resv_iter_begin(cursor, obj, usage), \ fence = dma_resv_iter_first(cursor); fence; \ fence = dma_resv_iter_next(cursor))
@@ -418,14 +465,14 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence); void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, struct dma_fence *fence); void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence); -int dma_resv_get_fences(struct dma_resv *obj, bool write, +int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage, unsigned int *num_fences, struct dma_fence ***fences); -int dma_resv_get_singleton(struct dma_resv *obj, bool write, +int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage, struct dma_fence **fence); int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src); -long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, - unsigned long timeout); -bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all); +long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage, + bool intr, unsigned long timeout); +bool dma_resv_test_signaled(struct dma_resv *obj, enum dma_resv_usage usage); void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq);
#endif /* _LINUX_RESERVATION_H */
Instead of distingting between shared and exclusive fences specify the fence usage while adding fences.
Rework all drivers to use this interface instead and deprecate the old one.
v2: some kerneldoc comments suggested by Daniel v3: fix a missing case in radeon v4: rebase on nouveau changes, fix lockdep and temporary disable warning v5: more documentation updates v6: separate internal dma_resv changes from this patch, avoids to disable warning temporary, rebase on upstream changes v7: fix missed case in lima driver, minimize changes to i915_gem_busy_ioctl
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 48 +++++++-- drivers/dma-buf/st-dma-resv.c | 101 +++++------------- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 +- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 10 +- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 6 +- drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 5 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 4 +- .../drm/i915/gem/selftests/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/i915_vma.c | 8 +- .../drm/i915/selftests/intel_memory_region.c | 3 +- drivers/gpu/drm/lima/lima_gem.c | 7 +- drivers/gpu/drm/msm/msm_gem_submit.c | 6 +- drivers/gpu/drm/nouveau/nouveau_bo.c | 9 +- drivers/gpu/drm/nouveau/nouveau_fence.c | 4 +- drivers/gpu/drm/panfrost/panfrost_job.c | 2 +- drivers/gpu/drm/qxl/qxl_release.c | 3 +- drivers/gpu/drm/radeon/radeon_object.c | 6 +- drivers/gpu/drm/ttm/ttm_bo.c | 2 +- drivers/gpu/drm/ttm/ttm_bo_util.c | 5 +- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 6 +- drivers/gpu/drm/v3d/v3d_gem.c | 4 +- drivers/gpu/drm/vc4/vc4_gem.c | 2 +- drivers/gpu/drm/vgem/vgem_fence.c | 9 +- drivers/gpu/drm/virtio/virtgpu_gem.c | 3 +- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 3 +- include/linux/dma-buf.h | 16 +-- include/linux/dma-resv.h | 25 +++-- 30 files changed, 149 insertions(+), 166 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 17237e6ee30c..543dae6566d2 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -234,14 +234,14 @@ EXPORT_SYMBOL(dma_resv_reserve_fences);
#ifdef CONFIG_DEBUG_MUTEXES /** - * dma_resv_reset_shared_max - reset shared fences for debugging + * dma_resv_reset_max_fences - reset shared fences for debugging * @obj: the dma_resv object to reset * * Reset the number of pre-reserved shared slots to test that drivers do * correct slot allocation using dma_resv_reserve_fences(). See also * &dma_resv_list.shared_max. */ -void dma_resv_reset_shared_max(struct dma_resv *obj) +void dma_resv_reset_max_fences(struct dma_resv *obj) { struct dma_resv_list *fences = dma_resv_shared_list(obj);
@@ -251,7 +251,7 @@ void dma_resv_reset_shared_max(struct dma_resv *obj) if (fences) fences->shared_max = fences->shared_count; } -EXPORT_SYMBOL(dma_resv_reset_shared_max); +EXPORT_SYMBOL(dma_resv_reset_max_fences); #endif
/** @@ -264,7 +264,8 @@ EXPORT_SYMBOL(dma_resv_reset_shared_max); * * See also &dma_resv.fence for a discussion of the semantics. */ -void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence) +static void dma_resv_add_shared_fence(struct dma_resv *obj, + struct dma_fence *fence) { struct dma_resv_list *fobj; struct dma_fence *old; @@ -305,13 +306,13 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence) write_seqcount_end(&obj->seq); dma_fence_put(old); } -EXPORT_SYMBOL(dma_resv_add_shared_fence);
/** * dma_resv_replace_fences - replace fences in the dma_resv obj * @obj: the reservation object * @context: the context of the fences to replace * @replacement: the new fence to use instead + * @usage: how the new fence is used, see enum dma_resv_usage * * Replace fences with a specified context with a new fence. Only valid if the * operation represented by the original fence has no longer access to the @@ -321,12 +322,16 @@ EXPORT_SYMBOL(dma_resv_add_shared_fence); * update fence which makes the resource inaccessible. */ void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, - struct dma_fence *replacement) + struct dma_fence *replacement, + enum dma_resv_usage usage) { struct dma_resv_list *list; struct dma_fence *old; unsigned int i;
+ /* Only readers supported for now */ + WARN_ON(usage != DMA_RESV_USAGE_READ); + dma_resv_assert_held(obj);
write_seqcount_begin(&obj->seq); @@ -360,7 +365,8 @@ EXPORT_SYMBOL(dma_resv_replace_fences); * Add a fence to the exclusive slot. @obj must be locked with dma_resv_lock(). * See also &dma_resv.fence_excl for a discussion of the semantics. */ -void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) +static void dma_resv_add_excl_fence(struct dma_resv *obj, + struct dma_fence *fence) { struct dma_fence *old_fence = dma_resv_excl_fence(obj);
@@ -375,7 +381,27 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence)
dma_fence_put(old_fence); } -EXPORT_SYMBOL(dma_resv_add_excl_fence); + +/** + * dma_resv_add_fence - Add a fence to the dma_resv obj + * @obj: the reservation object + * @fence: the fence to add + * @usage: how the fence is used, see enum dma_resv_usage + * + * Add a fence to a slot, @obj must be locked with dma_resv_lock(), and + * dma_resv_reserve_fences() has been called. + * + * See also &dma_resv.fence for a discussion of the semantics. + */ +void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, + enum dma_resv_usage usage) +{ + if (usage == DMA_RESV_USAGE_WRITE) + dma_resv_add_excl_fence(obj, fence); + else + dma_resv_add_shared_fence(obj, fence); +} +EXPORT_SYMBOL(dma_resv_add_fence);
/* Restart the iterator by initializing all the necessary fields, but not the * relation to the dma_resv object. */ @@ -574,7 +600,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) }
dma_fence_get(f); - if (dma_resv_iter_is_exclusive(&cursor)) + if (dma_resv_iter_usage(&cursor) == DMA_RESV_USAGE_WRITE) excl = f; else RCU_INIT_POINTER(list->shared[list->shared_count++], f); @@ -771,13 +797,13 @@ EXPORT_SYMBOL_GPL(dma_resv_test_signaled); */ void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq) { + static const char *usage[] = { "write", "read" }; struct dma_resv_iter cursor; struct dma_fence *fence;
dma_resv_for_each_fence(&cursor, obj, DMA_RESV_USAGE_READ, fence) { seq_printf(seq, "\t%s fence:", - dma_resv_iter_is_exclusive(&cursor) ? - "Exclusive" : "Shared"); + usage[dma_resv_iter_usage(&cursor)]); dma_fence_describe(fence, seq); } } diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c index d097981061b1..d0f7c2bfd4f0 100644 --- a/drivers/dma-buf/st-dma-resv.c +++ b/drivers/dma-buf/st-dma-resv.c @@ -58,8 +58,9 @@ static int sanitycheck(void *arg) return r; }
-static int test_signaling(void *arg, enum dma_resv_usage usage) +static int test_signaling(void *arg) { + enum dma_resv_usage usage = (unsigned long)arg; struct dma_resv resv; struct dma_fence *f; int r; @@ -81,11 +82,7 @@ static int test_signaling(void *arg, enum dma_resv_usage usage) goto err_unlock; }
- if (usage >= DMA_RESV_USAGE_READ) - dma_resv_add_shared_fence(&resv, f); - else - dma_resv_add_excl_fence(&resv, f); - + dma_resv_add_fence(&resv, f, usage); if (dma_resv_test_signaled(&resv, usage)) { pr_err("Resv unexpectedly signaled\n"); r = -EINVAL; @@ -105,18 +102,9 @@ static int test_signaling(void *arg, enum dma_resv_usage usage) return r; }
-static int test_excl_signaling(void *arg) -{ - return test_signaling(arg, DMA_RESV_USAGE_WRITE); -} - -static int test_shared_signaling(void *arg) -{ - return test_signaling(arg, DMA_RESV_USAGE_READ); -} - -static int test_for_each(void *arg, enum dma_resv_usage usage) +static int test_for_each(void *arg) { + enum dma_resv_usage usage = (unsigned long)arg; struct dma_resv_iter cursor; struct dma_fence *f, *fence; struct dma_resv resv; @@ -139,10 +127,7 @@ static int test_for_each(void *arg, enum dma_resv_usage usage) goto err_unlock; }
- if (usage >= DMA_RESV_USAGE_READ) - dma_resv_add_shared_fence(&resv, f); - else - dma_resv_add_excl_fence(&resv, f); + dma_resv_add_fence(&resv, f, usage);
r = -ENOENT; dma_resv_for_each_fence(&cursor, &resv, usage, fence) { @@ -156,8 +141,7 @@ static int test_for_each(void *arg, enum dma_resv_usage usage) r = -EINVAL; goto err_unlock; } - if (dma_resv_iter_is_exclusive(&cursor) != - (usage >= DMA_RESV_USAGE_READ)) { + if (dma_resv_iter_usage(&cursor) != usage) { pr_err("Unexpected fence usage\n"); r = -EINVAL; goto err_unlock; @@ -177,18 +161,9 @@ static int test_for_each(void *arg, enum dma_resv_usage usage) return r; }
-static int test_excl_for_each(void *arg) -{ - return test_for_each(arg, DMA_RESV_USAGE_WRITE); -} - -static int test_shared_for_each(void *arg) -{ - return test_for_each(arg, DMA_RESV_USAGE_READ); -} - -static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage) +static int test_for_each_unlocked(void *arg) { + enum dma_resv_usage usage = (unsigned long)arg; struct dma_resv_iter cursor; struct dma_fence *f, *fence; struct dma_resv resv; @@ -212,10 +187,7 @@ static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage) goto err_free; }
- if (usage >= DMA_RESV_USAGE_READ) - dma_resv_add_shared_fence(&resv, f); - else - dma_resv_add_excl_fence(&resv, f); + dma_resv_add_fence(&resv, f, usage); dma_resv_unlock(&resv);
r = -ENOENT; @@ -235,8 +207,7 @@ static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage) r = -EINVAL; goto err_iter_end; } - if (dma_resv_iter_is_exclusive(&cursor) != - (usage >= DMA_RESV_USAGE_READ)) { + if (dma_resv_iter_usage(&cursor) != usage) { pr_err("Unexpected fence usage\n"); r = -EINVAL; goto err_iter_end; @@ -262,18 +233,9 @@ static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage) return r; }
-static int test_excl_for_each_unlocked(void *arg) -{ - return test_for_each_unlocked(arg, DMA_RESV_USAGE_WRITE); -} - -static int test_shared_for_each_unlocked(void *arg) -{ - return test_for_each_unlocked(arg, DMA_RESV_USAGE_READ); -} - -static int test_get_fences(void *arg, enum dma_resv_usage usage) +static int test_get_fences(void *arg) { + enum dma_resv_usage usage = (unsigned long)arg; struct dma_fence *f, **fences = NULL; struct dma_resv resv; int r, i; @@ -296,10 +258,7 @@ static int test_get_fences(void *arg, enum dma_resv_usage usage) goto err_resv; }
- if (usage >= DMA_RESV_USAGE_READ) - dma_resv_add_shared_fence(&resv, f); - else - dma_resv_add_excl_fence(&resv, f); + dma_resv_add_fence(&resv, f, usage); dma_resv_unlock(&resv);
r = dma_resv_get_fences(&resv, usage, &i, &fences); @@ -324,30 +283,24 @@ static int test_get_fences(void *arg, enum dma_resv_usage usage) return r; }
-static int test_excl_get_fences(void *arg) -{ - return test_get_fences(arg, DMA_RESV_USAGE_WRITE); -} - -static int test_shared_get_fences(void *arg) -{ - return test_get_fences(arg, DMA_RESV_USAGE_READ); -} - int dma_resv(void) { static const struct subtest tests[] = { SUBTEST(sanitycheck), - SUBTEST(test_excl_signaling), - SUBTEST(test_shared_signaling), - SUBTEST(test_excl_for_each), - SUBTEST(test_shared_for_each), - SUBTEST(test_excl_for_each_unlocked), - SUBTEST(test_shared_for_each_unlocked), - SUBTEST(test_excl_get_fences), - SUBTEST(test_shared_get_fences), + SUBTEST(test_signaling), + SUBTEST(test_for_each), + SUBTEST(test_for_each_unlocked), + SUBTEST(test_get_fences), }; + enum dma_resv_usage usage; + int r;
spin_lock_init(&fence_lock); - return subtests(tests, NULL); + for (usage = DMA_RESV_USAGE_WRITE; usage <= DMA_RESV_USAGE_READ; + ++usage) { + r = subtests(tests, (void *)(unsigned long)usage); + if (r) + return r; + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 98b1736bb221..5031e26e6716 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -263,7 +263,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, */ replacement = dma_fence_get_stub(); dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context, - replacement); + replacement, DMA_RESV_USAGE_READ); dma_fence_put(replacement); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 413f32c3fd63..76fd916424d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -55,8 +55,8 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); p->uf_entry.priority = 0; p->uf_entry.tv.bo = &bo->tbo; - /* One for TTM and one for the CS job */ - p->uf_entry.tv.num_shared = 2; + /* One for TTM and two for the CS job */ + p->uf_entry.tv.num_shared = 3;
drm_gem_object_put(gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index a7f39f8ab7be..a3cdf8a24377 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1397,10 +1397,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, return; }
- if (shared) - dma_resv_add_shared_fence(resv, fence); - else - dma_resv_add_excl_fence(resv, fence); + dma_resv_add_fence(resv, fence, shared ? DMA_RESV_USAGE_READ : + DMA_RESV_USAGE_WRITE); }
/** diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 53f7c78628a4..98bb5c9239de 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -202,14 +202,10 @@ static void submit_attach_object_fences(struct etnaviv_gem_submit *submit)
for (i = 0; i < submit->nr_bos; i++) { struct drm_gem_object *obj = &submit->bos[i].obj->base; + bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE;
- if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE) - dma_resv_add_excl_fence(obj->resv, - submit->out_fence); - else - dma_resv_add_shared_fence(obj->resv, - submit->out_fence); - + dma_resv_add_fence(obj->resv, submit->out_fence, write ? + DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ); submit_unlock_object(submit, i); } } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 14a1c0ad8c3c..ddda468241ef 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -148,11 +148,11 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, if (dma_resv_iter_is_restarted(&cursor)) args->busy = 0;
- if (dma_resv_iter_is_exclusive(&cursor)) - /* Translate the exclusive fence to the READ *and* WRITE engine */ + if (dma_resv_iter_usage(&cursor) <= DMA_RESV_USAGE_WRITE) + /* Translate the write fences to the READ *and* WRITE engine */ args->busy |= busy_check_writer(fence); else - /* Translate shared fences to READ set of engines */ + /* Translate read fences to READ set of engines */ args->busy |= busy_check_reader(fence); } dma_resv_iter_end(&cursor); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index 1fd0cc9ca213..f5f2b8b115ea 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -116,7 +116,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, obj->base.resv, NULL, true, i915_fence_timeout(i915), I915_FENCE_GFP); - dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma); + dma_resv_add_fence(obj->base.resv, &clflush->base.dma, + DMA_RESV_USAGE_WRITE); dma_fence_work_commit(&clflush->base); /* * We must have successfully populated the pages(since we are diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 432ac74ff225..438b8a95b3d1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -637,9 +637,8 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, if (IS_ERR_OR_NULL(copy_fence)) return PTR_ERR_OR_ZERO(copy_fence);
- dma_resv_add_excl_fence(dst_bo->base.resv, copy_fence); - dma_resv_add_shared_fence(src_bo->base.resv, copy_fence); - + dma_resv_add_fence(dst_bo->base.resv, copy_fence, DMA_RESV_USAGE_WRITE); + dma_resv_add_fence(src_bo->base.resv, copy_fence, DMA_RESV_USAGE_READ); dma_fence_put(copy_fence);
return 0; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index 0e52eb87cd55..4997ed18b6e4 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -218,8 +218,8 @@ static int __igt_lmem_pages_migrate(struct intel_gt *gt, if (rq) { err = dma_resv_reserve_fences(obj->base.resv, 1); if (!err) - dma_resv_add_excl_fence(obj->base.resv, - &rq->fence); + dma_resv_add_fence(obj->base.resv, &rq->fence, + DMA_RESV_USAGE_WRITE); i915_gem_object_set_moving_fence(obj, &rq->fence); i915_request_put(rq); } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index a132e241c3ee..3a6e3f6d239f 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -1220,7 +1220,8 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, expand32(POISON_INUSE), &rq); i915_gem_object_unpin_pages(obj); if (rq) { - dma_resv_add_excl_fence(obj->base.resv, &rq->fence); + dma_resv_add_fence(obj->base.resv, &rq->fence, + DMA_RESV_USAGE_WRITE); i915_gem_object_set_moving_fence(obj, &rq->fence); i915_request_put(rq); } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index bae3423f58e8..524477d8939e 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1826,7 +1826,8 @@ int _i915_vma_move_to_active(struct i915_vma *vma, }
if (fence) { - dma_resv_add_excl_fence(vma->obj->base.resv, fence); + dma_resv_add_fence(vma->obj->base.resv, fence, + DMA_RESV_USAGE_WRITE); obj->write_domain = I915_GEM_DOMAIN_RENDER; obj->read_domains = 0; } @@ -1838,7 +1839,8 @@ int _i915_vma_move_to_active(struct i915_vma *vma, }
if (fence) { - dma_resv_add_shared_fence(vma->obj->base.resv, fence); + dma_resv_add_fence(vma->obj->base.resv, fence, + DMA_RESV_USAGE_READ); obj->write_domain = 0; } } @@ -2078,7 +2080,7 @@ int i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm) goto out_rpm; }
- dma_resv_add_shared_fence(obj->base.resv, fence); + dma_resv_add_fence(obj->base.resv, fence, DMA_RESV_USAGE_READ); dma_fence_put(fence);
out_rpm: diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 6114e013092b..73eb53edb8de 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -1056,7 +1056,8 @@ static int igt_lmem_write_cpu(void *arg) obj->mm.pages->sgl, I915_CACHE_NONE, true, 0xdeadbeaf, &rq); if (rq) { - dma_resv_add_excl_fence(obj->base.resv, &rq->fence); + dma_resv_add_fence(obj->base.resv, &rq->fence, + DMA_RESV_USAGE_WRITE); i915_request_put(rq); }
diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c index e0a11ee0e86d..0f1ca0b0db49 100644 --- a/drivers/gpu/drm/lima/lima_gem.c +++ b/drivers/gpu/drm/lima/lima_gem.c @@ -364,10 +364,9 @@ int lima_gem_submit(struct drm_file *file, struct lima_submit *submit) fence = lima_sched_context_queue_task(submit->task);
for (i = 0; i < submit->nr_bos; i++) { - if (submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE) - dma_resv_add_excl_fence(lima_bo_resv(bos[i]), fence); - else - dma_resv_add_shared_fence(lima_bo_resv(bos[i]), fence); + dma_resv_add_fence(lima_bo_resv(bos[i]), fence, + submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE ? + DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ); }
drm_gem_unlock_reservations((struct drm_gem_object **)bos, diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 3164db8be893..8d1eef914ba8 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -395,9 +395,11 @@ static void submit_attach_object_fences(struct msm_gem_submit *submit) struct drm_gem_object *obj = &submit->bos[i].obj->base;
if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) - dma_resv_add_excl_fence(obj->resv, submit->user_fence); + dma_resv_add_fence(obj->resv, submit->user_fence, + DMA_RESV_USAGE_WRITE); else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) - dma_resv_add_shared_fence(obj->resv, submit->user_fence); + dma_resv_add_fence(obj->resv, submit->user_fence, + DMA_RESV_USAGE_READ); } }
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index c6bb4dbcd735..05076e530e7d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1308,10 +1308,11 @@ nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence, bool excl { struct dma_resv *resv = nvbo->bo.base.resv;
- if (exclusive) - dma_resv_add_excl_fence(resv, &fence->base); - else if (fence) - dma_resv_add_shared_fence(resv, &fence->base); + if (!fence) + return; + + dma_resv_add_fence(resv, &fence->base, exclusive ? + DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ); }
static void diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index d5e81ccee01c..7f01dcf81fab 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -360,9 +360,11 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, dma_resv_for_each_fence(&cursor, resv, dma_resv_usage_rw(exclusive), fence) { + enum dma_resv_usage usage; struct nouveau_fence *f;
- if (i == 0 && dma_resv_iter_is_exclusive(&cursor)) + usage = dma_resv_iter_usage(&cursor); + if (i == 0 && usage == DMA_RESV_USAGE_WRITE) continue;
f = nouveau_local_fence(fence, chan->drm); diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index c34114560e49..fda5871aebe3 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -268,7 +268,7 @@ static void panfrost_attach_object_fences(struct drm_gem_object **bos, int i;
for (i = 0; i < bo_count; i++) - dma_resv_add_excl_fence(bos[i]->resv, fence); + dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE); }
int panfrost_job_push(struct panfrost_job *job) diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index cde1e8ddaeaa..368d26da0d6a 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -429,7 +429,8 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) list_for_each_entry(entry, &release->bos, head) { bo = entry->bo;
- dma_resv_add_shared_fence(bo->base.resv, &release->base); + dma_resv_add_fence(bo->base.resv, &release->base, + DMA_RESV_USAGE_READ); ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv); } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 7ffd2e90f325..cb5c4aa45cef 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -791,8 +791,6 @@ void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence, return; }
- if (shared) - dma_resv_add_shared_fence(resv, &fence->base); - else - dma_resv_add_excl_fence(resv, &fence->base); + dma_resv_add_fence(resv, &fence->base, shared ? + DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE); } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index cff05b62f3f7..d74f9eea855e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -739,7 +739,7 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, return ret; }
- dma_resv_add_shared_fence(bo->base.resv, fence); + dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE);
ret = dma_resv_reserve_fences(bo->base.resv, 1); if (unlikely(ret)) { diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 1b96b91bf81b..7a96a1db13a7 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -507,7 +507,8 @@ static int ttm_bo_move_to_ghost(struct ttm_buffer_object *bo, if (ret) return ret;
- dma_resv_add_excl_fence(&ghost_obj->base._resv, fence); + dma_resv_add_fence(&ghost_obj->base._resv, fence, + DMA_RESV_USAGE_WRITE);
/** * If we're not moving to fixed memory, the TTM object @@ -561,7 +562,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, struct ttm_resource_manager *man = ttm_manager_type(bdev, new_mem->mem_type); int ret = 0;
- dma_resv_add_excl_fence(bo->base.resv, fence); + dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE); if (!evict) ret = ttm_bo_move_to_ghost(bo, fence, man->use_tt); else if (!from->use_tt && pipeline) diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 789c645f004e..0eb995d25df1 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -154,10 +154,8 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
- if (entry->num_shared) - dma_resv_add_shared_fence(bo->base.resv, fence); - else - dma_resv_add_excl_fence(bo->base.resv, fence); + dma_resv_add_fence(bo->base.resv, fence, entry->num_shared ? + DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE); ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv); } diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 961812d33827..2352e9640922 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -550,8 +550,8 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv,
for (i = 0; i < job->bo_count; i++) { /* XXX: Use shared fences for read-only objects. */ - dma_resv_add_excl_fence(job->bo[i]->resv, - job->done_fence); + dma_resv_add_fence(job->bo[i]->resv, job->done_fence, + DMA_RESV_USAGE_WRITE); }
drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 594bd6bb00d2..38550317e025 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -546,7 +546,7 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) bo = to_vc4_bo(&exec->bo[i]->base); bo->seqno = seqno;
- dma_resv_add_shared_fence(bo->base.base.resv, exec->fence); + dma_resv_add_fence(bo->base.base.resv, exec->fence); }
list_for_each_entry(bo, &exec->unref_list, unref_head) { diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c index 91fc4940c65a..c2a879734d40 100644 --- a/drivers/gpu/drm/vgem/vgem_fence.c +++ b/drivers/gpu/drm/vgem/vgem_fence.c @@ -161,12 +161,9 @@ int vgem_fence_attach_ioctl(struct drm_device *dev, /* Expose the fence via the dma-buf */ dma_resv_lock(resv, NULL); ret = dma_resv_reserve_fences(resv, 1); - if (!ret) { - if (arg->flags & VGEM_FENCE_WRITE) - dma_resv_add_excl_fence(resv, fence); - else - dma_resv_add_shared_fence(resv, fence); - } + if (!ret) + dma_resv_add_fence(resv, fence, arg->flags & VGEM_FENCE_WRITE ? + DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ); dma_resv_unlock(resv);
/* Record the fence in our idr for later signaling */ diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c index 1820ca6cf673..580a78809836 100644 --- a/drivers/gpu/drm/virtio/virtgpu_gem.c +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c @@ -250,7 +250,8 @@ void virtio_gpu_array_add_fence(struct virtio_gpu_object_array *objs, int i;
for (i = 0; i < objs->nents; i++) - dma_resv_add_excl_fence(objs->objs[i]->resv, fence); + dma_resv_add_fence(objs->objs[i]->resv, fence, + DMA_RESV_USAGE_WRITE); }
void virtio_gpu_array_put_free(struct virtio_gpu_object_array *objs) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index b96884f7d03d..bec50223efe5 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -758,7 +758,8 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo,
ret = dma_resv_reserve_fences(bo->base.resv, 1); if (!ret) - dma_resv_add_excl_fence(bo->base.resv, &fence->base); + dma_resv_add_fence(bo->base.resv, &fence->base, + DMA_RESV_USAGE_WRITE); else /* Last resort fallback when we are OOM */ dma_fence_wait(&fence->base, false); diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index a297397743a2..71731796c8c3 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -393,15 +393,15 @@ struct dma_buf { * e.g. exposed in `Implicit Fence Poll Support`_ must follow the * below rules. * - * - Drivers must add a shared fence through dma_resv_add_shared_fence() - * for anything the userspace API considers a read access. This highly - * depends upon the API and window system. + * - Drivers must add a read fence through dma_resv_add_fence() with the + * DMA_RESV_USAGE_READ flag for anything the userspace API considers a + * read access. This highly depends upon the API and window system. * - * - Similarly drivers must set the exclusive fence through - * dma_resv_add_excl_fence() for anything the userspace API considers - * write access. + * - Similarly drivers must add a write fence through + * dma_resv_add_fence() with the DMA_RESV_USAGE_WRITE flag for + * anything the userspace API considers write access. * - * - Drivers may just always set the exclusive fence, since that only + * - Drivers may just always add a write fence, since that only * causes unecessarily synchronization, but no correctness issues. * * - Some drivers only expose a synchronous userspace API with no @@ -416,7 +416,7 @@ struct dma_buf { * Dynamic importers, see dma_buf_attachment_is_dynamic(), have * additional constraints on how they set up fences: * - * - Dynamic importers must obey the exclusive fence and wait for it to + * - Dynamic importers must obey the write fences and wait for them to * signal before allowing access to the buffer's underlying storage * through the device. * diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 92cd8023980f..98dc5234b487 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -195,6 +195,9 @@ struct dma_resv_iter { /** @fence: the currently handled fence */ struct dma_fence *fence;
+ /** @fence_usage: the usage of the current fence */ + enum dma_resv_usage fence_usage; + /** @seq: sequence number to check for modifications */ unsigned int seq;
@@ -244,14 +247,15 @@ static inline void dma_resv_iter_end(struct dma_resv_iter *cursor) }
/** - * dma_resv_iter_is_exclusive - test if the current fence is the exclusive one + * dma_resv_iter_usage - Return the usage of the current fence * @cursor: the cursor of the current position * - * Returns true if the currently returned fence is the exclusive one. + * Returns the usage of the currently processed fence. */ -static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) +static inline enum dma_resv_usage +dma_resv_iter_usage(struct dma_resv_iter *cursor) { - return cursor->index == 0; + return cursor->fence_usage; }
/** @@ -306,9 +310,9 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
#ifdef CONFIG_DEBUG_MUTEXES -void dma_resv_reset_shared_max(struct dma_resv *obj); +void dma_resv_reset_max_fences(struct dma_resv *obj); #else -static inline void dma_resv_reset_shared_max(struct dma_resv *obj) {} +static inline void dma_resv_reset_max_fences(struct dma_resv *obj) {} #endif
/** @@ -454,17 +458,18 @@ static inline struct ww_acquire_ctx *dma_resv_locking_ctx(struct dma_resv *obj) */ static inline void dma_resv_unlock(struct dma_resv *obj) { - dma_resv_reset_shared_max(obj); + dma_resv_reset_max_fences(obj); ww_mutex_unlock(&obj->lock); }
void dma_resv_init(struct dma_resv *obj); void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences); -void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence); +void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, + enum dma_resv_usage usage); void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, - struct dma_fence *fence); -void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence); + struct dma_fence *fence, + enum dma_resv_usage usage); int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage, unsigned int *num_fences, struct dma_fence ***fences); int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage,
On Thu, Apr 07, 2022 at 10:59:33AM +0200, Christian König wrote:
Instead of distingting between shared and exclusive fences specify the fence usage while adding fences.
Rework all drivers to use this interface instead and deprecate the old one.
v2: some kerneldoc comments suggested by Daniel v3: fix a missing case in radeon v4: rebase on nouveau changes, fix lockdep and temporary disable warning v5: more documentation updates v6: separate internal dma_resv changes from this patch, avoids to disable warning temporary, rebase on upstream changes v7: fix missed case in lima driver, minimize changes to i915_gem_busy_ioctl
I've only done an incremental review of these two things, lgtm.
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-resv.c | 48 +++++++-- drivers/dma-buf/st-dma-resv.c | 101 +++++------------- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 +- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 10 +- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 6 +- drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 3 +- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 5 +- .../drm/i915/gem/selftests/i915_gem_migrate.c | 4 +- .../drm/i915/gem/selftests/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/i915_vma.c | 8 +- .../drm/i915/selftests/intel_memory_region.c | 3 +- drivers/gpu/drm/lima/lima_gem.c | 7 +- drivers/gpu/drm/msm/msm_gem_submit.c | 6 +- drivers/gpu/drm/nouveau/nouveau_bo.c | 9 +- drivers/gpu/drm/nouveau/nouveau_fence.c | 4 +- drivers/gpu/drm/panfrost/panfrost_job.c | 2 +- drivers/gpu/drm/qxl/qxl_release.c | 3 +- drivers/gpu/drm/radeon/radeon_object.c | 6 +- drivers/gpu/drm/ttm/ttm_bo.c | 2 +- drivers/gpu/drm/ttm/ttm_bo_util.c | 5 +- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 6 +- drivers/gpu/drm/v3d/v3d_gem.c | 4 +- drivers/gpu/drm/vc4/vc4_gem.c | 2 +- drivers/gpu/drm/vgem/vgem_fence.c | 9 +- drivers/gpu/drm/virtio/virtgpu_gem.c | 3 +- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 3 +- include/linux/dma-buf.h | 16 +-- include/linux/dma-resv.h | 25 +++-- 30 files changed, 149 insertions(+), 166 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 17237e6ee30c..543dae6566d2 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -234,14 +234,14 @@ EXPORT_SYMBOL(dma_resv_reserve_fences); #ifdef CONFIG_DEBUG_MUTEXES /**
- dma_resv_reset_shared_max - reset shared fences for debugging
*/
- dma_resv_reset_max_fences - reset shared fences for debugging
- @obj: the dma_resv object to reset
- Reset the number of pre-reserved shared slots to test that drivers do
- correct slot allocation using dma_resv_reserve_fences(). See also
- &dma_resv_list.shared_max.
-void dma_resv_reset_shared_max(struct dma_resv *obj) +void dma_resv_reset_max_fences(struct dma_resv *obj) { struct dma_resv_list *fences = dma_resv_shared_list(obj); @@ -251,7 +251,7 @@ void dma_resv_reset_shared_max(struct dma_resv *obj) if (fences) fences->shared_max = fences->shared_count; } -EXPORT_SYMBOL(dma_resv_reset_shared_max); +EXPORT_SYMBOL(dma_resv_reset_max_fences); #endif /** @@ -264,7 +264,8 @@ EXPORT_SYMBOL(dma_resv_reset_shared_max);
- See also &dma_resv.fence for a discussion of the semantics.
*/ -void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence) +static void dma_resv_add_shared_fence(struct dma_resv *obj,
struct dma_fence *fence)
{ struct dma_resv_list *fobj; struct dma_fence *old; @@ -305,13 +306,13 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence) write_seqcount_end(&obj->seq); dma_fence_put(old); } -EXPORT_SYMBOL(dma_resv_add_shared_fence); /**
- dma_resv_replace_fences - replace fences in the dma_resv obj
- @obj: the reservation object
- @context: the context of the fences to replace
- @replacement: the new fence to use instead
- @usage: how the new fence is used, see enum dma_resv_usage
- Replace fences with a specified context with a new fence. Only valid if the
- operation represented by the original fence has no longer access to the
@@ -321,12 +322,16 @@ EXPORT_SYMBOL(dma_resv_add_shared_fence);
- update fence which makes the resource inaccessible.
*/ void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
struct dma_fence *replacement)
struct dma_fence *replacement,
enum dma_resv_usage usage)
{ struct dma_resv_list *list; struct dma_fence *old; unsigned int i;
- /* Only readers supported for now */
- WARN_ON(usage != DMA_RESV_USAGE_READ);
- dma_resv_assert_held(obj);
write_seqcount_begin(&obj->seq); @@ -360,7 +365,8 @@ EXPORT_SYMBOL(dma_resv_replace_fences);
- Add a fence to the exclusive slot. @obj must be locked with dma_resv_lock().
- See also &dma_resv.fence_excl for a discussion of the semantics.
*/ -void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) +static void dma_resv_add_excl_fence(struct dma_resv *obj,
struct dma_fence *fence)
{ struct dma_fence *old_fence = dma_resv_excl_fence(obj); @@ -375,7 +381,27 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) dma_fence_put(old_fence); } -EXPORT_SYMBOL(dma_resv_add_excl_fence);
+/**
- dma_resv_add_fence - Add a fence to the dma_resv obj
- @obj: the reservation object
- @fence: the fence to add
- @usage: how the fence is used, see enum dma_resv_usage
- Add a fence to a slot, @obj must be locked with dma_resv_lock(), and
- dma_resv_reserve_fences() has been called.
- See also &dma_resv.fence for a discussion of the semantics.
- */
+void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence,
enum dma_resv_usage usage)
+{
- if (usage == DMA_RESV_USAGE_WRITE)
dma_resv_add_excl_fence(obj, fence);
- else
dma_resv_add_shared_fence(obj, fence);
+} +EXPORT_SYMBOL(dma_resv_add_fence); /* Restart the iterator by initializing all the necessary fields, but not the
- relation to the dma_resv object. */
@@ -574,7 +600,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) } dma_fence_get(f);
if (dma_resv_iter_is_exclusive(&cursor))
else RCU_INIT_POINTER(list->shared[list->shared_count++], f);if (dma_resv_iter_usage(&cursor) == DMA_RESV_USAGE_WRITE) excl = f;
@@ -771,13 +797,13 @@ EXPORT_SYMBOL_GPL(dma_resv_test_signaled); */ void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq) {
- static const char *usage[] = { "write", "read" }; struct dma_resv_iter cursor; struct dma_fence *fence;
dma_resv_for_each_fence(&cursor, obj, DMA_RESV_USAGE_READ, fence) { seq_printf(seq, "\t%s fence:",
dma_resv_iter_is_exclusive(&cursor) ?
"Exclusive" : "Shared");
dma_fence_describe(fence, seq); }usage[dma_resv_iter_usage(&cursor)]);
} diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c index d097981061b1..d0f7c2bfd4f0 100644 --- a/drivers/dma-buf/st-dma-resv.c +++ b/drivers/dma-buf/st-dma-resv.c @@ -58,8 +58,9 @@ static int sanitycheck(void *arg) return r; } -static int test_signaling(void *arg, enum dma_resv_usage usage) +static int test_signaling(void *arg) {
- enum dma_resv_usage usage = (unsigned long)arg; struct dma_resv resv; struct dma_fence *f; int r;
@@ -81,11 +82,7 @@ static int test_signaling(void *arg, enum dma_resv_usage usage) goto err_unlock; }
- if (usage >= DMA_RESV_USAGE_READ)
dma_resv_add_shared_fence(&resv, f);
- else
dma_resv_add_excl_fence(&resv, f);
- dma_resv_add_fence(&resv, f, usage); if (dma_resv_test_signaled(&resv, usage)) { pr_err("Resv unexpectedly signaled\n"); r = -EINVAL;
@@ -105,18 +102,9 @@ static int test_signaling(void *arg, enum dma_resv_usage usage) return r; } -static int test_excl_signaling(void *arg) -{
- return test_signaling(arg, DMA_RESV_USAGE_WRITE);
-}
-static int test_shared_signaling(void *arg) -{
- return test_signaling(arg, DMA_RESV_USAGE_READ);
-}
-static int test_for_each(void *arg, enum dma_resv_usage usage) +static int test_for_each(void *arg) {
- enum dma_resv_usage usage = (unsigned long)arg; struct dma_resv_iter cursor; struct dma_fence *f, *fence; struct dma_resv resv;
@@ -139,10 +127,7 @@ static int test_for_each(void *arg, enum dma_resv_usage usage) goto err_unlock; }
- if (usage >= DMA_RESV_USAGE_READ)
dma_resv_add_shared_fence(&resv, f);
- else
dma_resv_add_excl_fence(&resv, f);
- dma_resv_add_fence(&resv, f, usage);
r = -ENOENT; dma_resv_for_each_fence(&cursor, &resv, usage, fence) { @@ -156,8 +141,7 @@ static int test_for_each(void *arg, enum dma_resv_usage usage) r = -EINVAL; goto err_unlock; }
if (dma_resv_iter_is_exclusive(&cursor) !=
(usage >= DMA_RESV_USAGE_READ)) {
if (dma_resv_iter_usage(&cursor) != usage) { pr_err("Unexpected fence usage\n"); r = -EINVAL; goto err_unlock;
@@ -177,18 +161,9 @@ static int test_for_each(void *arg, enum dma_resv_usage usage) return r; } -static int test_excl_for_each(void *arg) -{
- return test_for_each(arg, DMA_RESV_USAGE_WRITE);
-}
-static int test_shared_for_each(void *arg) -{
- return test_for_each(arg, DMA_RESV_USAGE_READ);
-}
-static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage) +static int test_for_each_unlocked(void *arg) {
- enum dma_resv_usage usage = (unsigned long)arg; struct dma_resv_iter cursor; struct dma_fence *f, *fence; struct dma_resv resv;
@@ -212,10 +187,7 @@ static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage) goto err_free; }
- if (usage >= DMA_RESV_USAGE_READ)
dma_resv_add_shared_fence(&resv, f);
- else
dma_resv_add_excl_fence(&resv, f);
- dma_resv_add_fence(&resv, f, usage); dma_resv_unlock(&resv);
r = -ENOENT; @@ -235,8 +207,7 @@ static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage) r = -EINVAL; goto err_iter_end; }
if (dma_resv_iter_is_exclusive(&cursor) !=
(usage >= DMA_RESV_USAGE_READ)) {
if (dma_resv_iter_usage(&cursor) != usage) { pr_err("Unexpected fence usage\n"); r = -EINVAL; goto err_iter_end;
@@ -262,18 +233,9 @@ static int test_for_each_unlocked(void *arg, enum dma_resv_usage usage) return r; } -static int test_excl_for_each_unlocked(void *arg) -{
- return test_for_each_unlocked(arg, DMA_RESV_USAGE_WRITE);
-}
-static int test_shared_for_each_unlocked(void *arg) -{
- return test_for_each_unlocked(arg, DMA_RESV_USAGE_READ);
-}
-static int test_get_fences(void *arg, enum dma_resv_usage usage) +static int test_get_fences(void *arg) {
- enum dma_resv_usage usage = (unsigned long)arg; struct dma_fence *f, **fences = NULL; struct dma_resv resv; int r, i;
@@ -296,10 +258,7 @@ static int test_get_fences(void *arg, enum dma_resv_usage usage) goto err_resv; }
- if (usage >= DMA_RESV_USAGE_READ)
dma_resv_add_shared_fence(&resv, f);
- else
dma_resv_add_excl_fence(&resv, f);
- dma_resv_add_fence(&resv, f, usage); dma_resv_unlock(&resv);
r = dma_resv_get_fences(&resv, usage, &i, &fences); @@ -324,30 +283,24 @@ static int test_get_fences(void *arg, enum dma_resv_usage usage) return r; } -static int test_excl_get_fences(void *arg) -{
- return test_get_fences(arg, DMA_RESV_USAGE_WRITE);
-}
-static int test_shared_get_fences(void *arg) -{
- return test_get_fences(arg, DMA_RESV_USAGE_READ);
-}
int dma_resv(void) { static const struct subtest tests[] = { SUBTEST(sanitycheck),
SUBTEST(test_excl_signaling),
SUBTEST(test_shared_signaling),
SUBTEST(test_excl_for_each),
SUBTEST(test_shared_for_each),
SUBTEST(test_excl_for_each_unlocked),
SUBTEST(test_shared_for_each_unlocked),
SUBTEST(test_excl_get_fences),
SUBTEST(test_shared_get_fences),
SUBTEST(test_signaling),
SUBTEST(test_for_each),
SUBTEST(test_for_each_unlocked),
};SUBTEST(test_get_fences),
- enum dma_resv_usage usage;
- int r;
spin_lock_init(&fence_lock);
- return subtests(tests, NULL);
- for (usage = DMA_RESV_USAGE_WRITE; usage <= DMA_RESV_USAGE_READ;
++usage) {
r = subtests(tests, (void *)(unsigned long)usage);
if (r)
return r;
- }
- return 0;
} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 98b1736bb221..5031e26e6716 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -263,7 +263,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, */ replacement = dma_fence_get_stub(); dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
replacement);
dma_fence_put(replacement); return 0;replacement, DMA_RESV_USAGE_READ);
} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 413f32c3fd63..76fd916424d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -55,8 +55,8 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); p->uf_entry.priority = 0; p->uf_entry.tv.bo = &bo->tbo;
- /* One for TTM and one for the CS job */
- p->uf_entry.tv.num_shared = 2;
- /* One for TTM and two for the CS job */
- p->uf_entry.tv.num_shared = 3;
drm_gem_object_put(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index a7f39f8ab7be..a3cdf8a24377 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1397,10 +1397,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, return; }
- if (shared)
dma_resv_add_shared_fence(resv, fence);
- else
dma_resv_add_excl_fence(resv, fence);
- dma_resv_add_fence(resv, fence, shared ? DMA_RESV_USAGE_READ :
DMA_RESV_USAGE_WRITE);
} /** diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 53f7c78628a4..98bb5c9239de 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -202,14 +202,10 @@ static void submit_attach_object_fences(struct etnaviv_gem_submit *submit) for (i = 0; i < submit->nr_bos; i++) { struct drm_gem_object *obj = &submit->bos[i].obj->base;
bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE;
if (submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE)
dma_resv_add_excl_fence(obj->resv,
submit->out_fence);
else
dma_resv_add_shared_fence(obj->resv,
submit->out_fence);
dma_resv_add_fence(obj->resv, submit->out_fence, write ?
submit_unlock_object(submit, i); }DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ);
} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 14a1c0ad8c3c..ddda468241ef 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -148,11 +148,11 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, if (dma_resv_iter_is_restarted(&cursor)) args->busy = 0;
if (dma_resv_iter_is_exclusive(&cursor))
/* Translate the exclusive fence to the READ *and* WRITE engine */
if (dma_resv_iter_usage(&cursor) <= DMA_RESV_USAGE_WRITE)
else/* Translate the write fences to the READ *and* WRITE engine */ args->busy |= busy_check_writer(fence);
/* Translate shared fences to READ set of engines */
} dma_resv_iter_end(&cursor);/* Translate read fences to READ set of engines */ args->busy |= busy_check_reader(fence);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index 1fd0cc9ca213..f5f2b8b115ea 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -116,7 +116,8 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, obj->base.resv, NULL, true, i915_fence_timeout(i915), I915_FENCE_GFP);
dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma);
dma_resv_add_fence(obj->base.resv, &clflush->base.dma,
dma_fence_work_commit(&clflush->base); /*DMA_RESV_USAGE_WRITE);
- We must have successfully populated the pages(since we are
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 432ac74ff225..438b8a95b3d1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -637,9 +637,8 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, if (IS_ERR_OR_NULL(copy_fence)) return PTR_ERR_OR_ZERO(copy_fence);
- dma_resv_add_excl_fence(dst_bo->base.resv, copy_fence);
- dma_resv_add_shared_fence(src_bo->base.resv, copy_fence);
- dma_resv_add_fence(dst_bo->base.resv, copy_fence, DMA_RESV_USAGE_WRITE);
- dma_resv_add_fence(src_bo->base.resv, copy_fence, DMA_RESV_USAGE_READ); dma_fence_put(copy_fence);
return 0; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index 0e52eb87cd55..4997ed18b6e4 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -218,8 +218,8 @@ static int __igt_lmem_pages_migrate(struct intel_gt *gt, if (rq) { err = dma_resv_reserve_fences(obj->base.resv, 1); if (!err)
dma_resv_add_excl_fence(obj->base.resv,
&rq->fence);
dma_resv_add_fence(obj->base.resv, &rq->fence,
}DMA_RESV_USAGE_WRITE); i915_gem_object_set_moving_fence(obj, &rq->fence); i915_request_put(rq);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index a132e241c3ee..3a6e3f6d239f 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -1220,7 +1220,8 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, expand32(POISON_INUSE), &rq); i915_gem_object_unpin_pages(obj); if (rq) {
dma_resv_add_excl_fence(obj->base.resv, &rq->fence);
dma_resv_add_fence(obj->base.resv, &rq->fence,
i915_gem_object_set_moving_fence(obj, &rq->fence); i915_request_put(rq); }DMA_RESV_USAGE_WRITE);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index bae3423f58e8..524477d8939e 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1826,7 +1826,8 @@ int _i915_vma_move_to_active(struct i915_vma *vma, } if (fence) {
dma_resv_add_excl_fence(vma->obj->base.resv, fence);
dma_resv_add_fence(vma->obj->base.resv, fence,
}DMA_RESV_USAGE_WRITE); obj->write_domain = I915_GEM_DOMAIN_RENDER; obj->read_domains = 0;
@@ -1838,7 +1839,8 @@ int _i915_vma_move_to_active(struct i915_vma *vma, } if (fence) {
dma_resv_add_shared_fence(vma->obj->base.resv, fence);
dma_resv_add_fence(vma->obj->base.resv, fence,
} }DMA_RESV_USAGE_READ); obj->write_domain = 0;
@@ -2078,7 +2080,7 @@ int i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm) goto out_rpm; }
- dma_resv_add_shared_fence(obj->base.resv, fence);
- dma_resv_add_fence(obj->base.resv, fence, DMA_RESV_USAGE_READ); dma_fence_put(fence);
out_rpm: diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 6114e013092b..73eb53edb8de 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -1056,7 +1056,8 @@ static int igt_lmem_write_cpu(void *arg) obj->mm.pages->sgl, I915_CACHE_NONE, true, 0xdeadbeaf, &rq); if (rq) {
dma_resv_add_excl_fence(obj->base.resv, &rq->fence);
dma_resv_add_fence(obj->base.resv, &rq->fence,
i915_request_put(rq); }DMA_RESV_USAGE_WRITE);
diff --git a/drivers/gpu/drm/lima/lima_gem.c b/drivers/gpu/drm/lima/lima_gem.c index e0a11ee0e86d..0f1ca0b0db49 100644 --- a/drivers/gpu/drm/lima/lima_gem.c +++ b/drivers/gpu/drm/lima/lima_gem.c @@ -364,10 +364,9 @@ int lima_gem_submit(struct drm_file *file, struct lima_submit *submit) fence = lima_sched_context_queue_task(submit->task); for (i = 0; i < submit->nr_bos; i++) {
if (submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE)
dma_resv_add_excl_fence(lima_bo_resv(bos[i]), fence);
else
dma_resv_add_shared_fence(lima_bo_resv(bos[i]), fence);
dma_resv_add_fence(lima_bo_resv(bos[i]), fence,
submit->bos[i].flags & LIMA_SUBMIT_BO_WRITE ?
}DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ);
drm_gem_unlock_reservations((struct drm_gem_object **)bos, diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 3164db8be893..8d1eef914ba8 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -395,9 +395,11 @@ static void submit_attach_object_fences(struct msm_gem_submit *submit) struct drm_gem_object *obj = &submit->bos[i].obj->base; if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE)
dma_resv_add_excl_fence(obj->resv, submit->user_fence);
dma_resv_add_fence(obj->resv, submit->user_fence,
else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ)DMA_RESV_USAGE_WRITE);
dma_resv_add_shared_fence(obj->resv, submit->user_fence);
dma_resv_add_fence(obj->resv, submit->user_fence,
}DMA_RESV_USAGE_READ);
} diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index c6bb4dbcd735..05076e530e7d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1308,10 +1308,11 @@ nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence, bool excl { struct dma_resv *resv = nvbo->bo.base.resv;
- if (exclusive)
dma_resv_add_excl_fence(resv, &fence->base);
- else if (fence)
dma_resv_add_shared_fence(resv, &fence->base);
- if (!fence)
return;
- dma_resv_add_fence(resv, &fence->base, exclusive ?
DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ);
} static void diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index d5e81ccee01c..7f01dcf81fab 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -360,9 +360,11 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, dma_resv_for_each_fence(&cursor, resv, dma_resv_usage_rw(exclusive), fence) {
enum dma_resv_usage usage; struct nouveau_fence *f;
if (i == 0 && dma_resv_iter_is_exclusive(&cursor))
usage = dma_resv_iter_usage(&cursor);
if (i == 0 && usage == DMA_RESV_USAGE_WRITE) continue;
f = nouveau_local_fence(fence, chan->drm); diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index c34114560e49..fda5871aebe3 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -268,7 +268,7 @@ static void panfrost_attach_object_fences(struct drm_gem_object **bos, int i; for (i = 0; i < bo_count; i++)
dma_resv_add_excl_fence(bos[i]->resv, fence);
dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE);
} int panfrost_job_push(struct panfrost_job *job) diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index cde1e8ddaeaa..368d26da0d6a 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -429,7 +429,8 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) list_for_each_entry(entry, &release->bos, head) { bo = entry->bo;
dma_resv_add_shared_fence(bo->base.resv, &release->base);
dma_resv_add_fence(bo->base.resv, &release->base,
ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv); }DMA_RESV_USAGE_READ);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 7ffd2e90f325..cb5c4aa45cef 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -791,8 +791,6 @@ void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence, return; }
- if (shared)
dma_resv_add_shared_fence(resv, &fence->base);
- else
dma_resv_add_excl_fence(resv, &fence->base);
- dma_resv_add_fence(resv, &fence->base, shared ?
DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE);
} diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index cff05b62f3f7..d74f9eea855e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -739,7 +739,7 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, return ret; }
- dma_resv_add_shared_fence(bo->base.resv, fence);
- dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE);
ret = dma_resv_reserve_fences(bo->base.resv, 1); if (unlikely(ret)) { diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 1b96b91bf81b..7a96a1db13a7 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -507,7 +507,8 @@ static int ttm_bo_move_to_ghost(struct ttm_buffer_object *bo, if (ret) return ret;
- dma_resv_add_excl_fence(&ghost_obj->base._resv, fence);
- dma_resv_add_fence(&ghost_obj->base._resv, fence,
DMA_RESV_USAGE_WRITE);
/** * If we're not moving to fixed memory, the TTM object @@ -561,7 +562,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, struct ttm_resource_manager *man = ttm_manager_type(bdev, new_mem->mem_type); int ret = 0;
- dma_resv_add_excl_fence(bo->base.resv, fence);
- dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE); if (!evict) ret = ttm_bo_move_to_ghost(bo, fence, man->use_tt); else if (!from->use_tt && pipeline)
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 789c645f004e..0eb995d25df1 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -154,10 +154,8 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo;
if (entry->num_shared)
dma_resv_add_shared_fence(bo->base.resv, fence);
else
dma_resv_add_excl_fence(bo->base.resv, fence);
dma_resv_add_fence(bo->base.resv, fence, entry->num_shared ?
ttm_bo_move_to_lru_tail_unlocked(bo); dma_resv_unlock(bo->base.resv); }DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE);
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 961812d33827..2352e9640922 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -550,8 +550,8 @@ v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, for (i = 0; i < job->bo_count; i++) { /* XXX: Use shared fences for read-only objects. */
dma_resv_add_excl_fence(job->bo[i]->resv,
job->done_fence);
dma_resv_add_fence(job->bo[i]->resv, job->done_fence,
}DMA_RESV_USAGE_WRITE);
drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 594bd6bb00d2..38550317e025 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -546,7 +546,7 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) bo = to_vc4_bo(&exec->bo[i]->base); bo->seqno = seqno;
dma_resv_add_shared_fence(bo->base.base.resv, exec->fence);
}dma_resv_add_fence(bo->base.base.resv, exec->fence);
list_for_each_entry(bo, &exec->unref_list, unref_head) { diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c index 91fc4940c65a..c2a879734d40 100644 --- a/drivers/gpu/drm/vgem/vgem_fence.c +++ b/drivers/gpu/drm/vgem/vgem_fence.c @@ -161,12 +161,9 @@ int vgem_fence_attach_ioctl(struct drm_device *dev, /* Expose the fence via the dma-buf */ dma_resv_lock(resv, NULL); ret = dma_resv_reserve_fences(resv, 1);
- if (!ret) {
if (arg->flags & VGEM_FENCE_WRITE)
dma_resv_add_excl_fence(resv, fence);
else
dma_resv_add_shared_fence(resv, fence);
- }
- if (!ret)
dma_resv_add_fence(resv, fence, arg->flags & VGEM_FENCE_WRITE ?
dma_resv_unlock(resv);DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_READ);
/* Record the fence in our idr for later signaling */ diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c index 1820ca6cf673..580a78809836 100644 --- a/drivers/gpu/drm/virtio/virtgpu_gem.c +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c @@ -250,7 +250,8 @@ void virtio_gpu_array_add_fence(struct virtio_gpu_object_array *objs, int i; for (i = 0; i < objs->nents; i++)
dma_resv_add_excl_fence(objs->objs[i]->resv, fence);
dma_resv_add_fence(objs->objs[i]->resv, fence,
DMA_RESV_USAGE_WRITE);
} void virtio_gpu_array_put_free(struct virtio_gpu_object_array *objs) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index b96884f7d03d..bec50223efe5 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -758,7 +758,8 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo, ret = dma_resv_reserve_fences(bo->base.resv, 1); if (!ret)
dma_resv_add_excl_fence(bo->base.resv, &fence->base);
dma_resv_add_fence(bo->base.resv, &fence->base,
else /* Last resort fallback when we are OOM */ dma_fence_wait(&fence->base, false);DMA_RESV_USAGE_WRITE);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index a297397743a2..71731796c8c3 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -393,15 +393,15 @@ struct dma_buf { * e.g. exposed in `Implicit Fence Poll Support`_ must follow the * below rules. *
* - Drivers must add a shared fence through dma_resv_add_shared_fence()
* for anything the userspace API considers a read access. This highly
* depends upon the API and window system.
* - Drivers must add a read fence through dma_resv_add_fence() with the
* DMA_RESV_USAGE_READ flag for anything the userspace API considers a
* read access. This highly depends upon the API and window system.
* - Similarly drivers must set the exclusive fence through
* dma_resv_add_excl_fence() for anything the userspace API considers
* write access.
* - Similarly drivers must add a write fence through
* dma_resv_add_fence() with the DMA_RESV_USAGE_WRITE flag for
* anything the userspace API considers write access.
* - Drivers may just always set the exclusive fence, since that only
* - Drivers may just always add a write fence, since that only
- causes unecessarily synchronization, but no correctness issues.
- Some drivers only expose a synchronous userspace API with no
@@ -416,7 +416,7 @@ struct dma_buf { * Dynamic importers, see dma_buf_attachment_is_dynamic(), have * additional constraints on how they set up fences: *
* - Dynamic importers must obey the exclusive fence and wait for it to
* - Dynamic importers must obey the write fences and wait for them to
- signal before allowing access to the buffer's underlying storage
- through the device.
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 92cd8023980f..98dc5234b487 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -195,6 +195,9 @@ struct dma_resv_iter { /** @fence: the currently handled fence */ struct dma_fence *fence;
- /** @fence_usage: the usage of the current fence */
- enum dma_resv_usage fence_usage;
- /** @seq: sequence number to check for modifications */ unsigned int seq;
@@ -244,14 +247,15 @@ static inline void dma_resv_iter_end(struct dma_resv_iter *cursor) } /**
- dma_resv_iter_is_exclusive - test if the current fence is the exclusive one
- dma_resv_iter_usage - Return the usage of the current fence
- @cursor: the cursor of the current position
- Returns true if the currently returned fence is the exclusive one.
*/
- Returns the usage of the currently processed fence.
-static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) +static inline enum dma_resv_usage +dma_resv_iter_usage(struct dma_resv_iter *cursor) {
- return cursor->index == 0;
- return cursor->fence_usage;
} /** @@ -306,9 +310,9 @@ static inline bool dma_resv_iter_is_restarted(struct dma_resv_iter *cursor) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base) #ifdef CONFIG_DEBUG_MUTEXES -void dma_resv_reset_shared_max(struct dma_resv *obj); +void dma_resv_reset_max_fences(struct dma_resv *obj); #else -static inline void dma_resv_reset_shared_max(struct dma_resv *obj) {} +static inline void dma_resv_reset_max_fences(struct dma_resv *obj) {} #endif /** @@ -454,17 +458,18 @@ static inline struct ww_acquire_ctx *dma_resv_locking_ctx(struct dma_resv *obj) */ static inline void dma_resv_unlock(struct dma_resv *obj) {
- dma_resv_reset_shared_max(obj);
- dma_resv_reset_max_fences(obj); ww_mutex_unlock(&obj->lock);
} void dma_resv_init(struct dma_resv *obj); void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences); -void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence); +void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence,
enum dma_resv_usage usage);
void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context,
struct dma_fence *fence);
-void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence);
struct dma_fence *fence,
enum dma_resv_usage usage);
int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage, unsigned int *num_fences, struct dma_fence ***fences); int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage, -- 2.25.1
Hello Christian,
On 4/7/22 10:59, Christian König wrote:
Instead of distingting between shared and exclusive fences specify the fence usage while adding fences.
Rework all drivers to use this interface instead and deprecate the old one.
This patch broke compilation for the vc4 DRM driver. I've this patch locally which seems to work but I don't know enough about the fence API to know if is correct.
If you think is the proper fix then I can post it as a patch.
From 3e96db4827ef69b38927476659cbb4469a0246e6 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas javierm@redhat.com Date: Thu, 7 Apr 2022 14:54:07 +0200 Subject: [PATCH] drm/vc4: Use newer fence API to fix build error
The commit 73511edf8b19 ("dma-buf: specify usage while adding fences to dma_resv obj v7") ported all the DRM drivers to use the newer fence API that specifies the usage with the enum dma_resv_usage rather than doing an explicit shared / exclusive distinction.
But the commit didn't do it properly in two callers of the vc4 driver, leading to build errors.
Fixes: 73511edf8b19 ("dma-buf: specify usage while adding fences to dma_resv obj v7") Signed-off-by: Javier Martinez Canillas javierm@redhat.com --- drivers/gpu/drm/vc4/vc4_gem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 38550317e025..9eaf304fc20d 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -546,7 +546,8 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) bo = to_vc4_bo(&exec->bo[i]->base); bo->seqno = seqno;
- dma_resv_add_fence(bo->base.base.resv, exec->fence); + dma_resv_add_fence(bo->base.base.resv, exec->fence, + DMA_RESV_USAGE_READ); }
list_for_each_entry(bo, &exec->unref_list, unref_head) { @@ -557,7 +558,8 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); bo->write_seqno = seqno;
- dma_resv_add_excl_fence(bo->base.base.resv, exec->fence); + dma_resv_add_fence(bo->base.base.resv, exec->fence, + DMA_RESV_USAGE_WRITE); } }
Am 07.04.22 um 15:08 schrieb Javier Martinez Canillas:
Hello Christian,
On 4/7/22 10:59, Christian König wrote:
Instead of distingting between shared and exclusive fences specify the fence usage while adding fences.
Rework all drivers to use this interface instead and deprecate the old one.
This patch broke compilation for the vc4 DRM driver.
My apologies for that. I've tried really hard to catch all cases, but looks like I missed some.
I've this patch locally which seems to work but I don't know enough about the fence API to know if is correct.
If you think is the proper fix then I can post it as a patch.
Yes, that patch looks absolutely correct to me.
Feel free to add an Reviewed-by: Christian König christian.koenig@amd.com and CC me so that I can push it to drm-misc-next ASAP.
Thanks, Christian.
From 3e96db4827ef69b38927476659cbb4469a0246e6 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas javierm@redhat.com Date: Thu, 7 Apr 2022 14:54:07 +0200 Subject: [PATCH] drm/vc4: Use newer fence API to fix build error
The commit 73511edf8b19 ("dma-buf: specify usage while adding fences to dma_resv obj v7") ported all the DRM drivers to use the newer fence API that specifies the usage with the enum dma_resv_usage rather than doing an explicit shared / exclusive distinction.
But the commit didn't do it properly in two callers of the vc4 driver, leading to build errors.
Fixes: 73511edf8b19 ("dma-buf: specify usage while adding fences to dma_resv obj v7") Signed-off-by: Javier Martinez Canillas javierm@redhat.com
drivers/gpu/drm/vc4/vc4_gem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 38550317e025..9eaf304fc20d 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -546,7 +546,8 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) bo = to_vc4_bo(&exec->bo[i]->base); bo->seqno = seqno;
dma_resv_add_fence(bo->base.base.resv, exec->fence);
dma_resv_add_fence(bo->base.base.resv, exec->fence,
}DMA_RESV_USAGE_READ);
list_for_each_entry(bo, &exec->unref_list, unref_head) { @@ -557,7 +558,8 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); bo->write_seqno = seqno;
dma_resv_add_excl_fence(bo->base.base.resv, exec->fence);
dma_resv_add_fence(bo->base.base.resv, exec->fence,
} }DMA_RESV_USAGE_WRITE);
On 4/7/22 15:13, Christian König wrote:
Am 07.04.22 um 15:08 schrieb Javier Martinez Canillas:
Hello Christian,
On 4/7/22 10:59, Christian König wrote:
Instead of distingting between shared and exclusive fences specify the fence usage while adding fences.
Rework all drivers to use this interface instead and deprecate the old one.
This patch broke compilation for the vc4 DRM driver.
My apologies for that. I've tried really hard to catch all cases, but looks like I missed some.
No worries, I know that's not easy to get all callers when doing these subsystem wide changes.
I've this patch locally which seems to work but I don't know enough about the fence API to know if is correct.
If you think is the proper fix then I can post it as a patch.
Yes, that patch looks absolutely correct to me.
Thanks for looking at it.
Feel free to add an Reviewed-by: Christian König christian.koenig@amd.com and CC me so that I can push it to drm-misc-next ASAP.
I can also do it after posting (just to get a proper Link: tag with dim)
Already have another set that wanted to push but found this issue after doing a build test before pushing.
Thanks, Christian.
Rework the internals of the dma_resv object to allow adding more than one write fence and remember for each fence what purpose it had.
This allows removing the workaround from amdgpu which used a container for this instead.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch Cc: amd-gfx@lists.freedesktop.org --- drivers/dma-buf/dma-resv.c | 353 ++++++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 53 +-- include/linux/dma-resv.h | 47 +-- 4 files changed, 157 insertions(+), 297 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 543dae6566d2..378d47e1cfea 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -44,12 +44,12 @@ /** * DOC: Reservation Object Overview * - * The reservation object provides a mechanism to manage shared and - * exclusive fences associated with a buffer. A reservation object - * can have attached one exclusive fence (normally associated with - * write operations) or N shared fences (read operations). The RCU - * mechanism is used to protect read access to fences from locked - * write-side updates. + * The reservation object provides a mechanism to manage a container of + * dma_fence object associated with a resource. A reservation object + * can have any number of fences attaches to it. Each fence carries an usage + * parameter determining how the operation represented by the fence is using the + * resource. The RCU mechanism is used to protect read access to fences from + * locked write-side updates. * * See struct dma_resv for more details. */ @@ -57,39 +57,59 @@ DEFINE_WD_CLASS(reservation_ww_class); EXPORT_SYMBOL(reservation_ww_class);
+/* Mask for the lower fence pointer bits */ +#define DMA_RESV_LIST_MASK 0x3 + struct dma_resv_list { struct rcu_head rcu; - u32 shared_count, shared_max; - struct dma_fence __rcu *shared[]; + u32 num_fences, max_fences; + struct dma_fence __rcu *table[]; };
-/** - * dma_resv_list_alloc - allocate fence list - * @shared_max: number of fences we need space for - * +/* Extract the fence and usage flags from an RCU protected entry in the list. */ +static void dma_resv_list_entry(struct dma_resv_list *list, unsigned int index, + struct dma_resv *resv, struct dma_fence **fence, + enum dma_resv_usage *usage) +{ + long tmp; + + tmp = (long)rcu_dereference_check(list->table[index], + resv ? dma_resv_held(resv) : true); + *fence = (struct dma_fence *)(tmp & ~DMA_RESV_LIST_MASK); + if (usage) + *usage = tmp & DMA_RESV_LIST_MASK; +} + +/* Set the fence and usage flags at the specific index in the list. */ +static void dma_resv_list_set(struct dma_resv_list *list, + unsigned int index, + struct dma_fence *fence, + enum dma_resv_usage usage) +{ + long tmp = ((long)fence) | usage; + + RCU_INIT_POINTER(list->table[index], (struct dma_fence *)tmp); +} + +/* * Allocate a new dma_resv_list and make sure to correctly initialize - * shared_max. + * max_fences. */ -static struct dma_resv_list *dma_resv_list_alloc(unsigned int shared_max) +static struct dma_resv_list *dma_resv_list_alloc(unsigned int max_fences) { struct dma_resv_list *list;
- list = kmalloc(struct_size(list, shared, shared_max), GFP_KERNEL); + list = kmalloc(struct_size(list, table, max_fences), GFP_KERNEL); if (!list) return NULL;
- list->shared_max = (ksize(list) - offsetof(typeof(*list), shared)) / - sizeof(*list->shared); + list->max_fences = (ksize(list) - offsetof(typeof(*list), table)) / + sizeof(*list->table);
return list; }
-/** - * dma_resv_list_free - free fence list - * @list: list to free - * - * Free a dma_resv_list and make sure to drop all references. - */ +/* Free a dma_resv_list and make sure to drop all references. */ static void dma_resv_list_free(struct dma_resv_list *list) { unsigned int i; @@ -97,9 +117,12 @@ static void dma_resv_list_free(struct dma_resv_list *list) if (!list) return;
- for (i = 0; i < list->shared_count; ++i) - dma_fence_put(rcu_dereference_protected(list->shared[i], true)); + for (i = 0; i < list->num_fences; ++i) { + struct dma_fence *fence;
+ dma_resv_list_entry(list, i, NULL, &fence, NULL); + dma_fence_put(fence); + } kfree_rcu(list, rcu); }
@@ -112,8 +135,7 @@ void dma_resv_init(struct dma_resv *obj) ww_mutex_init(&obj->lock, &reservation_ww_class); seqcount_ww_mutex_init(&obj->seq, &obj->lock);
- RCU_INIT_POINTER(obj->fence, NULL); - RCU_INIT_POINTER(obj->fence_excl, NULL); + RCU_INIT_POINTER(obj->fences, NULL); } EXPORT_SYMBOL(dma_resv_init);
@@ -123,46 +145,32 @@ EXPORT_SYMBOL(dma_resv_init); */ void dma_resv_fini(struct dma_resv *obj) { - struct dma_resv_list *fobj; - struct dma_fence *excl; - /* * This object should be dead and all references must have * been released to it, so no need to be protected with rcu. */ - excl = rcu_dereference_protected(obj->fence_excl, 1); - if (excl) - dma_fence_put(excl); - - fobj = rcu_dereference_protected(obj->fence, 1); - dma_resv_list_free(fobj); + dma_resv_list_free(rcu_dereference_protected(obj->fences, true)); ww_mutex_destroy(&obj->lock); } EXPORT_SYMBOL(dma_resv_fini);
-static inline struct dma_fence * -dma_resv_excl_fence(struct dma_resv *obj) -{ - return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj)); -} - -static inline struct dma_resv_list *dma_resv_shared_list(struct dma_resv *obj) +/* Dereference the fences while ensuring RCU rules */ +static inline struct dma_resv_list *dma_resv_fences_list(struct dma_resv *obj) { - return rcu_dereference_check(obj->fence, dma_resv_held(obj)); + return rcu_dereference_check(obj->fences, dma_resv_held(obj)); }
/** - * dma_resv_reserve_fences - Reserve space to add shared fences to - * a dma_resv. + * dma_resv_reserve_fences - Reserve space to add fences to a dma_resv object. * @obj: reservation object * @num_fences: number of fences we want to add * - * Should be called before dma_resv_add_shared_fence(). Must - * be called with @obj locked through dma_resv_lock(). + * Should be called before dma_resv_add_fence(). Must be called with @obj + * locked through dma_resv_lock(). * * Note that the preallocated slots need to be re-reserved if @obj is unlocked - * at any time before calling dma_resv_add_shared_fence(). This is validated - * when CONFIG_DEBUG_MUTEXES is enabled. + * at any time before calling dma_resv_add_fence(). This is validated when + * CONFIG_DEBUG_MUTEXES is enabled. * * RETURNS * Zero for success, or -errno @@ -174,11 +182,11 @@ int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences)
dma_resv_assert_held(obj);
- old = dma_resv_shared_list(obj); - if (old && old->shared_max) { - if ((old->shared_count + num_fences) <= old->shared_max) + old = dma_resv_fences_list(obj); + if (old && old->max_fences) { + if ((old->num_fences + num_fences) <= old->max_fences) return 0; - max = max(old->shared_count + num_fences, old->shared_max * 2); + max = max(old->num_fences + num_fences, old->max_fences * 2); } else { max = max(4ul, roundup_pow_of_two(num_fences)); } @@ -193,27 +201,27 @@ int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences) * references from the old struct are carried over to * the new. */ - for (i = 0, j = 0, k = max; i < (old ? old->shared_count : 0); ++i) { + for (i = 0, j = 0, k = max; i < (old ? old->num_fences : 0); ++i) { + enum dma_resv_usage usage; struct dma_fence *fence;
- fence = rcu_dereference_protected(old->shared[i], - dma_resv_held(obj)); + dma_resv_list_entry(old, i, obj, &fence, &usage); if (dma_fence_is_signaled(fence)) - RCU_INIT_POINTER(new->shared[--k], fence); + RCU_INIT_POINTER(new->table[--k], fence); else - RCU_INIT_POINTER(new->shared[j++], fence); + dma_resv_list_set(new, j++, fence, usage); } - new->shared_count = j; + new->num_fences = j;
/* * We are not changing the effective set of fences here so can * merely update the pointer to the new array; both existing * readers and new readers will see exactly the same set of - * active (unsignaled) shared fences. Individual fences and the + * active (unsignaled) fences. Individual fences and the * old array are protected by RCU and so will not vanish under * the gaze of the rcu_read_lock() readers. */ - rcu_assign_pointer(obj->fence, new); + rcu_assign_pointer(obj->fences, new);
if (!old) return 0; @@ -222,7 +230,7 @@ int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences) for (i = k; i < max; ++i) { struct dma_fence *fence;
- fence = rcu_dereference_protected(new->shared[i], + fence = rcu_dereference_protected(new->table[i], dma_resv_held(obj)); dma_fence_put(fence); } @@ -234,38 +242,39 @@ EXPORT_SYMBOL(dma_resv_reserve_fences);
#ifdef CONFIG_DEBUG_MUTEXES /** - * dma_resv_reset_max_fences - reset shared fences for debugging + * dma_resv_reset_max_fences - reset fences for debugging * @obj: the dma_resv object to reset * - * Reset the number of pre-reserved shared slots to test that drivers do + * Reset the number of pre-reserved fence slots to test that drivers do * correct slot allocation using dma_resv_reserve_fences(). See also - * &dma_resv_list.shared_max. + * &dma_resv_list.max_fences. */ void dma_resv_reset_max_fences(struct dma_resv *obj) { - struct dma_resv_list *fences = dma_resv_shared_list(obj); + struct dma_resv_list *fences = dma_resv_fences_list(obj);
dma_resv_assert_held(obj);
- /* Test shared fence slot reservation */ + /* Test fence slot reservation */ if (fences) - fences->shared_max = fences->shared_count; + fences->max_fences = fences->num_fences; } EXPORT_SYMBOL(dma_resv_reset_max_fences); #endif
/** - * dma_resv_add_shared_fence - Add a fence to a shared slot + * dma_resv_add_fence - Add a fence to the dma_resv obj * @obj: the reservation object - * @fence: the shared fence to add + * @fence: the fence to add + * @usage: how the fence is used, see enum dma_resv_usage * - * Add a fence to a shared slot, @obj must be locked with dma_resv_lock(), and + * Add a fence to a slot, @obj must be locked with dma_resv_lock(), and * dma_resv_reserve_fences() has been called. * * See also &dma_resv.fence for a discussion of the semantics. */ -static void dma_resv_add_shared_fence(struct dma_resv *obj, - struct dma_fence *fence) +void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, + enum dma_resv_usage usage) { struct dma_resv_list *fobj; struct dma_fence *old; @@ -280,32 +289,33 @@ static void dma_resv_add_shared_fence(struct dma_resv *obj, */ WARN_ON(dma_fence_is_container(fence));
- fobj = dma_resv_shared_list(obj); - count = fobj->shared_count; + fobj = dma_resv_fences_list(obj); + count = fobj->num_fences;
write_seqcount_begin(&obj->seq);
for (i = 0; i < count; ++i) { + enum dma_resv_usage old_usage;
- old = rcu_dereference_protected(fobj->shared[i], - dma_resv_held(obj)); - if (old->context == fence->context || + dma_resv_list_entry(fobj, i, obj, &old, &old_usage); + if ((old->context == fence->context && old_usage >= usage) || dma_fence_is_signaled(old)) goto replace; }
- BUG_ON(fobj->shared_count >= fobj->shared_max); + BUG_ON(fobj->num_fences >= fobj->max_fences); old = NULL; count++;
replace: - RCU_INIT_POINTER(fobj->shared[i], fence); - /* pointer update must be visible before we extend the shared_count */ - smp_store_mb(fobj->shared_count, count); + dma_resv_list_set(fobj, i, fence, usage); + /* pointer update must be visible before we extend the num_fences */ + smp_store_mb(fobj->num_fences, count);
write_seqcount_end(&obj->seq); dma_fence_put(old); } +EXPORT_SYMBOL(dma_resv_add_fence);
/** * dma_resv_replace_fences - replace fences in the dma_resv obj @@ -326,128 +336,63 @@ void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, enum dma_resv_usage usage) { struct dma_resv_list *list; - struct dma_fence *old; unsigned int i;
- /* Only readers supported for now */ - WARN_ON(usage != DMA_RESV_USAGE_READ); - dma_resv_assert_held(obj);
+ list = dma_resv_fences_list(obj); write_seqcount_begin(&obj->seq); + for (i = 0; list && i < list->num_fences; ++i) { + struct dma_fence *old;
- old = dma_resv_excl_fence(obj); - if (old->context == context) { - RCU_INIT_POINTER(obj->fence_excl, dma_fence_get(replacement)); - dma_fence_put(old); - } - - list = dma_resv_shared_list(obj); - for (i = 0; list && i < list->shared_count; ++i) { - old = rcu_dereference_protected(list->shared[i], - dma_resv_held(obj)); + dma_resv_list_entry(list, i, obj, &old, NULL); if (old->context != context) continue;
- rcu_assign_pointer(list->shared[i], dma_fence_get(replacement)); + dma_resv_list_set(list, i, replacement, usage); dma_fence_put(old); } - write_seqcount_end(&obj->seq); } EXPORT_SYMBOL(dma_resv_replace_fences);
-/** - * dma_resv_add_excl_fence - Add an exclusive fence. - * @obj: the reservation object - * @fence: the exclusive fence to add - * - * Add a fence to the exclusive slot. @obj must be locked with dma_resv_lock(). - * See also &dma_resv.fence_excl for a discussion of the semantics. - */ -static void dma_resv_add_excl_fence(struct dma_resv *obj, - struct dma_fence *fence) -{ - struct dma_fence *old_fence = dma_resv_excl_fence(obj); - - dma_resv_assert_held(obj); - - dma_fence_get(fence); - - write_seqcount_begin(&obj->seq); - /* write_seqcount_begin provides the necessary memory barrier */ - RCU_INIT_POINTER(obj->fence_excl, fence); - write_seqcount_end(&obj->seq); - - dma_fence_put(old_fence); -} - -/** - * dma_resv_add_fence - Add a fence to the dma_resv obj - * @obj: the reservation object - * @fence: the fence to add - * @usage: how the fence is used, see enum dma_resv_usage - * - * Add a fence to a slot, @obj must be locked with dma_resv_lock(), and - * dma_resv_reserve_fences() has been called. - * - * See also &dma_resv.fence for a discussion of the semantics. - */ -void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, - enum dma_resv_usage usage) -{ - if (usage == DMA_RESV_USAGE_WRITE) - dma_resv_add_excl_fence(obj, fence); - else - dma_resv_add_shared_fence(obj, fence); -} -EXPORT_SYMBOL(dma_resv_add_fence); - -/* Restart the iterator by initializing all the necessary fields, but not the - * relation to the dma_resv object. */ +/* Restart the unlocked iteration by initializing the cursor object. */ static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor) { cursor->seq = read_seqcount_begin(&cursor->obj->seq); - cursor->index = -1; - cursor->shared_count = 0; - if (cursor->usage >= DMA_RESV_USAGE_READ) { - cursor->fences = dma_resv_shared_list(cursor->obj); - if (cursor->fences) - cursor->shared_count = cursor->fences->shared_count; - } else { - cursor->fences = NULL; - } + cursor->index = 0; + cursor->num_fences = 0; + cursor->fences = dma_resv_fences_list(cursor->obj); + if (cursor->fences) + cursor->num_fences = cursor->fences->num_fences; cursor->is_restarted = true; }
/* Walk to the next not signaled fence and grab a reference to it */ static void dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor) { - struct dma_resv *obj = cursor->obj; + if (!cursor->fences) + return;
do { /* Drop the reference from the previous round */ dma_fence_put(cursor->fence);
- if (cursor->index == -1) { - cursor->fence = dma_resv_excl_fence(obj); - cursor->index++; - if (!cursor->fence) - continue; - - } else if (!cursor->fences || - cursor->index >= cursor->shared_count) { + if (cursor->index >= cursor->num_fences) { cursor->fence = NULL; break;
- } else { - struct dma_resv_list *fences = cursor->fences; - unsigned int idx = cursor->index++; - - cursor->fence = rcu_dereference(fences->shared[idx]); } + + dma_resv_list_entry(cursor->fences, cursor->index++, + cursor->obj, &cursor->fence, + &cursor->fence_usage); cursor->fence = dma_fence_get_rcu(cursor->fence); - if (!cursor->fence || !dma_fence_is_signaled(cursor->fence)) + if (!cursor->fence) + break; + + if (!dma_fence_is_signaled(cursor->fence) && + cursor->usage >= cursor->fence_usage) break; } while (true); } @@ -522,15 +467,9 @@ struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor) dma_resv_assert_held(cursor->obj);
cursor->index = 0; - if (cursor->usage >= DMA_RESV_USAGE_READ) - cursor->fences = dma_resv_shared_list(cursor->obj); - else - cursor->fences = NULL; - - fence = dma_resv_excl_fence(cursor->obj); - if (!fence) - fence = dma_resv_iter_next(cursor); + cursor->fences = dma_resv_fences_list(cursor->obj);
+ fence = dma_resv_iter_next(cursor); cursor->is_restarted = true; return fence; } @@ -545,17 +484,22 @@ EXPORT_SYMBOL_GPL(dma_resv_iter_first); */ struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor) { - unsigned int idx; + struct dma_fence *fence;
dma_resv_assert_held(cursor->obj);
cursor->is_restarted = false; - if (!cursor->fences || cursor->index >= cursor->fences->shared_count) - return NULL;
- idx = cursor->index++; - return rcu_dereference_protected(cursor->fences->shared[idx], - dma_resv_held(cursor->obj)); + do { + if (!cursor->fences || + cursor->index >= cursor->fences->num_fences) + return NULL; + + dma_resv_list_entry(cursor->fences, cursor->index++, + cursor->obj, &fence, &cursor->fence_usage); + } while (cursor->fence_usage > cursor->usage); + + return fence; } EXPORT_SYMBOL_GPL(dma_resv_iter_next);
@@ -570,57 +514,43 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) { struct dma_resv_iter cursor; struct dma_resv_list *list; - struct dma_fence *f, *excl; + struct dma_fence *f;
dma_resv_assert_held(dst);
list = NULL; - excl = NULL;
dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_READ); dma_resv_for_each_fence_unlocked(&cursor, f) {
if (dma_resv_iter_is_restarted(&cursor)) { dma_resv_list_free(list); - dma_fence_put(excl); - - if (cursor.shared_count) { - list = dma_resv_list_alloc(cursor.shared_count); - if (!list) { - dma_resv_iter_end(&cursor); - return -ENOMEM; - }
- list->shared_count = 0; - - } else { - list = NULL; + list = dma_resv_list_alloc(cursor.num_fences); + if (!list) { + dma_resv_iter_end(&cursor); + return -ENOMEM; } - excl = NULL; + list->num_fences = 0; }
dma_fence_get(f); - if (dma_resv_iter_usage(&cursor) == DMA_RESV_USAGE_WRITE) - excl = f; - else - RCU_INIT_POINTER(list->shared[list->shared_count++], f); + dma_resv_list_set(list, list->num_fences++, f, + dma_resv_iter_usage(&cursor)); } dma_resv_iter_end(&cursor);
write_seqcount_begin(&dst->seq); - excl = rcu_replace_pointer(dst->fence_excl, excl, dma_resv_held(dst)); - list = rcu_replace_pointer(dst->fence, list, dma_resv_held(dst)); + list = rcu_replace_pointer(dst->fences, list, dma_resv_held(dst)); write_seqcount_end(&dst->seq);
dma_resv_list_free(list); - dma_fence_put(excl); - return 0; } EXPORT_SYMBOL(dma_resv_copy_fences);
/** - * dma_resv_get_fences - Get an object's shared and exclusive + * dma_resv_get_fences - Get an object's fences * fences without update side lock held * @obj: the reservation object * @usage: controls which fences to include, see enum dma_resv_usage. @@ -649,7 +579,7 @@ int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage, while (*num_fences) dma_fence_put((*fences)[--(*num_fences)]);
- count = cursor.shared_count + 1; + count = cursor.num_fences + 1;
/* Eventually re-allocate the array */ *fences = krealloc_array(*fences, count, @@ -723,8 +653,7 @@ int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage, EXPORT_SYMBOL_GPL(dma_resv_get_singleton);
/** - * dma_resv_wait_timeout - Wait on reservation's objects - * shared and/or exclusive fences. + * dma_resv_wait_timeout - Wait on reservation's objects fences * @obj: the reservation object * @usage: controls which fences to include, see enum dma_resv_usage. * @intr: if true, do interruptible wait diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 044b41f0bfd9..529d52a204cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -34,7 +34,6 @@ struct amdgpu_fpriv; struct amdgpu_bo_list_entry { struct ttm_validate_buffer tv; struct amdgpu_bo_va *bo_va; - struct dma_fence_chain *chain; uint32_t priority; struct page **user_pages; bool user_invalidated; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 76fd916424d6..8de283997769 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -574,14 +574,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
e->bo_va = amdgpu_vm_bo_find(vm, bo); - - if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) { - e->chain = dma_fence_chain_alloc(); - if (!e->chain) { - r = -ENOMEM; - goto error_validate; - } - } }
/* Move fence waiting after getting reservation lock of @@ -642,13 +634,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, }
error_validate: - if (r) { - amdgpu_bo_list_for_each_entry(e, p->bo_list) { - dma_fence_chain_free(e->chain); - e->chain = NULL; - } + if (r) ttm_eu_backoff_reservation(&p->ticket, &p->validated); - } out: return r; } @@ -688,17 +675,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, { unsigned i;
- if (error && backoff) { - struct amdgpu_bo_list_entry *e; - - amdgpu_bo_list_for_each_entry(e, parser->bo_list) { - dma_fence_chain_free(e->chain); - e->chain = NULL; - } - + if (error && backoff) ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); - }
for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); @@ -1272,31 +1251,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
- amdgpu_bo_list_for_each_entry(e, p->bo_list) { - struct dma_resv *resv = e->tv.bo->base.resv; - struct dma_fence_chain *chain = e->chain; - struct dma_resv_iter cursor; - struct dma_fence *fence; - - if (!chain) - continue; - - /* - * Temporary workaround dma_resv shortcommings by wrapping up - * the submission in a dma_fence_chain and add it as exclusive - * fence. - * - * TODO: Remove together with dma_resv rework. - */ - dma_resv_for_each_fence(&cursor, resv, - DMA_RESV_USAGE_WRITE, - fence) { - break; - } - dma_fence_chain_init(chain, fence, dma_fence_get(p->fence), 1); - rcu_assign_pointer(resv->fence_excl, &chain->base); - e->chain = NULL; - } + /* Make sure all BOs are remembered as writers */ + amdgpu_bo_list_for_each_entry(e, p->bo_list) + e->tv.num_shared = 0;
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); mutex_unlock(&p->adev->notifier_lock); diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 98dc5234b487..7bb7e7edbb6f 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -99,8 +99,8 @@ static inline enum dma_resv_usage dma_resv_usage_rw(bool write) /** * struct dma_resv - a reservation object manages fences for a buffer * - * There are multiple uses for this, with sometimes slightly different rules in - * how the fence slots are used. + * This is a container for dma_fence objects which needs to handle multiple use + * cases. * * One use is to synchronize cross-driver access to a struct dma_buf, either for * dynamic buffer management or just to handle implicit synchronization between @@ -130,47 +130,22 @@ struct dma_resv { * @seq: * * Sequence count for managing RCU read-side synchronization, allows - * read-only access to @fence_excl and @fence while ensuring we take a - * consistent snapshot. + * read-only access to @fences while ensuring we take a consistent + * snapshot. */ seqcount_ww_mutex_t seq;
/** - * @fence_excl: + * @fences: * - * The exclusive fence, if there is one currently. + * Array of fences which where added to the dma_resv object * - * To guarantee that no fences are lost, this new fence must signal - * only after the previous exclusive fence has signalled. If - * semantically only a new access is added without actually treating the - * previous one as a dependency the exclusive fences can be strung - * together using struct dma_fence_chain. - * - * Note that actual semantics of what an exclusive or shared fence mean - * is defined by the user, for reservation objects shared across drivers - * see &dma_buf.resv. - */ - struct dma_fence __rcu *fence_excl; - - /** - * @fence: - * - * List of current shared fences. - * - * There are no ordering constraints of shared fences against the - * exclusive fence slot. If a waiter needs to wait for all access, it - * has to wait for both sets of fences to signal. - * - * A new fence is added by calling dma_resv_add_shared_fence(). Since - * this often needs to be done past the point of no return in command + * A new fence is added by calling dma_resv_add_fence(). Since this + * often needs to be done past the point of no return in command * submission it cannot fail, and therefore sufficient slots need to be * reserved by calling dma_resv_reserve_fences(). - * - * Note that actual semantics of what an exclusive or shared fence mean - * is defined by the user, for reservation objects shared across drivers - * see &dma_buf.resv. */ - struct dma_resv_list __rcu *fence; + struct dma_resv_list __rcu *fences; };
/** @@ -207,8 +182,8 @@ struct dma_resv_iter { /** @fences: the shared fences; private, *MUST* not dereference */ struct dma_resv_list *fences;
- /** @shared_count: number of shared fences */ - unsigned int shared_count; + /** @num_fences: number of fences */ + unsigned int num_fences;
/** @is_restarted: true if this is the first returned fence */ bool is_restarted;
On Thu, 2022-04-07 at 10:59 +0200, Christian König wrote:
Rework the internals of the dma_resv object to allow adding more than one write fence and remember for each fence what purpose it had.
This allows removing the workaround from amdgpu which used a container for this instead.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch Cc: amd-gfx@lists.freedesktop.org
afaict this change broke vmwgfx which now kernel oops right after boot. I haven't had the time to look into it yet, so I'm not sure what's the problem. I'll look at this tomorrow, but just in case you have some clues, the backtrace follows:
------------[ cut here ]------------ kernel BUG at drivers/dma-buf/dma-resv.c:306! invalid opcode: 0000 [#1] PREEMPT SMP PTI CPU: 1 PID: 1608 Comm: gnome-shell Not tainted 5.18.0-rc1-vmwgfx #18 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 RIP: 0010:dma_resv_add_fence+0x2ed/0x300 Code: ff ff be 01 00 00 00 e8 31 7d d9 ff e9 80 fd ff ff be 03 00 00 00 e8 22 7d d9 ff e9 ee fe ff ff 0f 1f 44 00 00 e9 bc fe ff ff <0f> 0b e8 4c cc 45 00 66 6> RSP: 0018:ffffa1e6846c3ab0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff94c5c5507138 RCX: 902bc24e7b7c70ae RDX: 902bc24e7b7c70ae RSI: ffffffffaaf7f437 RDI: ffffffffaaffde66 RBP: ffffa1e6846c3b08 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000004 R11: 0000000000000000 R12: ffff94c5cba90578 R13: 0000000000000000 R14: ffff94c5cba8bc00 R15: 0000000000000000 FS: 00007f9a17c6e600(0000) GS:ffff94c6f9e40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f9a14113000 CR3: 000000000144c003 CR4: 00000000003706e0 Call Trace: <TASK> ttm_eu_fence_buffer_objects+0x54/0x110 [ttm] vmw_execbuf_process+0xcae/0x12a0 [vmwgfx] ? vmw_execbuf_release_pinned_bo+0x60/0x60 [vmwgfx] vmw_execbuf_ioctl+0xfb/0x160 [vmwgfx] ? vmw_execbuf_release_pinned_bo+0x60/0x60 [vmwgfx] drm_ioctl_kernel+0xba/0x150 [drm] ? __might_fault+0x77/0x80 drm_ioctl+0x247/0x460 [drm] ? vmw_execbuf_release_pinned_bo+0x60/0x60 [vmwgfx] ? find_held_lock+0x31/0x90 ? __fget_files+0xc5/0x190 ? __this_cpu_preempt_check+0x13/0x20 ? lock_release+0x142/0x2f0 ? drm_ioctl_kernel+0x150/0x150 [drm] vmw_generic_ioctl+0xa3/0x110 [vmwgfx] vmw_unlocked_ioctl+0x15/0x20 [vmwgfx] __x64_sys_ioctl+0x91/0xc0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f9a1af1aaff Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <41> 89 c0 3d 00 f0 ff ff 7> RSP: 002b:00007ffd833696c0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007ffd83369780 RCX: 00007f9a1af1aaff RDX: 00007ffd83369780 RSI: 000000004028644c RDI: 000000000000000d RBP: 000000004028644c R08: 0000000000001248 R09: 00007ffd83369808 R10: 0000000000000008 R11: 0000000000000246 R12: 00007ffd83369808 R13: 000000000000000d R14: 000055719cb629c0 R15: 00007ffd83369808 </TASK> Modules linked in: overlay snd_ens1371 intel_rapl_msr snd_ac97_codec intel_rapl_common ac97_bus vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci> ---[ end trace 0000000000000000 ]---
z
Hi Zack,
Am 20.04.22 um 05:56 schrieb Zack Rusin:
On Thu, 2022-04-07 at 10:59 +0200, Christian König wrote:
Rework the internals of the dma_resv object to allow adding more than one write fence and remember for each fence what purpose it had.
This allows removing the workaround from amdgpu which used a container for this instead.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch Cc: amd-gfx@lists.freedesktop.org
afaict this change broke vmwgfx which now kernel oops right after boot. I haven't had the time to look into it yet, so I'm not sure what's the problem. I'll look at this tomorrow, but just in case you have some clues, the backtrace follows:
that's a known issue and should already be fixed with:
commit d72dcbe9fce505228dae43bef9da8f2b707d1b3d Author: Christian König christian.koenig@amd.com Date: Mon Apr 11 15:21:59 2022 +0200
drm/ttm: fix logic inversion in ttm_eu_reserve_buffers
That should have been max, not min.
Signed-off-by: Christian König christian.koenig@amd.com Fixes: c8d4c18bfbc4 ("dma-buf/drivers: make reserving a shared slot mandatory v4") Reviewed-by: Matthew Auld matthew.auld@intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20220411134537.2854-1-christia...
Just a stupid logic inversion. Sorry for the noise.
Regards, Christian.
------------[ cut here ]------------ kernel BUG at drivers/dma-buf/dma-resv.c:306! invalid opcode: 0000 [#1] PREEMPT SMP PTI CPU: 1 PID: 1608 Comm: gnome-shell Not tainted 5.18.0-rc1-vmwgfx #18 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 RIP: 0010:dma_resv_add_fence+0x2ed/0x300 Code: ff ff be 01 00 00 00 e8 31 7d d9 ff e9 80 fd ff ff be 03 00 00 00 e8 22 7d d9 ff e9 ee fe ff ff 0f 1f 44 00 00 e9 bc fe ff ff <0f> 0b e8 4c cc 45 00 66 6> RSP: 0018:ffffa1e6846c3ab0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff94c5c5507138 RCX: 902bc24e7b7c70ae RDX: 902bc24e7b7c70ae RSI: ffffffffaaf7f437 RDI: ffffffffaaffde66 RBP: ffffa1e6846c3b08 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000004 R11: 0000000000000000 R12: ffff94c5cba90578 R13: 0000000000000000 R14: ffff94c5cba8bc00 R15: 0000000000000000 FS: 00007f9a17c6e600(0000) GS:ffff94c6f9e40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f9a14113000 CR3: 000000000144c003 CR4: 00000000003706e0 Call Trace:
<TASK> ttm_eu_fence_buffer_objects+0x54/0x110 [ttm] vmw_execbuf_process+0xcae/0x12a0 [vmwgfx] ? vmw_execbuf_release_pinned_bo+0x60/0x60 [vmwgfx] vmw_execbuf_ioctl+0xfb/0x160 [vmwgfx] ? vmw_execbuf_release_pinned_bo+0x60/0x60 [vmwgfx] drm_ioctl_kernel+0xba/0x150 [drm] ? __might_fault+0x77/0x80 drm_ioctl+0x247/0x460 [drm] ? vmw_execbuf_release_pinned_bo+0x60/0x60 [vmwgfx] ? find_held_lock+0x31/0x90 ? __fget_files+0xc5/0x190 ? __this_cpu_preempt_check+0x13/0x20 ? lock_release+0x142/0x2f0 ? drm_ioctl_kernel+0x150/0x150 [drm] vmw_generic_ioctl+0xa3/0x110 [vmwgfx] vmw_unlocked_ioctl+0x15/0x20 [vmwgfx] __x64_sys_ioctl+0x91/0xc0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f9a1af1aaff Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <41> 89 c0 3d 00 f0 ff ff 7> RSP: 002b:00007ffd833696c0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007ffd83369780 RCX: 00007f9a1af1aaff RDX: 00007ffd83369780 RSI: 000000004028644c RDI: 000000000000000d RBP: 000000004028644c R08: 0000000000001248 R09: 00007ffd83369808 R10: 0000000000000008 R11: 0000000000000246 R12: 00007ffd83369808 R13: 000000000000000d R14: 000055719cb629c0 R15: 00007ffd83369808 </TASK> Modules linked in: overlay snd_ens1371 intel_rapl_msr snd_ac97_codec intel_rapl_common ac97_bus vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci> ---[ end trace 0000000000000000 ]---
z
On Wed, 2022-04-20 at 09:37 +0200, Christian König wrote:
⚠ External Email
Hi Zack,
Am 20.04.22 um 05:56 schrieb Zack Rusin:
On Thu, 2022-04-07 at 10:59 +0200, Christian König wrote:
Rework the internals of the dma_resv object to allow adding more than one write fence and remember for each fence what purpose it had.
This allows removing the workaround from amdgpu which used a container for this instead.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch Cc: amd-gfx@lists.freedesktop.org
afaict this change broke vmwgfx which now kernel oops right after boot. I haven't had the time to look into it yet, so I'm not sure what's the problem. I'll look at this tomorrow, but just in case you have some clues, the backtrace follows:
that's a known issue and should already be fixed with:
commit d72dcbe9fce505228dae43bef9da8f2b707d1b3d Author: Christian König christian.koenig@amd.com Date: Mon Apr 11 15:21:59 2022 +0200
Unfortunately that doesn't seem to be it. The backtrace is from the current (as of the time of sending of this email) drm-misc-next, which has this change, so it's something else.
z
Am 20.04.22 um 19:38 schrieb Zack Rusin:
On Wed, 2022-04-20 at 09:37 +0200, Christian König wrote:
⚠ External Email
Hi Zack,
Am 20.04.22 um 05:56 schrieb Zack Rusin:
On Thu, 2022-04-07 at 10:59 +0200, Christian König wrote:
Rework the internals of the dma_resv object to allow adding more than one write fence and remember for each fence what purpose it had.
This allows removing the workaround from amdgpu which used a container for this instead.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch Cc: amd-gfx@lists.freedesktop.org
afaict this change broke vmwgfx which now kernel oops right after boot. I haven't had the time to look into it yet, so I'm not sure what's the problem. I'll look at this tomorrow, but just in case you have some clues, the backtrace follows:
that's a known issue and should already be fixed with:
commit d72dcbe9fce505228dae43bef9da8f2b707d1b3d Author: Christian König christian.koenig@amd.com Date: Mon Apr 11 15:21:59 2022 +0200
Unfortunately that doesn't seem to be it. The backtrace is from the current (as of the time of sending of this email) drm-misc-next, which has this change, so it's something else.
Ok, that's strange. In this case I need to investigate further.
Maybe VMWGFX is adding more than one fence and we actually need to reserve multiple slots.
Regards, Christian.
z
On Wed, 2022-04-20 at 19:40 +0200, Christian König wrote:
Am 20.04.22 um 19:38 schrieb Zack Rusin:
On Wed, 2022-04-20 at 09:37 +0200, Christian König wrote:
⚠ External Email
Hi Zack,
Am 20.04.22 um 05:56 schrieb Zack Rusin:
On Thu, 2022-04-07 at 10:59 +0200, Christian König wrote:
Rework the internals of the dma_resv object to allow adding more than one write fence and remember for each fence what purpose it had.
This allows removing the workaround from amdgpu which used a container for this instead.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch Cc: amd-gfx@lists.freedesktop.org
afaict this change broke vmwgfx which now kernel oops right after boot. I haven't had the time to look into it yet, so I'm not sure what's the problem. I'll look at this tomorrow, but just in case you have some clues, the backtrace follows:
that's a known issue and should already be fixed with:
commit d72dcbe9fce505228dae43bef9da8f2b707d1b3d Author: Christian König christian.koenig@amd.com Date: Mon Apr 11 15:21:59 2022 +0200
Unfortunately that doesn't seem to be it. The backtrace is from the current (as of the time of sending of this email) drm-misc-next, which has this change, so it's something else.
Ok, that's strange. In this case I need to investigate further.
Maybe VMWGFX is adding more than one fence and we actually need to reserve multiple slots.
This might be helper code issue with CONFIG_DEBUG_MUTEXES set. On that config dma_resv_reset_max_fences does: fences->max_fences = fences->num_fences; For some objects num_fences is 0 and so after max_fences and num_fences are both 0. And then BUG_ON(num_fences >= max_fences) is triggered.
z
Am 20.04.22 um 20:41 schrieb Zack Rusin:
On Wed, 2022-04-20 at 19:40 +0200, Christian König wrote:
Am 20.04.22 um 19:38 schrieb Zack Rusin:
On Wed, 2022-04-20 at 09:37 +0200, Christian König wrote:
⚠ External Email
Hi Zack,
Am 20.04.22 um 05:56 schrieb Zack Rusin:
On Thu, 2022-04-07 at 10:59 +0200, Christian König wrote:
Rework the internals of the dma_resv object to allow adding more than one write fence and remember for each fence what purpose it had.
This allows removing the workaround from amdgpu which used a container for this instead.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch Cc: amd-gfx@lists.freedesktop.org
afaict this change broke vmwgfx which now kernel oops right after boot. I haven't had the time to look into it yet, so I'm not sure what's the problem. I'll look at this tomorrow, but just in case you have some clues, the backtrace follows:
that's a known issue and should already be fixed with:
commit d72dcbe9fce505228dae43bef9da8f2b707d1b3d Author: Christian König christian.koenig@amd.com Date: Mon Apr 11 15:21:59 2022 +0200
Unfortunately that doesn't seem to be it. The backtrace is from the current (as of the time of sending of this email) drm-misc-next, which has this change, so it's something else.
Ok, that's strange. In this case I need to investigate further.
Maybe VMWGFX is adding more than one fence and we actually need to reserve multiple slots.
This might be helper code issue with CONFIG_DEBUG_MUTEXES set. On that config dma_resv_reset_max_fences does: fences->max_fences = fences->num_fences; For some objects num_fences is 0 and so after max_fences and num_fences are both 0. And then BUG_ON(num_fences >= max_fences) is triggered.
Yeah, but that's expected behavior.
What's not expected is that max_fences is still 0 (or equal to old num_fences) when VMWGFX tries to add a new fence. The function ttm_eu_reserve_buffers() should have reserved at least one fence slot.
So the underlying problem is that either ttm_eu_reserve_buffers() was never called or VMWGFX tried to add more than one fence.
Regards, Christian.
z
Am 20.04.22 um 20:49 schrieb Christian König:
Am 20.04.22 um 20:41 schrieb Zack Rusin:
On Wed, 2022-04-20 at 19:40 +0200, Christian König wrote:
Am 20.04.22 um 19:38 schrieb Zack Rusin:
On Wed, 2022-04-20 at 09:37 +0200, Christian König wrote:
⚠ External Email
Hi Zack,
Am 20.04.22 um 05:56 schrieb Zack Rusin:
On Thu, 2022-04-07 at 10:59 +0200, Christian König wrote: > Rework the internals of the dma_resv object to allow adding > more > than > one > write fence and remember for each fence what purpose it had. > > This allows removing the workaround from amdgpu which used a > container > for > this instead. > > Signed-off-by: Christian König christian.koenig@amd.com > Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch > Cc: amd-gfx@lists.freedesktop.org afaict this change broke vmwgfx which now kernel oops right after boot. I haven't had the time to look into it yet, so I'm not sure what's the problem. I'll look at this tomorrow, but just in case you have some clues, the backtrace follows:
that's a known issue and should already be fixed with:
commit d72dcbe9fce505228dae43bef9da8f2b707d1b3d Author: Christian König christian.koenig@amd.com Date: Mon Apr 11 15:21:59 2022 +0200
Unfortunately that doesn't seem to be it. The backtrace is from the current (as of the time of sending of this email) drm-misc-next, which has this change, so it's something else.
Ok, that's strange. In this case I need to investigate further.
Maybe VMWGFX is adding more than one fence and we actually need to reserve multiple slots.
This might be helper code issue with CONFIG_DEBUG_MUTEXES set. On that config dma_resv_reset_max_fences does: fences->max_fences = fences->num_fences; For some objects num_fences is 0 and so after max_fences and num_fences are both 0. And then BUG_ON(num_fences >= max_fences) is triggered.
Yeah, but that's expected behavior.
What's not expected is that max_fences is still 0 (or equal to old num_fences) when VMWGFX tries to add a new fence. The function ttm_eu_reserve_buffers() should have reserved at least one fence slot.
So the underlying problem is that either ttm_eu_reserve_buffers() was never called or VMWGFX tried to add more than one fence.
To figure out what it is could you try the following code fragment:
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c index f46891012be3..a36f89d3f36d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c @@ -288,7 +288,7 @@ int vmw_validation_add_bo(struct vmw_validation_context *ctx, val_buf->bo = ttm_bo_get_unless_zero(&vbo->base); if (!val_buf->bo) return -ESRCH; - val_buf->num_shared = 0; + val_buf->num_shared = 16; list_add_tail(&val_buf->head, &ctx->bo_list); bo_node->as_mob = as_mob; bo_node->cpu_blit = cpu_blit;
Thanks, Christian.
Regards, Christian.
z
On Wed, 2022-04-20 at 20:56 +0200, Christian König wrote:
⚠ External Email
Am 20.04.22 um 20:49 schrieb Christian König:
Am 20.04.22 um 20:41 schrieb Zack Rusin:
On Wed, 2022-04-20 at 19:40 +0200, Christian König wrote:
Am 20.04.22 um 19:38 schrieb Zack Rusin:
On Wed, 2022-04-20 at 09:37 +0200, Christian König wrote:
⚠ External Email
Hi Zack,
Am 20.04.22 um 05:56 schrieb Zack Rusin: > On Thu, 2022-04-07 at 10:59 +0200, Christian König wrote: > > Rework the internals of the dma_resv object to allow > > adding > > more > > than > > one > > write fence and remember for each fence what purpose it > > had. > > > > This allows removing the workaround from amdgpu which > > used a > > container > > for > > this instead. > > > > Signed-off-by: Christian König > > christian.koenig@amd.com > > Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch > > Cc: amd-gfx@lists.freedesktop.org > afaict this change broke vmwgfx which now kernel oops > right > after > boot. > I haven't had the time to look into it yet, so I'm not > sure > what's > the > problem. I'll look at this tomorrow, but just in case you > have > some > clues, the backtrace follows: that's a known issue and should already be fixed with:
commit d72dcbe9fce505228dae43bef9da8f2b707d1b3d Author: Christian König christian.koenig@amd.com Date: Mon Apr 11 15:21:59 2022 +0200
Unfortunately that doesn't seem to be it. The backtrace is from the current (as of the time of sending of this email) drm-misc- next, which has this change, so it's something else.
Ok, that's strange. In this case I need to investigate further.
Maybe VMWGFX is adding more than one fence and we actually need to reserve multiple slots.
This might be helper code issue with CONFIG_DEBUG_MUTEXES set. On that config dma_resv_reset_max_fences does: fences->max_fences = fences->num_fences; For some objects num_fences is 0 and so after max_fences and num_fences are both 0. And then BUG_ON(num_fences >= max_fences) is triggered.
Yeah, but that's expected behavior.
What's not expected is that max_fences is still 0 (or equal to old num_fences) when VMWGFX tries to add a new fence. The function ttm_eu_reserve_buffers() should have reserved at least one fence slot.
So the underlying problem is that either ttm_eu_reserve_buffers() was never called or VMWGFX tried to add more than one fence.
To figure out what it is could you try the following code fragment:
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c index f46891012be3..a36f89d3f36d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c @@ -288,7 +288,7 @@ int vmw_validation_add_bo(struct vmw_validation_context *ctx, val_buf->bo = ttm_bo_get_unless_zero(&vbo->base); if (!val_buf->bo) return -ESRCH; - val_buf->num_shared = 0; + val_buf->num_shared = 16; list_add_tail(&val_buf->head, &ctx->bo_list); bo_node->as_mob = as_mob; bo_node->cpu_blit = cpu_blit;
Fails the same BUG_ON with num_fences and max_fences == 0.
z
Am 20.04.22 um 21:28 schrieb Zack Rusin:
[SNIP]
To figure out what it is could you try the following code fragment:
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c index f46891012be3..a36f89d3f36d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c @@ -288,7 +288,7 @@ int vmw_validation_add_bo(struct vmw_validation_context *ctx, val_buf->bo = ttm_bo_get_unless_zero(&vbo->base); if (!val_buf->bo) return -ESRCH; - val_buf->num_shared = 0; + val_buf->num_shared = 16; list_add_tail(&val_buf->head, &ctx->bo_list); bo_node->as_mob = as_mob; bo_node->cpu_blit = cpu_blit;
Fails the same BUG_ON with num_fences and max_fences == 0.
Thanks for testing this.
So the buffer object is not reserved through vmw_validation_bo_reserve(), but comes from somewhere else. Unfortunately I absolutely can't find where that's coming from.
Do you have some documentation howto setup vmwgfx? E.g. sample VM which I can download somewhere etc..
Thanks, Christian.
z
On Thu, 2022-04-21 at 12:17 +0200, Christian König wrote:
⚠ External Email
Am 20.04.22 um 21:28 schrieb Zack Rusin:
[SNIP]
To figure out what it is could you try the following code fragment:
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c index f46891012be3..a36f89d3f36d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c @@ -288,7 +288,7 @@ int vmw_validation_add_bo(struct vmw_validation_context *ctx, val_buf->bo = ttm_bo_get_unless_zero(&vbo-
base);
if (!val_buf->bo) return -ESRCH; - val_buf->num_shared = 0; + val_buf->num_shared = 16; list_add_tail(&val_buf->head, &ctx->bo_list); bo_node->as_mob = as_mob; bo_node->cpu_blit = cpu_blit;
Fails the same BUG_ON with num_fences and max_fences == 0.
Thanks for testing this.
So the buffer object is not reserved through vmw_validation_bo_reserve(), but comes from somewhere else. Unfortunately I absolutely can't find where that's coming from.
Do you have some documentation howto setup vmwgfx? E.g. sample VM which I can download somewhere etc..
I don't have an external machine to upload it to. Getting an external machine to run Mesa CI on has been on our todo for a while, so I'll try to setup something next week.
The issue here seems to be that vmwgfx always had some buffers that didn't immediately go through vmw_validation_bo_reserve. What's happening is that in vmwgfx_execbuf.c in vmw_execbuf_process we call vmw_validation_bo_reserve and after it we call vmw_validation_res_validate. Inside vmw_validation_res_validate (in vmwgfx_validation.c) we call vmw_resource_validate, which calls vmw_resource_do_validate . vmw_resource_do_validate has this code "ret = func->create(res);" which is an issue for vmwgfx_cotable.c . The func->create for cotable's is vmw_cotable_create which calls vmw_cotable_resize which creates, reserves and validates a new bo.
In short a new bo is created in vmw_cotable_resize between ttm_eu_reserve_buffers and ttm_eu_fence_buffer_objects calls.
z
Am 21.04.22 um 23:13 schrieb Zack Rusin:
On Thu, 2022-04-21 at 12:17 +0200, Christian König wrote:
⚠ External Email
Am 20.04.22 um 21:28 schrieb Zack Rusin:
[SNIP]
To figure out what it is could you try the following code fragment:
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c index f46891012be3..a36f89d3f36d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c @@ -288,7 +288,7 @@ int vmw_validation_add_bo(struct vmw_validation_context *ctx, val_buf->bo = ttm_bo_get_unless_zero(&vbo-
base);
if (!val_buf->bo) return -ESRCH; - val_buf->num_shared = 0; + val_buf->num_shared = 16; list_add_tail(&val_buf->head, &ctx->bo_list); bo_node->as_mob = as_mob; bo_node->cpu_blit = cpu_blit;
Fails the same BUG_ON with num_fences and max_fences == 0.
Thanks for testing this.
So the buffer object is not reserved through vmw_validation_bo_reserve(), but comes from somewhere else. Unfortunately I absolutely can't find where that's coming from.
Do you have some documentation howto setup vmwgfx? E.g. sample VM which I can download somewhere etc..
I don't have an external machine to upload it to. Getting an external machine to run Mesa CI on has been on our todo for a while, so I'll try to setup something next week.
The issue here seems to be that vmwgfx always had some buffers that didn't immediately go through vmw_validation_bo_reserve. What's happening is that in vmwgfx_execbuf.c in vmw_execbuf_process we call vmw_validation_bo_reserve and after it we call vmw_validation_res_validate. Inside vmw_validation_res_validate (in vmwgfx_validation.c) we call vmw_resource_validate, which calls vmw_resource_do_validate . vmw_resource_do_validate has this code "ret = func->create(res);" which is an issue for vmwgfx_cotable.c . The func->create for cotable's is vmw_cotable_create which calls vmw_cotable_resize which creates, reserves and validates a new bo.
Just wow! No wonder that I missed that, I would have never ever been able to find it.
In short a new bo is created in vmw_cotable_resize between ttm_eu_reserve_buffers and ttm_eu_fence_buffer_objects calls.
Well then the fix is trivial, we just need to call dma_resv_reserve_fences() on this new BO to make sure that at least one fence slot is reserved.
I will try to come up with a patch. Thanks a lot for that!
Regards, Christian.
z
Add an usage for kernel submissions. Waiting for those are mandatory for dynamic DMA-bufs.
As a precaution this patch also changes all occurrences where fences are added as part of memory management in TTM, VMWGFX and i915 to use the new value because it now becomes possible for drivers to ignore fences with the WRITE usage.
v2: use "must" in documentation, fix whitespaces v3: separate out some driver changes and better document why some changes should still be part of this patch.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/dma-buf/dma-resv.c | 2 +- drivers/dma-buf/st-dma-resv.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 2 +- drivers/gpu/drm/ttm/ttm_bo.c | 2 +- drivers/gpu/drm/ttm/ttm_bo_util.c | 4 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 2 +- include/linux/dma-resv.h | 24 ++++++++++++++++++--- 7 files changed, 28 insertions(+), 10 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 378d47e1cfea..f4860e5f2d8b 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -726,7 +726,7 @@ EXPORT_SYMBOL_GPL(dma_resv_test_signaled); */ void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq) { - static const char *usage[] = { "write", "read" }; + static const char *usage[] = { "kernel", "write", "read" }; struct dma_resv_iter cursor; struct dma_fence *fence;
diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c index d0f7c2bfd4f0..062b57d63fa6 100644 --- a/drivers/dma-buf/st-dma-resv.c +++ b/drivers/dma-buf/st-dma-resv.c @@ -296,7 +296,7 @@ int dma_resv(void) int r;
spin_lock_init(&fence_lock); - for (usage = DMA_RESV_USAGE_WRITE; usage <= DMA_RESV_USAGE_READ; + for (usage = DMA_RESV_USAGE_KERNEL; usage <= DMA_RESV_USAGE_READ; ++usage) { r = subtests(tests, (void *)(unsigned long)usage); if (r) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index f5f2b8b115ea..0512afdd20d8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -117,7 +117,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, i915_fence_timeout(i915), I915_FENCE_GFP); dma_resv_add_fence(obj->base.resv, &clflush->base.dma, - DMA_RESV_USAGE_WRITE); + DMA_RESV_USAGE_KERNEL); dma_fence_work_commit(&clflush->base); /* * We must have successfully populated the pages(since we are diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d74f9eea855e..6bf3fb1c8045 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -739,7 +739,7 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, return ret; }
- dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE); + dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL);
ret = dma_resv_reserve_fences(bo->base.resv, 1); if (unlikely(ret)) { diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 7a96a1db13a7..99deb45894f4 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -508,7 +508,7 @@ static int ttm_bo_move_to_ghost(struct ttm_buffer_object *bo, return ret;
dma_resv_add_fence(&ghost_obj->base._resv, fence, - DMA_RESV_USAGE_WRITE); + DMA_RESV_USAGE_KERNEL);
/** * If we're not moving to fixed memory, the TTM object @@ -562,7 +562,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, struct ttm_resource_manager *man = ttm_manager_type(bdev, new_mem->mem_type); int ret = 0;
- dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_WRITE); + dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL); if (!evict) ret = ttm_bo_move_to_ghost(bo, fence, man->use_tt); else if (!from->use_tt && pipeline) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index bec50223efe5..408ede1f967f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -759,7 +759,7 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo, ret = dma_resv_reserve_fences(bo->base.resv, 1); if (!ret) dma_resv_add_fence(bo->base.resv, &fence->base, - DMA_RESV_USAGE_WRITE); + DMA_RESV_USAGE_KERNEL); else /* Last resort fallback when we are OOM */ dma_fence_wait(&fence->base, false); diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 7bb7e7edbb6f..a749f229ae91 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -55,11 +55,29 @@ struct dma_resv_list; * This enum describes the different use cases for a dma_resv object and * controls which fences are returned when queried. * - * An important fact is that there is the order WRITE<READ and when the - * dma_resv object is asked for fences for one use case the fences for the - * lower use case are returned as well. + * An important fact is that there is the order KERNEL<WRITE<READ and + * when the dma_resv object is asked for fences for one use case the fences + * for the lower use case are returned as well. + * + * For example when asking for WRITE fences then the KERNEL fences are returned + * as well. Similar when asked for READ fences then both WRITE and KERNEL + * fences are returned as well. */ enum dma_resv_usage { + /** + * @DMA_RESV_USAGE_KERNEL: For in kernel memory management only. + * + * This should only be used for things like copying or clearing memory + * with a DMA hardware engine for the purpose of kernel memory + * management. + * + * Drivers *always* must wait for those fences before accessing the + * resource protected by the dma_resv object. The only exception for + * that is when the resource is known to be locked down in place by + * pinning it previously. + */ + DMA_RESV_USAGE_KERNEL, + /** * @DMA_RESV_USAGE_WRITE: Implicit write synchronization. *
Wait only for kernel fences before kmap or UVD direct submission.
This also makes sure that we always wait in amdgpu_bo_kmap() even when returning a cached pointer.
Signed-off-by: Christian König christian.koenig@amd.com Acked-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index a3cdf8a24377..5832c05ab10d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -761,6 +761,11 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) return -EPERM;
+ r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) + return r; + kptr = amdgpu_bo_kptr(bo); if (kptr) { if (ptr) @@ -768,11 +773,6 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) return 0; }
- r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE, - false, MAX_SCHEDULE_TIMEOUT); - if (r < 0) - return r; - r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.resource->num_pages, &bo->kmap); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 3654326219e0..6eac649499d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1164,7 +1164,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
if (direct) { r = dma_resv_wait_timeout(bo->tbo.base.resv, - DMA_RESV_USAGE_WRITE, false, + DMA_RESV_USAGE_KERNEL, false, msecs_to_jiffies(10)); if (r == 0) r = -ETIMEDOUT;
Always wait for kernel fences before kmap and not only for UVD kmaps.
Signed-off-by: Christian König christian.koenig@amd.com Acked-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/gpu/drm/radeon/radeon_object.c | 7 ++++++- drivers/gpu/drm/radeon/radeon_uvd.c | 12 ++---------- 2 files changed, 8 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index cb5c4aa45cef..6c4a6802ca96 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -219,7 +219,12 @@ int radeon_bo_create(struct radeon_device *rdev, int radeon_bo_kmap(struct radeon_bo *bo, void **ptr) { bool is_iomem; - int r; + long r; + + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) + return r;
if (bo->kptr) { if (ptr) { diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index a50750740ab0..a2cda184b2b2 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -470,24 +470,16 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo, int32_t *msg, msg_type, handle; unsigned img_size = 0; void *ptr; - long r; - int i; + int i, r;
if (offset & 0x3F) { DRM_ERROR("UVD messages must be 64 byte aligned!\n"); return -EINVAL; }
- r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_WRITE, - false, MAX_SCHEDULE_TIMEOUT); - if (r <= 0) { - DRM_ERROR("Failed waiting for UVD message (%ld)!\n", r); - return r ? r : -ETIME; - } - r = radeon_bo_kmap(bo, &ptr); if (r) { - DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r); + DRM_ERROR("Failed mapping the UVD message (%d)!\n", r); return r; }
Don't wait for user space submissions. I'm not 100% sure if that is correct, but it seems to match what the code initially intended.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/nouveau/nouveau_bo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 05076e530e7d..13deb6c70ba6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -962,10 +962,10 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo, struct dma_fence *fence; int ret;
- ret = dma_resv_get_singleton(bo->base.resv, DMA_RESV_USAGE_WRITE, + ret = dma_resv_get_singleton(bo->base.resv, DMA_RESV_USAGE_KERNEL, &fence); if (ret) - dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_WRITE, + dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_KERNEL, false, MAX_SCHEDULE_TIMEOUT);
nv10_bo_put_tile_region(dev, *old_tile, fence);
We only need to wait for kernel submissions here.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/infiniband/core/umem_dmabuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c index f9901d273b8e..fce80a4a5147 100644 --- a/drivers/infiniband/core/umem_dmabuf.c +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -68,7 +68,7 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) * the migration. */ return dma_resv_wait_timeout(umem_dmabuf->attach->dmabuf->resv, - DMA_RESV_USAGE_WRITE, + DMA_RESV_USAGE_KERNEL, false, MAX_SCHEDULE_TIMEOUT); } EXPORT_SYMBOL(ib_umem_dmabuf_map_pages);
Add an usage for submissions independent of implicit sync but still interesting for memory management.
v2: cleanup the kerneldoc a bit v3: separate amdgpu changes from this
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/dma-buf/dma-resv.c | 4 ++-- drivers/dma-buf/st-dma-resv.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++--- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 2 +- drivers/gpu/drm/qxl/qxl_debugfs.c | 2 +- drivers/gpu/drm/radeon/radeon_gem.c | 2 +- drivers/gpu/drm/radeon/radeon_mn.c | 2 +- drivers/gpu/drm/ttm/ttm_bo.c | 14 +++++++------- include/linux/dma-resv.h | 13 ++++++++++++- 14 files changed, 35 insertions(+), 24 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index f4860e5f2d8b..5b64aa554c36 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -520,7 +520,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src)
list = NULL;
- dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_READ); + dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_BOOKKEEP); dma_resv_for_each_fence_unlocked(&cursor, f) {
if (dma_resv_iter_is_restarted(&cursor)) { @@ -726,7 +726,7 @@ EXPORT_SYMBOL_GPL(dma_resv_test_signaled); */ void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq) { - static const char *usage[] = { "kernel", "write", "read" }; + static const char *usage[] = { "kernel", "write", "read", "bookkeep" }; struct dma_resv_iter cursor; struct dma_fence *fence;
diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c index 062b57d63fa6..8ace9e84c845 100644 --- a/drivers/dma-buf/st-dma-resv.c +++ b/drivers/dma-buf/st-dma-resv.c @@ -296,7 +296,7 @@ int dma_resv(void) int r;
spin_lock_init(&fence_lock); - for (usage = DMA_RESV_USAGE_KERNEL; usage <= DMA_RESV_USAGE_READ; + for (usage = DMA_RESV_USAGE_KERNEL; usage <= DMA_RESV_USAGE_BOOKKEEP; ++usage) { r = subtests(tests, (void *)(unsigned long)usage); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 65998cbcd7f7..4ba4b54092f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -111,7 +111,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, struct dma_fence *fence; int r;
- r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_READ, &fence); + r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_BOOKKEEP, &fence); if (r) goto fallback;
@@ -139,7 +139,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, /* Not enough memory for the delayed delete, as last resort * block for all the fences to complete. */ - dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ, + dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); amdgpu_pasid_free(pasid); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 86f5248676b0..b86c0b8252a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -75,7 +75,7 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,
mmu_interval_set_seq(mni, cur_seq);
- r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ, + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); mutex_unlock(&adev->notifier_lock); if (r <= 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 744e144e5fc2..11c46b3e4c60 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -260,7 +260,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, return -EINVAL;
/* TODO: Use DMA_RESV_USAGE_READ here */ - dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) { + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) { dma_fence_chain_for_each(f, f) { struct dma_fence *tmp = dma_fence_chain_contained(f);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 5db5066e74b4..49ffad312d5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1345,7 +1345,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * be resident to run successfully */ dma_resv_for_each_fence(&resv_cursor, bo->base.resv, - DMA_RESV_USAGE_READ, f) { + DMA_RESV_USAGE_BOOKKEEP, f) { if (amdkfd_fence_check_mm(f, current->mm)) return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a0376fd36a82..5277c10d901d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2059,7 +2059,7 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct dma_resv_iter cursor; struct dma_fence *fence;
- dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, fence) { + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) { /* Add a callback for each fence in the reservation object */ amdgpu_vm_prt_get(adev); amdgpu_vm_add_prt_cb(adev, fence); @@ -2665,7 +2665,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return true;
/* Don't evict VM page tables while they are busy */ - if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_READ)) + if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP)) return false;
/* Try to block ongoing updates */ @@ -2846,7 +2846,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, - DMA_RESV_USAGE_READ, + DMA_RESV_USAGE_BOOKKEEP, true, timeout); if (timeout <= 0) return timeout; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index a200d3e66573..4115a222a853 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -66,7 +66,7 @@ bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj) struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
#ifdef CONFIG_LOCKDEP - GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, DMA_RESV_USAGE_READ) && + GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, DMA_RESV_USAGE_BOOKKEEP) && i915_gem_object_evictable(obj)); #endif return mr && (mr->type == INTEL_MEMORY_LOCAL || diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 644fe237601c..094f06b4ce33 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -86,7 +86,7 @@ static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni, return true;
/* we will unbind on next submission, still have userptr pins */ - r = dma_resv_wait_timeout(obj->base.resv, DMA_RESV_USAGE_READ, false, + r = dma_resv_wait_timeout(obj->base.resv, DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) drm_err(&i915->drm, "(%ld) failed to wait for idle\n", r); diff --git a/drivers/gpu/drm/qxl/qxl_debugfs.c b/drivers/gpu/drm/qxl/qxl_debugfs.c index 33e5889d6608..2d9ed3b94574 100644 --- a/drivers/gpu/drm/qxl/qxl_debugfs.c +++ b/drivers/gpu/drm/qxl/qxl_debugfs.c @@ -62,7 +62,7 @@ qxl_debugfs_buffers_info(struct seq_file *m, void *data) int rel = 0;
dma_resv_iter_begin(&cursor, bo->tbo.base.resv, - DMA_RESV_USAGE_READ); + DMA_RESV_USAGE_BOOKKEEP); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (dma_resv_iter_is_restarted(&cursor)) rel = 0; diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 6616a828f40b..8c01a7f0e027 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -163,7 +163,7 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj, if (domain == RADEON_GEM_DOMAIN_CPU) { /* Asking for cpu access wait for object idle */ r = dma_resv_wait_timeout(robj->tbo.base.resv, - DMA_RESV_USAGE_READ, + DMA_RESV_USAGE_BOOKKEEP, true, 30 * HZ); if (!r) r = -EBUSY; diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c index 68ebeb1bdfff..29fe8423bd90 100644 --- a/drivers/gpu/drm/radeon/radeon_mn.c +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -66,7 +66,7 @@ static bool radeon_mn_invalidate(struct mmu_interval_notifier *mn, return true; }
- r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_READ, + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 6bf3fb1c8045..360f980c7e10 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -223,7 +223,7 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) struct dma_resv_iter cursor; struct dma_fence *fence;
- dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_READ); + dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (!fence->ops->signaled) dma_fence_enable_sw_signaling(fence); @@ -252,7 +252,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, struct dma_resv *resv = &bo->base._resv; int ret;
- if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_READ)) + if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP)) ret = 0; else ret = -EBUSY; @@ -264,7 +264,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, dma_resv_unlock(bo->base.resv); spin_unlock(&bo->bdev->lru_lock);
- lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_READ, + lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, interruptible, 30 * HZ);
@@ -369,7 +369,7 @@ static void ttm_bo_release(struct kref *kref) * fences block for the BO to become idle */ dma_resv_wait_timeout(bo->base.resv, - DMA_RESV_USAGE_READ, false, + DMA_RESV_USAGE_BOOKKEEP, false, 30 * HZ); }
@@ -380,7 +380,7 @@ static void ttm_bo_release(struct kref *kref) ttm_mem_io_free(bdev, bo->resource); }
- if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ) || + if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP) || !dma_resv_trylock(bo->base.resv)) { /* The BO is not idle, resurrect it for delayed destroy */ ttm_bo_flush_all_fences(bo); @@ -1046,13 +1046,13 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, long timeout = 15 * HZ;
if (no_wait) { - if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) + if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP)) return 0; else return -EBUSY; }
- timeout = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_READ, + timeout = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, interruptible, timeout); if (timeout < 0) return timeout; diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index a749f229ae91..1db759eacc98 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -55,7 +55,7 @@ struct dma_resv_list; * This enum describes the different use cases for a dma_resv object and * controls which fences are returned when queried. * - * An important fact is that there is the order KERNEL<WRITE<READ and + * An important fact is that there is the order KERNEL<WRITE<READ<BOOKKEEP and * when the dma_resv object is asked for fences for one use case the fences * for the lower use case are returned as well. * @@ -93,6 +93,17 @@ enum dma_resv_usage { * an implicit read dependency. */ DMA_RESV_USAGE_READ, + + /** + * @DMA_RESV_USAGE_BOOKKEEP: No implicit sync. + * + * This should be used by submissions which don't want to participate in + * implicit synchronization. + * + * The most common case are preemption fences as well as page table + * updates and their TLB flushes. + */ + DMA_RESV_USAGE_BOOKKEEP };
/**
Use DMA_RESV_USAGE_BOOKKEEP for VM page table updates and KFD preemption fence.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 5031e26e6716..808e21dcb517 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -263,7 +263,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, */ replacement = dma_fence_get_stub(); dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context, - replacement, DMA_RESV_USAGE_READ); + replacement, DMA_RESV_USAGE_BOOKKEEP); dma_fence_put(replacement); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index dbb551762805..9485b541947e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -112,7 +112,8 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, swap(p->vm->last_unlocked, f); dma_fence_put(tmp); } else { - amdgpu_bo_fence(p->vm->root.bo, f, true); + dma_resv_add_fence(p->vm->root.bo->tbo.base.resv, f, + DMA_RESV_USAGE_BOOKKEEP); }
if (fence && !p->immediate)
We have previously done that in the individual drivers but it is more defensive to move that into the common code.
Dynamic attachments should wait for map operations to complete by themselves.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/dma-buf/dma-buf.c | 18 +++++++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 14 +------------- drivers/gpu/drm/nouveau/nouveau_prime.c | 17 +---------------- drivers/gpu/drm/radeon/radeon_prime.c | 16 +++------------- 4 files changed, 20 insertions(+), 45 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 1cddb65eafda..79795857be3e 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -661,12 +661,24 @@ static struct sg_table * __map_dma_buf(struct dma_buf_attachment *attach, enum dma_data_direction direction) { struct sg_table *sg_table; + signed long ret;
sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction); + if (IS_ERR_OR_NULL(sg_table)) + return sg_table; + + if (!dma_buf_attachment_is_dynamic(attach)) { + ret = dma_resv_wait_timeout(attach->dmabuf->resv, + DMA_RESV_USAGE_KERNEL, true, + MAX_SCHEDULE_TIMEOUT); + if (ret < 0) { + attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, + direction); + return ERR_PTR(ret); + } + }
- if (!IS_ERR_OR_NULL(sg_table)) - mangle_sg_table(sg_table); - + mangle_sg_table(sg_table); return sg_table; }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 579adfafe4d0..782cbca37538 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -102,21 +102,9 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) { struct drm_gem_object *obj = attach->dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - int r;
/* pin buffer into GTT */ - r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); - if (r) - return r; - - if (bo->tbo.moving) { - r = dma_fence_wait(bo->tbo.moving, true); - if (r) { - amdgpu_bo_unpin(bo); - return r; - } - } - return 0; + return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); }
/** diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c index 60019d0532fc..347488685f74 100644 --- a/drivers/gpu/drm/nouveau/nouveau_prime.c +++ b/drivers/gpu/drm/nouveau/nouveau_prime.c @@ -93,22 +93,7 @@ int nouveau_gem_prime_pin(struct drm_gem_object *obj) if (ret) return -EINVAL;
- ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL); - if (ret) - goto error; - - if (nvbo->bo.moving) - ret = dma_fence_wait(nvbo->bo.moving, true); - - ttm_bo_unreserve(&nvbo->bo); - if (ret) - goto error; - - return ret; - -error: - nouveau_bo_unpin(nvbo); - return ret; + return 0; }
void nouveau_gem_prime_unpin(struct drm_gem_object *obj) diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c index 4a90807351e7..42a87948e28c 100644 --- a/drivers/gpu/drm/radeon/radeon_prime.c +++ b/drivers/gpu/drm/radeon/radeon_prime.c @@ -77,19 +77,9 @@ int radeon_gem_prime_pin(struct drm_gem_object *obj)
/* pin buffer into GTT */ ret = radeon_bo_pin(bo, RADEON_GEM_DOMAIN_GTT, NULL); - if (unlikely(ret)) - goto error; - - if (bo->tbo.moving) { - ret = dma_fence_wait(bo->tbo.moving, false); - if (unlikely(ret)) { - radeon_bo_unpin(bo); - goto error; - } - } - - bo->prime_shared_count++; -error: + if (likely(ret == 0)) + bo->prime_shared_count++; + radeon_bo_unreserve(bo); return ret; }
That should now be handled by the common dma_resv framework.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch Cc: intel-gfx@lists.freedesktop.org --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 41 ++++--------------- drivers/gpu/drm/i915/gem/i915_gem_object.h | 8 +--- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 15 +------ .../drm/i915/gem/selftests/i915_gem_migrate.c | 3 +- .../drm/i915/gem/selftests/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/i915_vma.c | 9 +++- 6 files changed, 21 insertions(+), 58 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 372bc220faeb..ffde7bc0a95d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -741,30 +741,19 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = { /** * i915_gem_object_get_moving_fence - Get the object's moving fence if any * @obj: The object whose moving fence to get. + * @fence: The resulting fence * * A non-signaled moving fence means that there is an async operation * pending on the object that needs to be waited on before setting up * any GPU- or CPU PTEs to the object's pages. * - * Return: A refcounted pointer to the object's moving fence if any, - * NULL otherwise. + * Return: Negative error code or 0 for success. */ -struct dma_fence * -i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj) +int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj, + struct dma_fence **fence) { - return dma_fence_get(i915_gem_to_ttm(obj)->moving); -} - -void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj, - struct dma_fence *fence) -{ - struct dma_fence **moving = &i915_gem_to_ttm(obj)->moving; - - if (*moving == fence) - return; - - dma_fence_put(*moving); - *moving = dma_fence_get(fence); + return dma_resv_get_singleton(obj->base.resv, DMA_RESV_USAGE_KERNEL, + fence); }
/** @@ -782,23 +771,9 @@ void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj, int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr) { - struct dma_fence *fence = i915_gem_to_ttm(obj)->moving; - int ret; - assert_object_held(obj); - if (!fence) - return 0; - - ret = dma_fence_wait(fence, intr); - if (ret) - return ret; - - if (fence->error) - return fence->error; - - i915_gem_to_ttm(obj)->moving = NULL; - dma_fence_put(fence); - return 0; + return dma_resv_wait_timeout(obj->base. resv, DMA_RESV_USAGE_KERNEL, + intr, MAX_SCHEDULE_TIMEOUT); }
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 02c37fe4a535..e11d82a9f7c3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -520,12 +520,8 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) i915_gem_object_unpin_pages(obj); }
-struct dma_fence * -i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj); - -void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj, - struct dma_fence *fence); - +int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj, + struct dma_fence **fence); int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 438b8a95b3d1..a10716f4e717 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -467,19 +467,6 @@ __i915_ttm_move(struct ttm_buffer_object *bo, return fence; }
-static int -prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, - struct i915_deps *deps) -{ - int ret; - - ret = i915_deps_add_dependency(deps, bo->moving, ctx); - if (!ret) - ret = i915_deps_add_resv(deps, bo->base.resv, ctx); - - return ret; -} - /** * i915_ttm_move - The TTM move callback used by i915. * @bo: The buffer object. @@ -534,7 +521,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, struct i915_deps deps;
i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); - ret = prev_deps(bo, ctx, &deps); + ret = i915_deps_add_resv(&deps, bo->base.resv, ctx); if (ret) { i915_refct_sgt_put(dst_rsgt); return ret; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index 4997ed18b6e4..0ad443a90c8b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -219,8 +219,7 @@ static int __igt_lmem_pages_migrate(struct intel_gt *gt, err = dma_resv_reserve_fences(obj->base.resv, 1); if (!err) dma_resv_add_fence(obj->base.resv, &rq->fence, - DMA_RESV_USAGE_WRITE); - i915_gem_object_set_moving_fence(obj, &rq->fence); + DMA_RESV_USAGE_KERNEL); i915_request_put(rq); } if (err) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 3a6e3f6d239f..dfc34cc2ef8c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -1221,8 +1221,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, i915_gem_object_unpin_pages(obj); if (rq) { dma_resv_add_fence(obj->base.resv, &rq->fence, - DMA_RESV_USAGE_WRITE); - i915_gem_object_set_moving_fence(obj, &rq->fence); + DMA_RESV_USAGE_KERNEL); i915_request_put(rq); } i915_gem_object_unlock(obj); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 524477d8939e..d077f7b9eaad 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1357,10 +1357,17 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (err) return err;
+ if (vma->obj) { + err = i915_gem_object_get_moving_fence(vma->obj, &moving); + if (err) + return err; + } else { + moving = NULL; + } + if (flags & PIN_GLOBAL) wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
- moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL; if (flags & vma->vm->bind_async_flags || moving) { /* lock VM */ err = i915_vm_lock_objects(vma->vm, ww);
On Thu, 07 Apr 2022, "Christian König" ckoenig.leichtzumerken@gmail.com wrote:
That should now be handled by the common dma_resv framework.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch Cc: intel-gfx@lists.freedesktop.org
So, where are the i915 maintainer acks for merging this (and the other patches in the series touching i915) via drm-misc-next?
Daniel's Reviewed-by is not an ack to merge outside drm-intel-next.
We don't merge i915 stuff without passing CI results. Apparently this one failed enough machines that the CI had to be stopped entirely.
BR, Jani.
drivers/gpu/drm/i915/gem/i915_gem_object.c | 41 ++++--------------- drivers/gpu/drm/i915/gem/i915_gem_object.h | 8 +--- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 15 +------ .../drm/i915/gem/selftests/i915_gem_migrate.c | 3 +- .../drm/i915/gem/selftests/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/i915_vma.c | 9 +++- 6 files changed, 21 insertions(+), 58 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 372bc220faeb..ffde7bc0a95d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -741,30 +741,19 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = { /**
- i915_gem_object_get_moving_fence - Get the object's moving fence if any
- @obj: The object whose moving fence to get.
- @fence: The resulting fence
- A non-signaled moving fence means that there is an async operation
- pending on the object that needs to be waited on before setting up
- any GPU- or CPU PTEs to the object's pages.
- Return: A refcounted pointer to the object's moving fence if any,
- NULL otherwise.
*/
- Return: Negative error code or 0 for success.
-struct dma_fence * -i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj) +int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
struct dma_fence **fence)
{
- return dma_fence_get(i915_gem_to_ttm(obj)->moving);
-}
-void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj,
struct dma_fence *fence)
-{
- struct dma_fence **moving = &i915_gem_to_ttm(obj)->moving;
- if (*moving == fence)
return;
- dma_fence_put(*moving);
- *moving = dma_fence_get(fence);
- return dma_resv_get_singleton(obj->base.resv, DMA_RESV_USAGE_KERNEL,
fence);
} /** @@ -782,23 +771,9 @@ void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj, int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr) {
- struct dma_fence *fence = i915_gem_to_ttm(obj)->moving;
- int ret;
- assert_object_held(obj);
- if (!fence)
return 0;
- ret = dma_fence_wait(fence, intr);
- if (ret)
return ret;
- if (fence->error)
return fence->error;
- i915_gem_to_ttm(obj)->moving = NULL;
- dma_fence_put(fence);
- return 0;
- return dma_resv_wait_timeout(obj->base. resv, DMA_RESV_USAGE_KERNEL,
intr, MAX_SCHEDULE_TIMEOUT);
} #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 02c37fe4a535..e11d82a9f7c3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -520,12 +520,8 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) i915_gem_object_unpin_pages(obj); } -struct dma_fence * -i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
-void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj,
struct dma_fence *fence);
+int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
struct dma_fence **fence);
int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 438b8a95b3d1..a10716f4e717 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -467,19 +467,6 @@ __i915_ttm_move(struct ttm_buffer_object *bo, return fence; } -static int -prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
struct i915_deps *deps)
-{
- int ret;
- ret = i915_deps_add_dependency(deps, bo->moving, ctx);
- if (!ret)
ret = i915_deps_add_resv(deps, bo->base.resv, ctx);
- return ret;
-}
/**
- i915_ttm_move - The TTM move callback used by i915.
- @bo: The buffer object.
@@ -534,7 +521,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, struct i915_deps deps; i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
ret = prev_deps(bo, ctx, &deps);
if (ret) { i915_refct_sgt_put(dst_rsgt); return ret;ret = i915_deps_add_resv(&deps, bo->base.resv, ctx);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index 4997ed18b6e4..0ad443a90c8b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -219,8 +219,7 @@ static int __igt_lmem_pages_migrate(struct intel_gt *gt, err = dma_resv_reserve_fences(obj->base.resv, 1); if (!err) dma_resv_add_fence(obj->base.resv, &rq->fence,
DMA_RESV_USAGE_WRITE);
i915_gem_object_set_moving_fence(obj, &rq->fence);
} if (err)DMA_RESV_USAGE_KERNEL); i915_request_put(rq);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 3a6e3f6d239f..dfc34cc2ef8c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -1221,8 +1221,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, i915_gem_object_unpin_pages(obj); if (rq) { dma_resv_add_fence(obj->base.resv, &rq->fence,
DMA_RESV_USAGE_WRITE);
i915_gem_object_set_moving_fence(obj, &rq->fence);
i915_request_put(rq); } i915_gem_object_unlock(obj);DMA_RESV_USAGE_KERNEL);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 524477d8939e..d077f7b9eaad 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1357,10 +1357,17 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (err) return err;
- if (vma->obj) {
err = i915_gem_object_get_moving_fence(vma->obj, &moving);
if (err)
return err;
- } else {
moving = NULL;
- }
- if (flags & PIN_GLOBAL) wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
- moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL; if (flags & vma->vm->bind_async_flags || moving) { /* lock VM */ err = i915_vm_lock_objects(vma->vm, ww);
Am 08.04.22 um 11:05 schrieb Jani Nikula:
On Thu, 07 Apr 2022, "Christian König" ckoenig.leichtzumerken@gmail.com wrote:
That should now be handled by the common dma_resv framework.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch Cc: intel-gfx@lists.freedesktop.org
So, where are the i915 maintainer acks for merging this (and the other patches in the series touching i915) via drm-misc-next?
Daniel's Reviewed-by is not an ack to merge outside drm-intel-next.
I had the impression that it would be sufficient.
We don't merge i915 stuff without passing CI results. Apparently this one failed enough machines that the CI had to be stopped entirely.
That was unfortunately partially expected and pointed out by Matthew and Daniel before the push.
i915 for some reason extended the usage of the bo->moving fence despite the fact we had patches on the mailing list to entirely remove this feature.
I couldn't get any sane CI results for weeks because of this and at some point we just had to go ahead and fix the clash in drm-tip.
Sorry for any inconvenience cause by that. I hoped that we fixed all cases, but looks like we still missed some.
Regards, Christian.
BR, Jani.
drivers/gpu/drm/i915/gem/i915_gem_object.c | 41 ++++--------------- drivers/gpu/drm/i915/gem/i915_gem_object.h | 8 +--- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 15 +------ .../drm/i915/gem/selftests/i915_gem_migrate.c | 3 +- .../drm/i915/gem/selftests/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/i915_vma.c | 9 +++- 6 files changed, 21 insertions(+), 58 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 372bc220faeb..ffde7bc0a95d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -741,30 +741,19 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = { /**
- i915_gem_object_get_moving_fence - Get the object's moving fence if any
- @obj: The object whose moving fence to get.
- @fence: The resulting fence
- A non-signaled moving fence means that there is an async operation
- pending on the object that needs to be waited on before setting up
- any GPU- or CPU PTEs to the object's pages.
- Return: A refcounted pointer to the object's moving fence if any,
- NULL otherwise.
*/
- Return: Negative error code or 0 for success.
-struct dma_fence * -i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj) +int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
{struct dma_fence **fence)
- return dma_fence_get(i915_gem_to_ttm(obj)->moving);
-}
-void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj,
struct dma_fence *fence)
-{
- struct dma_fence **moving = &i915_gem_to_ttm(obj)->moving;
- if (*moving == fence)
return;
- dma_fence_put(*moving);
- *moving = dma_fence_get(fence);
- return dma_resv_get_singleton(obj->base.resv, DMA_RESV_USAGE_KERNEL,
}fence);
/** @@ -782,23 +771,9 @@ void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj, int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr) {
- struct dma_fence *fence = i915_gem_to_ttm(obj)->moving;
- int ret;
- assert_object_held(obj);
- if (!fence)
return 0;
- ret = dma_fence_wait(fence, intr);
- if (ret)
return ret;
- if (fence->error)
return fence->error;
- i915_gem_to_ttm(obj)->moving = NULL;
- dma_fence_put(fence);
- return 0;
- return dma_resv_wait_timeout(obj->base. resv, DMA_RESV_USAGE_KERNEL,
}intr, MAX_SCHEDULE_TIMEOUT);
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 02c37fe4a535..e11d82a9f7c3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -520,12 +520,8 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) i915_gem_object_unpin_pages(obj); } -struct dma_fence * -i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
-void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj,
struct dma_fence *fence);
+int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr);struct dma_fence **fence);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 438b8a95b3d1..a10716f4e717 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -467,19 +467,6 @@ __i915_ttm_move(struct ttm_buffer_object *bo, return fence; } -static int -prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
struct i915_deps *deps)
-{
- int ret;
- ret = i915_deps_add_dependency(deps, bo->moving, ctx);
- if (!ret)
ret = i915_deps_add_resv(deps, bo->base.resv, ctx);
- return ret;
-}
- /**
- i915_ttm_move - The TTM move callback used by i915.
- @bo: The buffer object.
@@ -534,7 +521,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, struct i915_deps deps; i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
ret = prev_deps(bo, ctx, &deps);
if (ret) { i915_refct_sgt_put(dst_rsgt); return ret;ret = i915_deps_add_resv(&deps, bo->base.resv, ctx);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index 4997ed18b6e4..0ad443a90c8b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -219,8 +219,7 @@ static int __igt_lmem_pages_migrate(struct intel_gt *gt, err = dma_resv_reserve_fences(obj->base.resv, 1); if (!err) dma_resv_add_fence(obj->base.resv, &rq->fence,
DMA_RESV_USAGE_WRITE);
i915_gem_object_set_moving_fence(obj, &rq->fence);
} if (err)DMA_RESV_USAGE_KERNEL); i915_request_put(rq);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 3a6e3f6d239f..dfc34cc2ef8c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -1221,8 +1221,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, i915_gem_object_unpin_pages(obj); if (rq) { dma_resv_add_fence(obj->base.resv, &rq->fence,
DMA_RESV_USAGE_WRITE);
i915_gem_object_set_moving_fence(obj, &rq->fence);
i915_request_put(rq); } i915_gem_object_unlock(obj);DMA_RESV_USAGE_KERNEL);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 524477d8939e..d077f7b9eaad 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1357,10 +1357,17 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (err) return err;
- if (vma->obj) {
err = i915_gem_object_get_moving_fence(vma->obj, &moving);
if (err)
return err;
- } else {
moving = NULL;
- }
- if (flags & PIN_GLOBAL) wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
- moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL; if (flags & vma->vm->bind_async_flags || moving) { /* lock VM */ err = i915_vm_lock_objects(vma->vm, ww);
On Fri, 8 Apr 2022 at 11:27, Christian König christian.koenig@amd.com wrote:
Am 08.04.22 um 11:05 schrieb Jani Nikula:
On Thu, 07 Apr 2022, "Christian König" ckoenig.leichtzumerken@gmail.com wrote:
That should now be handled by the common dma_resv framework.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch Cc: intel-gfx@lists.freedesktop.org
So, where are the i915 maintainer acks for merging this (and the other patches in the series touching i915) via drm-misc-next?
Daniel's Reviewed-by is not an ack to merge outside drm-intel-next.
I had the impression that it would be sufficient.
We don't merge i915 stuff without passing CI results. Apparently this one failed enough machines that the CI had to be stopped entirely.
That was unfortunately partially expected and pointed out by Matthew and Daniel before the push.
Uh I didn't realize that CI never tested this. Usually the way then is to rebase onto drm-tip and figure out the merge story. Doing subsystem wide changes while not on linux-next or drm-tip or another integration tree is just fraught with surprises due to conflicts.
Also "doesn't even compile" is really below the bar, and not the first time this happened at all. And i915 isn't really an obscure driver which is hard to compile test :-) -Daniel
i915 for some reason extended the usage of the bo->moving fence despite the fact we had patches on the mailing list to entirely remove this feature.
I couldn't get any sane CI results for weeks because of this and at some point we just had to go ahead and fix the clash in drm-tip.
Sorry for any inconvenience cause by that. I hoped that we fixed all cases, but looks like we still missed some.
Regards, Christian.
BR, Jani.
drivers/gpu/drm/i915/gem/i915_gem_object.c | 41 ++++--------------- drivers/gpu/drm/i915/gem/i915_gem_object.h | 8 +--- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 15 +------ .../drm/i915/gem/selftests/i915_gem_migrate.c | 3 +- .../drm/i915/gem/selftests/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/i915_vma.c | 9 +++- 6 files changed, 21 insertions(+), 58 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 372bc220faeb..ffde7bc0a95d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -741,30 +741,19 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = { /**
- i915_gem_object_get_moving_fence - Get the object's moving fence if any
- @obj: The object whose moving fence to get.
- @fence: The resulting fence
- A non-signaled moving fence means that there is an async operation
- pending on the object that needs to be waited on before setting up
- any GPU- or CPU PTEs to the object's pages.
- Return: A refcounted pointer to the object's moving fence if any,
- NULL otherwise.
*/
- Return: Negative error code or 0 for success.
-struct dma_fence * -i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj) +int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
{struct dma_fence **fence)
- return dma_fence_get(i915_gem_to_ttm(obj)->moving);
-}
-void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj,
struct dma_fence *fence)
-{
- struct dma_fence **moving = &i915_gem_to_ttm(obj)->moving;
- if (*moving == fence)
return;
- dma_fence_put(*moving);
- *moving = dma_fence_get(fence);
return dma_resv_get_singleton(obj->base.resv, DMA_RESV_USAGE_KERNEL,
fence);
}
/**
@@ -782,23 +771,9 @@ void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj, int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr) {
- struct dma_fence *fence = i915_gem_to_ttm(obj)->moving;
- int ret;
- assert_object_held(obj);
- if (!fence)
return 0;
- ret = dma_fence_wait(fence, intr);
- if (ret)
return ret;
- if (fence->error)
return fence->error;
- i915_gem_to_ttm(obj)->moving = NULL;
- dma_fence_put(fence);
- return 0;
return dma_resv_wait_timeout(obj->base. resv, DMA_RESV_USAGE_KERNEL,
intr, MAX_SCHEDULE_TIMEOUT);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 02c37fe4a535..e11d82a9f7c3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -520,12 +520,8 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) i915_gem_object_unpin_pages(obj); }
-struct dma_fence * -i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
-void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj,
struct dma_fence *fence);
+int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr);struct dma_fence **fence);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 438b8a95b3d1..a10716f4e717 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -467,19 +467,6 @@ __i915_ttm_move(struct ttm_buffer_object *bo, return fence; }
-static int -prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
struct i915_deps *deps)
-{
- int ret;
- ret = i915_deps_add_dependency(deps, bo->moving, ctx);
- if (!ret)
ret = i915_deps_add_resv(deps, bo->base.resv, ctx);
- return ret;
-}
- /**
- i915_ttm_move - The TTM move callback used by i915.
- @bo: The buffer object.
@@ -534,7 +521,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, struct i915_deps deps;
i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
ret = prev_deps(bo, ctx, &deps);
ret = i915_deps_add_resv(&deps, bo->base.resv, ctx); if (ret) { i915_refct_sgt_put(dst_rsgt); return ret;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index 4997ed18b6e4..0ad443a90c8b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -219,8 +219,7 @@ static int __igt_lmem_pages_migrate(struct intel_gt *gt, err = dma_resv_reserve_fences(obj->base.resv, 1); if (!err) dma_resv_add_fence(obj->base.resv, &rq->fence,
DMA_RESV_USAGE_WRITE);
i915_gem_object_set_moving_fence(obj, &rq->fence);
DMA_RESV_USAGE_KERNEL); i915_request_put(rq); } if (err)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 3a6e3f6d239f..dfc34cc2ef8c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -1221,8 +1221,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, i915_gem_object_unpin_pages(obj); if (rq) { dma_resv_add_fence(obj->base.resv, &rq->fence,
DMA_RESV_USAGE_WRITE);
i915_gem_object_set_moving_fence(obj, &rq->fence);
} i915_gem_object_unlock(obj);DMA_RESV_USAGE_KERNEL); i915_request_put(rq);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 524477d8939e..d077f7b9eaad 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1357,10 +1357,17 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (err) return err;
- if (vma->obj) {
err = i915_gem_object_get_moving_fence(vma->obj, &moving);
if (err)
return err;
- } else {
moving = NULL;
- }
- if (flags & PIN_GLOBAL) wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
- moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL; if (flags & vma->vm->bind_async_flags || moving) { /* lock VM */ err = i915_vm_lock_objects(vma->vm, ww);
Am 08.04.22 um 11:33 schrieb Daniel Vetter:
On Fri, 8 Apr 2022 at 11:27, Christian König christian.koenig@amd.com wrote:
Am 08.04.22 um 11:05 schrieb Jani Nikula:
On Thu, 07 Apr 2022, "Christian König" ckoenig.leichtzumerken@gmail.com wrote:
That should now be handled by the common dma_resv framework.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch Cc: intel-gfx@lists.freedesktop.org
So, where are the i915 maintainer acks for merging this (and the other patches in the series touching i915) via drm-misc-next?
Daniel's Reviewed-by is not an ack to merge outside drm-intel-next.
I had the impression that it would be sufficient.
We don't merge i915 stuff without passing CI results. Apparently this one failed enough machines that the CI had to be stopped entirely.
That was unfortunately partially expected and pointed out by Matthew and Daniel before the push.
Uh I didn't realize that CI never tested this. Usually the way then is to rebase onto drm-tip and figure out the merge story. Doing subsystem wide changes while not on linux-next or drm-tip or another integration tree is just fraught with surprises due to conflicts.
Also "doesn't even compile" is really below the bar, and not the first time this happened at all. And i915 isn't really an obscure driver which is hard to compile test :-)
Yeah, I'm really wondering how the build breakage slipped as well.
I've double checked it multiple times now. My build system was certainly not complaining about anything, but right after the push both i915, VC4 and one of the core header changes broke the build on drm-tip.
Going to try to figure out why that didn't worked as expected.
Thanks, Christian
-Daniel
i915 for some reason extended the usage of the bo->moving fence despite the fact we had patches on the mailing list to entirely remove this feature.
I couldn't get any sane CI results for weeks because of this and at some point we just had to go ahead and fix the clash in drm-tip.
Sorry for any inconvenience cause by that. I hoped that we fixed all cases, but looks like we still missed some.
Regards, Christian.
BR, Jani.
drivers/gpu/drm/i915/gem/i915_gem_object.c | 41 ++++--------------- drivers/gpu/drm/i915/gem/i915_gem_object.h | 8 +--- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 15 +------ .../drm/i915/gem/selftests/i915_gem_migrate.c | 3 +- .../drm/i915/gem/selftests/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/i915_vma.c | 9 +++- 6 files changed, 21 insertions(+), 58 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 372bc220faeb..ffde7bc0a95d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -741,30 +741,19 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = { /** * i915_gem_object_get_moving_fence - Get the object's moving fence if any * @obj: The object whose moving fence to get.
- @fence: The resulting fence
- A non-signaled moving fence means that there is an async operation
- pending on the object that needs to be waited on before setting up
- any GPU- or CPU PTEs to the object's pages.
- Return: A refcounted pointer to the object's moving fence if any,
- NULL otherwise.
- Return: Negative error code or 0 for success. */
-struct dma_fence * -i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj) +int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
{struct dma_fence **fence)
- return dma_fence_get(i915_gem_to_ttm(obj)->moving);
-}
-void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj,
struct dma_fence *fence)
-{
- struct dma_fence **moving = &i915_gem_to_ttm(obj)->moving;
- if (*moving == fence)
return;
- dma_fence_put(*moving);
- *moving = dma_fence_get(fence);
return dma_resv_get_singleton(obj->base.resv, DMA_RESV_USAGE_KERNEL,
fence);
}
/**
@@ -782,23 +771,9 @@ void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj, int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr) {
- struct dma_fence *fence = i915_gem_to_ttm(obj)->moving;
- int ret;
assert_object_held(obj);
- if (!fence)
return 0;
- ret = dma_fence_wait(fence, intr);
- if (ret)
return ret;
- if (fence->error)
return fence->error;
- i915_gem_to_ttm(obj)->moving = NULL;
- dma_fence_put(fence);
- return 0;
return dma_resv_wait_timeout(obj->base. resv, DMA_RESV_USAGE_KERNEL,
intr, MAX_SCHEDULE_TIMEOUT);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 02c37fe4a535..e11d82a9f7c3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -520,12 +520,8 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) i915_gem_object_unpin_pages(obj); }
-struct dma_fence * -i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
-void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj,
struct dma_fence *fence);
+int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr);struct dma_fence **fence);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 438b8a95b3d1..a10716f4e717 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -467,19 +467,6 @@ __i915_ttm_move(struct ttm_buffer_object *bo, return fence; }
-static int -prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
struct i915_deps *deps)
-{
- int ret;
- ret = i915_deps_add_dependency(deps, bo->moving, ctx);
- if (!ret)
ret = i915_deps_add_resv(deps, bo->base.resv, ctx);
- return ret;
-}
- /**
- i915_ttm_move - The TTM move callback used by i915.
- @bo: The buffer object.
@@ -534,7 +521,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, struct i915_deps deps;
i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
ret = prev_deps(bo, ctx, &deps);
ret = i915_deps_add_resv(&deps, bo->base.resv, ctx); if (ret) { i915_refct_sgt_put(dst_rsgt); return ret;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index 4997ed18b6e4..0ad443a90c8b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -219,8 +219,7 @@ static int __igt_lmem_pages_migrate(struct intel_gt *gt, err = dma_resv_reserve_fences(obj->base.resv, 1); if (!err) dma_resv_add_fence(obj->base.resv, &rq->fence,
DMA_RESV_USAGE_WRITE);
i915_gem_object_set_moving_fence(obj, &rq->fence);
DMA_RESV_USAGE_KERNEL); i915_request_put(rq); } if (err)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 3a6e3f6d239f..dfc34cc2ef8c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -1221,8 +1221,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, i915_gem_object_unpin_pages(obj); if (rq) { dma_resv_add_fence(obj->base.resv, &rq->fence,
DMA_RESV_USAGE_WRITE);
i915_gem_object_set_moving_fence(obj, &rq->fence);
DMA_RESV_USAGE_KERNEL); i915_request_put(rq); } i915_gem_object_unlock(obj);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 524477d8939e..d077f7b9eaad 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1357,10 +1357,17 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (err) return err;
- if (vma->obj) {
err = i915_gem_object_get_moving_fence(vma->obj, &moving);
if (err)
return err;
- } else {
moving = NULL;
- }
if (flags & PIN_GLOBAL) wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
- moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL; if (flags & vma->vm->bind_async_flags || moving) { /* lock VM */ err = i915_vm_lock_objects(vma->vm, ww);
On Fri, 08 Apr 2022, Christian König christian.koenig@amd.com wrote:
Am 08.04.22 um 11:05 schrieb Jani Nikula:
On Thu, 07 Apr 2022, "Christian König" ckoenig.leichtzumerken@gmail.com wrote:
That should now be handled by the common dma_resv framework.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch Cc: intel-gfx@lists.freedesktop.org
So, where are the i915 maintainer acks for merging this (and the other patches in the series touching i915) via drm-misc-next?
Daniel's Reviewed-by is not an ack to merge outside drm-intel-next.
I had the impression that it would be sufficient.
Please don't assume. Please always ask for explicit acks from the maintainers before merging, and record the acks in the commit message. This has been standard policy for as long as I remember.
Contrast with us merging non-trivial dma-buf changes via drm-intel-next with a Reviewed-by from someone who isn't a dma-buf maintainer, and not even bothering to Cc the maintainers.
We don't merge i915 stuff without passing CI results. Apparently this one failed enough machines that the CI had to be stopped entirely.
That was unfortunately partially expected and pointed out by Matthew and Daniel before the push.
i915 for some reason extended the usage of the bo->moving fence despite the fact we had patches on the mailing list to entirely remove this feature.
I couldn't get any sane CI results for weeks because of this and at some point we just had to go ahead and fix the clash in drm-tip.
Did you talk to the maintainers about it?
BR, Jani.
Sorry for any inconvenience cause by that. I hoped that we fixed all cases, but looks like we still missed some.
Regards, Christian.
BR, Jani.
drivers/gpu/drm/i915/gem/i915_gem_object.c | 41 ++++--------------- drivers/gpu/drm/i915/gem/i915_gem_object.h | 8 +--- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 15 +------ .../drm/i915/gem/selftests/i915_gem_migrate.c | 3 +- .../drm/i915/gem/selftests/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/i915_vma.c | 9 +++- 6 files changed, 21 insertions(+), 58 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 372bc220faeb..ffde7bc0a95d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -741,30 +741,19 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = { /**
- i915_gem_object_get_moving_fence - Get the object's moving fence if any
- @obj: The object whose moving fence to get.
- @fence: The resulting fence
- A non-signaled moving fence means that there is an async operation
- pending on the object that needs to be waited on before setting up
- any GPU- or CPU PTEs to the object's pages.
- Return: A refcounted pointer to the object's moving fence if any,
- NULL otherwise.
*/
- Return: Negative error code or 0 for success.
-struct dma_fence * -i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj) +int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
{struct dma_fence **fence)
- return dma_fence_get(i915_gem_to_ttm(obj)->moving);
-}
-void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj,
struct dma_fence *fence)
-{
- struct dma_fence **moving = &i915_gem_to_ttm(obj)->moving;
- if (*moving == fence)
return;
- dma_fence_put(*moving);
- *moving = dma_fence_get(fence);
- return dma_resv_get_singleton(obj->base.resv, DMA_RESV_USAGE_KERNEL,
}fence);
/** @@ -782,23 +771,9 @@ void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj, int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr) {
- struct dma_fence *fence = i915_gem_to_ttm(obj)->moving;
- int ret;
- assert_object_held(obj);
- if (!fence)
return 0;
- ret = dma_fence_wait(fence, intr);
- if (ret)
return ret;
- if (fence->error)
return fence->error;
- i915_gem_to_ttm(obj)->moving = NULL;
- dma_fence_put(fence);
- return 0;
- return dma_resv_wait_timeout(obj->base. resv, DMA_RESV_USAGE_KERNEL,
}intr, MAX_SCHEDULE_TIMEOUT);
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 02c37fe4a535..e11d82a9f7c3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -520,12 +520,8 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) i915_gem_object_unpin_pages(obj); } -struct dma_fence * -i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
-void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj,
struct dma_fence *fence);
+int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr);struct dma_fence **fence);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 438b8a95b3d1..a10716f4e717 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -467,19 +467,6 @@ __i915_ttm_move(struct ttm_buffer_object *bo, return fence; } -static int -prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
struct i915_deps *deps)
-{
- int ret;
- ret = i915_deps_add_dependency(deps, bo->moving, ctx);
- if (!ret)
ret = i915_deps_add_resv(deps, bo->base.resv, ctx);
- return ret;
-}
- /**
- i915_ttm_move - The TTM move callback used by i915.
- @bo: The buffer object.
@@ -534,7 +521,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, struct i915_deps deps; i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
ret = prev_deps(bo, ctx, &deps);
if (ret) { i915_refct_sgt_put(dst_rsgt); return ret;ret = i915_deps_add_resv(&deps, bo->base.resv, ctx);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index 4997ed18b6e4..0ad443a90c8b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -219,8 +219,7 @@ static int __igt_lmem_pages_migrate(struct intel_gt *gt, err = dma_resv_reserve_fences(obj->base.resv, 1); if (!err) dma_resv_add_fence(obj->base.resv, &rq->fence,
DMA_RESV_USAGE_WRITE);
i915_gem_object_set_moving_fence(obj, &rq->fence);
} if (err)DMA_RESV_USAGE_KERNEL); i915_request_put(rq);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 3a6e3f6d239f..dfc34cc2ef8c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -1221,8 +1221,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, i915_gem_object_unpin_pages(obj); if (rq) { dma_resv_add_fence(obj->base.resv, &rq->fence,
DMA_RESV_USAGE_WRITE);
i915_gem_object_set_moving_fence(obj, &rq->fence);
i915_request_put(rq); } i915_gem_object_unlock(obj);DMA_RESV_USAGE_KERNEL);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 524477d8939e..d077f7b9eaad 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1357,10 +1357,17 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (err) return err;
- if (vma->obj) {
err = i915_gem_object_get_moving_fence(vma->obj, &moving);
if (err)
return err;
- } else {
moving = NULL;
- }
- if (flags & PIN_GLOBAL) wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
- moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL; if (flags & vma->vm->bind_async_flags || moving) { /* lock VM */ err = i915_vm_lock_objects(vma->vm, ww);
Am 08.04.22 um 12:15 schrieb Jani Nikula:
On Fri, 08 Apr 2022, Christian König christian.koenig@amd.com wrote:
Am 08.04.22 um 11:05 schrieb Jani Nikula:
On Thu, 07 Apr 2022, "Christian König" ckoenig.leichtzumerken@gmail.com wrote:
That should now be handled by the common dma_resv framework.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch Cc: intel-gfx@lists.freedesktop.org
So, where are the i915 maintainer acks for merging this (and the other patches in the series touching i915) via drm-misc-next?
Daniel's Reviewed-by is not an ack to merge outside drm-intel-next.
I had the impression that it would be sufficient.
Please don't assume. Please always ask for explicit acks from the maintainers before merging, and record the acks in the commit message. This has been standard policy for as long as I remember.
Acked.
Contrast with us merging non-trivial dma-buf changes via drm-intel-next with a Reviewed-by from someone who isn't a dma-buf maintainer, and not even bothering to Cc the maintainers.
Exactly that has happened before. And yes you are right we need to get more Acks here.
We don't merge i915 stuff without passing CI results. Apparently this one failed enough machines that the CI had to be stopped entirely.
That was unfortunately partially expected and pointed out by Matthew and Daniel before the push.
i915 for some reason extended the usage of the bo->moving fence despite the fact we had patches on the mailing list to entirely remove this feature.
I couldn't get any sane CI results for weeks because of this and at some point we just had to go ahead and fix the clash in drm-tip.
Did you talk to the maintainers about it?
Well, I noted merge problems on the list a few days before.
I'm still puzzled why this didn't worked as expected. I've tested the drm-tip merge result on my build system before the push and it didn't showed anything broken.
After the merge not just i915 broke, the whole core kernel didn't build because of a now missing include in futex.h.
There must be something wrong with my setup here (or I just didn't had enough sleep). One major problem for me is that I can't run dim on my build system, but rather have to push/pull stuff from my laptop over SSH.
My suspicion is that I was testing a stale checkout all the time because of this.
Regards, Christian.
BR, Jani.
Sorry for any inconvenience cause by that. I hoped that we fixed all cases, but looks like we still missed some.
Regards, Christian.
BR, Jani.
drivers/gpu/drm/i915/gem/i915_gem_object.c | 41 ++++--------------- drivers/gpu/drm/i915/gem/i915_gem_object.h | 8 +--- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 15 +------ .../drm/i915/gem/selftests/i915_gem_migrate.c | 3 +- .../drm/i915/gem/selftests/i915_gem_mman.c | 3 +- drivers/gpu/drm/i915/i915_vma.c | 9 +++- 6 files changed, 21 insertions(+), 58 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 372bc220faeb..ffde7bc0a95d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -741,30 +741,19 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = { /** * i915_gem_object_get_moving_fence - Get the object's moving fence if any * @obj: The object whose moving fence to get.
- @fence: The resulting fence
- A non-signaled moving fence means that there is an async operation
- pending on the object that needs to be waited on before setting up
- any GPU- or CPU PTEs to the object's pages.
- Return: A refcounted pointer to the object's moving fence if any,
- NULL otherwise.
- Return: Negative error code or 0 for success. */
-struct dma_fence * -i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj) +int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
{struct dma_fence **fence)
- return dma_fence_get(i915_gem_to_ttm(obj)->moving);
-}
-void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj,
struct dma_fence *fence)
-{
- struct dma_fence **moving = &i915_gem_to_ttm(obj)->moving;
- if (*moving == fence)
return;
- dma_fence_put(*moving);
- *moving = dma_fence_get(fence);
- return dma_resv_get_singleton(obj->base.resv, DMA_RESV_USAGE_KERNEL,
} /**fence);
@@ -782,23 +771,9 @@ void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj, int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr) {
- struct dma_fence *fence = i915_gem_to_ttm(obj)->moving;
- int ret;
- assert_object_held(obj);
- if (!fence)
return 0;
- ret = dma_fence_wait(fence, intr);
- if (ret)
return ret;
- if (fence->error)
return fence->error;
- i915_gem_to_ttm(obj)->moving = NULL;
- dma_fence_put(fence);
- return 0;
- return dma_resv_wait_timeout(obj->base. resv, DMA_RESV_USAGE_KERNEL,
} #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)intr, MAX_SCHEDULE_TIMEOUT);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 02c37fe4a535..e11d82a9f7c3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -520,12 +520,8 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj) i915_gem_object_unpin_pages(obj); } -struct dma_fence * -i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
-void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj,
struct dma_fence *fence);
+int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr);struct dma_fence **fence);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 438b8a95b3d1..a10716f4e717 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -467,19 +467,6 @@ __i915_ttm_move(struct ttm_buffer_object *bo, return fence; } -static int -prev_deps(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
struct i915_deps *deps)
-{
- int ret;
- ret = i915_deps_add_dependency(deps, bo->moving, ctx);
- if (!ret)
ret = i915_deps_add_resv(deps, bo->base.resv, ctx);
- return ret;
-}
- /**
- i915_ttm_move - The TTM move callback used by i915.
- @bo: The buffer object.
@@ -534,7 +521,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, struct i915_deps deps; i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
ret = prev_deps(bo, ctx, &deps);
ret = i915_deps_add_resv(&deps, bo->base.resv, ctx); if (ret) { i915_refct_sgt_put(dst_rsgt); return ret;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index 4997ed18b6e4..0ad443a90c8b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -219,8 +219,7 @@ static int __igt_lmem_pages_migrate(struct intel_gt *gt, err = dma_resv_reserve_fences(obj->base.resv, 1); if (!err) dma_resv_add_fence(obj->base.resv, &rq->fence,
DMA_RESV_USAGE_WRITE);
i915_gem_object_set_moving_fence(obj, &rq->fence);
DMA_RESV_USAGE_KERNEL); i915_request_put(rq); } if (err)
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 3a6e3f6d239f..dfc34cc2ef8c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -1221,8 +1221,7 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, i915_gem_object_unpin_pages(obj); if (rq) { dma_resv_add_fence(obj->base.resv, &rq->fence,
DMA_RESV_USAGE_WRITE);
i915_gem_object_set_moving_fence(obj, &rq->fence);
} i915_gem_object_unlock(obj);DMA_RESV_USAGE_KERNEL); i915_request_put(rq);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 524477d8939e..d077f7b9eaad 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1357,10 +1357,17 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, if (err) return err;
- if (vma->obj) {
err = i915_gem_object_get_moving_fence(vma->obj, &moving);
if (err)
return err;
- } else {
moving = NULL;
- }
- if (flags & PIN_GLOBAL) wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
- moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL; if (flags & vma->vm->bind_async_flags || moving) { /* lock VM */ err = i915_vm_lock_objects(vma->vm, ww);
This is now handled by the DMA-buf framework in the dma_resv obj.
Also remove the workaround inside VMWGFX to update the moving fence.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 13 ++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c | 11 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 11 ++++-- drivers/gpu/drm/ttm/ttm_bo.c | 10 ++---- drivers/gpu/drm/ttm/ttm_bo_util.c | 7 ---- drivers/gpu/drm/ttm/ttm_bo_vm.c | 34 +++++++------------ drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 6 ---- include/drm/ttm/ttm_bo_api.h | 2 -- 9 files changed, 39 insertions(+), 60 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 808e21dcb517..a4955ef76cfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2447,6 +2447,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) struct amdgpu_bo *bo = mem->bo; uint32_t domain = mem->domain; struct kfd_mem_attachment *attachment; + struct dma_resv_iter cursor; + struct dma_fence *fence;
total_size += amdgpu_bo_size(bo);
@@ -2461,10 +2463,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) goto validate_map_fail; } } - ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving); - if (ret) { - pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); - goto validate_map_fail; + dma_resv_for_each_fence(&cursor, bo->tbo.base.resv, + DMA_RESV_USAGE_KERNEL, fence) { + ret = amdgpu_sync_fence(&sync_obj, fence); + if (ret) { + pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); + goto validate_map_fail; + } } list_for_each_entry(attachment, &mem->attachments, list) { if (!attachment->is_mapped) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 5832c05ab10d..ef93abec13b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -612,9 +612,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if (unlikely(r)) goto fail_unreserve;
- amdgpu_bo_fence(bo, fence, false); - dma_fence_put(bo->tbo.moving); - bo->tbo.moving = dma_fence_get(fence); + dma_resv_add_fence(bo->tbo.base.resv, fence, + DMA_RESV_USAGE_KERNEL); dma_fence_put(fence); } if (!bp->resv) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index e3fbf0f10add..31913ae86de6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -74,13 +74,12 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, { unsigned int i; uint64_t value; - int r; + long r;
- if (vmbo->bo.tbo.moving) { - r = dma_fence_wait(vmbo->bo.tbo.moving, true); - if (r) - return r; - } + r = dma_resv_wait_timeout(vmbo->bo.tbo.base.resv, DMA_RESV_USAGE_KERNEL, + true, MAX_SCHEDULE_TIMEOUT); + if (r < 0) + return r;
pe += (unsigned long)amdgpu_bo_kptr(&vmbo->bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 9485b541947e..9cd6f41896c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -205,14 +205,19 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, struct amdgpu_bo *bo = &vmbo->bo; enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE : AMDGPU_IB_POOL_DELAYED; + struct dma_resv_iter cursor; unsigned int i, ndw, nptes; + struct dma_fence *fence; uint64_t *pte; int r;
/* Wait for PD/PT moves to be completed */ - r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving); - if (r) - return r; + dma_resv_for_each_fence(&cursor, bo->tbo.base.resv, + DMA_RESV_USAGE_KERNEL, fence) { + r = amdgpu_sync_fence(&p->job->sync, fence); + if (r) + return r; + }
do { ndw = p->num_dw_left; diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 360f980c7e10..015a94f766de 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -418,7 +418,6 @@ static void ttm_bo_release(struct kref *kref) dma_resv_unlock(bo->base.resv);
atomic_dec(&ttm_glob.bo_count); - dma_fence_put(bo->moving); bo->destroy(bo); }
@@ -714,9 +713,8 @@ void ttm_bo_unpin(struct ttm_buffer_object *bo) EXPORT_SYMBOL(ttm_bo_unpin);
/* - * Add the last move fence to the BO and reserve a new shared slot. We only use - * a shared slot to avoid unecessary sync and rely on the subsequent bo move to - * either stall or use an exclusive fence respectively set bo->moving. + * Add the last move fence to the BO as kernel dependency and reserve a new + * fence slot. */ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, struct ttm_resource_manager *man, @@ -746,9 +744,6 @@ static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, dma_fence_put(fence); return ret; } - - dma_fence_put(bo->moving); - bo->moving = fence; return 0; }
@@ -951,7 +946,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, bo->bdev = bdev; bo->type = type; bo->page_alignment = page_alignment; - bo->moving = NULL; bo->pin_count = 0; bo->sg = sg; bo->bulk_move = NULL; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 99deb45894f4..bc5190340b9c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -228,7 +228,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
atomic_inc(&ttm_glob.bo_count); INIT_LIST_HEAD(&fbo->base.ddestroy); - fbo->base.moving = NULL; drm_vma_node_reset(&fbo->base.base.vma_node);
kref_init(&fbo->base.kref); @@ -500,9 +499,6 @@ static int ttm_bo_move_to_ghost(struct ttm_buffer_object *bo, * operation has completed. */
- dma_fence_put(bo->moving); - bo->moving = dma_fence_get(fence); - ret = ttm_buffer_object_transfer(bo, &ghost_obj); if (ret) return ret; @@ -546,9 +542,6 @@ static void ttm_bo_move_pipeline_evict(struct ttm_buffer_object *bo, spin_unlock(&from->move_lock);
ttm_resource_free(bo, &bo->resource); - - dma_fence_put(bo->moving); - bo->moving = dma_fence_get(fence); }
int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 08ba083a80d2..5b324f245265 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -46,17 +46,13 @@ static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, struct vm_fault *vmf) { - vm_fault_t ret = 0; - int err = 0; - - if (likely(!bo->moving)) - goto out_unlock; + long err = 0;
/* * Quick non-stalling check for idle. */ - if (dma_fence_is_signaled(bo->moving)) - goto out_clear; + if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_KERNEL)) + return 0;
/* * If possible, avoid waiting for GPU with mmap_lock @@ -64,34 +60,30 @@ static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, * is the first attempt. */ if (fault_flag_allow_retry_first(vmf->flags)) { - ret = VM_FAULT_RETRY; if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) - goto out_unlock; + return VM_FAULT_RETRY;
ttm_bo_get(bo); mmap_read_unlock(vmf->vma->vm_mm); - (void) dma_fence_wait(bo->moving, true); + (void)dma_resv_wait_timeout(bo->base.resv, + DMA_RESV_USAGE_KERNEL, true, + MAX_SCHEDULE_TIMEOUT); dma_resv_unlock(bo->base.resv); ttm_bo_put(bo); - goto out_unlock; + return VM_FAULT_RETRY; }
/* * Ordinary wait. */ - err = dma_fence_wait(bo->moving, true); - if (unlikely(err != 0)) { - ret = (err != -ERESTARTSYS) ? VM_FAULT_SIGBUS : + err = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_KERNEL, true, + MAX_SCHEDULE_TIMEOUT); + if (unlikely(err < 0)) { + return (err != -ERESTARTSYS) ? VM_FAULT_SIGBUS : VM_FAULT_NOPAGE; - goto out_unlock; }
-out_clear: - dma_fence_put(bo->moving); - bo->moving = NULL; - -out_unlock: - return ret; + return 0; }
static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index a84d1d5628d0..a7d62a4eb47b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -1161,12 +1161,6 @@ int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start, *num_prefault = __KERNEL_DIV_ROUND_UP(last_cleaned - res_start, PAGE_SIZE); vmw_bo_fence_single(bo, NULL); - if (bo->moving) - dma_fence_put(bo->moving); - - return dma_resv_get_singleton(bo->base.resv, - DMA_RESV_USAGE_WRITE, - &bo->moving); }
return 0; diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index c76932b68a33..2d524f8b0802 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -94,7 +94,6 @@ struct ttm_tt; * @deleted: True if the object is only a zombie and already deleted. * @ddestroy: List head for the delayed destroy list. * @swap: List head for swap LRU list. - * @moving: Fence set when BO is moving * @offset: The current GPU offset, which can have different meanings * depending on the memory type. For SYSTEM type memory, it should be 0. * @cur_placement: Hint of current placement. @@ -147,7 +146,6 @@ struct ttm_buffer_object { * Members protected by a bo reservation. */
- struct dma_fence *moving; unsigned priority; unsigned pin_count;
This should be possible now since we don't have the distinction between exclusive and shared fences any more.
The only possible pitfall is that a dma_fence would be reused during the RCU grace period, but even that could be handled with a single extra check.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch --- drivers/dma-buf/dma-resv.c | 33 ++++++++++++--------------------- drivers/dma-buf/st-dma-resv.c | 2 +- include/linux/dma-resv.h | 12 ------------ 3 files changed, 13 insertions(+), 34 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 5b64aa554c36..0cce6e4ec946 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -133,7 +133,6 @@ static void dma_resv_list_free(struct dma_resv_list *list) void dma_resv_init(struct dma_resv *obj) { ww_mutex_init(&obj->lock, &reservation_ww_class); - seqcount_ww_mutex_init(&obj->seq, &obj->lock);
RCU_INIT_POINTER(obj->fences, NULL); } @@ -292,28 +291,24 @@ void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, fobj = dma_resv_fences_list(obj); count = fobj->num_fences;
- write_seqcount_begin(&obj->seq); - for (i = 0; i < count; ++i) { enum dma_resv_usage old_usage;
dma_resv_list_entry(fobj, i, obj, &old, &old_usage); if ((old->context == fence->context && old_usage >= usage) || - dma_fence_is_signaled(old)) - goto replace; + dma_fence_is_signaled(old)) { + dma_resv_list_set(fobj, i, fence, usage); + dma_fence_put(old); + return; + } }
BUG_ON(fobj->num_fences >= fobj->max_fences); - old = NULL; count++;
-replace: dma_resv_list_set(fobj, i, fence, usage); /* pointer update must be visible before we extend the num_fences */ smp_store_mb(fobj->num_fences, count); - - write_seqcount_end(&obj->seq); - dma_fence_put(old); } EXPORT_SYMBOL(dma_resv_add_fence);
@@ -341,7 +336,6 @@ void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, dma_resv_assert_held(obj);
list = dma_resv_fences_list(obj); - write_seqcount_begin(&obj->seq); for (i = 0; list && i < list->num_fences; ++i) { struct dma_fence *old;
@@ -352,14 +346,12 @@ void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, dma_resv_list_set(list, i, replacement, usage); dma_fence_put(old); } - write_seqcount_end(&obj->seq); } EXPORT_SYMBOL(dma_resv_replace_fences);
/* Restart the unlocked iteration by initializing the cursor object. */ static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor) { - cursor->seq = read_seqcount_begin(&cursor->obj->seq); cursor->index = 0; cursor->num_fences = 0; cursor->fences = dma_resv_fences_list(cursor->obj); @@ -388,8 +380,10 @@ static void dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor) cursor->obj, &cursor->fence, &cursor->fence_usage); cursor->fence = dma_fence_get_rcu(cursor->fence); - if (!cursor->fence) - break; + if (!cursor->fence) { + dma_resv_iter_restart_unlocked(cursor); + continue; + }
if (!dma_fence_is_signaled(cursor->fence) && cursor->usage >= cursor->fence_usage) @@ -415,7 +409,7 @@ struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor) do { dma_resv_iter_restart_unlocked(cursor); dma_resv_iter_walk_unlocked(cursor); - } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + } while (dma_resv_fences_list(cursor->obj) != cursor->fences); rcu_read_unlock();
return cursor->fence; @@ -438,13 +432,13 @@ struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor)
rcu_read_lock(); cursor->is_restarted = false; - restart = read_seqcount_retry(&cursor->obj->seq, cursor->seq); + restart = dma_resv_fences_list(cursor->obj) != cursor->fences; do { if (restart) dma_resv_iter_restart_unlocked(cursor); dma_resv_iter_walk_unlocked(cursor); restart = true; - } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + } while (dma_resv_fences_list(cursor->obj) != cursor->fences); rcu_read_unlock();
return cursor->fence; @@ -540,10 +534,7 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) } dma_resv_iter_end(&cursor);
- write_seqcount_begin(&dst->seq); list = rcu_replace_pointer(dst->fences, list, dma_resv_held(dst)); - write_seqcount_end(&dst->seq); - dma_resv_list_free(list); return 0; } diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c index 8ace9e84c845..813779e3c9be 100644 --- a/drivers/dma-buf/st-dma-resv.c +++ b/drivers/dma-buf/st-dma-resv.c @@ -217,7 +217,7 @@ static int test_for_each_unlocked(void *arg) if (r == -ENOENT) { r = -EINVAL; /* That should trigger an restart */ - cursor.seq--; + cursor.fences = (void*)~0; } else if (r == -EINVAL) { r = 0; } diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 1db759eacc98..c8ccbc94d5d2 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -155,15 +155,6 @@ struct dma_resv { */ struct ww_mutex lock;
- /** - * @seq: - * - * Sequence count for managing RCU read-side synchronization, allows - * read-only access to @fences while ensuring we take a consistent - * snapshot. - */ - seqcount_ww_mutex_t seq; - /** * @fences: * @@ -202,9 +193,6 @@ struct dma_resv_iter { /** @fence_usage: the usage of the current fence */ enum dma_resv_usage fence_usage;
- /** @seq: sequence number to check for modifications */ - unsigned int seq; - /** @index: index into the shared fences */ unsigned int index;
Daniel pointed out that this series removes the last user of seqcount_ww_mutex_t, so let's drop this.
Signed-off-by: Christian König christian.koenig@amd.com Cc: Peter Zijlstra peterz@infradead.org Cc: Ingo Molnar mingo@redhat.com Cc: Will Deacon will@kernel.org Cc: Waiman Long longman@redhat.com Cc: Boqun Feng boqun.feng@gmail.com Cc: linux-kernel@vger.kernel.org --- include/linux/seqlock.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 37ded6b8fee6..3926e9027947 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -17,7 +17,6 @@ #include <linux/kcsan-checks.h> #include <linux/lockdep.h> #include <linux/mutex.h> -#include <linux/ww_mutex.h> #include <linux/preempt.h> #include <linux/spinlock.h>
@@ -164,7 +163,7 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * static initializer or init function. This enables lockdep to validate * that the write side critical section is properly serialized. * - * LOCKNAME: raw_spinlock, spinlock, rwlock, mutex, or ww_mutex. + * LOCKNAME: raw_spinlock, spinlock, rwlock or mutex */
/* @@ -184,7 +183,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) #define seqcount_spinlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, spinlock) #define seqcount_rwlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, rwlock) #define seqcount_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, mutex) -#define seqcount_ww_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, ww_mutex)
/* * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers @@ -277,7 +275,6 @@ SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock, raw_s SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock)) SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock)) SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock)) -SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mutex, ww_mutex_lock(s->lock, NULL))
/* * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t @@ -304,8 +301,7 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu __seqprop_case((s), raw_spinlock, prop), \ __seqprop_case((s), spinlock, prop), \ __seqprop_case((s), rwlock, prop), \ - __seqprop_case((s), mutex, prop), \ - __seqprop_case((s), ww_mutex, prop)) + __seqprop_case((s), mutex, prop))
#define seqprop_ptr(s) __seqprop(s, ptr) #define seqprop_sequence(s) __seqprop(s, sequence)
On Thu, Apr 07, 2022 at 10:59:46AM +0200, Christian König wrote:
Daniel pointed out that this series removes the last user of seqcount_ww_mutex_t, so let's drop this.
Signed-off-by: Christian König christian.koenig@amd.com Cc: Peter Zijlstra peterz@infradead.org Cc: Ingo Molnar mingo@redhat.com Cc: Will Deacon will@kernel.org Cc: Waiman Long longman@redhat.com Cc: Boqun Feng boqun.feng@gmail.com Cc: linux-kernel@vger.kernel.org
Yeah I don't think we'll ever need this again, ww_mutex aren't common and the ww_mutex+seqlock thing wasn't the brighest idea.
Peter/Ingo, assuming you agree, can you ack this for merging through drm-misc, or want to pick this up later on when the last user disappeared in Linus' tree?
Cheers, Daniel
include/linux/seqlock.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 37ded6b8fee6..3926e9027947 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -17,7 +17,6 @@ #include <linux/kcsan-checks.h> #include <linux/lockdep.h> #include <linux/mutex.h> -#include <linux/ww_mutex.h> #include <linux/preempt.h> #include <linux/spinlock.h> @@ -164,7 +163,7 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
- static initializer or init function. This enables lockdep to validate
- that the write side critical section is properly serialized.
- LOCKNAME: raw_spinlock, spinlock, rwlock, mutex, or ww_mutex.
*/
- LOCKNAME: raw_spinlock, spinlock, rwlock or mutex
/* @@ -184,7 +183,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) #define seqcount_spinlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, spinlock) #define seqcount_rwlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, rwlock) #define seqcount_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, mutex) -#define seqcount_ww_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, ww_mutex) /*
- SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers
@@ -277,7 +275,6 @@ SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock, raw_s SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock)) SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock)) SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock)) -SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mutex, ww_mutex_lock(s->lock, NULL)) /*
- SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t
@@ -304,8 +301,7 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu __seqprop_case((s), raw_spinlock, prop), \ __seqprop_case((s), spinlock, prop), \ __seqprop_case((s), rwlock, prop), \
- __seqprop_case((s), mutex, prop), \
- __seqprop_case((s), ww_mutex, prop))
- __seqprop_case((s), mutex, prop))
#define seqprop_ptr(s) __seqprop(s, ptr)
#define seqprop_sequence(s) __seqprop(s, sequence)
2.25.1
Am 07.04.22 um 11:19 schrieb Daniel Vetter:
On Thu, Apr 07, 2022 at 10:59:46AM +0200, Christian König wrote:
Daniel pointed out that this series removes the last user of seqcount_ww_mutex_t, so let's drop this.
Signed-off-by: Christian König christian.koenig@amd.com Cc: Peter Zijlstra peterz@infradead.org Cc: Ingo Molnar mingo@redhat.com Cc: Will Deacon will@kernel.org Cc: Waiman Long longman@redhat.com Cc: Boqun Feng boqun.feng@gmail.com Cc: linux-kernel@vger.kernel.org
Yeah I don't think we'll ever need this again, ww_mutex aren't common and the ww_mutex+seqlock thing wasn't the brighest idea.
Peter/Ingo, assuming you agree, can you ack this for merging through drm-misc, or want to pick this up later on when the last user disappeared in Linus' tree?
Mpf, I didn't noticed that removing ww_mutex.h from seqlock.h is causing a problem for futex.h.
Just send out a patch to fix this because the build servers are pointing out now that drm-misc-next is broken.
Please review ASAP, Christian.
Cheers, Daniel
include/linux/seqlock.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 37ded6b8fee6..3926e9027947 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -17,7 +17,6 @@ #include <linux/kcsan-checks.h> #include <linux/lockdep.h> #include <linux/mutex.h> -#include <linux/ww_mutex.h> #include <linux/preempt.h> #include <linux/spinlock.h> @@ -164,7 +163,7 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
- static initializer or init function. This enables lockdep to validate
- that the write side critical section is properly serialized.
- LOCKNAME: raw_spinlock, spinlock, rwlock, mutex, or ww_mutex.
*/
- LOCKNAME: raw_spinlock, spinlock, rwlock or mutex
/* @@ -184,7 +183,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) #define seqcount_spinlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, spinlock) #define seqcount_rwlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, rwlock) #define seqcount_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, mutex) -#define seqcount_ww_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, ww_mutex) /*
- SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers
@@ -277,7 +275,6 @@ SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock, raw_s SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock)) SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock)) SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock)) -SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mutex, ww_mutex_lock(s->lock, NULL)) /*
- SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t
@@ -304,8 +301,7 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu __seqprop_case((s), raw_spinlock, prop), \ __seqprop_case((s), spinlock, prop), \ __seqprop_case((s), rwlock, prop), \
- __seqprop_case((s), mutex, prop), \
- __seqprop_case((s), ww_mutex, prop))
- __seqprop_case((s), mutex, prop))
#define seqprop_ptr(s) __seqprop(s, ptr)
#define seqprop_sequence(s) __seqprop(s, sequence)
2.25.1
On Thu, Apr 07, 2022 at 10:59:46AM +0200, Christian König wrote:
Daniel pointed out that this series removes the last user of seqcount_ww_mutex_t, so let's drop this.
Signed-off-by: Christian König christian.koenig@amd.com
Acked-by: Peter Zijlstra (Intel) peterz@infradead.org
On Thu, Apr 07, 2022 at 10:59:31AM +0200, Christian König wrote:
Hi Daniel,
only patch #2 had some significant changes. The rest ist pretty much the same except for the dropped exynos change and the added cleanup for the seqlock.
Reviewed that patch, I plan to do a full review of the docs and hopefully also a bit of all the various users once it's all landed.
Cheers, Daniel
linaro-mm-sig@lists.linaro.org