Amdgpu was the only user of the signal on any feature and we dropped that use case recently, so we can remove that functionality.
v2: update num_pending only after the fence is signaled v3: separate out simplifying dma_fence_array implementation
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-fence-array.c | 13 ++++--------- drivers/dma-buf/dma-fence-unwrap.c | 3 +-- drivers/dma-buf/dma-resv.c | 3 +-- drivers/dma-buf/st-dma-fence-unwrap.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 3 +-- drivers/gpu/drm/xe/xe_sync.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 4 ++-- include/linux/dma-fence-array.h | 6 ++---- 8 files changed, 13 insertions(+), 23 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 089f69469524..5e10e8df372f 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -190,15 +190,13 @@ EXPORT_SYMBOL(dma_fence_array_alloc); * @fences: [in] array containing the fences * @context: [in] fence context to use * @seqno: [in] sequence number to use - * @signal_on_any: [in] signal on any fence in the array * * Implementation of @dma_fence_array_create without allocation. Useful to init * a preallocated dma fence array in the path of reclaim or dma fence signaling. */ void dma_fence_array_init(struct dma_fence_array *array, int num_fences, struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any) + u64 context, unsigned seqno) { static struct lock_class_key dma_fence_array_lock_key;
@@ -222,7 +220,7 @@ void dma_fence_array_init(struct dma_fence_array *array, */ lockdep_set_class(&array->base.inline_lock, &dma_fence_array_lock_key);
- atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences); + atomic_set(&array->num_pending, num_fences); array->fences = fences;
array->base.error = PENDING_ERROR; @@ -249,7 +247,6 @@ EXPORT_SYMBOL(dma_fence_array_init); * @fences: [in] array containing the fences * @context: [in] fence context to use * @seqno: [in] sequence number to use - * @signal_on_any: [in] signal on any fence in the array * * Allocate a dma_fence_array object and initialize the base fence with * dma_fence_init(). @@ -264,8 +261,7 @@ EXPORT_SYMBOL(dma_fence_array_init); */ struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any) + u64 context, unsigned seqno) { struct dma_fence_array *array;
@@ -273,8 +269,7 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, if (!array) return NULL;
- dma_fence_array_init(array, num_fences, fences, - context, seqno, signal_on_any); + dma_fence_array_init(array, num_fences, fences, context, seqno);
return array; } diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index 07fe9bf45aea..53bb40e70b27 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -180,8 +180,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
if (count > 1) { result = dma_fence_array_create(count, array, - dma_fence_context_alloc(1), - 1, false); + dma_fence_context_alloc(1), 1); if (!result) { for (i = 0; i < count; i++) dma_fence_put(array[i]); diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index ce9e6c04897f..39a92d9f2413 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -648,8 +648,7 @@ int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage, }
array = dma_fence_array_create(count, fences, - dma_fence_context_alloc(1), - 1, false); + dma_fence_context_alloc(1), 1); if (!array) { while (count--) dma_fence_put(fences[count]); diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c index 51c87869b7b8..4e7ee25372ba 100644 --- a/drivers/dma-buf/st-dma-fence-unwrap.c +++ b/drivers/dma-buf/st-dma-fence-unwrap.c @@ -64,7 +64,7 @@ static struct dma_fence *mock_array(unsigned int num_fences, ...)
array = dma_fence_array_create(num_fences, fences, dma_fence_context_alloc(1), - 1, false); + 1); if (!array) goto error_free; return &array->base; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 942f4eed817f..4a1a9031f9db 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -3205,8 +3205,7 @@ eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd) fence_array = dma_fence_array_create(eb->num_batches, fences, eb->context->parallel.fence_context, - eb->context->parallel.seqno++, - false); + eb->context->parallel.seqno++); if (!fence_array) { kfree(fences); return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 24d6d9af20d6..37866768d64c 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -376,7 +376,7 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, xe_assert(vm->xe, current_fence == num_fence); cf = dma_fence_array_create(num_fence, fences, dma_fence_context_alloc(1), - 1, false); + 1); if (!cf) goto err_out;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 56e2db50bb36..8f472911469d 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3370,7 +3370,7 @@ static struct dma_fence *ops_execute(struct xe_vm *vm, goto err_trace; }
- cf = dma_fence_array_alloc(n_fence); + cf = dma_fence_array_alloc(); if (!cf) { fence = ERR_PTR(-ENOMEM); goto err_out; @@ -3414,7 +3414,7 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
xe_assert(vm->xe, current_fence == n_fence); dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1), - 1, false); + 1); fence = &cf->base;
for_each_tile(tile, vm->xe, id) { diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 370b3d2bba37..1b1d87579c38 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -81,13 +81,11 @@ to_dma_fence_array(struct dma_fence *fence) struct dma_fence_array *dma_fence_array_alloc(int num_fences); void dma_fence_array_init(struct dma_fence_array *array, int num_fences, struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any); + u64 context, unsigned seqno);
struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any); + u64 context, unsigned seqno);
bool dma_fence_match_context(struct dma_fence *fence, u64 context);
Removing the signal on any feature allows to simplfy the dma_fence_array code a lot and saves us from the need to install a callback on all fences at the same time.
This results in less memory and CPU overhead.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-fence-array.c | 130 +++++++++++++----------------- include/linux/dma-fence-array.h | 22 ++--- 2 files changed, 59 insertions(+), 93 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 5e10e8df372f..f1b4b3296c87 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -42,97 +42,80 @@ static void dma_fence_array_clear_pending_error(struct dma_fence_array *array) cmpxchg(&array->base.error, PENDING_ERROR, 0); }
-static void irq_dma_fence_array_work(struct irq_work *wrk) +static void dma_fence_array_cb_func(struct dma_fence *f, + struct dma_fence_cb *cb) { - struct dma_fence_array *array = container_of(wrk, typeof(*array), work); + struct dma_fence_array *array = + container_of(cb, struct dma_fence_array, callback);
- dma_fence_array_clear_pending_error(array); + irq_work_queue(&array->work); +} + +static void dma_fence_array_arm_cb(struct dma_fence_array *array) +{ + while (array->num_pending) { + struct dma_fence *f = array->fences[array->num_pending - 1]; + + if (!dma_fence_add_callback(f, &array->callback, + dma_fence_array_cb_func)) + return; + + dma_fence_array_set_pending_error(array, f->error); + WRITE_ONCE(array->num_pending, array->num_pending - 1); + }
dma_fence_signal(&array->base); dma_fence_put(&array->base); }
-static void dma_fence_array_cb_func(struct dma_fence *f, - struct dma_fence_cb *cb) +static void dma_fence_array_irq_work(struct irq_work *wrk) { - struct dma_fence_array_cb *array_cb = - container_of(cb, struct dma_fence_array_cb, cb); - struct dma_fence_array *array = array_cb->array; - - dma_fence_array_set_pending_error(array, f->error); + struct dma_fence_array *array = container_of(wrk, typeof(*array), work);
- if (atomic_dec_and_test(&array->num_pending)) - irq_work_queue(&array->work); - else - dma_fence_put(&array->base); + WRITE_ONCE(array->num_pending, array->num_pending - 1); + dma_fence_array_arm_cb(array); }
static bool dma_fence_array_enable_signaling(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); - struct dma_fence_array_cb *cb = array->callbacks; - unsigned i; - - for (i = 0; i < array->num_fences; ++i) { - cb[i].array = array; - /* - * As we may report that the fence is signaled before all - * callbacks are complete, we need to take an additional - * reference count on the array so that we do not free it too - * early. The core fence handling will only hold the reference - * until we signal the array as complete (but that is now - * insufficient). - */ - dma_fence_get(&array->base); - if (dma_fence_add_callback(array->fences[i], &cb[i].cb, - dma_fence_array_cb_func)) { - int error = array->fences[i]->error; - - dma_fence_array_set_pending_error(array, error); - dma_fence_put(&array->base); - if (atomic_dec_and_test(&array->num_pending)) { - dma_fence_array_clear_pending_error(array); - return false; - } - } - }
+ /* + * As we may report that the fence is signaled before all + * callbacks are complete, we need to take an additional + * reference count on the array so that we do not free it too + * early. The core fence handling will only hold the reference + * until we signal the array as complete (but that is now + * insufficient). + */ + dma_fence_get(&array->base); + dma_fence_array_arm_cb(array); return true; }
static bool dma_fence_array_signaled(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); - int num_pending; + int num_pending, error = 0; unsigned int i;
/* - * We need to read num_pending before checking the enable_signal bit - * to avoid racing with the enable_signaling() implementation, which - * might decrement the counter, and cause a partial check. - * atomic_read_acquire() pairs with atomic_dec_and_test() in - * dma_fence_array_enable_signaling() - * - * The !--num_pending check is here to account for the any_signaled case - * if we race with enable_signaling(), that means the !num_pending check - * in the is_signalling_enabled branch might be outdated (num_pending - * might have been decremented), but that's fine. The user will get the - * right value when testing again later. + * Reading num_pending without a memory barrier here is correct since + * that is only for optimization, it is perfectly acceptable to have a + * stale value for it. In all other cases num_pending is accessed by a + * single call chain. */ - num_pending = atomic_read_acquire(&array->num_pending); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &array->base.flags)) { - if (num_pending <= 0) - goto signal; - return false; - } + num_pending = READ_ONCE(array->num_pending); + for (i = 0; i < num_pending; ++i) { + struct dma_fence *f = array->fences[i];
- for (i = 0; i < array->num_fences; ++i) { - if (dma_fence_is_signaled(array->fences[i]) && !--num_pending) - goto signal; - } - return false; + if (!dma_fence_is_signaled(f)) + return false;
-signal: + if (!error) + error = f->error; + } + dma_fence_array_set_pending_error(array, error); dma_fence_array_clear_pending_error(array); return true; } @@ -171,15 +154,12 @@ EXPORT_SYMBOL(dma_fence_array_ops);
/** * dma_fence_array_alloc - Allocate a custom fence array - * @num_fences: [in] number of fences to add in the array * * Return dma fence array on success, NULL on failure */ -struct dma_fence_array *dma_fence_array_alloc(int num_fences) +struct dma_fence_array *dma_fence_array_alloc(void) { - struct dma_fence_array *array; - - return kzalloc_flex(*array, callbacks, num_fences); + return kzalloc_obj(struct dma_fence_array); } EXPORT_SYMBOL(dma_fence_array_alloc);
@@ -203,10 +183,13 @@ void dma_fence_array_init(struct dma_fence_array *array, WARN_ON(!num_fences || !fences);
array->num_fences = num_fences; + array->num_pending = num_fences; + array->fences = fences; + array->base.error = PENDING_ERROR;
dma_fence_init(&array->base, &dma_fence_array_ops, NULL, context, seqno); - init_irq_work(&array->work, irq_dma_fence_array_work); + init_irq_work(&array->work, dma_fence_array_irq_work);
/* * dma_fence_array_enable_signaling() is invoked while holding @@ -220,11 +203,6 @@ void dma_fence_array_init(struct dma_fence_array *array, */ lockdep_set_class(&array->base.inline_lock, &dma_fence_array_lock_key);
- atomic_set(&array->num_pending, num_fences); - array->fences = fences; - - array->base.error = PENDING_ERROR; - /* * dma_fence_array objects should never contain any other fence * containers or otherwise we run into recursion and potential kernel @@ -265,7 +243,7 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, { struct dma_fence_array *array;
- array = dma_fence_array_alloc(num_fences); + array = dma_fence_array_alloc(); if (!array) return NULL;
diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 1b1d87579c38..3ee55c0e2fa4 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -15,16 +15,6 @@ #include <linux/dma-fence.h> #include <linux/irq_work.h>
-/** - * struct dma_fence_array_cb - callback helper for fence array - * @cb: fence callback structure for signaling - * @array: reference to the parent fence array object - */ -struct dma_fence_array_cb { - struct dma_fence_cb cb; - struct dma_fence_array *array; -}; - /** * struct dma_fence_array - fence to represent an array of fences * @base: fence base class @@ -33,18 +23,17 @@ struct dma_fence_array_cb { * @num_pending: fences in the array still pending * @fences: array of the fences * @work: internal irq_work function - * @callbacks: array of callback helpers + * @callback: callback structure for signaling */ struct dma_fence_array { struct dma_fence base;
- unsigned num_fences; - atomic_t num_pending; + unsigned int num_fences; + unsigned int num_pending; struct dma_fence **fences;
struct irq_work work; - - struct dma_fence_array_cb callbacks[] __counted_by(num_fences); + struct dma_fence_cb callback; };
/** @@ -78,11 +67,10 @@ to_dma_fence_array(struct dma_fence *fence) for (index = 0, fence = dma_fence_array_first(head); fence; \ ++(index), fence = dma_fence_array_next(head, index))
-struct dma_fence_array *dma_fence_array_alloc(int num_fences); +struct dma_fence_array *dma_fence_array_alloc(void); void dma_fence_array_init(struct dma_fence_array *array, int num_fences, struct dma_fence **fences, u64 context, unsigned seqno); - struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, u64 context, unsigned seqno);
linaro-mm-sig@lists.linaro.org