Amdgpu was the only user of the signal on any feature and we dropped that use case recently, so we can remove that functionality.
v2: update num_pending only after the fence is signaled v3: separate out simplifying dma_fence_array implementation
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-fence-array.c | 13 ++++--------- drivers/dma-buf/dma-fence-unwrap.c | 3 +-- drivers/dma-buf/dma-resv.c | 3 +-- drivers/dma-buf/st-dma-fence-unwrap.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 3 +-- drivers/gpu/drm/xe/xe_sync.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 4 ++-- include/linux/dma-fence-array.h | 6 ++---- 8 files changed, 13 insertions(+), 23 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 089f69469524..5e10e8df372f 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -190,15 +190,13 @@ EXPORT_SYMBOL(dma_fence_array_alloc); * @fences: [in] array containing the fences * @context: [in] fence context to use * @seqno: [in] sequence number to use - * @signal_on_any: [in] signal on any fence in the array * * Implementation of @dma_fence_array_create without allocation. Useful to init * a preallocated dma fence array in the path of reclaim or dma fence signaling. */ void dma_fence_array_init(struct dma_fence_array *array, int num_fences, struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any) + u64 context, unsigned seqno) { static struct lock_class_key dma_fence_array_lock_key;
@@ -222,7 +220,7 @@ void dma_fence_array_init(struct dma_fence_array *array, */ lockdep_set_class(&array->base.inline_lock, &dma_fence_array_lock_key);
- atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences); + atomic_set(&array->num_pending, num_fences); array->fences = fences;
array->base.error = PENDING_ERROR; @@ -249,7 +247,6 @@ EXPORT_SYMBOL(dma_fence_array_init); * @fences: [in] array containing the fences * @context: [in] fence context to use * @seqno: [in] sequence number to use - * @signal_on_any: [in] signal on any fence in the array * * Allocate a dma_fence_array object and initialize the base fence with * dma_fence_init(). @@ -264,8 +261,7 @@ EXPORT_SYMBOL(dma_fence_array_init); */ struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any) + u64 context, unsigned seqno) { struct dma_fence_array *array;
@@ -273,8 +269,7 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, if (!array) return NULL;
- dma_fence_array_init(array, num_fences, fences, - context, seqno, signal_on_any); + dma_fence_array_init(array, num_fences, fences, context, seqno);
return array; } diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index 07fe9bf45aea..53bb40e70b27 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -180,8 +180,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
if (count > 1) { result = dma_fence_array_create(count, array, - dma_fence_context_alloc(1), - 1, false); + dma_fence_context_alloc(1), 1); if (!result) { for (i = 0; i < count; i++) dma_fence_put(array[i]); diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index ce9e6c04897f..39a92d9f2413 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -648,8 +648,7 @@ int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage, }
array = dma_fence_array_create(count, fences, - dma_fence_context_alloc(1), - 1, false); + dma_fence_context_alloc(1), 1); if (!array) { while (count--) dma_fence_put(fences[count]); diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c index 51c87869b7b8..4e7ee25372ba 100644 --- a/drivers/dma-buf/st-dma-fence-unwrap.c +++ b/drivers/dma-buf/st-dma-fence-unwrap.c @@ -64,7 +64,7 @@ static struct dma_fence *mock_array(unsigned int num_fences, ...)
array = dma_fence_array_create(num_fences, fences, dma_fence_context_alloc(1), - 1, false); + 1); if (!array) goto error_free; return &array->base; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 942f4eed817f..4a1a9031f9db 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -3205,8 +3205,7 @@ eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd) fence_array = dma_fence_array_create(eb->num_batches, fences, eb->context->parallel.fence_context, - eb->context->parallel.seqno++, - false); + eb->context->parallel.seqno++); if (!fence_array) { kfree(fences); return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 24d6d9af20d6..37866768d64c 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -376,7 +376,7 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, xe_assert(vm->xe, current_fence == num_fence); cf = dma_fence_array_create(num_fence, fences, dma_fence_context_alloc(1), - 1, false); + 1); if (!cf) goto err_out;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 56e2db50bb36..8f472911469d 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3370,7 +3370,7 @@ static struct dma_fence *ops_execute(struct xe_vm *vm, goto err_trace; }
- cf = dma_fence_array_alloc(n_fence); + cf = dma_fence_array_alloc(); if (!cf) { fence = ERR_PTR(-ENOMEM); goto err_out; @@ -3414,7 +3414,7 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
xe_assert(vm->xe, current_fence == n_fence); dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1), - 1, false); + 1); fence = &cf->base;
for_each_tile(tile, vm->xe, id) { diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 370b3d2bba37..1b1d87579c38 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -81,13 +81,11 @@ to_dma_fence_array(struct dma_fence *fence) struct dma_fence_array *dma_fence_array_alloc(int num_fences); void dma_fence_array_init(struct dma_fence_array *array, int num_fences, struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any); + u64 context, unsigned seqno);
struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any); + u64 context, unsigned seqno);
bool dma_fence_match_context(struct dma_fence *fence, u64 context);
Removing the signal on any feature allows to simplfy the dma_fence_array code a lot and saves us from the need to install a callback on all fences at the same time.
This results in less memory and CPU overhead.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-fence-array.c | 130 +++++++++++++----------------- include/linux/dma-fence-array.h | 22 ++--- 2 files changed, 59 insertions(+), 93 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 5e10e8df372f..f1b4b3296c87 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -42,97 +42,80 @@ static void dma_fence_array_clear_pending_error(struct dma_fence_array *array) cmpxchg(&array->base.error, PENDING_ERROR, 0); }
-static void irq_dma_fence_array_work(struct irq_work *wrk) +static void dma_fence_array_cb_func(struct dma_fence *f, + struct dma_fence_cb *cb) { - struct dma_fence_array *array = container_of(wrk, typeof(*array), work); + struct dma_fence_array *array = + container_of(cb, struct dma_fence_array, callback);
- dma_fence_array_clear_pending_error(array); + irq_work_queue(&array->work); +} + +static void dma_fence_array_arm_cb(struct dma_fence_array *array) +{ + while (array->num_pending) { + struct dma_fence *f = array->fences[array->num_pending - 1]; + + if (!dma_fence_add_callback(f, &array->callback, + dma_fence_array_cb_func)) + return; + + dma_fence_array_set_pending_error(array, f->error); + WRITE_ONCE(array->num_pending, array->num_pending - 1); + }
dma_fence_signal(&array->base); dma_fence_put(&array->base); }
-static void dma_fence_array_cb_func(struct dma_fence *f, - struct dma_fence_cb *cb) +static void dma_fence_array_irq_work(struct irq_work *wrk) { - struct dma_fence_array_cb *array_cb = - container_of(cb, struct dma_fence_array_cb, cb); - struct dma_fence_array *array = array_cb->array; - - dma_fence_array_set_pending_error(array, f->error); + struct dma_fence_array *array = container_of(wrk, typeof(*array), work);
- if (atomic_dec_and_test(&array->num_pending)) - irq_work_queue(&array->work); - else - dma_fence_put(&array->base); + WRITE_ONCE(array->num_pending, array->num_pending - 1); + dma_fence_array_arm_cb(array); }
static bool dma_fence_array_enable_signaling(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); - struct dma_fence_array_cb *cb = array->callbacks; - unsigned i; - - for (i = 0; i < array->num_fences; ++i) { - cb[i].array = array; - /* - * As we may report that the fence is signaled before all - * callbacks are complete, we need to take an additional - * reference count on the array so that we do not free it too - * early. The core fence handling will only hold the reference - * until we signal the array as complete (but that is now - * insufficient). - */ - dma_fence_get(&array->base); - if (dma_fence_add_callback(array->fences[i], &cb[i].cb, - dma_fence_array_cb_func)) { - int error = array->fences[i]->error; - - dma_fence_array_set_pending_error(array, error); - dma_fence_put(&array->base); - if (atomic_dec_and_test(&array->num_pending)) { - dma_fence_array_clear_pending_error(array); - return false; - } - } - }
+ /* + * As we may report that the fence is signaled before all + * callbacks are complete, we need to take an additional + * reference count on the array so that we do not free it too + * early. The core fence handling will only hold the reference + * until we signal the array as complete (but that is now + * insufficient). + */ + dma_fence_get(&array->base); + dma_fence_array_arm_cb(array); return true; }
static bool dma_fence_array_signaled(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); - int num_pending; + int num_pending, error = 0; unsigned int i;
/* - * We need to read num_pending before checking the enable_signal bit - * to avoid racing with the enable_signaling() implementation, which - * might decrement the counter, and cause a partial check. - * atomic_read_acquire() pairs with atomic_dec_and_test() in - * dma_fence_array_enable_signaling() - * - * The !--num_pending check is here to account for the any_signaled case - * if we race with enable_signaling(), that means the !num_pending check - * in the is_signalling_enabled branch might be outdated (num_pending - * might have been decremented), but that's fine. The user will get the - * right value when testing again later. + * Reading num_pending without a memory barrier here is correct since + * that is only for optimization, it is perfectly acceptable to have a + * stale value for it. In all other cases num_pending is accessed by a + * single call chain. */ - num_pending = atomic_read_acquire(&array->num_pending); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &array->base.flags)) { - if (num_pending <= 0) - goto signal; - return false; - } + num_pending = READ_ONCE(array->num_pending); + for (i = 0; i < num_pending; ++i) { + struct dma_fence *f = array->fences[i];
- for (i = 0; i < array->num_fences; ++i) { - if (dma_fence_is_signaled(array->fences[i]) && !--num_pending) - goto signal; - } - return false; + if (!dma_fence_is_signaled(f)) + return false;
-signal: + if (!error) + error = f->error; + } + dma_fence_array_set_pending_error(array, error); dma_fence_array_clear_pending_error(array); return true; } @@ -171,15 +154,12 @@ EXPORT_SYMBOL(dma_fence_array_ops);
/** * dma_fence_array_alloc - Allocate a custom fence array - * @num_fences: [in] number of fences to add in the array * * Return dma fence array on success, NULL on failure */ -struct dma_fence_array *dma_fence_array_alloc(int num_fences) +struct dma_fence_array *dma_fence_array_alloc(void) { - struct dma_fence_array *array; - - return kzalloc_flex(*array, callbacks, num_fences); + return kzalloc_obj(struct dma_fence_array); } EXPORT_SYMBOL(dma_fence_array_alloc);
@@ -203,10 +183,13 @@ void dma_fence_array_init(struct dma_fence_array *array, WARN_ON(!num_fences || !fences);
array->num_fences = num_fences; + array->num_pending = num_fences; + array->fences = fences; + array->base.error = PENDING_ERROR;
dma_fence_init(&array->base, &dma_fence_array_ops, NULL, context, seqno); - init_irq_work(&array->work, irq_dma_fence_array_work); + init_irq_work(&array->work, dma_fence_array_irq_work);
/* * dma_fence_array_enable_signaling() is invoked while holding @@ -220,11 +203,6 @@ void dma_fence_array_init(struct dma_fence_array *array, */ lockdep_set_class(&array->base.inline_lock, &dma_fence_array_lock_key);
- atomic_set(&array->num_pending, num_fences); - array->fences = fences; - - array->base.error = PENDING_ERROR; - /* * dma_fence_array objects should never contain any other fence * containers or otherwise we run into recursion and potential kernel @@ -265,7 +243,7 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, { struct dma_fence_array *array;
- array = dma_fence_array_alloc(num_fences); + array = dma_fence_array_alloc(); if (!array) return NULL;
diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 1b1d87579c38..3ee55c0e2fa4 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -15,16 +15,6 @@ #include <linux/dma-fence.h> #include <linux/irq_work.h>
-/** - * struct dma_fence_array_cb - callback helper for fence array - * @cb: fence callback structure for signaling - * @array: reference to the parent fence array object - */ -struct dma_fence_array_cb { - struct dma_fence_cb cb; - struct dma_fence_array *array; -}; - /** * struct dma_fence_array - fence to represent an array of fences * @base: fence base class @@ -33,18 +23,17 @@ struct dma_fence_array_cb { * @num_pending: fences in the array still pending * @fences: array of the fences * @work: internal irq_work function - * @callbacks: array of callback helpers + * @callback: callback structure for signaling */ struct dma_fence_array { struct dma_fence base;
- unsigned num_fences; - atomic_t num_pending; + unsigned int num_fences; + unsigned int num_pending; struct dma_fence **fences;
struct irq_work work; - - struct dma_fence_array_cb callbacks[] __counted_by(num_fences); + struct dma_fence_cb callback; };
/** @@ -78,11 +67,10 @@ to_dma_fence_array(struct dma_fence *fence) for (index = 0, fence = dma_fence_array_first(head); fence; \ ++(index), fence = dma_fence_array_next(head, index))
-struct dma_fence_array *dma_fence_array_alloc(int num_fences); +struct dma_fence_array *dma_fence_array_alloc(void); void dma_fence_array_init(struct dma_fence_array *array, int num_fences, struct dma_fence **fences, u64 context, unsigned seqno); - struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, u64 context, unsigned seqno);
On 22/04/2026 11:30, Christian König wrote:
Removing the signal on any feature allows to simplfy the dma_fence_array code a lot and saves us from the need to install a callback on all fences at the same time.
This results in less memory and CPU overhead.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-fence-array.c | 130 +++++++++++++----------------- include/linux/dma-fence-array.h | 22 ++--- 2 files changed, 59 insertions(+), 93 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 5e10e8df372f..f1b4b3296c87 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -42,97 +42,80 @@ static void dma_fence_array_clear_pending_error(struct dma_fence_array *array) cmpxchg(&array->base.error, PENDING_ERROR, 0); } -static void irq_dma_fence_array_work(struct irq_work *wrk) +static void dma_fence_array_cb_func(struct dma_fence *f,
{struct dma_fence_cb *cb)
- struct dma_fence_array *array = container_of(wrk, typeof(*array), work);
- struct dma_fence_array *array =
container_of(cb, struct dma_fence_array, callback);
- dma_fence_array_clear_pending_error(array);
- irq_work_queue(&array->work);
+}
+static void dma_fence_array_arm_cb(struct dma_fence_array *array) +{
- while (array->num_pending) {
struct dma_fence *f = array->fences[array->num_pending - 1];if (!dma_fence_add_callback(f, &array->callback,dma_fence_array_cb_func))return;dma_fence_array_set_pending_error(array, f->error);WRITE_ONCE(array->num_pending, array->num_pending - 1);
Do you think the WRITE_ONCEs are needed? As the loop will restart with un-annotated read anyway, but not just that, I don't think it can be compiled away in the kernel with this usage pattern. Maybe I am mistaken.
- }
dma_fence_signal(&array->base); dma_fence_put(&array->base); } -static void dma_fence_array_cb_func(struct dma_fence *f,
struct dma_fence_cb *cb)+static void dma_fence_array_irq_work(struct irq_work *wrk) {
- struct dma_fence_array_cb *array_cb =
container_of(cb, struct dma_fence_array_cb, cb);- struct dma_fence_array *array = array_cb->array;
- dma_fence_array_set_pending_error(array, f->error);
- struct dma_fence_array *array = container_of(wrk, typeof(*array), work);
- if (atomic_dec_and_test(&array->num_pending))
irq_work_queue(&array->work);- else
dma_fence_put(&array->base);
- WRITE_ONCE(array->num_pending, array->num_pending - 1);
- dma_fence_array_arm_cb(array);
So for x86 going from one irqwork latency to num_fences latencies is probably passable but I am not sure how other architectures fare.
} static bool dma_fence_array_enable_signaling(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence);
- struct dma_fence_array_cb *cb = array->callbacks;
- unsigned i;
- for (i = 0; i < array->num_fences; ++i) {
cb[i].array = array;/** As we may report that the fence is signaled before all* callbacks are complete, we need to take an additional* reference count on the array so that we do not free it too* early. The core fence handling will only hold the reference* until we signal the array as complete (but that is now* insufficient).*/dma_fence_get(&array->base);if (dma_fence_add_callback(array->fences[i], &cb[i].cb,dma_fence_array_cb_func)) {int error = array->fences[i]->error;dma_fence_array_set_pending_error(array, error);dma_fence_put(&array->base);if (atomic_dec_and_test(&array->num_pending)) {dma_fence_array_clear_pending_error(array);return false;}}- }
- /*
* As we may report that the fence is signaled before all* callbacks are complete, we need to take an additional* reference count on the array so that we do not free it too* early. The core fence handling will only hold the reference* until we signal the array as complete (but that is now* insufficient).*/- dma_fence_get(&array->base);
- dma_fence_array_arm_cb(array); return true;
Are you sure it is safe to always return true?
Regards,
Tvrtko
} static bool dma_fence_array_signaled(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence);
- int num_pending;
- int num_pending, error = 0; unsigned int i;
/*
* We need to read num_pending before checking the enable_signal bit* to avoid racing with the enable_signaling() implementation, which* might decrement the counter, and cause a partial check.* atomic_read_acquire() pairs with atomic_dec_and_test() in* dma_fence_array_enable_signaling()** The !--num_pending check is here to account for the any_signaled case* if we race with enable_signaling(), that means the !num_pending check* in the is_signalling_enabled branch might be outdated (num_pending* might have been decremented), but that's fine. The user will get the* right value when testing again later.
* Reading num_pending without a memory barrier here is correct since* that is only for optimization, it is perfectly acceptable to have a* stale value for it. In all other cases num_pending is accessed by a */* single call chain.
- num_pending = atomic_read_acquire(&array->num_pending);
- if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &array->base.flags)) {
if (num_pending <= 0)goto signal;return false;- }
- num_pending = READ_ONCE(array->num_pending);
- for (i = 0; i < num_pending; ++i) {
struct dma_fence *f = array->fences[i];
- for (i = 0; i < array->num_fences; ++i) {
if (dma_fence_is_signaled(array->fences[i]) && !--num_pending)goto signal;- }
- return false;
if (!dma_fence_is_signaled(f))return false;-signal:
if (!error)error = f->error;- }
- dma_fence_array_set_pending_error(array, error); dma_fence_array_clear_pending_error(array); return true; }
@@ -171,15 +154,12 @@ EXPORT_SYMBOL(dma_fence_array_ops); /**
- dma_fence_array_alloc - Allocate a custom fence array
*/
- @num_fences: [in] number of fences to add in the array
- Return dma fence array on success, NULL on failure
-struct dma_fence_array *dma_fence_array_alloc(int num_fences) +struct dma_fence_array *dma_fence_array_alloc(void) {
- struct dma_fence_array *array;
- return kzalloc_flex(*array, callbacks, num_fences);
- return kzalloc_obj(struct dma_fence_array); } EXPORT_SYMBOL(dma_fence_array_alloc);
@@ -203,10 +183,13 @@ void dma_fence_array_init(struct dma_fence_array *array, WARN_ON(!num_fences || !fences); array->num_fences = num_fences;
- array->num_pending = num_fences;
- array->fences = fences;
- array->base.error = PENDING_ERROR;
dma_fence_init(&array->base, &dma_fence_array_ops, NULL, context, seqno);
- init_irq_work(&array->work, irq_dma_fence_array_work);
- init_irq_work(&array->work, dma_fence_array_irq_work);
/* * dma_fence_array_enable_signaling() is invoked while holding @@ -220,11 +203,6 @@ void dma_fence_array_init(struct dma_fence_array *array, */ lockdep_set_class(&array->base.inline_lock, &dma_fence_array_lock_key);
- atomic_set(&array->num_pending, num_fences);
- array->fences = fences;
- array->base.error = PENDING_ERROR;
- /*
- dma_fence_array objects should never contain any other fence
- containers or otherwise we run into recursion and potential kernel
@@ -265,7 +243,7 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, { struct dma_fence_array *array;
- array = dma_fence_array_alloc(num_fences);
- array = dma_fence_array_alloc(); if (!array) return NULL;
diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 1b1d87579c38..3ee55c0e2fa4 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -15,16 +15,6 @@ #include <linux/dma-fence.h> #include <linux/irq_work.h> -/**
- struct dma_fence_array_cb - callback helper for fence array
- @cb: fence callback structure for signaling
- @array: reference to the parent fence array object
- */
-struct dma_fence_array_cb {
- struct dma_fence_cb cb;
- struct dma_fence_array *array;
-};
- /**
- struct dma_fence_array - fence to represent an array of fences
- @base: fence base class
@@ -33,18 +23,17 @@ struct dma_fence_array_cb {
- @num_pending: fences in the array still pending
- @fences: array of the fences
- @work: internal irq_work function
- @callbacks: array of callback helpers
*/ struct dma_fence_array { struct dma_fence base;
- @callback: callback structure for signaling
- unsigned num_fences;
- atomic_t num_pending;
- unsigned int num_fences;
- unsigned int num_pending; struct dma_fence **fences;
struct irq_work work;
- struct dma_fence_array_cb callbacks[] __counted_by(num_fences);
- struct dma_fence_cb callback; };
/** @@ -78,11 +67,10 @@ to_dma_fence_array(struct dma_fence *fence) for (index = 0, fence = dma_fence_array_first(head); fence; \ ++(index), fence = dma_fence_array_next(head, index)) -struct dma_fence_array *dma_fence_array_alloc(int num_fences); +struct dma_fence_array *dma_fence_array_alloc(void); void dma_fence_array_init(struct dma_fence_array *array, int num_fences, struct dma_fence **fences, u64 context, unsigned seqno);
- struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, u64 context, unsigned seqno);
On 4/22/26 13:37, Tvrtko Ursulin wrote:
On 22/04/2026 11:30, Christian König wrote:
Removing the signal on any feature allows to simplfy the dma_fence_array code a lot and saves us from the need to install a callback on all fences at the same time.
This results in less memory and CPU overhead.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-fence-array.c | 130 +++++++++++++----------------- include/linux/dma-fence-array.h | 22 ++--- 2 files changed, 59 insertions(+), 93 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 5e10e8df372f..f1b4b3296c87 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -42,97 +42,80 @@ static void dma_fence_array_clear_pending_error(struct dma_fence_array *array) cmpxchg(&array->base.error, PENDING_ERROR, 0); } -static void irq_dma_fence_array_work(struct irq_work *wrk) +static void dma_fence_array_cb_func(struct dma_fence *f, + struct dma_fence_cb *cb) { - struct dma_fence_array *array = container_of(wrk, typeof(*array), work); + struct dma_fence_array *array = + container_of(cb, struct dma_fence_array, callback); - dma_fence_array_clear_pending_error(array); + irq_work_queue(&array->work); +}
+static void dma_fence_array_arm_cb(struct dma_fence_array *array) +{ + while (array->num_pending) { + struct dma_fence *f = array->fences[array->num_pending - 1];
+ if (!dma_fence_add_callback(f, &array->callback, + dma_fence_array_cb_func)) + return;
+ dma_fence_array_set_pending_error(array, f->error); + WRITE_ONCE(array->num_pending, array->num_pending - 1);
Do you think the WRITE_ONCEs are needed? As the loop will restart with un-annotated read anyway, but not just that, I don't think it can be compiled away in the kernel with this usage pattern. Maybe I am mistaken.
I also think the WRITE_ONCEs are superfluous. But I wanted to be on the save side, not that the compiler re-orders the write before the dma_fence_add_callback() and it shouldn't matter for performance.
+ } dma_fence_signal(&array->base); dma_fence_put(&array->base); } -static void dma_fence_array_cb_func(struct dma_fence *f, - struct dma_fence_cb *cb) +static void dma_fence_array_irq_work(struct irq_work *wrk) { - struct dma_fence_array_cb *array_cb = - container_of(cb, struct dma_fence_array_cb, cb); - struct dma_fence_array *array = array_cb->array;
- dma_fence_array_set_pending_error(array, f->error); + struct dma_fence_array *array = container_of(wrk, typeof(*array), work); - if (atomic_dec_and_test(&array->num_pending)) - irq_work_queue(&array->work); - else - dma_fence_put(&array->base); + WRITE_ONCE(array->num_pending, array->num_pending - 1); + dma_fence_array_arm_cb(array);
So for x86 going from one irqwork latency to num_fences latencies is probably passable but I am not sure how other architectures fare.
Mhm, what do you mean?
Previously we started one irqwork handler for each not signaled fence when enable_signaling was called, but now we reduce that to only starting one for each not signaled fence when the previous fence has finished.
As far as I can see that is always better or at least the same overhead.
} static bool dma_fence_array_enable_signaling(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); - struct dma_fence_array_cb *cb = array->callbacks; - unsigned i;
- for (i = 0; i < array->num_fences; ++i) { - cb[i].array = array; - /* - * As we may report that the fence is signaled before all - * callbacks are complete, we need to take an additional - * reference count on the array so that we do not free it too - * early. The core fence handling will only hold the reference - * until we signal the array as complete (but that is now - * insufficient). - */ - dma_fence_get(&array->base); - if (dma_fence_add_callback(array->fences[i], &cb[i].cb, - dma_fence_array_cb_func)) { - int error = array->fences[i]->error;
- dma_fence_array_set_pending_error(array, error); - dma_fence_put(&array->base); - if (atomic_dec_and_test(&array->num_pending)) { - dma_fence_array_clear_pending_error(array); - return false; - } - } - } + /* + * As we may report that the fence is signaled before all + * callbacks are complete, we need to take an additional + * reference count on the array so that we do not free it too + * early. The core fence handling will only hold the reference + * until we signal the array as complete (but that is now + * insufficient). + */ + dma_fence_get(&array->base); + dma_fence_array_arm_cb(array); return true;
Are you sure it is safe to always return true?
Oh, good point!
It is safe to return true here, but it is not save to call dma_fence_array_arm_cb() because that could signal the fence and result in double locking!
Going to fix that, thanks.
Regards, Christian.
Regards,
Tvrtko
} static bool dma_fence_array_signaled(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); - int num_pending; + int num_pending, error = 0; unsigned int i; /* - * We need to read num_pending before checking the enable_signal bit - * to avoid racing with the enable_signaling() implementation, which - * might decrement the counter, and cause a partial check. - * atomic_read_acquire() pairs with atomic_dec_and_test() in - * dma_fence_array_enable_signaling() - * - * The !--num_pending check is here to account for the any_signaled case - * if we race with enable_signaling(), that means the !num_pending check - * in the is_signalling_enabled branch might be outdated (num_pending - * might have been decremented), but that's fine. The user will get the - * right value when testing again later. + * Reading num_pending without a memory barrier here is correct since + * that is only for optimization, it is perfectly acceptable to have a + * stale value for it. In all other cases num_pending is accessed by a + * single call chain. */ - num_pending = atomic_read_acquire(&array->num_pending); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &array->base.flags)) { - if (num_pending <= 0) - goto signal; - return false; - } + num_pending = READ_ONCE(array->num_pending); + for (i = 0; i < num_pending; ++i) { + struct dma_fence *f = array->fences[i]; - for (i = 0; i < array->num_fences; ++i) { - if (dma_fence_is_signaled(array->fences[i]) && !--num_pending) - goto signal; - } - return false; + if (!dma_fence_is_signaled(f)) + return false; -signal: + if (!error) + error = f->error; + } + dma_fence_array_set_pending_error(array, error); dma_fence_array_clear_pending_error(array); return true; } @@ -171,15 +154,12 @@ EXPORT_SYMBOL(dma_fence_array_ops); /** * dma_fence_array_alloc - Allocate a custom fence array
- @num_fences: [in] number of fences to add in the array
* * Return dma fence array on success, NULL on failure */ -struct dma_fence_array *dma_fence_array_alloc(int num_fences) +struct dma_fence_array *dma_fence_array_alloc(void) { - struct dma_fence_array *array;
- return kzalloc_flex(*array, callbacks, num_fences); + return kzalloc_obj(struct dma_fence_array); } EXPORT_SYMBOL(dma_fence_array_alloc); @@ -203,10 +183,13 @@ void dma_fence_array_init(struct dma_fence_array *array, WARN_ON(!num_fences || !fences); array->num_fences = num_fences; + array->num_pending = num_fences; + array->fences = fences; + array->base.error = PENDING_ERROR; dma_fence_init(&array->base, &dma_fence_array_ops, NULL, context, seqno); - init_irq_work(&array->work, irq_dma_fence_array_work); + init_irq_work(&array->work, dma_fence_array_irq_work); /* * dma_fence_array_enable_signaling() is invoked while holding @@ -220,11 +203,6 @@ void dma_fence_array_init(struct dma_fence_array *array, */ lockdep_set_class(&array->base.inline_lock, &dma_fence_array_lock_key); - atomic_set(&array->num_pending, num_fences); - array->fences = fences;
- array->base.error = PENDING_ERROR;
/* * dma_fence_array objects should never contain any other fence * containers or otherwise we run into recursion and potential kernel @@ -265,7 +243,7 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, { struct dma_fence_array *array; - array = dma_fence_array_alloc(num_fences); + array = dma_fence_array_alloc(); if (!array) return NULL; diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 1b1d87579c38..3ee55c0e2fa4 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -15,16 +15,6 @@ #include <linux/dma-fence.h> #include <linux/irq_work.h> -/**
- struct dma_fence_array_cb - callback helper for fence array
- @cb: fence callback structure for signaling
- @array: reference to the parent fence array object
- */
-struct dma_fence_array_cb { - struct dma_fence_cb cb; - struct dma_fence_array *array; -};
/** * struct dma_fence_array - fence to represent an array of fences * @base: fence base class @@ -33,18 +23,17 @@ struct dma_fence_array_cb { * @num_pending: fences in the array still pending * @fences: array of the fences * @work: internal irq_work function
- @callbacks: array of callback helpers
- @callback: callback structure for signaling
*/ struct dma_fence_array { struct dma_fence base; - unsigned num_fences; - atomic_t num_pending; + unsigned int num_fences; + unsigned int num_pending; struct dma_fence **fences; struct irq_work work;
- struct dma_fence_array_cb callbacks[] __counted_by(num_fences); + struct dma_fence_cb callback; }; /** @@ -78,11 +67,10 @@ to_dma_fence_array(struct dma_fence *fence) for (index = 0, fence = dma_fence_array_first(head); fence; \ ++(index), fence = dma_fence_array_next(head, index)) -struct dma_fence_array *dma_fence_array_alloc(int num_fences); +struct dma_fence_array *dma_fence_array_alloc(void); void dma_fence_array_init(struct dma_fence_array *array, int num_fences, struct dma_fence **fences, u64 context, unsigned seqno);
struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, u64 context, unsigned seqno);
On 04/05/2026 15:55, Christian König wrote:
On 4/22/26 13:37, Tvrtko Ursulin wrote:
On 22/04/2026 11:30, Christian König wrote:
Removing the signal on any feature allows to simplfy the dma_fence_array code a lot and saves us from the need to install a callback on all fences at the same time.
This results in less memory and CPU overhead.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-fence-array.c | 130 +++++++++++++----------------- include/linux/dma-fence-array.h | 22 ++--- 2 files changed, 59 insertions(+), 93 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 5e10e8df372f..f1b4b3296c87 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -42,97 +42,80 @@ static void dma_fence_array_clear_pending_error(struct dma_fence_array *array) cmpxchg(&array->base.error, PENDING_ERROR, 0); } -static void irq_dma_fence_array_work(struct irq_work *wrk) +static void dma_fence_array_cb_func(struct dma_fence *f, + struct dma_fence_cb *cb) { - struct dma_fence_array *array = container_of(wrk, typeof(*array), work); + struct dma_fence_array *array = + container_of(cb, struct dma_fence_array, callback); - dma_fence_array_clear_pending_error(array); + irq_work_queue(&array->work); +}
+static void dma_fence_array_arm_cb(struct dma_fence_array *array) +{ + while (array->num_pending) { + struct dma_fence *f = array->fences[array->num_pending - 1];
+ if (!dma_fence_add_callback(f, &array->callback, + dma_fence_array_cb_func)) + return;
+ dma_fence_array_set_pending_error(array, f->error); + WRITE_ONCE(array->num_pending, array->num_pending - 1);
Do you think the WRITE_ONCEs are needed? As the loop will restart with un-annotated read anyway, but not just that, I don't think it can be compiled away in the kernel with this usage pattern. Maybe I am mistaken.
I also think the WRITE_ONCEs are superfluous. But I wanted to be on the save side, not that the compiler re-orders the write before the dma_fence_add_callback() and it shouldn't matter for performance.
Hm okay, but I am on the fence here, if they stay they may be misleading the reader that they are required.
+ } dma_fence_signal(&array->base); dma_fence_put(&array->base); } -static void dma_fence_array_cb_func(struct dma_fence *f, - struct dma_fence_cb *cb) +static void dma_fence_array_irq_work(struct irq_work *wrk) { - struct dma_fence_array_cb *array_cb = - container_of(cb, struct dma_fence_array_cb, cb); - struct dma_fence_array *array = array_cb->array;
- dma_fence_array_set_pending_error(array, f->error); + struct dma_fence_array *array = container_of(wrk, typeof(*array), work); - if (atomic_dec_and_test(&array->num_pending)) - irq_work_queue(&array->work); - else - dma_fence_put(&array->base); + WRITE_ONCE(array->num_pending, array->num_pending - 1); + dma_fence_array_arm_cb(array);
So for x86 going from one irqwork latency to num_fences latencies is probably passable but I am not sure how other architectures fare.
Mhm, what do you mean?
Previously we started one irqwork handler for each not signaled fence when enable_signaling was called, but now we reduce that to only starting one for each not signaled fence when the previous fence has finished.
As far as I can see that is always better or at least the same overhead.
Currently irq work latency is once per array:
static void dma_fence_array_cb_func(struct dma_fence *f, struct dma_fence_cb *cb) { ...
if (atomic_dec_and_test(&array->num_pending)) irq_work_queue(&array->work);
So only when last one signals.
With this patch, AFAIR from when I reviewed it because the diff is a bit difficult to read so I applied it, it is a "chain" of irq work and callbacks. One by one as you say - irq work installs the callback on the next fence, which when signalled queues irq work.
Regards,
Tvrtko
} static bool dma_fence_array_enable_signaling(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); - struct dma_fence_array_cb *cb = array->callbacks; - unsigned i;
- for (i = 0; i < array->num_fences; ++i) { - cb[i].array = array; - /* - * As we may report that the fence is signaled before all - * callbacks are complete, we need to take an additional - * reference count on the array so that we do not free it too - * early. The core fence handling will only hold the reference - * until we signal the array as complete (but that is now - * insufficient). - */ - dma_fence_get(&array->base); - if (dma_fence_add_callback(array->fences[i], &cb[i].cb, - dma_fence_array_cb_func)) { - int error = array->fences[i]->error;
- dma_fence_array_set_pending_error(array, error); - dma_fence_put(&array->base); - if (atomic_dec_and_test(&array->num_pending)) { - dma_fence_array_clear_pending_error(array); - return false; - } - } - } + /* + * As we may report that the fence is signaled before all + * callbacks are complete, we need to take an additional + * reference count on the array so that we do not free it too + * early. The core fence handling will only hold the reference + * until we signal the array as complete (but that is now + * insufficient). + */ + dma_fence_get(&array->base); + dma_fence_array_arm_cb(array); return true;
Are you sure it is safe to always return true?
Oh, good point!
It is safe to return true here, but it is not save to call dma_fence_array_arm_cb() because that could signal the fence and result in double locking!
Going to fix that, thanks.
Regards, Christian.
Regards,
Tvrtko
} static bool dma_fence_array_signaled(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); - int num_pending; + int num_pending, error = 0; unsigned int i; /* - * We need to read num_pending before checking the enable_signal bit - * to avoid racing with the enable_signaling() implementation, which - * might decrement the counter, and cause a partial check. - * atomic_read_acquire() pairs with atomic_dec_and_test() in - * dma_fence_array_enable_signaling() - * - * The !--num_pending check is here to account for the any_signaled case - * if we race with enable_signaling(), that means the !num_pending check - * in the is_signalling_enabled branch might be outdated (num_pending - * might have been decremented), but that's fine. The user will get the - * right value when testing again later. + * Reading num_pending without a memory barrier here is correct since + * that is only for optimization, it is perfectly acceptable to have a + * stale value for it. In all other cases num_pending is accessed by a + * single call chain. */ - num_pending = atomic_read_acquire(&array->num_pending); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &array->base.flags)) { - if (num_pending <= 0) - goto signal; - return false; - } + num_pending = READ_ONCE(array->num_pending); + for (i = 0; i < num_pending; ++i) { + struct dma_fence *f = array->fences[i]; - for (i = 0; i < array->num_fences; ++i) { - if (dma_fence_is_signaled(array->fences[i]) && !--num_pending) - goto signal; - } - return false; + if (!dma_fence_is_signaled(f)) + return false; -signal: + if (!error) + error = f->error; + } + dma_fence_array_set_pending_error(array, error); dma_fence_array_clear_pending_error(array); return true; } @@ -171,15 +154,12 @@ EXPORT_SYMBOL(dma_fence_array_ops); /** * dma_fence_array_alloc - Allocate a custom fence array
- @num_fences: [in] number of fences to add in the array
* * Return dma fence array on success, NULL on failure */ -struct dma_fence_array *dma_fence_array_alloc(int num_fences) +struct dma_fence_array *dma_fence_array_alloc(void) { - struct dma_fence_array *array;
- return kzalloc_flex(*array, callbacks, num_fences); + return kzalloc_obj(struct dma_fence_array); } EXPORT_SYMBOL(dma_fence_array_alloc); @@ -203,10 +183,13 @@ void dma_fence_array_init(struct dma_fence_array *array, WARN_ON(!num_fences || !fences); array->num_fences = num_fences; + array->num_pending = num_fences; + array->fences = fences; + array->base.error = PENDING_ERROR; dma_fence_init(&array->base, &dma_fence_array_ops, NULL, context, seqno); - init_irq_work(&array->work, irq_dma_fence_array_work); + init_irq_work(&array->work, dma_fence_array_irq_work); /* * dma_fence_array_enable_signaling() is invoked while holding @@ -220,11 +203,6 @@ void dma_fence_array_init(struct dma_fence_array *array, */ lockdep_set_class(&array->base.inline_lock, &dma_fence_array_lock_key); - atomic_set(&array->num_pending, num_fences); - array->fences = fences;
- array->base.error = PENDING_ERROR;
/* * dma_fence_array objects should never contain any other fence * containers or otherwise we run into recursion and potential kernel @@ -265,7 +243,7 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, { struct dma_fence_array *array; - array = dma_fence_array_alloc(num_fences); + array = dma_fence_array_alloc(); if (!array) return NULL; diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 1b1d87579c38..3ee55c0e2fa4 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -15,16 +15,6 @@ #include <linux/dma-fence.h> #include <linux/irq_work.h> -/**
- struct dma_fence_array_cb - callback helper for fence array
- @cb: fence callback structure for signaling
- @array: reference to the parent fence array object
- */
-struct dma_fence_array_cb { - struct dma_fence_cb cb; - struct dma_fence_array *array; -};
/** * struct dma_fence_array - fence to represent an array of fences * @base: fence base class @@ -33,18 +23,17 @@ struct dma_fence_array_cb { * @num_pending: fences in the array still pending * @fences: array of the fences * @work: internal irq_work function
- @callbacks: array of callback helpers
- @callback: callback structure for signaling
*/ struct dma_fence_array { struct dma_fence base; - unsigned num_fences; - atomic_t num_pending; + unsigned int num_fences; + unsigned int num_pending; struct dma_fence **fences; struct irq_work work;
- struct dma_fence_array_cb callbacks[] __counted_by(num_fences); + struct dma_fence_cb callback; }; /** @@ -78,11 +67,10 @@ to_dma_fence_array(struct dma_fence *fence) for (index = 0, fence = dma_fence_array_first(head); fence; \ ++(index), fence = dma_fence_array_next(head, index)) -struct dma_fence_array *dma_fence_array_alloc(int num_fences); +struct dma_fence_array *dma_fence_array_alloc(void); void dma_fence_array_init(struct dma_fence_array *array, int num_fences, struct dma_fence **fences, u64 context, unsigned seqno);
struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, u64 context, unsigned seqno);
On 22/04/2026 11:30, Christian König wrote:
Amdgpu was the only user of the signal on any feature and we dropped that use case recently, so we can remove that functionality.
v2: update num_pending only after the fence is signaled v3: separate out simplifying dma_fence_array implementation
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-fence-array.c | 13 ++++--------- drivers/dma-buf/dma-fence-unwrap.c | 3 +-- drivers/dma-buf/dma-resv.c | 3 +-- drivers/dma-buf/st-dma-fence-unwrap.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 3 +-- drivers/gpu/drm/xe/xe_sync.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 4 ++-- include/linux/dma-fence-array.h | 6 ++---- 8 files changed, 13 insertions(+), 23 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 089f69469524..5e10e8df372f 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -190,15 +190,13 @@ EXPORT_SYMBOL(dma_fence_array_alloc);
- @fences: [in] array containing the fences
- @context: [in] fence context to use
- @seqno: [in] sequence number to use
*/ void dma_fence_array_init(struct dma_fence_array *array, int num_fences, struct dma_fence **fences,
- @signal_on_any: [in] signal on any fence in the array
- Implementation of @dma_fence_array_create without allocation. Useful to init
- a preallocated dma fence array in the path of reclaim or dma fence signaling.
u64 context, unsigned seqno,bool signal_on_any)
{ static struct lock_class_key dma_fence_array_lock_key;u64 context, unsigned seqno)@@ -222,7 +220,7 @@ void dma_fence_array_init(struct dma_fence_array *array, */ lockdep_set_class(&array->base.inline_lock, &dma_fence_array_lock_key);
- atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences);
- atomic_set(&array->num_pending, num_fences); array->fences = fences;
array->base.error = PENDING_ERROR; @@ -249,7 +247,6 @@ EXPORT_SYMBOL(dma_fence_array_init);
- @fences: [in] array containing the fences
- @context: [in] fence context to use
- @seqno: [in] sequence number to use
- @signal_on_any: [in] signal on any fence in the array
- Allocate a dma_fence_array object and initialize the base fence with
- dma_fence_init().
@@ -264,8 +261,7 @@ EXPORT_SYMBOL(dma_fence_array_init); */ struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences,
u64 context, unsigned seqno,bool signal_on_any)
{ struct dma_fence_array *array;u64 context, unsigned seqno)@@ -273,8 +269,7 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, if (!array) return NULL;
- dma_fence_array_init(array, num_fences, fences,
context, seqno, signal_on_any);
- dma_fence_array_init(array, num_fences, fences, context, seqno);
return array; } diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index 07fe9bf45aea..53bb40e70b27 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -180,8 +180,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, if (count > 1) { result = dma_fence_array_create(count, array,
dma_fence_context_alloc(1),1, false);
if (!result) { for (i = 0; i < count; i++) dma_fence_put(array[i]);dma_fence_context_alloc(1), 1);diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index ce9e6c04897f..39a92d9f2413 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -648,8 +648,7 @@ int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage, } array = dma_fence_array_create(count, fences,
dma_fence_context_alloc(1),1, false);
if (!array) { while (count--) dma_fence_put(fences[count]);dma_fence_context_alloc(1), 1);diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c index 51c87869b7b8..4e7ee25372ba 100644 --- a/drivers/dma-buf/st-dma-fence-unwrap.c +++ b/drivers/dma-buf/st-dma-fence-unwrap.c @@ -64,7 +64,7 @@ static struct dma_fence *mock_array(unsigned int num_fences, ...) array = dma_fence_array_create(num_fences, fences, dma_fence_context_alloc(1),
1, false);
if (!array) goto error_free; return &array->base;1);diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 942f4eed817f..4a1a9031f9db 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -3205,8 +3205,7 @@ eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd) fence_array = dma_fence_array_create(eb->num_batches, fences, eb->context->parallel.fence_context,
eb->context->parallel.seqno++,false);
if (!fence_array) { kfree(fences); return ERR_PTR(-ENOMEM);eb->context->parallel.seqno++);diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 24d6d9af20d6..37866768d64c 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -376,7 +376,7 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, xe_assert(vm->xe, current_fence == num_fence); cf = dma_fence_array_create(num_fence, fences, dma_fence_context_alloc(1),
1, false);
if (!cf) goto err_out;1);diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 56e2db50bb36..8f472911469d 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3370,7 +3370,7 @@ static struct dma_fence *ops_execute(struct xe_vm *vm, goto err_trace; }
- cf = dma_fence_array_alloc(n_fence);
- cf = dma_fence_array_alloc();
Patch splitting mistake here.
The rest LGTM. So with this hunk dropped:
Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@igalia.com
Regards,
Tvrtko
if (!cf) { fence = ERR_PTR(-ENOMEM); goto err_out; @@ -3414,7 +3414,7 @@ static struct dma_fence *ops_execute(struct xe_vm *vm, xe_assert(vm->xe, current_fence == n_fence); dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1),
1, false);
fence = &cf->base;1);for_each_tile(tile, vm->xe, id) { diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 370b3d2bba37..1b1d87579c38 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -81,13 +81,11 @@ to_dma_fence_array(struct dma_fence *fence) struct dma_fence_array *dma_fence_array_alloc(int num_fences); void dma_fence_array_init(struct dma_fence_array *array, int num_fences, struct dma_fence **fences,
u64 context, unsigned seqno,bool signal_on_any);
u64 context, unsigned seqno);struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences,
u64 context, unsigned seqno,bool signal_on_any);
u64 context, unsigned seqno);bool dma_fence_match_context(struct dma_fence *fence, u64 context);
On 4/22/26 12:49, Tvrtko Ursulin wrote:
On 22/04/2026 11:30, Christian König wrote:
...
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 56e2db50bb36..8f472911469d 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3370,7 +3370,7 @@ static struct dma_fence *ops_execute(struct xe_vm *vm, goto err_trace; } - cf = dma_fence_array_alloc(n_fence); + cf = dma_fence_array_alloc();
Patch splitting mistake here.
Ah, thanks for pointing this out.
The rest LGTM. So with this hunk dropped:
Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@igalia.com
I double checked the patch once more, compile tested it and then pushed the result.
Thanks, Christian.
Regards,
Tvrtko
if (!cf) { fence = ERR_PTR(-ENOMEM); goto err_out; @@ -3414,7 +3414,7 @@ static struct dma_fence *ops_execute(struct xe_vm *vm, xe_assert(vm->xe, current_fence == n_fence); dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1), - 1, false); + 1); fence = &cf->base; for_each_tile(tile, vm->xe, id) { diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 370b3d2bba37..1b1d87579c38 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -81,13 +81,11 @@ to_dma_fence_array(struct dma_fence *fence) struct dma_fence_array *dma_fence_array_alloc(int num_fences); void dma_fence_array_init(struct dma_fence_array *array, int num_fences, struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any); + u64 context, unsigned seqno); struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any); + u64 context, unsigned seqno); bool dma_fence_match_context(struct dma_fence *fence, u64 context);
linaro-mm-sig@lists.linaro.org