On 22/04/2026 11:30, Christian König wrote:
Removing the signal on any feature allows to simplfy the dma_fence_array code a lot and saves us from the need to install a callback on all fences at the same time.
This results in less memory and CPU overhead.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-fence-array.c | 130 +++++++++++++----------------- include/linux/dma-fence-array.h | 22 ++--- 2 files changed, 59 insertions(+), 93 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 5e10e8df372f..f1b4b3296c87 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -42,97 +42,80 @@ static void dma_fence_array_clear_pending_error(struct dma_fence_array *array) cmpxchg(&array->base.error, PENDING_ERROR, 0); } -static void irq_dma_fence_array_work(struct irq_work *wrk) +static void dma_fence_array_cb_func(struct dma_fence *f,
{struct dma_fence_cb *cb)
- struct dma_fence_array *array = container_of(wrk, typeof(*array), work);
- struct dma_fence_array *array =
container_of(cb, struct dma_fence_array, callback);
- dma_fence_array_clear_pending_error(array);
- irq_work_queue(&array->work);
+}
+static void dma_fence_array_arm_cb(struct dma_fence_array *array) +{
- while (array->num_pending) {
struct dma_fence *f = array->fences[array->num_pending - 1];if (!dma_fence_add_callback(f, &array->callback,dma_fence_array_cb_func))return;dma_fence_array_set_pending_error(array, f->error);WRITE_ONCE(array->num_pending, array->num_pending - 1);
Do you think the WRITE_ONCEs are needed? As the loop will restart with un-annotated read anyway, but not just that, I don't think it can be compiled away in the kernel with this usage pattern. Maybe I am mistaken.
- }
dma_fence_signal(&array->base); dma_fence_put(&array->base); } -static void dma_fence_array_cb_func(struct dma_fence *f,
struct dma_fence_cb *cb)+static void dma_fence_array_irq_work(struct irq_work *wrk) {
- struct dma_fence_array_cb *array_cb =
container_of(cb, struct dma_fence_array_cb, cb);- struct dma_fence_array *array = array_cb->array;
- dma_fence_array_set_pending_error(array, f->error);
- struct dma_fence_array *array = container_of(wrk, typeof(*array), work);
- if (atomic_dec_and_test(&array->num_pending))
irq_work_queue(&array->work);- else
dma_fence_put(&array->base);
- WRITE_ONCE(array->num_pending, array->num_pending - 1);
- dma_fence_array_arm_cb(array);
So for x86 going from one irqwork latency to num_fences latencies is probably passable but I am not sure how other architectures fare.
} static bool dma_fence_array_enable_signaling(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence);
- struct dma_fence_array_cb *cb = array->callbacks;
- unsigned i;
- for (i = 0; i < array->num_fences; ++i) {
cb[i].array = array;/** As we may report that the fence is signaled before all* callbacks are complete, we need to take an additional* reference count on the array so that we do not free it too* early. The core fence handling will only hold the reference* until we signal the array as complete (but that is now* insufficient).*/dma_fence_get(&array->base);if (dma_fence_add_callback(array->fences[i], &cb[i].cb,dma_fence_array_cb_func)) {int error = array->fences[i]->error;dma_fence_array_set_pending_error(array, error);dma_fence_put(&array->base);if (atomic_dec_and_test(&array->num_pending)) {dma_fence_array_clear_pending_error(array);return false;}}- }
- /*
* As we may report that the fence is signaled before all* callbacks are complete, we need to take an additional* reference count on the array so that we do not free it too* early. The core fence handling will only hold the reference* until we signal the array as complete (but that is now* insufficient).*/- dma_fence_get(&array->base);
- dma_fence_array_arm_cb(array); return true;
Are you sure it is safe to always return true?
Regards,
Tvrtko
} static bool dma_fence_array_signaled(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence);
- int num_pending;
- int num_pending, error = 0; unsigned int i;
/*
* We need to read num_pending before checking the enable_signal bit* to avoid racing with the enable_signaling() implementation, which* might decrement the counter, and cause a partial check.* atomic_read_acquire() pairs with atomic_dec_and_test() in* dma_fence_array_enable_signaling()** The !--num_pending check is here to account for the any_signaled case* if we race with enable_signaling(), that means the !num_pending check* in the is_signalling_enabled branch might be outdated (num_pending* might have been decremented), but that's fine. The user will get the* right value when testing again later.
* Reading num_pending without a memory barrier here is correct since* that is only for optimization, it is perfectly acceptable to have a* stale value for it. In all other cases num_pending is accessed by a */* single call chain.
- num_pending = atomic_read_acquire(&array->num_pending);
- if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &array->base.flags)) {
if (num_pending <= 0)goto signal;return false;- }
- num_pending = READ_ONCE(array->num_pending);
- for (i = 0; i < num_pending; ++i) {
struct dma_fence *f = array->fences[i];
- for (i = 0; i < array->num_fences; ++i) {
if (dma_fence_is_signaled(array->fences[i]) && !--num_pending)goto signal;- }
- return false;
if (!dma_fence_is_signaled(f))return false;-signal:
if (!error)error = f->error;- }
- dma_fence_array_set_pending_error(array, error); dma_fence_array_clear_pending_error(array); return true; }
@@ -171,15 +154,12 @@ EXPORT_SYMBOL(dma_fence_array_ops); /**
- dma_fence_array_alloc - Allocate a custom fence array
*/
- @num_fences: [in] number of fences to add in the array
- Return dma fence array on success, NULL on failure
-struct dma_fence_array *dma_fence_array_alloc(int num_fences) +struct dma_fence_array *dma_fence_array_alloc(void) {
- struct dma_fence_array *array;
- return kzalloc_flex(*array, callbacks, num_fences);
- return kzalloc_obj(struct dma_fence_array); } EXPORT_SYMBOL(dma_fence_array_alloc);
@@ -203,10 +183,13 @@ void dma_fence_array_init(struct dma_fence_array *array, WARN_ON(!num_fences || !fences); array->num_fences = num_fences;
- array->num_pending = num_fences;
- array->fences = fences;
- array->base.error = PENDING_ERROR;
dma_fence_init(&array->base, &dma_fence_array_ops, NULL, context, seqno);
- init_irq_work(&array->work, irq_dma_fence_array_work);
- init_irq_work(&array->work, dma_fence_array_irq_work);
/* * dma_fence_array_enable_signaling() is invoked while holding @@ -220,11 +203,6 @@ void dma_fence_array_init(struct dma_fence_array *array, */ lockdep_set_class(&array->base.inline_lock, &dma_fence_array_lock_key);
- atomic_set(&array->num_pending, num_fences);
- array->fences = fences;
- array->base.error = PENDING_ERROR;
- /*
- dma_fence_array objects should never contain any other fence
- containers or otherwise we run into recursion and potential kernel
@@ -265,7 +243,7 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, { struct dma_fence_array *array;
- array = dma_fence_array_alloc(num_fences);
- array = dma_fence_array_alloc(); if (!array) return NULL;
diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 1b1d87579c38..3ee55c0e2fa4 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -15,16 +15,6 @@ #include <linux/dma-fence.h> #include <linux/irq_work.h> -/**
- struct dma_fence_array_cb - callback helper for fence array
- @cb: fence callback structure for signaling
- @array: reference to the parent fence array object
- */
-struct dma_fence_array_cb {
- struct dma_fence_cb cb;
- struct dma_fence_array *array;
-};
- /**
- struct dma_fence_array - fence to represent an array of fences
- @base: fence base class
@@ -33,18 +23,17 @@ struct dma_fence_array_cb {
- @num_pending: fences in the array still pending
- @fences: array of the fences
- @work: internal irq_work function
- @callbacks: array of callback helpers
*/ struct dma_fence_array { struct dma_fence base;
- @callback: callback structure for signaling
- unsigned num_fences;
- atomic_t num_pending;
- unsigned int num_fences;
- unsigned int num_pending; struct dma_fence **fences;
struct irq_work work;
- struct dma_fence_array_cb callbacks[] __counted_by(num_fences);
- struct dma_fence_cb callback; };
/** @@ -78,11 +67,10 @@ to_dma_fence_array(struct dma_fence *fence) for (index = 0, fence = dma_fence_array_first(head); fence; \ ++(index), fence = dma_fence_array_next(head, index)) -struct dma_fence_array *dma_fence_array_alloc(int num_fences); +struct dma_fence_array *dma_fence_array_alloc(void); void dma_fence_array_init(struct dma_fence_array *array, int num_fences, struct dma_fence **fences, u64 context, unsigned seqno);
- struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, u64 context, unsigned seqno);