Removing the signal on any feature allows to simplfy the dma_fence_array code a lot and saves us from the need to install a callback on all fences at the same time.
This results in less memory and CPU overhead.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-fence-array.c | 130 +++++++++++++----------------- include/linux/dma-fence-array.h | 22 ++--- 2 files changed, 59 insertions(+), 93 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 5e10e8df372f..f1b4b3296c87 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -42,97 +42,80 @@ static void dma_fence_array_clear_pending_error(struct dma_fence_array *array) cmpxchg(&array->base.error, PENDING_ERROR, 0); }
-static void irq_dma_fence_array_work(struct irq_work *wrk) +static void dma_fence_array_cb_func(struct dma_fence *f, + struct dma_fence_cb *cb) { - struct dma_fence_array *array = container_of(wrk, typeof(*array), work); + struct dma_fence_array *array = + container_of(cb, struct dma_fence_array, callback);
- dma_fence_array_clear_pending_error(array); + irq_work_queue(&array->work); +} + +static void dma_fence_array_arm_cb(struct dma_fence_array *array) +{ + while (array->num_pending) { + struct dma_fence *f = array->fences[array->num_pending - 1]; + + if (!dma_fence_add_callback(f, &array->callback, + dma_fence_array_cb_func)) + return; + + dma_fence_array_set_pending_error(array, f->error); + WRITE_ONCE(array->num_pending, array->num_pending - 1); + }
dma_fence_signal(&array->base); dma_fence_put(&array->base); }
-static void dma_fence_array_cb_func(struct dma_fence *f, - struct dma_fence_cb *cb) +static void dma_fence_array_irq_work(struct irq_work *wrk) { - struct dma_fence_array_cb *array_cb = - container_of(cb, struct dma_fence_array_cb, cb); - struct dma_fence_array *array = array_cb->array; - - dma_fence_array_set_pending_error(array, f->error); + struct dma_fence_array *array = container_of(wrk, typeof(*array), work);
- if (atomic_dec_and_test(&array->num_pending)) - irq_work_queue(&array->work); - else - dma_fence_put(&array->base); + WRITE_ONCE(array->num_pending, array->num_pending - 1); + dma_fence_array_arm_cb(array); }
static bool dma_fence_array_enable_signaling(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); - struct dma_fence_array_cb *cb = array->callbacks; - unsigned i; - - for (i = 0; i < array->num_fences; ++i) { - cb[i].array = array; - /* - * As we may report that the fence is signaled before all - * callbacks are complete, we need to take an additional - * reference count on the array so that we do not free it too - * early. The core fence handling will only hold the reference - * until we signal the array as complete (but that is now - * insufficient). - */ - dma_fence_get(&array->base); - if (dma_fence_add_callback(array->fences[i], &cb[i].cb, - dma_fence_array_cb_func)) { - int error = array->fences[i]->error; - - dma_fence_array_set_pending_error(array, error); - dma_fence_put(&array->base); - if (atomic_dec_and_test(&array->num_pending)) { - dma_fence_array_clear_pending_error(array); - return false; - } - } - }
+ /* + * As we may report that the fence is signaled before all + * callbacks are complete, we need to take an additional + * reference count on the array so that we do not free it too + * early. The core fence handling will only hold the reference + * until we signal the array as complete (but that is now + * insufficient). + */ + dma_fence_get(&array->base); + dma_fence_array_arm_cb(array); return true; }
static bool dma_fence_array_signaled(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); - int num_pending; + int num_pending, error = 0; unsigned int i;
/* - * We need to read num_pending before checking the enable_signal bit - * to avoid racing with the enable_signaling() implementation, which - * might decrement the counter, and cause a partial check. - * atomic_read_acquire() pairs with atomic_dec_and_test() in - * dma_fence_array_enable_signaling() - * - * The !--num_pending check is here to account for the any_signaled case - * if we race with enable_signaling(), that means the !num_pending check - * in the is_signalling_enabled branch might be outdated (num_pending - * might have been decremented), but that's fine. The user will get the - * right value when testing again later. + * Reading num_pending without a memory barrier here is correct since + * that is only for optimization, it is perfectly acceptable to have a + * stale value for it. In all other cases num_pending is accessed by a + * single call chain. */ - num_pending = atomic_read_acquire(&array->num_pending); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &array->base.flags)) { - if (num_pending <= 0) - goto signal; - return false; - } + num_pending = READ_ONCE(array->num_pending); + for (i = 0; i < num_pending; ++i) { + struct dma_fence *f = array->fences[i];
- for (i = 0; i < array->num_fences; ++i) { - if (dma_fence_is_signaled(array->fences[i]) && !--num_pending) - goto signal; - } - return false; + if (!dma_fence_is_signaled(f)) + return false;
-signal: + if (!error) + error = f->error; + } + dma_fence_array_set_pending_error(array, error); dma_fence_array_clear_pending_error(array); return true; } @@ -171,15 +154,12 @@ EXPORT_SYMBOL(dma_fence_array_ops);
/** * dma_fence_array_alloc - Allocate a custom fence array - * @num_fences: [in] number of fences to add in the array * * Return dma fence array on success, NULL on failure */ -struct dma_fence_array *dma_fence_array_alloc(int num_fences) +struct dma_fence_array *dma_fence_array_alloc(void) { - struct dma_fence_array *array; - - return kzalloc_flex(*array, callbacks, num_fences); + return kzalloc_obj(struct dma_fence_array); } EXPORT_SYMBOL(dma_fence_array_alloc);
@@ -203,10 +183,13 @@ void dma_fence_array_init(struct dma_fence_array *array, WARN_ON(!num_fences || !fences);
array->num_fences = num_fences; + array->num_pending = num_fences; + array->fences = fences; + array->base.error = PENDING_ERROR;
dma_fence_init(&array->base, &dma_fence_array_ops, NULL, context, seqno); - init_irq_work(&array->work, irq_dma_fence_array_work); + init_irq_work(&array->work, dma_fence_array_irq_work);
/* * dma_fence_array_enable_signaling() is invoked while holding @@ -220,11 +203,6 @@ void dma_fence_array_init(struct dma_fence_array *array, */ lockdep_set_class(&array->base.inline_lock, &dma_fence_array_lock_key);
- atomic_set(&array->num_pending, num_fences); - array->fences = fences; - - array->base.error = PENDING_ERROR; - /* * dma_fence_array objects should never contain any other fence * containers or otherwise we run into recursion and potential kernel @@ -265,7 +243,7 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, { struct dma_fence_array *array;
- array = dma_fence_array_alloc(num_fences); + array = dma_fence_array_alloc(); if (!array) return NULL;
diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 1b1d87579c38..3ee55c0e2fa4 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -15,16 +15,6 @@ #include <linux/dma-fence.h> #include <linux/irq_work.h>
-/** - * struct dma_fence_array_cb - callback helper for fence array - * @cb: fence callback structure for signaling - * @array: reference to the parent fence array object - */ -struct dma_fence_array_cb { - struct dma_fence_cb cb; - struct dma_fence_array *array; -}; - /** * struct dma_fence_array - fence to represent an array of fences * @base: fence base class @@ -33,18 +23,17 @@ struct dma_fence_array_cb { * @num_pending: fences in the array still pending * @fences: array of the fences * @work: internal irq_work function - * @callbacks: array of callback helpers + * @callback: callback structure for signaling */ struct dma_fence_array { struct dma_fence base;
- unsigned num_fences; - atomic_t num_pending; + unsigned int num_fences; + unsigned int num_pending; struct dma_fence **fences;
struct irq_work work; - - struct dma_fence_array_cb callbacks[] __counted_by(num_fences); + struct dma_fence_cb callback; };
/** @@ -78,11 +67,10 @@ to_dma_fence_array(struct dma_fence *fence) for (index = 0, fence = dma_fence_array_first(head); fence; \ ++(index), fence = dma_fence_array_next(head, index))
-struct dma_fence_array *dma_fence_array_alloc(int num_fences); +struct dma_fence_array *dma_fence_array_alloc(void); void dma_fence_array_init(struct dma_fence_array *array, int num_fences, struct dma_fence **fences, u64 context, unsigned seqno); - struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, u64 context, unsigned seqno);