Amdgpu was the only user of the signal on any feature and we dropped that use case recently, so we can remove that functionality again.
This allows to simplfy the dma_fence_array code a lot and saves us from the need to install a callback on all fences at the same time.
Signed-off-by: Christian Kรถnig christian.koenig@amd.com --- drivers/dma-buf/dma-fence-array.c | 133 +++++++----------- drivers/dma-buf/dma-fence-unwrap.c | 3 +- drivers/dma-buf/dma-resv.c | 3 +- drivers/dma-buf/st-dma-fence-unwrap.c | 2 +- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 3 +- drivers/gpu/drm/xe/xe_sync.c | 2 +- drivers/gpu/drm/xe/xe_vm.c | 4 +- include/linux/dma-fence-array.h | 28 +--- 8 files changed, 63 insertions(+), 115 deletions(-)
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 089f69469524..0d54bf2e47ba 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -42,97 +42,71 @@ static void dma_fence_array_clear_pending_error(struct dma_fence_array *array) cmpxchg(&array->base.error, PENDING_ERROR, 0); }
-static void irq_dma_fence_array_work(struct irq_work *wrk) +static void dma_fence_array_cb_func(struct dma_fence *f, + struct dma_fence_cb *cb) { - struct dma_fence_array *array = container_of(wrk, typeof(*array), work); + struct dma_fence_array *array = + container_of(cb, struct dma_fence_array, callback);
- dma_fence_array_clear_pending_error(array); - - dma_fence_signal(&array->base); - dma_fence_put(&array->base); + irq_work_queue(&array->work); }
-static void dma_fence_array_cb_func(struct dma_fence *f, - struct dma_fence_cb *cb) +static void irq_dma_fence_array_work(struct irq_work *wrk) { - struct dma_fence_array_cb *array_cb = - container_of(cb, struct dma_fence_array_cb, cb); - struct dma_fence_array *array = array_cb->array; + struct dma_fence_array *array = container_of(wrk, typeof(*array), work);
- dma_fence_array_set_pending_error(array, f->error); + while (array->num_pending--) { + struct dma_fence *f = array->fences[array->num_pending];
- if (atomic_dec_and_test(&array->num_pending)) - irq_work_queue(&array->work); - else - dma_fence_put(&array->base); + if (!dma_fence_add_callback(f, &array->callback, + dma_fence_array_cb_func)) + return; + + dma_fence_array_set_pending_error(array, f->error); + } + + dma_fence_signal(&array->base); + dma_fence_put(&array->base); }
static bool dma_fence_array_enable_signaling(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); - struct dma_fence_array_cb *cb = array->callbacks; - unsigned i; - - for (i = 0; i < array->num_fences; ++i) { - cb[i].array = array; - /* - * As we may report that the fence is signaled before all - * callbacks are complete, we need to take an additional - * reference count on the array so that we do not free it too - * early. The core fence handling will only hold the reference - * until we signal the array as complete (but that is now - * insufficient). - */ - dma_fence_get(&array->base); - if (dma_fence_add_callback(array->fences[i], &cb[i].cb, - dma_fence_array_cb_func)) { - int error = array->fences[i]->error; - - dma_fence_array_set_pending_error(array, error); - dma_fence_put(&array->base); - if (atomic_dec_and_test(&array->num_pending)) { - dma_fence_array_clear_pending_error(array); - return false; - } - } - }
+ /* + * As we may report that the fence is signaled before all + * callbacks are complete, we need to take an additional + * reference count on the array so that we do not free it too + * early. The core fence handling will only hold the reference + * until we signal the array as complete (but that is now + * insufficient). + */ + dma_fence_get(&array->base); + irq_dma_fence_array_work(&array->work); return true; }
static bool dma_fence_array_signaled(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); - int num_pending; + int num_pending, error = 0; unsigned int i;
/* - * We need to read num_pending before checking the enable_signal bit - * to avoid racing with the enable_signaling() implementation, which - * might decrement the counter, and cause a partial check. - * atomic_read_acquire() pairs with atomic_dec_and_test() in - * dma_fence_array_enable_signaling() - * - * The !--num_pending check is here to account for the any_signaled case - * if we race with enable_signaling(), that means the !num_pending check - * in the is_signalling_enabled branch might be outdated (num_pending - * might have been decremented), but that's fine. The user will get the - * right value when testing again later. + * Reading num_pending is just an optimization, it is perfectly + * acceptable to have a stale value for it. */ - num_pending = atomic_read_acquire(&array->num_pending); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &array->base.flags)) { - if (num_pending <= 0) - goto signal; - return false; - } + num_pending = READ_ONCE(array->num_pending); + for (i = 0; i < num_pending; ++i) { + struct dma_fence *f = array->fences[i];
- for (i = 0; i < array->num_fences; ++i) { - if (dma_fence_is_signaled(array->fences[i]) && !--num_pending) - goto signal; - } - return false; + if (!dma_fence_is_signaled(f)) + return false;
-signal: + if (!error) + error = f->error; + } + dma_fence_array_set_pending_error(array, error); dma_fence_array_clear_pending_error(array); return true; } @@ -171,15 +145,12 @@ EXPORT_SYMBOL(dma_fence_array_ops);
/** * dma_fence_array_alloc - Allocate a custom fence array - * @num_fences: [in] number of fences to add in the array * * Return dma fence array on success, NULL on failure */ -struct dma_fence_array *dma_fence_array_alloc(int num_fences) +struct dma_fence_array *dma_fence_array_alloc(void) { - struct dma_fence_array *array; - - return kzalloc_flex(*array, callbacks, num_fences); + return kzalloc_obj(struct dma_fence_array); } EXPORT_SYMBOL(dma_fence_array_alloc);
@@ -190,21 +161,22 @@ EXPORT_SYMBOL(dma_fence_array_alloc); * @fences: [in] array containing the fences * @context: [in] fence context to use * @seqno: [in] sequence number to use - * @signal_on_any: [in] signal on any fence in the array * * Implementation of @dma_fence_array_create without allocation. Useful to init * a preallocated dma fence array in the path of reclaim or dma fence signaling. */ void dma_fence_array_init(struct dma_fence_array *array, int num_fences, struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any) + u64 context, unsigned seqno) { static struct lock_class_key dma_fence_array_lock_key;
WARN_ON(!num_fences || !fences);
array->num_fences = num_fences; + array->num_pending = num_fences; + array->fences = fences; + array->base.error = PENDING_ERROR;
dma_fence_init(&array->base, &dma_fence_array_ops, NULL, context, seqno); @@ -222,11 +194,6 @@ void dma_fence_array_init(struct dma_fence_array *array, */ lockdep_set_class(&array->base.inline_lock, &dma_fence_array_lock_key);
- atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences); - array->fences = fences; - - array->base.error = PENDING_ERROR; - /* * dma_fence_array objects should never contain any other fence * containers or otherwise we run into recursion and potential kernel @@ -249,7 +216,6 @@ EXPORT_SYMBOL(dma_fence_array_init); * @fences: [in] array containing the fences * @context: [in] fence context to use * @seqno: [in] sequence number to use - * @signal_on_any: [in] signal on any fence in the array * * Allocate a dma_fence_array object and initialize the base fence with * dma_fence_init(). @@ -264,17 +230,16 @@ EXPORT_SYMBOL(dma_fence_array_init); */ struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any) + u64 context, unsigned seqno) { struct dma_fence_array *array;
- array = dma_fence_array_alloc(num_fences); + array = dma_fence_array_alloc(); if (!array) return NULL;
dma_fence_array_init(array, num_fences, fences, - context, seqno, signal_on_any); + context, seqno);
return array; } diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index 07fe9bf45aea..53bb40e70b27 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -180,8 +180,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
if (count > 1) { result = dma_fence_array_create(count, array, - dma_fence_context_alloc(1), - 1, false); + dma_fence_context_alloc(1), 1); if (!result) { for (i = 0; i < count; i++) dma_fence_put(array[i]); diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index ce9e6c04897f..39a92d9f2413 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -648,8 +648,7 @@ int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage, }
array = dma_fence_array_create(count, fences, - dma_fence_context_alloc(1), - 1, false); + dma_fence_context_alloc(1), 1); if (!array) { while (count--) dma_fence_put(fences[count]); diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c index 9c74195f47fd..45413666970e 100644 --- a/drivers/dma-buf/st-dma-fence-unwrap.c +++ b/drivers/dma-buf/st-dma-fence-unwrap.c @@ -65,7 +65,7 @@ static struct dma_fence *mock_array(unsigned int num_fences, ...)
array = dma_fence_array_create(num_fences, fences, dma_fence_context_alloc(1), - 1, false); + 1); if (!array) goto error_free; return &array->base; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index e7918f896a26..1ac91a46d87f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -3203,8 +3203,7 @@ eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd) fence_array = dma_fence_array_create(eb->num_batches, fences, eb->context->parallel.fence_context, - eb->context->parallel.seqno++, - false); + eb->context->parallel.seqno++); if (!fence_array) { kfree(fences); return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 24d6d9af20d6..37866768d64c 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -376,7 +376,7 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync, xe_assert(vm->xe, current_fence == num_fence); cf = dma_fence_array_create(num_fence, fences, dma_fence_context_alloc(1), - 1, false); + 1); if (!cf) goto err_out;
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 548b0769b3ef..b916a9d90104 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3196,7 +3196,7 @@ static struct dma_fence *ops_execute(struct xe_vm *vm, goto err_trace; }
- cf = dma_fence_array_alloc(n_fence); + cf = dma_fence_array_alloc(); if (!cf) { fence = ERR_PTR(-ENOMEM); goto err_out; @@ -3240,7 +3240,7 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
xe_assert(vm->xe, current_fence == n_fence); dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1), - 1, false); + 1); fence = &cf->base;
for_each_tile(tile, vm->xe, id) { diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 370b3d2bba37..3ee55c0e2fa4 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -15,16 +15,6 @@ #include <linux/dma-fence.h> #include <linux/irq_work.h>
-/** - * struct dma_fence_array_cb - callback helper for fence array - * @cb: fence callback structure for signaling - * @array: reference to the parent fence array object - */ -struct dma_fence_array_cb { - struct dma_fence_cb cb; - struct dma_fence_array *array; -}; - /** * struct dma_fence_array - fence to represent an array of fences * @base: fence base class @@ -33,18 +23,17 @@ struct dma_fence_array_cb { * @num_pending: fences in the array still pending * @fences: array of the fences * @work: internal irq_work function - * @callbacks: array of callback helpers + * @callback: callback structure for signaling */ struct dma_fence_array { struct dma_fence base;
- unsigned num_fences; - atomic_t num_pending; + unsigned int num_fences; + unsigned int num_pending; struct dma_fence **fences;
struct irq_work work; - - struct dma_fence_array_cb callbacks[] __counted_by(num_fences); + struct dma_fence_cb callback; };
/** @@ -78,16 +67,13 @@ to_dma_fence_array(struct dma_fence *fence) for (index = 0, fence = dma_fence_array_first(head); fence; \ ++(index), fence = dma_fence_array_next(head, index))
-struct dma_fence_array *dma_fence_array_alloc(int num_fences); +struct dma_fence_array *dma_fence_array_alloc(void); void dma_fence_array_init(struct dma_fence_array *array, int num_fences, struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any); - + u64 context, unsigned seqno); struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any); + u64 context, unsigned seqno);
bool dma_fence_match_context(struct dma_fence *fence, u64 context);
linaro-mm-sig@lists.linaro.org