Next round for that one here, maybe the CI systems are now more gracefully with me :)
I'm pretty sure that a couple of those dma_resv_for_each_fence_unlocked should actually be replaced with lock+dma_resv_for_each_fence, but that needs more auditing.
Please review and comment.
Thanks, Christian.
Abstract the complexity of iterating over all the fences in a dma_resv object.
The new loop handles the whole RCU and retry dance and returns only fences where we can be sure we grabbed the right one.
v2: fix accessing the shared fences while they might be freed, improve kerneldoc, rename _cursor to _iter, add dma_resv_iter_is_exclusive
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 62 ++++++++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 50 ++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 84fbe60629e3..77083170ec3b 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -323,6 +323,68 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) } EXPORT_SYMBOL(dma_resv_add_excl_fence);
+/** + * dma_resv_walk_unlocked - walk over fences in a dma_resv obj + * @obj: the dma_resv object + * @cursor: cursor to record the current position + * @all_fences: true returns also the shared fences + * @first: if we should start over + * + * Return all the fences in the dma_resv object which are not yet signaled. + * The returned fence has an extra local reference so will stay alive. + * If a concurrent modify is detected the whole iterator is started over again. + */ +struct dma_fence *dma_resv_walk_unlocked(struct dma_resv *obj, + struct dma_resv_iter *cursor, + bool all_fences, bool first) +{ + struct dma_fence *fence = NULL; + + first |= read_seqcount_retry(&obj->seq, cursor->seq); + do { + /* Drop the reference from the previous round */ + dma_fence_put(fence); + + cursor->is_first = first; + if (first) { + cursor->seq = read_seqcount_begin(&obj->seq); + cursor->index = -1; + cursor->fences = dma_resv_shared_list(obj); + + fence = dma_resv_excl_fence(obj); + if (fence && test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &fence->flags)) + fence = NULL; + } else { + fence = NULL; + } + + if (fence) { + fence = dma_fence_get_rcu(fence); + } else if (all_fences && cursor->fences) { + struct dma_resv_list *fences = cursor->fences; + + while (++cursor->index < fences->shared_count) { + fence = rcu_dereference( + fences->shared[cursor->index]); + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &fence->flags)) + break; + } + if (cursor->index < fences->shared_count) + fence = dma_fence_get_rcu(fence); + else + fence = NULL; + } + + /* For the eventually next round */ + first = true; + } while (read_seqcount_retry(&obj->seq, cursor->seq)); + + return fence; +} +EXPORT_SYMBOL_GPL(dma_resv_walk_unlocked); + /** * dma_resv_copy_fences - Copy all fences from src to dst. * @dst: the destination reservation object diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 9100dd3dc21f..1cd686384c71 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -149,6 +149,53 @@ struct dma_resv { struct dma_resv_list __rcu *fence; };
+/** + * struct dma_resv_iter - current position into the dma_resv fences + * + * Don't touch this directly in the driver, use the accessor function instead. + */ +struct dma_resv_iter { + /** @seq: sequence number to check for modifications */ + unsigned int seq; + + /** @index: index into the shared fences */ + unsigned int index; + + /** @fences: the shared fences */ + struct dma_resv_list *fences; + + /** @is_first: true if this is the first returned fence */ + bool is_first; +}; + +/** + * dma_resv_for_each_fence_unlocked - fence iterator + * @obj: a dma_resv object pointer + * @cursor: a struct dma_resv_iter pointer + * @all_fences: true if all fences should be returned + * @fence: the current fence + * + * Iterate over the fences in a struct dma_resv object without holding the + * dma_resv::lock. The RCU read side lock must be hold when using this, but can + * be dropped and re-taken as necessary inside the loop. @all_fences controls + * if the shared fences are returned as well. + */ +#define dma_resv_for_each_fence_unlocked(obj, cursor, all_fences, fence) \ + for (fence = dma_resv_walk_unlocked(obj, cursor, all_fences, true); \ + fence; dma_fence_put(fence), \ + fence = dma_resv_walk_unlocked(obj, cursor, all_fences, false)) + +/** + * dma_resv_iter_is_exclusive - test if the current fence is the exclusive one + * @cursor: the cursor of the current position + * + * Returns true if the currently returned fence is the exclusive one. + */ +static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) +{ + return cursor->index == -1; +} + #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
@@ -366,6 +413,9 @@ void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences); void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence); void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence); +struct dma_fence *dma_resv_walk_unlocked(struct dma_resv *obj, + struct dma_resv_iter *cursor, + bool first, bool all_fences); int dma_resv_get_fences(struct dma_resv *obj, struct dma_fence **pfence_excl, unsigned *pshared_count, struct dma_fence ***pshared); int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src);
On Thu, Sep 16, 2021 at 01:30:17PM +0200, Christian König wrote:
Abstract the complexity of iterating over all the fences in a dma_resv object.
The new loop handles the whole RCU and retry dance and returns only fences where we can be sure we grabbed the right one.
v2: fix accessing the shared fences while they might be freed, improve kerneldoc, rename _cursor to _iter, add dma_resv_iter_is_exclusive
Signed-off-by: Christian König christian.koenig@amd.com
Replied in the other thread with the fully typed out example, this really needs iter_init/next/end here. Or it's just way too fragile and tricky for a generic helper that we roll out everywhere. -Daniel
drivers/dma-buf/dma-resv.c | 62 ++++++++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 50 ++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 84fbe60629e3..77083170ec3b 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -323,6 +323,68 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) } EXPORT_SYMBOL(dma_resv_add_excl_fence); +/**
- dma_resv_walk_unlocked - walk over fences in a dma_resv obj
- @obj: the dma_resv object
- @cursor: cursor to record the current position
- @all_fences: true returns also the shared fences
- @first: if we should start over
- Return all the fences in the dma_resv object which are not yet signaled.
- The returned fence has an extra local reference so will stay alive.
- If a concurrent modify is detected the whole iterator is started over again.
- */
+struct dma_fence *dma_resv_walk_unlocked(struct dma_resv *obj,
struct dma_resv_iter *cursor,
bool all_fences, bool first)
+{
- struct dma_fence *fence = NULL;
- first |= read_seqcount_retry(&obj->seq, cursor->seq);
- do {
/* Drop the reference from the previous round */
dma_fence_put(fence);
cursor->is_first = first;
if (first) {
cursor->seq = read_seqcount_begin(&obj->seq);
cursor->index = -1;
cursor->fences = dma_resv_shared_list(obj);
fence = dma_resv_excl_fence(obj);
if (fence && test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&fence->flags))
fence = NULL;
} else {
fence = NULL;
}
if (fence) {
fence = dma_fence_get_rcu(fence);
} else if (all_fences && cursor->fences) {
struct dma_resv_list *fences = cursor->fences;
while (++cursor->index < fences->shared_count) {
fence = rcu_dereference(
fences->shared[cursor->index]);
if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&fence->flags))
break;
}
if (cursor->index < fences->shared_count)
fence = dma_fence_get_rcu(fence);
else
fence = NULL;
}
/* For the eventually next round */
first = true;
- } while (read_seqcount_retry(&obj->seq, cursor->seq));
- return fence;
+} +EXPORT_SYMBOL_GPL(dma_resv_walk_unlocked);
/**
- dma_resv_copy_fences - Copy all fences from src to dst.
- @dst: the destination reservation object
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 9100dd3dc21f..1cd686384c71 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -149,6 +149,53 @@ struct dma_resv { struct dma_resv_list __rcu *fence; }; +/**
- struct dma_resv_iter - current position into the dma_resv fences
- Don't touch this directly in the driver, use the accessor function instead.
- */
+struct dma_resv_iter {
- /** @seq: sequence number to check for modifications */
- unsigned int seq;
- /** @index: index into the shared fences */
- unsigned int index;
- /** @fences: the shared fences */
- struct dma_resv_list *fences;
- /** @is_first: true if this is the first returned fence */
- bool is_first;
+};
+/**
- dma_resv_for_each_fence_unlocked - fence iterator
- @obj: a dma_resv object pointer
- @cursor: a struct dma_resv_iter pointer
- @all_fences: true if all fences should be returned
- @fence: the current fence
- Iterate over the fences in a struct dma_resv object without holding the
- dma_resv::lock. The RCU read side lock must be hold when using this, but can
- be dropped and re-taken as necessary inside the loop. @all_fences controls
- if the shared fences are returned as well.
- */
+#define dma_resv_for_each_fence_unlocked(obj, cursor, all_fences, fence) \
- for (fence = dma_resv_walk_unlocked(obj, cursor, all_fences, true); \
fence; dma_fence_put(fence), \
fence = dma_resv_walk_unlocked(obj, cursor, all_fences, false))
+/**
- dma_resv_iter_is_exclusive - test if the current fence is the exclusive one
- @cursor: the cursor of the current position
- Returns true if the currently returned fence is the exclusive one.
- */
+static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) +{
- return cursor->index == -1;
+}
#define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base) @@ -366,6 +413,9 @@ void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences); void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence); void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence); +struct dma_fence *dma_resv_walk_unlocked(struct dma_resv *obj,
struct dma_resv_iter *cursor,
bool first, bool all_fences);
int dma_resv_get_fences(struct dma_resv *obj, struct dma_fence **pfence_excl, unsigned *pshared_count, struct dma_fence ***pshared); int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src); -- 2.25.1
A simpler version of the iterator to be used when the dma_resv object is locked.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 37 +++++++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 18 ++++++++++++++++++ 2 files changed, 55 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 77083170ec3b..bbf36a08ced0 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -323,6 +323,43 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) } EXPORT_SYMBOL(dma_resv_add_excl_fence);
+/** + * dma_resv_walk - walk over fences in a dma_resv obj + * @obj: the dma_resv object + * @cursor: cursor to record the current position + * @all_fences: true returns also the shared fences + * @first: if we should start over + * + * Return all the fences in the dma_resv object while holding the + * dma_resv::lock. + */ +struct dma_fence *dma_resv_walk(struct dma_resv *obj, + struct dma_resv_iter *cursor, + bool all_fences, bool first) +{ + dma_resv_assert_held(obj); + + cursor->is_first = first; + if (first) { + struct dma_fence *fence; + + cursor->index = -1; + cursor->fences = dma_resv_shared_list(obj); + + fence = dma_resv_excl_fence(obj); + if (fence) + return fence; + } + + if (!all_fences || !cursor->fences || + ++cursor->index >= cursor->fences->shared_count) + return NULL; + + return rcu_dereference_protected(cursor->fences->shared[cursor->index], + dma_resv_held(obj)); +} +EXPORT_SYMBOL_GPL(dma_resv_walk); + /** * dma_resv_walk_unlocked - walk over fences in a dma_resv obj * @obj: the dma_resv object diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 1cd686384c71..6761512ba662 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -168,6 +168,21 @@ struct dma_resv_iter { bool is_first; };
+/** + * dma_resv_for_each_fence - fence iterator + * @obj: a dma_resv object pointer + * @cursor: a struct dma_resv_iter pointer + * @all_fences: true if all fences should be returned + * @fence: the current fence + * + * Iterate over the fences in a struct dma_resv object while holding the + * dma_resv::lock. @all_fences controls if the shared fences are returned as + * well. + */ +#define dma_resv_for_each_fence(obj, cursor, all_fences, fence) \ + for (fence = dma_resv_walk(obj, cursor, all_fences, true); fence; \ + fence = dma_resv_walk(obj, cursor, all_fences, false)) + /** * dma_resv_for_each_fence_unlocked - fence iterator * @obj: a dma_resv object pointer @@ -413,6 +428,9 @@ void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences); void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence); void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence); +struct dma_fence *dma_resv_walk(struct dma_resv *obj, + struct dma_resv_iter *cursor, + bool first, bool all_fences); struct dma_fence *dma_resv_walk_unlocked(struct dma_resv *obj, struct dma_resv_iter *cursor, bool first, bool all_fences);
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 81 +++++++++++++++----------------------- 1 file changed, 32 insertions(+), 49 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index bbf36a08ced0..91c040cb7d63 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -431,74 +431,57 @@ EXPORT_SYMBOL_GPL(dma_resv_walk_unlocked); */ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) { - struct dma_resv_list *src_list, *dst_list; - struct dma_fence *old, *new; - unsigned int i; + struct dma_resv_iter cursor; + struct dma_resv_list *list; + struct dma_fence *f, *excl;
dma_resv_assert_held(dst);
- rcu_read_lock(); - src_list = dma_resv_shared_list(src); + list = NULL; + excl = NULL;
-retry: - if (src_list) { - unsigned int shared_count = src_list->shared_count; + rcu_read_lock(); + dma_resv_for_each_fence_unlocked(dst, &cursor, true, f) {
- rcu_read_unlock(); + if (cursor.is_first) { + dma_resv_list_free(list); + dma_fence_put(excl);
- dst_list = dma_resv_list_alloc(shared_count); - if (!dst_list) - return -ENOMEM; + if (cursor.fences) { + unsigned int cnt = cursor.fences->shared_count;
- rcu_read_lock(); - src_list = dma_resv_shared_list(src); - if (!src_list || src_list->shared_count > shared_count) { - kfree(dst_list); - goto retry; - } + rcu_read_unlock(); + list = dma_resv_list_alloc(cnt); + if (!list) + return -ENOMEM;
- dst_list->shared_count = 0; - for (i = 0; i < src_list->shared_count; ++i) { - struct dma_fence __rcu **dst; - struct dma_fence *fence; + list->shared_count = 0; + rcu_read_lock();
- fence = rcu_dereference(src_list->shared[i]); - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &fence->flags)) - continue; - - if (!dma_fence_get_rcu(fence)) { - dma_resv_list_free(dst_list); - src_list = dma_resv_shared_list(src); - goto retry; + } else { + list = NULL; } + excl = NULL; + }
- if (dma_fence_is_signaled(fence)) { - dma_fence_put(fence); - continue; - } + if (dma_resv_iter_is_exclusive(&cursor)) + excl = f; + else + RCU_INIT_POINTER(list->shared[list->shared_count++], f);
- dst = &dst_list->shared[dst_list->shared_count++]; - rcu_assign_pointer(*dst, fence); - } - } else { - dst_list = NULL; + /* Don't drop the reference */ + f = NULL; }
- new = dma_fence_get_rcu_safe(&src->fence_excl); rcu_read_unlock();
- src_list = dma_resv_shared_list(dst); - old = dma_resv_excl_fence(dst); - write_seqcount_begin(&dst->seq); - /* write_seqcount_begin provides the necessary memory barrier */ - RCU_INIT_POINTER(dst->fence_excl, new); - RCU_INIT_POINTER(dst->fence, dst_list); + excl = rcu_replace_pointer(dst->fence_excl, excl, dma_resv_held(dst)); + list = rcu_replace_pointer(dst->fence, list, dma_resv_held(dst)); write_seqcount_end(&dst->seq);
- dma_resv_list_free(src_list); - dma_fence_put(old); + dma_resv_list_free(list); + dma_fence_put(excl);
return 0; }
This makes the function much simpler since the complex retry logic is now handled elsewhere.
v2: use sizeof(void*) instead
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 110 +++++++++++++------------------------ 1 file changed, 37 insertions(+), 73 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 91c040cb7d63..bba328475304 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -491,99 +491,63 @@ EXPORT_SYMBOL(dma_resv_copy_fences); * dma_resv_get_fences - Get an object's shared and exclusive * fences without update side lock held * @obj: the reservation object - * @pfence_excl: the returned exclusive fence (or NULL) - * @pshared_count: the number of shared fences returned - * @pshared: the array of shared fence ptrs returned (array is krealloc'd to + * @fence_excl: the returned exclusive fence (or NULL) + * @shared_count: the number of shared fences returned + * @shared: the array of shared fence ptrs returned (array is krealloc'd to * the required size, and must be freed by caller) * * Retrieve all fences from the reservation object. If the pointer for the * exclusive fence is not specified the fence is put into the array of the * shared fences as well. Returns either zero or -ENOMEM. */ -int dma_resv_get_fences(struct dma_resv *obj, struct dma_fence **pfence_excl, - unsigned int *pshared_count, - struct dma_fence ***pshared) +int dma_resv_get_fences(struct dma_resv *obj, struct dma_fence **fence_excl, + unsigned int *shared_count, struct dma_fence ***shared) { - struct dma_fence **shared = NULL; - struct dma_fence *fence_excl; - unsigned int shared_count; - int ret = 1; - - do { - struct dma_resv_list *fobj; - unsigned int i, seq; - size_t sz = 0; - - shared_count = i = 0; - - rcu_read_lock(); - seq = read_seqcount_begin(&obj->seq); - - fence_excl = dma_resv_excl_fence(obj); - if (fence_excl && !dma_fence_get_rcu(fence_excl)) - goto unlock; + struct dma_resv_iter cursor; + struct dma_fence *fence;
- fobj = dma_resv_shared_list(obj); - if (fobj) - sz += sizeof(*shared) * fobj->shared_max; + *shared_count = 0; + *shared = NULL;
- if (!pfence_excl && fence_excl) - sz += sizeof(*shared); + if (fence_excl) + *fence_excl = NULL;
- if (sz) { - struct dma_fence **nshared; + rcu_read_lock(); + dma_resv_for_each_fence_unlocked(obj, &cursor, true, fence) {
- nshared = krealloc(shared, sz, - GFP_NOWAIT | __GFP_NOWARN); - if (!nshared) { - rcu_read_unlock(); + if (cursor.is_first) { + unsigned int count;
- dma_fence_put(fence_excl); - fence_excl = NULL; + while (*shared_count) + dma_fence_put((*shared)[--(*shared_count)]);
- nshared = krealloc(shared, sz, GFP_KERNEL); - if (nshared) { - shared = nshared; - continue; - } + if (fence_excl) + dma_fence_put(*fence_excl);
- ret = -ENOMEM; - break; - } - shared = nshared; - shared_count = fobj ? fobj->shared_count : 0; - for (i = 0; i < shared_count; ++i) { - shared[i] = rcu_dereference(fobj->shared[i]); - if (!dma_fence_get_rcu(shared[i])) - break; - } - } + count = cursor.fences ? cursor.fences->shared_count : 0; + count += fence_excl ? 0 : 1; + rcu_read_unlock();
- if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) { - while (i--) - dma_fence_put(shared[i]); - dma_fence_put(fence_excl); - goto unlock; + /* Eventually re-allocate the array */ + *shared = krealloc_array(*shared, count, + sizeof(void *), + GFP_KERNEL); + if (count && !*shared) + return -ENOMEM; + rcu_read_lock(); }
- ret = 0; -unlock: - rcu_read_unlock(); - } while (ret); - - if (pfence_excl) - *pfence_excl = fence_excl; - else if (fence_excl) - shared[shared_count++] = fence_excl; + if (dma_resv_iter_is_exclusive(&cursor) && fence_excl) + *fence_excl = fence; + else + (*shared)[(*shared_count)++] = fence;
- if (!shared_count) { - kfree(shared); - shared = NULL; + /* Don't drop the reference */ + fence = NULL; } + rcu_read_unlock();
- *pshared_count = shared_count; - *pshared = shared; - return ret; + return 0; } EXPORT_SYMBOL_GPL(dma_resv_get_fences);
This makes the function much simpler since the complex retry logic is now handled elsewhere.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 64 +++++--------------------------------- 1 file changed, 7 insertions(+), 57 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index bba328475304..764a71ec2347 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -569,74 +569,24 @@ long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, unsigned long timeout) { long ret = timeout ? timeout : 1; - unsigned int seq, shared_count; + struct dma_resv_iter cursor; struct dma_fence *fence; - int i;
-retry: - shared_count = 0; - seq = read_seqcount_begin(&obj->seq); rcu_read_lock(); - i = -1; - - fence = dma_resv_excl_fence(obj); - if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - if (!dma_fence_get_rcu(fence)) - goto unlock_retry; + dma_resv_for_each_fence_unlocked(obj, &cursor, wait_all, fence) { + rcu_read_unlock();
- if (dma_fence_is_signaled(fence)) { + ret = dma_fence_wait_timeout(fence, intr, ret); + if (ret <= 0) { dma_fence_put(fence); - fence = NULL; + return ret; }
- } else { - fence = NULL; - } - - if (wait_all) { - struct dma_resv_list *fobj = dma_resv_shared_list(obj); - - if (fobj) - shared_count = fobj->shared_count; - - for (i = 0; !fence && i < shared_count; ++i) { - struct dma_fence *lfence; - - lfence = rcu_dereference(fobj->shared[i]); - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &lfence->flags)) - continue; - - if (!dma_fence_get_rcu(lfence)) - goto unlock_retry; - - if (dma_fence_is_signaled(lfence)) { - dma_fence_put(lfence); - continue; - } - - fence = lfence; - break; - } + rcu_read_lock(); } - rcu_read_unlock(); - if (fence) { - if (read_seqcount_retry(&obj->seq, seq)) { - dma_fence_put(fence); - goto retry; - }
- ret = dma_fence_wait_timeout(fence, intr, ret); - dma_fence_put(fence); - if (ret > 0 && wait_all && (i + 1 < shared_count)) - goto retry; - } return ret; - -unlock_retry: - rcu_read_unlock(); - goto retry; } EXPORT_SYMBOL_GPL(dma_resv_wait_timeout);
This makes the function much simpler since the complex retry logic is now handled elsewhere.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 54 +++++--------------------------------- 1 file changed, 7 insertions(+), 47 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 764a71ec2347..fae881a5d336 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -591,22 +591,6 @@ long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, EXPORT_SYMBOL_GPL(dma_resv_wait_timeout);
-static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) -{ - struct dma_fence *fence, *lfence = passed_fence; - int ret = 1; - - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) { - fence = dma_fence_get_rcu(lfence); - if (!fence) - return -1; - - ret = !!dma_fence_is_signaled(fence); - dma_fence_put(fence); - } - return ret; -} - /** * dma_resv_test_signaled - Test if a reservation object's fences have been * signaled. @@ -623,43 +607,19 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) */ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all) { + struct dma_resv_iter cursor; struct dma_fence *fence; - unsigned int seq; - int ret;
rcu_read_lock(); -retry: - ret = true; - seq = read_seqcount_begin(&obj->seq); - - if (test_all) { - struct dma_resv_list *fobj = dma_resv_shared_list(obj); - unsigned int i, shared_count; - - shared_count = fobj ? fobj->shared_count : 0; - for (i = 0; i < shared_count; ++i) { - fence = rcu_dereference(fobj->shared[i]); - ret = dma_resv_test_signaled_single(fence); - if (ret < 0) - goto retry; - else if (!ret) - break; + dma_resv_for_each_fence_unlocked(obj, &cursor, test_all, fence) { + if (!dma_fence_is_signaled(fence)) { + rcu_read_unlock(); + dma_fence_put(fence); + return false; } } - - fence = dma_resv_excl_fence(obj); - if (ret && fence) { - ret = dma_resv_test_signaled_single(fence); - if (ret < 0) - goto retry; - - } - - if (read_seqcount_retry(&obj->seq, seq)) - goto retry; - rcu_read_unlock(); - return ret; + return true; } EXPORT_SYMBOL_GPL(dma_resv_test_signaled);
This is probably a fix since we didn't even grabed a reference to the fences.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 3b22c0013dbf..d5912f5b5953 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -269,19 +269,11 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) { struct dma_resv *resv = &bo->base._resv; - struct dma_resv_list *fobj; + struct dma_resv_iter cursor; struct dma_fence *fence; - int i;
rcu_read_lock(); - fobj = dma_resv_shared_list(resv); - fence = dma_resv_excl_fence(resv); - if (fence && !fence->ops->signaled) - dma_fence_enable_sw_signaling(fence); - - for (i = 0; fobj && i < fobj->shared_count; ++i) { - fence = rcu_dereference(fobj->shared[i]); - + dma_resv_for_each_fence_unlocked(resv, &cursor, true, fence) { if (!fence->ops->signaled) dma_fence_enable_sw_signaling(fence); }
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 44 ++++++++---------------- 1 file changed, 14 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 862eb3c1c4c5..e5d8bb11a14a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -252,41 +252,25 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner) { - struct dma_resv_list *flist; + struct dma_resv_iter cursor; struct dma_fence *f; - unsigned i; - int r = 0; + int r;
if (resv == NULL) return -EINVAL;
- /* always sync to the exclusive fence */ - f = dma_resv_excl_fence(resv); - dma_fence_chain_for_each(f, f) { - struct dma_fence_chain *chain = to_dma_fence_chain(f); - - if (amdgpu_sync_test_fence(adev, mode, owner, chain ? - chain->fence : f)) { - r = amdgpu_sync_fence(sync, f); - dma_fence_put(f); - if (r) - return r; - break; - } - } - - flist = dma_resv_shared_list(resv); - if (!flist) - return 0; - - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(resv)); - - if (amdgpu_sync_test_fence(adev, mode, owner, f)) { - r = amdgpu_sync_fence(sync, f); - if (r) - return r; + dma_resv_for_each_fence(resv, &cursor, true, f) { + dma_fence_chain_for_each(f, f) { + struct dma_fence_chain *chain = to_dma_fence_chain(f); + + if (amdgpu_sync_test_fence(adev, mode, owner, chain ? + chain->fence : f)) { + r = amdgpu_sync_fence(sync, f); + dma_fence_put(f); + if (r) + return r; + break; + } } } return 0;
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 1129e17e9f09..b3859c8ded85 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1332,10 +1332,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) { unsigned long num_pages = bo->resource->num_pages; + struct dma_resv_iter resv_cursor; struct amdgpu_res_cursor cursor; - struct dma_resv_list *flist; struct dma_fence *f; - int i;
/* Swapout? */ if (bo->resource->mem_type == TTM_PL_SYSTEM) @@ -1349,14 +1348,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - flist = dma_resv_shared_list(bo->base.resv); - if (flist) { - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(bo->base.resv)); - if (amdkfd_fence_check_mm(f, current->mm)) - return false; - } + dma_resv_for_each_fence(bo->base.resv, &resv_cursor, true, f) { + if (amdkfd_fence_check_mm(f, current->mm)) + return false; }
switch (bo->resource->mem_type) {
Simplifying the code a bit. Also drop the RCU read side lock since the object is locked anyway.
Untested since I can't get the driver to compile on !ARM.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/msm/msm_gem.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 22308a1b66fc..5bece4600e41 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -880,7 +880,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, { struct msm_gem_object *msm_obj = to_msm_bo(obj); struct dma_resv *robj = obj->resv; - struct dma_resv_list *fobj; + struct dma_resv_iter cursor; struct dma_fence *fence; struct msm_gem_vma *vma; uint64_t off = drm_vma_node_start(&obj->vma_node); @@ -955,22 +955,13 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, seq_puts(m, "\n"); }
- rcu_read_lock(); - fobj = dma_resv_shared_list(robj); - if (fobj) { - unsigned int i, shared_count = fobj->shared_count; - - for (i = 0; i < shared_count; i++) { - fence = rcu_dereference(fobj->shared[i]); + dma_resv_for_each_fence(robj, &cursor, true, fence) { + if (dma_resv_iter_is_exclusive(&cursor)) + describe_fence(fence, "Exclusive", m); + else describe_fence(fence, "Shared", m); - } }
- fence = dma_resv_excl_fence(robj); - if (fence) - describe_fence(fence, "Exclusive", m); - rcu_read_unlock(); - msm_gem_unlock(obj); }
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_sync.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c index 9257b60144c4..23fa98dfe04b 100644 --- a/drivers/gpu/drm/radeon/radeon_sync.c +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -91,33 +91,17 @@ int radeon_sync_resv(struct radeon_device *rdev, struct dma_resv *resv, bool shared) { - struct dma_resv_list *flist; - struct dma_fence *f; + struct dma_resv_iter cursor; struct radeon_fence *fence; - unsigned i; + struct dma_fence *f; int r = 0;
- /* always sync to the exclusive fence */ - f = dma_resv_excl_fence(resv); - fence = f ? to_radeon_fence(f) : NULL; - if (fence && fence->rdev == rdev) - radeon_sync_fence(sync, fence); - else if (f) - r = dma_fence_wait(f, true); - - flist = dma_resv_shared_list(resv); - if (shared || !flist || r) - return r; - - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(resv)); + dma_resv_for_each_fence(resv, &cursor, shared, f) { fence = to_radeon_fence(f); if (fence && fence->rdev == rdev) radeon_sync_fence(sync, fence); else r = dma_fence_wait(f, true); - if (r) break; }
Simplifying the code a bit.
v2: use dma_resv_for_each_fence
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/scheduler/sched_main.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 042c16b5d54a..ee2fe37ee724 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -699,30 +699,16 @@ int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job, struct drm_gem_object *obj, bool write) { + struct dma_resv_iter cursor; + struct dma_fence *fence; int ret; - struct dma_fence **fences; - unsigned int i, fence_count; - - if (!write) { - struct dma_fence *fence = dma_resv_get_excl_unlocked(obj->resv); - - return drm_sched_job_add_dependency(job, fence); - } - - ret = dma_resv_get_fences(obj->resv, NULL, &fence_count, &fences); - if (ret || !fence_count) - return ret;
- for (i = 0; i < fence_count; i++) { - ret = drm_sched_job_add_dependency(job, fences[i]); + dma_resv_for_each_fence(obj->resv, &cursor, write, fence) { + ret = drm_sched_job_add_dependency(job, fence); if (ret) - break; + return ret; } - - for (; i < fence_count; i++) - dma_fence_put(fences[i]); - kfree(fences); - return ret; + return 0; } EXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies);
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 30 +++++++----------------- 1 file changed, 9 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..adf9a8413446 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct dma_resv_list *list; - unsigned int seq; + struct dma_resv_iter cursor; + struct dma_fence *fence; int err;
err = -ENOENT; @@ -109,28 +109,16 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry: - seq = raw_read_seqcount(&obj->base.resv->seq); - - /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv)); - - /* Translate shared fences to READ set of engines */ - list = dma_resv_shared_list(obj->base.resv); - if (list) { - unsigned int shared_count = list->shared_count, i; - - for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = - rcu_dereference(list->shared[i]); - + args->busy = false; + dma_resv_for_each_fence_unlocked(obj->base.resv, &cursor, true, fence) { + if (dma_resv_iter_is_exclusive(&cursor)) + /* Translate the exclusive fence to the READ *and* WRITE engine */ + args->busy = busy_check_writer(fence); + else + /* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence); - } }
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq)) - goto retry; - err = 0; out: rcu_read_unlock();
Simplifying the code a bit.
v2: use dma_resv_for_each_fence instead, according to Tvrtko the lock is held here anyway.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/i915_sw_fence.c | 51 +++++----------------------- 1 file changed, 9 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index c589a681da77..86eb9ece71e0 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -572,56 +572,23 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, unsigned long timeout, gfp_t gfp) { - struct dma_fence *excl; + struct dma_resv_iter cursor; + struct dma_fence *f; int ret = 0, pending;
debug_fence_assert(fence); might_sleep_if(gfpflags_allow_blocking(gfp));
- if (write) { - struct dma_fence **shared; - unsigned int count, i; - - ret = dma_resv_get_fences(resv, &excl, &count, &shared); - if (ret) - return ret; - - for (i = 0; i < count; i++) { - if (shared[i]->ops == exclude) - continue; - - pending = i915_sw_fence_await_dma_fence(fence, - shared[i], - timeout, - gfp); - if (pending < 0) { - ret = pending; - break; - } - - ret |= pending; - } - - for (i = 0; i < count; i++) - dma_fence_put(shared[i]); - kfree(shared); - } else { - excl = dma_resv_get_excl_unlocked(resv); - } - - if (ret >= 0 && excl && excl->ops != exclude) { - pending = i915_sw_fence_await_dma_fence(fence, - excl, - timeout, + dma_resv_for_each_fence(resv, &cursor, write, f) { + pending = i915_sw_fence_await_dma_fence(fence, f, timeout, gfp); - if (pending < 0) + if (pending < 0) { ret = pending; - else - ret |= pending; - } - - dma_fence_put(excl); + break; + }
+ ret |= pending; + } return ret; }
Simplifying the code a bit.
v2: add missing rcu_read_lock()/rcu_read_unlock()
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/i915_request.c | 40 ++++++++--------------------- 1 file changed, 11 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index ce446716d092..221df2edcf02 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1509,39 +1509,21 @@ i915_request_await_object(struct i915_request *to, struct drm_i915_gem_object *obj, bool write) { - struct dma_fence *excl; + struct dma_resv_iter cursor; + struct dma_fence *fence; int ret = 0;
- if (write) { - struct dma_fence **shared; - unsigned int count, i; - - ret = dma_resv_get_fences(obj->base.resv, &excl, &count, - &shared); - if (ret) - return ret; - - for (i = 0; i < count; i++) { - ret = i915_request_await_dma_fence(to, shared[i]); - if (ret) - break; - - dma_fence_put(shared[i]); + rcu_read_lock(); + dma_resv_for_each_fence_unlocked(obj->base.resv, &cursor, write, fence) { + rcu_read_unlock(); + ret = i915_request_await_dma_fence(to, fence); + rcu_read_lock(); + if (ret) { + dma_fence_put(fence); + break; } - - for (; i < count; i++) - dma_fence_put(shared[i]); - kfree(shared); - } else { - excl = dma_resv_get_excl_unlocked(obj->base.resv); - } - - if (excl) { - if (ret == 0) - ret = i915_request_await_dma_fence(to, excl); - - dma_fence_put(excl); } + rcu_read_unlock();
return ret; }
Simplifying the code a bit.
v2: add missing rcu read unlock.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 57 +++++++----------------- 1 file changed, 15 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index f909aaa09d9c..a75dee9d7790 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -37,55 +37,28 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int flags, long timeout) { - struct dma_fence *excl; - bool prune_fences = false; - - if (flags & I915_WAIT_ALL) { - struct dma_fence **shared; - unsigned int count, i; - int ret; - - ret = dma_resv_get_fences(resv, &excl, &count, &shared); - if (ret) - return ret; - - for (i = 0; i < count; i++) { - timeout = i915_gem_object_wait_fence(shared[i], - flags, timeout); - if (timeout < 0) - break; - - dma_fence_put(shared[i]); + struct dma_resv_iter cursor; + struct dma_fence *fence; + + rcu_read_lock(); + dma_resv_for_each_fence_unlocked(resv, &cursor, flags & I915_WAIT_ALL, + fence) { + + rcu_read_unlock(); + timeout = i915_gem_object_wait_fence(fence, flags, timeout); + rcu_read_lock(); + if (timeout < 0) { + dma_fence_put(fence); + break; } - - for (; i < count; i++) - dma_fence_put(shared[i]); - kfree(shared); - - /* - * If both shared fences and an exclusive fence exist, - * then by construction the shared fences must be later - * than the exclusive fence. If we successfully wait for - * all the shared fences, we know that the exclusive fence - * must all be signaled. If all the shared fences are - * signaled, we can prune the array and recover the - * floating references on the fences/requests. - */ - prune_fences = count && timeout >= 0; - } else { - excl = dma_resv_get_excl_unlocked(resv); } - - if (excl && timeout >= 0) - timeout = i915_gem_object_wait_fence(excl, flags, timeout); - - dma_fence_put(excl); + rcu_read_unlock();
/* * Opportunistically prune the fences iff we know they have *all* been * signaled. */ - if (prune_fences) + if (timeout > 0) dma_resv_prune(resv);
return timeout;
Simplifying the code a bit.
v2: add missing rcu_read_lock()/unlock()
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 33 +++++++----------------- 1 file changed, 9 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index a75dee9d7790..db8a72556338 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -124,32 +124,17 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, const struct i915_sched_attr *attr) { - struct dma_fence *excl; - - if (flags & I915_WAIT_ALL) { - struct dma_fence **shared; - unsigned int count, i; - int ret; - - ret = dma_resv_get_fences(obj->base.resv, &excl, &count, - &shared); - if (ret) - return ret; - - for (i = 0; i < count; i++) { - i915_gem_fence_wait_priority(shared[i], attr); - dma_fence_put(shared[i]); - } - - kfree(shared); - } else { - excl = dma_resv_get_excl_unlocked(obj->base.resv); - } + struct dma_resv_iter cursor; + struct dma_fence *fence;
- if (excl) { - i915_gem_fence_wait_priority(excl, attr); - dma_fence_put(excl); + rcu_read_lock(); + dma_resv_for_each_fence_unlocked(obj->base.resv, &cursor, + flags & I915_WAIT_ALL, fence) { + rcu_read_unlock(); + i915_gem_fence_wait_priority(fence, attr); + rcu_read_lock(); } + rcu_read_unlock(); return 0; }
This is maybe even a fix since the RCU usage here looks incorrect.
v2: add missing rcu_read_lock()/unlock()
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/gem/i915_gem_object.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 48112b9d76df..7ff0027af7d9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -507,16 +507,17 @@ static inline struct intel_engine_cs * i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) { struct intel_engine_cs *engine = NULL; + struct dma_resv_iter cursor; struct dma_fence *fence;
rcu_read_lock(); - fence = dma_resv_get_excl_unlocked(obj->base.resv); + dma_resv_for_each_fence_unlocked(obj->base.resv, &cursor, false, + fence) { + if (fence && dma_fence_is_i915(fence) && + !dma_fence_is_signaled(fence)) + engine = to_request(fence)->engine; + } rcu_read_unlock(); - - if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) - engine = to_request(fence)->engine; - dma_fence_put(fence); - return engine; }
Simplifying the code a bit.
v2: add rcu_read_lock()/unlock()
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/display/intel_display.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 134a6acbd8fb..fa73a6754373 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11290,6 +11290,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane, i915_gem_object_flush_frontbuffer(obj, ORIGIN_DIRTYFB);
if (!new_plane_state->uapi.fence) { /* implicit fencing */ + struct dma_resv_iter cursor; struct dma_fence *fence;
ret = i915_sw_fence_await_reservation(&state->commit_ready, @@ -11300,12 +11301,15 @@ intel_prepare_plane_fb(struct drm_plane *_plane, if (ret < 0) goto unpin_fb;
- fence = dma_resv_get_excl_unlocked(obj->base.resv); - if (fence) { + rcu_read_lock(); + dma_resv_for_each_fence_unlocked(obj->base.resv, &cursor, false, + fence) { + rcu_read_unlock(); add_rps_boost_after_vblank(new_plane_state->hw.crtc, fence); - dma_fence_put(fence); + rcu_read_lock(); } + rcu_read_unlock(); } else { add_rps_boost_after_vblank(new_plane_state->hw.crtc, new_plane_state->uapi.fence);
Simplifying the code a bit.
v2: add missing rcu_read_lock()/unlock()
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/drm_gem.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 09c820045859..8c3ff098e49e 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1340,31 +1340,21 @@ int drm_gem_fence_array_add_implicit(struct xarray *fence_array, struct drm_gem_object *obj, bool write) { - int ret; - struct dma_fence **fences; - unsigned int i, fence_count; - - if (!write) { - struct dma_fence *fence = - dma_resv_get_excl_unlocked(obj->resv); - - return drm_gem_fence_array_add(fence_array, fence); - } - - ret = dma_resv_get_fences(obj->resv, NULL, - &fence_count, &fences); - if (ret || !fence_count) - return ret; - - for (i = 0; i < fence_count; i++) { - ret = drm_gem_fence_array_add(fence_array, fences[i]); - if (ret) + struct dma_resv_iter cursor; + struct dma_fence *fence; + int ret = 0; + + rcu_read_lock(); + dma_resv_for_each_fence_unlocked(obj->resv, &cursor, write, fence) { + rcu_read_unlock(); + ret = drm_gem_fence_array_add(fence_array, fence); + rcu_read_lock(); + if (ret) { + dma_fence_put(fence); break; + } } - - for (; i < fence_count; i++) - dma_fence_put(fences[i]); - kfree(fences); + rcu_read_unlock(); return ret; } EXPORT_SYMBOL(drm_gem_fence_array_add_implicit);
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
v2: add missing rcu_read_lock()/unlock()
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/drm_gem_atomic_helper.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index e570398abd78..86df75b2f8eb 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -143,6 +143,7 @@ */ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) { + struct dma_resv_iter cursor; struct drm_gem_object *obj; struct dma_fence *fence;
@@ -150,9 +151,16 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st return 0;
obj = drm_gem_fb_get_obj(state->fb, 0); - fence = dma_resv_get_excl_unlocked(obj->resv); - drm_atomic_set_fence_for_plane(state, fence); + rcu_read_lock(); + dma_resv_for_each_fence_unlocked(obj->resv, &cursor, false, fence) { + rcu_read_unlock(); + /* TODO: We only use the first write fence here */ + drm_atomic_set_fence_for_plane(state, fence); + return 0; + } + rcu_read_unlock();
+ drm_atomic_set_fence_for_plane(state, NULL); return 0; } EXPORT_SYMBOL_GPL(drm_gem_plane_helper_prepare_fb);
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/nouveau/nouveau_fence.c | 48 +++++++------------------ 1 file changed, 12 insertions(+), 36 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 05d0b3eb3690..f3584d840edc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -339,14 +339,15 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) }
int -nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool exclusive, bool intr) +nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, + bool exclusive, bool intr) { struct nouveau_fence_chan *fctx = chan->fence; - struct dma_fence *fence; struct dma_resv *resv = nvbo->bo.base.resv; - struct dma_resv_list *fobj; + struct dma_resv_iter cursor; + struct dma_fence *fence; struct nouveau_fence *f; - int ret = 0, i; + int ret;
if (!exclusive) { ret = dma_resv_reserve_shared(resv, 1); @@ -355,10 +356,7 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e return ret; }
- fobj = dma_resv_shared_list(resv); - fence = dma_resv_excl_fence(resv); - - if (fence) { + dma_resv_for_each_fence(resv, &cursor, exclusive, fence) { struct nouveau_channel *prev = NULL; bool must_wait = true;
@@ -366,41 +364,19 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e if (f) { rcu_read_lock(); prev = rcu_dereference(f->channel); - if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0)) + if (prev && (prev == chan || + fctx->sync(f, prev, chan) == 0)) must_wait = false; rcu_read_unlock(); }
- if (must_wait) + if (must_wait) { ret = dma_fence_wait(fence, intr); - - return ret; - } - - if (!exclusive || !fobj) - return ret; - - for (i = 0; i < fobj->shared_count && !ret; ++i) { - struct nouveau_channel *prev = NULL; - bool must_wait = true; - - fence = rcu_dereference_protected(fobj->shared[i], - dma_resv_held(resv)); - - f = nouveau_local_fence(fence, chan->drm); - if (f) { - rcu_read_lock(); - prev = rcu_dereference(f->channel); - if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0)) - must_wait = false; - rcu_read_unlock(); + if (ret) + return ret; } - - if (must_wait) - ret = dma_fence_wait(fence, intr); } - - return ret; + return 0; }
void
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
v2: add missing rcu_read_lock()/unlock()
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/nouveau/dispnv50/wndw.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 8d048bacd6f0..0f5cdb897f06 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -539,6 +539,8 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) struct nouveau_bo *nvbo; struct nv50_head_atom *asyh; struct nv50_wndw_ctxdma *ctxdma; + struct dma_resv_iter cursor; + struct dma_fence *fence; int ret;
NV_ATOMIC(drm, "%s prepare: %p\n", plane->name, fb); @@ -561,7 +563,14 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) asyw->image.handle[0] = ctxdma->object.handle; }
- asyw->state.fence = dma_resv_get_excl_unlocked(nvbo->bo.base.resv); + rcu_read_lock(); + dma_resv_for_each_fence_unlocked(nvbo->bo.base.resv, &cursor, false, + fence) { + /* TODO: We only use the first writer here */ + asyw->state.fence = fence; + break; + } + rcu_read_unlock(); asyw->image.offset[0] = nvbo->offset;
if (wndw->func->prepare) {
Instead of hand rolling the logic.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 8f1b5af47dd6..dc2a2615db38 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -428,19 +428,17 @@ int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj, static void etnaviv_gem_describe_fence(struct dma_fence *fence, const char *type, struct seq_file *m) { - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - seq_printf(m, "\t%9s: %s %s seq %llu\n", - type, - fence->ops->get_driver_name(fence), - fence->ops->get_timeline_name(fence), - fence->seqno); + seq_printf(m, "\t%9s: %s %s seq %llu\n", type, + fence->ops->get_driver_name(fence), + fence->ops->get_timeline_name(fence), + fence->seqno); }
static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m) { struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); struct dma_resv *robj = obj->resv; - struct dma_resv_list *fobj; + struct dma_resv_iter cursor; struct dma_fence *fence; unsigned long off = drm_vma_node_start(&obj->vma_node);
@@ -450,19 +448,12 @@ static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m) off, etnaviv_obj->vaddr, obj->size);
rcu_read_lock(); - fobj = dma_resv_shared_list(robj); - if (fobj) { - unsigned int i, shared_count = fobj->shared_count; - - for (i = 0; i < shared_count; i++) { - fence = rcu_dereference(fobj->shared[i]); + dma_resv_for_each_fence_unlocked(robj, &cursor, true, fence) { + if (dma_resv_iter_is_exclusive(&cursor)) + etnaviv_gem_describe_fence(fence, "Exclusive", m); + else etnaviv_gem_describe_fence(fence, "Shared", m); - } } - - fence = dma_resv_excl_fence(robj); - if (fence) - etnaviv_gem_describe_fence(fence, "Exclusive", m); rcu_read_unlock(); }
We certainly hold the reservation lock here, no need for the RCU dance.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 4dd7d9d541c0..7e17bc2b5df1 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -195,7 +195,7 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit) if (ret) return ret; } else { - bo->excl = dma_resv_get_excl_unlocked(robj); + bo->excl = dma_fence_get(dma_resv_excl_fence(robj)); }
}
Heureka, that's finally not used any more.
Signed-off-by: Christian König christian.koenig@amd.com --- include/linux/dma-resv.h | 26 -------------------------- 1 file changed, 26 deletions(-)
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 6761512ba662..3e6ffba0af70 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -384,32 +384,6 @@ dma_resv_excl_fence(struct dma_resv *obj) return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj)); }
-/** - * dma_resv_get_excl_unlocked - get the reservation object's - * exclusive fence, without lock held. - * @obj: the reservation object - * - * If there is an exclusive fence, this atomically increments it's - * reference count and returns it. - * - * RETURNS - * The exclusive fence or NULL if none - */ -static inline struct dma_fence * -dma_resv_get_excl_unlocked(struct dma_resv *obj) -{ - struct dma_fence *fence; - - if (!rcu_access_pointer(obj->fence_excl)) - return NULL; - - rcu_read_lock(); - fence = dma_fence_get_rcu_safe(&obj->fence_excl); - rcu_read_unlock(); - - return fence; -} - /** * dma_resv_shared_list - get the reservation object's shared fence list * @obj: the reservation object
linaro-mm-sig@lists.linaro.org