Hi guys,
during the recent discussion about SLAB_TYPESAFE_BY_RCU, dma_fence_get_rcu and dma_fence_get_rcu_safe we found that the RCU handling for dma_resv objects was implemented multiple times.
Unfortunately a lot of those implementations get the rather complicated dance with RCU and the sequence number handling wrong.
So this patch set aims to audit and unify this by providing an iterator which automatically restarts when a modification to the dma_resv object is detected.
The result is pretty impressive I think since this not only mean that we got rid of all those incorrect dma_fence_get_rcu() cases, but also reduce the overall loc count quite a bit.
Please review and/or comment.
Cheers, Christian.
Abstract the complexity of iterating over all the fences in a dma_resv object.
The new loop handles the whole RCU and retry dance and returns only fences where we can be sure we grabbed the right one.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 63 ++++++++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 36 ++++++++++++++++++++++ 2 files changed, 99 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 18dd5a6ca06c..d8da8a914b07 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -316,6 +316,69 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) } EXPORT_SYMBOL(dma_resv_add_excl_fence);
+/** + * dma_resv_walk_unlocked - walk over fences in a dma_resv obj + * @obj: the dma_resv object + * @cursor: cursor to record the current position + * @all_fences: true returns also the shared fences + * @first: if we should start over + * + * Return all the fences in the dma_resv object which are not yet signaled. + * The returned fence has an extra local reference so will stay alive. + * If a concurrent modify is detected the whole iterator is started over again. + */ +struct dma_fence *dma_resv_walk_unlocked(struct dma_resv *obj, + struct dma_resv_cursor *cursor, + bool all_fences, bool first) +{ + struct dma_fence *fence = NULL; + + do { + /* Drop the reference from the previous round */ + dma_fence_put(fence); + + cursor->is_first = first; + if (first) { + cursor->seq = read_seqcount_begin(&obj->seq); + cursor->index = -1; + cursor->fences = dma_resv_shared_list(obj); + cursor->is_exclusive = true; + + fence = dma_resv_excl_fence(obj); + if (fence && test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &fence->flags)) + fence = NULL; + } else { + fence = NULL; + } + + if (fence) { + fence = dma_fence_get_rcu(fence); + } else if (all_fences && cursor->fences) { + struct dma_resv_list *fences = cursor->fences; + + cursor->is_exclusive = false; + while (++cursor->index < fences->shared_count) { + fence = rcu_dereference(fences->shared[ + cursor->index]); + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &fence->flags)) + break; + } + if (cursor->index < fences->shared_count) + fence = dma_fence_get_rcu(fence); + else + fence = NULL; + } + + /* For the eventually next round */ + first = true; + } while (read_seqcount_retry(&obj->seq, cursor->seq)); + + return fence; +} +EXPORT_SYMBOL_GPL(dma_resv_walk_unlocked); + /** * dma_resv_copy_fences - Copy all fences from src to dst. * @dst: the destination reservation object diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 562b885cf9c3..74775f2cb679 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -75,6 +75,39 @@ struct dma_resv { struct dma_resv_list __rcu *fence; };
+/** + * struct dma_resv_cursor - current position into the dma_resv fences + * @seq: sequence number to check + * @index: index into the shared fences + * @shared: the shared fences + * @is_first: true if this is the first returned fence + * @is_exclusive: if the current fence is the exclusive one + */ +struct dma_resv_cursor { + unsigned int seq; + unsigned int index; + struct dma_resv_list *fences; + bool is_first; + bool is_exclusive; +}; + +/** + * dma_resv_for_each_fence_unlocked - fence iterator + * @obj: a dma_resv object pointer + * @cursor: a struct dma_resv_cursor pointer + * @all_fences: true if all fences should be returned + * @fence: the current fence + * + * Iterate over the fences in a struct dma_resv object without holding the + * dma_resv::lock. The RCU read side lock must be hold when using this, but can + * be dropped and re-taken as necessary inside the loop. @all_fences controls + * if the shared fences are returned as well. + */ +#define dma_resv_for_each_fence_unlocked(obj, cursor, all_fences, fence) \ + for (fence = dma_resv_walk_unlocked(obj, cursor, all_fences, true); \ + fence; dma_fence_put(fence), \ + fence = dma_resv_walk_unlocked(obj, cursor, all_fences, false)) + #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
@@ -272,6 +305,9 @@ void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences); void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence); void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence); +struct dma_fence *dma_resv_walk_unlocked(struct dma_resv *obj, + struct dma_resv_cursor *cursor, + bool first, bool all_fences); int dma_resv_get_fences(struct dma_resv *obj, struct dma_fence **pfence_excl, unsigned *pshared_count, struct dma_fence ***pshared); int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src);
A simpler version of the iterator to be used when the dma_resv object is locked.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 18 ++++++++++++++++++ 2 files changed, 56 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index d8da8a914b07..a0386cf5824c 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -316,6 +316,44 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) } EXPORT_SYMBOL(dma_resv_add_excl_fence);
+/** + * dma_resv_walk - walk over fences in a dma_resv obj + * @obj: the dma_resv object + * @cursor: cursor to record the current position + * @all_fences: true returns also the shared fences + * @first: if we should start over + * + * Return all the fences in the dma_resv object while holding the + * dma_resv::lock. + */ +struct dma_fence *dma_resv_walk(struct dma_resv *obj, + struct dma_resv_cursor *cursor, + bool all_fences, bool first) +{ + dma_resv_assert_held(obj); + + cursor->is_first = first; + if (first) { + struct dma_fence *fence; + + cursor->index = -1; + cursor->fences = dma_resv_shared_list(obj); + cursor->is_exclusive = true; + + fence = dma_resv_excl_fence(obj); + if (fence) + return fence; + } + + if (!all_fences || !cursor->fences || + ++cursor->index >= cursor->fences->shared_count) + return NULL; + + return rcu_dereference_protected(cursor->fences->shared[cursor->index], + dma_resv_held(obj)); +} +EXPORT_SYMBOL_GPL(dma_resv_walk); + /** * dma_resv_walk_unlocked - walk over fences in a dma_resv obj * @obj: the dma_resv object diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 74775f2cb679..84de4dff4ecc 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -91,6 +91,21 @@ struct dma_resv_cursor { bool is_exclusive; };
+/** + * dma_resv_for_each_fence - fence iterator + * @obj: a dma_resv object pointer + * @cursor: a struct dma_resv_cursor pointer + * @all_fences: true if all fences should be returned + * @fence: the current fence + * + * Iterate over the fences in a struct dma_resv object while holding the + * dma_resv::lock. @all_fences controls if the shared fences are returned as + * well. + */ +#define dma_resv_for_each_fence(obj, cursor, all_fences, fence) \ + for (fence = dma_resv_walk(obj, cursor, all_fences, true); fence; \ + fence = dma_resv_walk(obj, cursor, all_fences, false)) + /** * dma_resv_for_each_fence_unlocked - fence iterator * @obj: a dma_resv object pointer @@ -305,6 +320,9 @@ void dma_resv_fini(struct dma_resv *obj); int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences); void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence); void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence); +struct dma_fence *dma_resv_walk(struct dma_resv *obj, + struct dma_resv_cursor *cursor, + bool first, bool all_fences); struct dma_fence *dma_resv_walk_unlocked(struct dma_resv *obj, struct dma_resv_cursor *cursor, bool first, bool all_fences);
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 81 +++++++++++++++----------------------- 1 file changed, 32 insertions(+), 49 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index a0386cf5824c..a5d78bf401b5 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -426,74 +426,57 @@ EXPORT_SYMBOL_GPL(dma_resv_walk_unlocked); */ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) { - struct dma_resv_list *src_list, *dst_list; - struct dma_fence *old, *new; - unsigned int i; + struct dma_resv_cursor cursor; + struct dma_resv_list *list; + struct dma_fence *f, *excl;
dma_resv_assert_held(dst);
- rcu_read_lock(); - src_list = dma_resv_shared_list(src); + list = NULL; + excl = NULL;
-retry: - if (src_list) { - unsigned int shared_count = src_list->shared_count; + rcu_read_lock(); + dma_resv_for_each_fence_unlocked(dst, &cursor, true, f) {
- rcu_read_unlock(); + if (cursor.is_first) { + dma_resv_list_free(list); + dma_fence_put(excl);
- dst_list = dma_resv_list_alloc(shared_count); - if (!dst_list) - return -ENOMEM; + if (cursor.fences) { + unsigned int cnt = cursor.fences->shared_count;
- rcu_read_lock(); - src_list = dma_resv_shared_list(src); - if (!src_list || src_list->shared_count > shared_count) { - kfree(dst_list); - goto retry; - } + rcu_read_unlock(); + list = dma_resv_list_alloc(cnt); + if (!list) + return -ENOMEM;
- dst_list->shared_count = 0; - for (i = 0; i < src_list->shared_count; ++i) { - struct dma_fence __rcu **dst; - struct dma_fence *fence; + list->shared_count = 0; + rcu_read_lock();
- fence = rcu_dereference(src_list->shared[i]); - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &fence->flags)) - continue; - - if (!dma_fence_get_rcu(fence)) { - dma_resv_list_free(dst_list); - src_list = dma_resv_shared_list(src); - goto retry; + } else { + list = NULL; } + excl = NULL; + }
- if (dma_fence_is_signaled(fence)) { - dma_fence_put(fence); - continue; - } + if (cursor.is_exclusive) + excl = f; + else + RCU_INIT_POINTER(list->shared[list->shared_count++], f);
- dst = &dst_list->shared[dst_list->shared_count++]; - rcu_assign_pointer(*dst, fence); - } - } else { - dst_list = NULL; + /* Don't drop the reference */ + f = NULL; }
- new = dma_fence_get_rcu_safe(&src->fence_excl); rcu_read_unlock();
- src_list = dma_resv_shared_list(dst); - old = dma_resv_excl_fence(dst); - write_seqcount_begin(&dst->seq); - /* write_seqcount_begin provides the necessary memory barrier */ - RCU_INIT_POINTER(dst->fence_excl, new); - RCU_INIT_POINTER(dst->fence, dst_list); + excl = rcu_replace_pointer(dst->fence_excl, excl, dma_resv_held(dst)); + list = rcu_replace_pointer(dst->fence, list, dma_resv_held(dst)); write_seqcount_end(&dst->seq);
- dma_resv_list_free(src_list); - dma_fence_put(old); + dma_resv_list_free(list); + dma_fence_put(excl);
return 0; }
This makes the function much simpler since the complex retry logic is now handled elsewhere.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 110 +++++++++++++------------------------ 1 file changed, 37 insertions(+), 73 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index a5d78bf401b5..b77bf46c0f48 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -486,99 +486,63 @@ EXPORT_SYMBOL(dma_resv_copy_fences); * dma_resv_get_fences - Get an object's shared and exclusive * fences without update side lock held * @obj: the reservation object - * @pfence_excl: the returned exclusive fence (or NULL) - * @pshared_count: the number of shared fences returned - * @pshared: the array of shared fence ptrs returned (array is krealloc'd to + * @fence_excl: the returned exclusive fence (or NULL) + * @shared_count: the number of shared fences returned + * @shared: the array of shared fence ptrs returned (array is krealloc'd to * the required size, and must be freed by caller) * * Retrieve all fences from the reservation object. If the pointer for the * exclusive fence is not specified the fence is put into the array of the * shared fences as well. Returns either zero or -ENOMEM. */ -int dma_resv_get_fences(struct dma_resv *obj, struct dma_fence **pfence_excl, - unsigned int *pshared_count, - struct dma_fence ***pshared) +int dma_resv_get_fences(struct dma_resv *obj, struct dma_fence **fence_excl, + unsigned int *shared_count, struct dma_fence ***shared) { - struct dma_fence **shared = NULL; - struct dma_fence *fence_excl; - unsigned int shared_count; - int ret = 1; - - do { - struct dma_resv_list *fobj; - unsigned int i, seq; - size_t sz = 0; - - shared_count = i = 0; - - rcu_read_lock(); - seq = read_seqcount_begin(&obj->seq); - - fence_excl = dma_resv_excl_fence(obj); - if (fence_excl && !dma_fence_get_rcu(fence_excl)) - goto unlock; + struct dma_resv_cursor cursor; + struct dma_fence *fence;
- fobj = dma_resv_shared_list(obj); - if (fobj) - sz += sizeof(*shared) * fobj->shared_max; + *shared_count = 0; + *shared = NULL;
- if (!pfence_excl && fence_excl) - sz += sizeof(*shared); + if (fence_excl) + *fence_excl = NULL;
- if (sz) { - struct dma_fence **nshared; + rcu_read_lock(); + dma_resv_for_each_fence_unlocked(obj, &cursor, true, fence) {
- nshared = krealloc(shared, sz, - GFP_NOWAIT | __GFP_NOWARN); - if (!nshared) { - rcu_read_unlock(); + if (cursor.is_first) { + unsigned int count;
- dma_fence_put(fence_excl); - fence_excl = NULL; + while (*shared_count) + dma_fence_put((*shared)[--(*shared_count)]);
- nshared = krealloc(shared, sz, GFP_KERNEL); - if (nshared) { - shared = nshared; - continue; - } + if (fence_excl) + dma_fence_put(*fence_excl);
- ret = -ENOMEM; - break; - } - shared = nshared; - shared_count = fobj ? fobj->shared_count : 0; - for (i = 0; i < shared_count; ++i) { - shared[i] = rcu_dereference(fobj->shared[i]); - if (!dma_fence_get_rcu(shared[i])) - break; - } - } + count = cursor.fences ? cursor.fences->shared_count : 0; + count += fence_excl ? 0 : 1; + rcu_read_unlock();
- if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) { - while (i--) - dma_fence_put(shared[i]); - dma_fence_put(fence_excl); - goto unlock; + /* Eventually re-allocate the array */ + *shared = krealloc_array(*shared, count, + sizeof(*shared), + GFP_KERNEL); + if (count && !*shared) + return -ENOMEM; + rcu_read_lock(); }
- ret = 0; -unlock: - rcu_read_unlock(); - } while (ret); - - if (pfence_excl) - *pfence_excl = fence_excl; - else if (fence_excl) - shared[shared_count++] = fence_excl; + if (cursor.is_exclusive && fence_excl) + *fence_excl = fence; + else + (*shared)[(*shared_count)++] = fence;
- if (!shared_count) { - kfree(shared); - shared = NULL; + /* Don't drop the reference */ + fence = NULL; } + rcu_read_unlock();
- *pshared_count = shared_count; - *pshared = shared; - return ret; + return 0; } EXPORT_SYMBOL_GPL(dma_resv_get_fences);
This makes the function much simpler since the complex retry logic is now handled elsewhere.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 64 +++++--------------------------------- 1 file changed, 7 insertions(+), 57 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index b77bf46c0f48..5192cf4271ac 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -564,74 +564,24 @@ long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, unsigned long timeout) { long ret = timeout ? timeout : 1; - unsigned int seq, shared_count; + struct dma_resv_cursor cursor; struct dma_fence *fence; - int i;
-retry: - shared_count = 0; - seq = read_seqcount_begin(&obj->seq); rcu_read_lock(); - i = -1; - - fence = dma_resv_excl_fence(obj); - if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - if (!dma_fence_get_rcu(fence)) - goto unlock_retry; + dma_resv_for_each_fence_unlocked(obj, &cursor, wait_all, fence) { + rcu_read_unlock();
- if (dma_fence_is_signaled(fence)) { + ret = dma_fence_wait_timeout(fence, intr, ret); + if (ret <= 0) { dma_fence_put(fence); - fence = NULL; + return ret; }
- } else { - fence = NULL; - } - - if (wait_all) { - struct dma_resv_list *fobj = dma_resv_shared_list(obj); - - if (fobj) - shared_count = fobj->shared_count; - - for (i = 0; !fence && i < shared_count; ++i) { - struct dma_fence *lfence; - - lfence = rcu_dereference(fobj->shared[i]); - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &lfence->flags)) - continue; - - if (!dma_fence_get_rcu(lfence)) - goto unlock_retry; - - if (dma_fence_is_signaled(lfence)) { - dma_fence_put(lfence); - continue; - } - - fence = lfence; - break; - } + rcu_read_lock(); } - rcu_read_unlock(); - if (fence) { - if (read_seqcount_retry(&obj->seq, seq)) { - dma_fence_put(fence); - goto retry; - }
- ret = dma_fence_wait_timeout(fence, intr, ret); - dma_fence_put(fence); - if (ret > 0 && wait_all && (i + 1 < shared_count)) - goto retry; - } return ret; - -unlock_retry: - rcu_read_unlock(); - goto retry; } EXPORT_SYMBOL_GPL(dma_resv_wait_timeout);
This makes the function much simpler since the complex retry logic is now handled elsewhere.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 54 +++++--------------------------------- 1 file changed, 7 insertions(+), 47 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 5192cf4271ac..85e07becdb93 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -586,22 +586,6 @@ long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, EXPORT_SYMBOL_GPL(dma_resv_wait_timeout);
-static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) -{ - struct dma_fence *fence, *lfence = passed_fence; - int ret = 1; - - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) { - fence = dma_fence_get_rcu(lfence); - if (!fence) - return -1; - - ret = !!dma_fence_is_signaled(fence); - dma_fence_put(fence); - } - return ret; -} - /** * dma_resv_test_signaled - Test if a reservation object's fences have been * signaled. @@ -616,43 +600,19 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) */ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all) { + struct dma_resv_cursor cursor; struct dma_fence *fence; - unsigned int seq; - int ret;
rcu_read_lock(); -retry: - ret = true; - seq = read_seqcount_begin(&obj->seq); - - if (test_all) { - struct dma_resv_list *fobj = dma_resv_shared_list(obj); - unsigned int i, shared_count; - - shared_count = fobj ? fobj->shared_count : 0; - for (i = 0; i < shared_count; ++i) { - fence = rcu_dereference(fobj->shared[i]); - ret = dma_resv_test_signaled_single(fence); - if (ret < 0) - goto retry; - else if (!ret) - break; + dma_resv_for_each_fence_unlocked(obj, &cursor, test_all, fence) { + if (!dma_fence_is_signaled(fence)) { + rcu_read_unlock(); + dma_fence_put(fence); + return false; } } - - fence = dma_resv_excl_fence(obj); - if (ret && fence) { - ret = dma_resv_test_signaled_single(fence); - if (ret < 0) - goto retry; - - } - - if (read_seqcount_retry(&obj->seq, seq)) - goto retry; - rcu_read_unlock(); - return ret; + return true; } EXPORT_SYMBOL_GPL(dma_resv_test_signaled);
This makes the function much simpler since the complex retry logic is now handled elsewhere.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-buf.c | 49 ++++----------------------------------- 1 file changed, 4 insertions(+), 45 deletions(-)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index b67fbf4e3705..4173f1f70ac1 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -207,15 +207,13 @@ static void dma_buf_poll_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
static __poll_t dma_buf_poll(struct file *file, poll_table *poll) { + struct dma_resv_cursor cursor; struct dma_buf_poll_cb_t *dcb; struct dma_buf *dmabuf; struct dma_resv *resv; - struct dma_resv_list *fobj; - struct dma_fence *fence_excl; - unsigned shared_count, seq; struct dma_fence *fence; __poll_t events; - int r, i; + int r;
dmabuf = file->private_data; if (!dmabuf || !dmabuf->resv) @@ -241,53 +239,14 @@ static __poll_t dma_buf_poll(struct file *file, poll_table *poll) if (!events) return 0;
-retry: - seq = read_seqcount_begin(&resv->seq); - rcu_read_lock(); - - fobj = rcu_dereference(resv->fence); - if (fobj && events & EPOLLOUT) - shared_count = fobj->shared_count; - else - shared_count = 0; - - for (i = 0; i < shared_count; ++i) { - fence = rcu_dereference(fobj->shared[i]); - fence = dma_fence_get_rcu(fence); - if (!fence || read_seqcount_retry(&resv->seq, seq)) { - /* Concurrent modify detected, force re-check */ - dma_fence_put(fence); - rcu_read_unlock(); - goto retry; - } - - r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb); - if (!r) { - /* Callback queued */ - events = 0; - goto out; - } - dma_fence_put(fence); - } - - fence = dma_resv_excl_fence(resv); - if (fence) { - fence = dma_fence_get_rcu(fence); - if (!fence || read_seqcount_retry(&resv->seq, seq)) { - /* Concurrent modify detected, force re-check */ - dma_fence_put(fence); - rcu_read_unlock(); - goto retry; - - } - + dma_resv_for_each_fence_unlocked(resv, &cursor, events & EPOLLOUT, + fence) { r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb); if (!r) { /* Callback queued */ events = 0; goto out; } - dma_fence_put(fence_excl); }
/* No callback queued, wake up any additional waiters. */
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 30 +++++++----------------- 1 file changed, 9 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..c6c6d747b33e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct dma_resv_list *list; - unsigned int seq; + struct dma_resv_cursor cursor; + struct dma_fence *fence; int err;
err = -ENOENT; @@ -109,28 +109,16 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry: - seq = raw_read_seqcount(&obj->base.resv->seq); - - /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv)); - - /* Translate shared fences to READ set of engines */ - list = dma_resv_shared_list(obj->base.resv); - if (list) { - unsigned int shared_count = list->shared_count, i; - - for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = - rcu_dereference(list->shared[i]); - + args->busy = false; + dma_resv_for_each_fence_unlocked(obj->base.resv, &cursor, true, fence) { + if (cursor.is_exclusive) + /* Translate the exclusive fence to the READ *and* WRITE engine */ + args->busy = busy_check_writer(fence); + else + /* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence); - } }
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq)) - goto retry; - err = 0; out: rcu_read_unlock();
This is probably a fix since we didn't even grabed a reference to the fences.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index db53fecca696..15edb308e5a9 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -256,19 +256,11 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) { struct dma_resv *resv = &bo->base._resv; - struct dma_resv_list *fobj; + struct dma_resv_cursor cursor; struct dma_fence *fence; - int i;
rcu_read_lock(); - fobj = dma_resv_shared_list(resv); - fence = dma_resv_excl_fence(resv); - if (fence && !fence->ops->signaled) - dma_fence_enable_sw_signaling(fence); - - for (i = 0; fobj && i < fobj->shared_count; ++i) { - fence = rcu_dereference(fobj->shared[i]); - + dma_resv_for_each_fence_unlocked(resv, &cursor, true, fence) { if (!fence->ops->signaled) dma_fence_enable_sw_signaling(fence); }
Instead of hand rolling the logic.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index b8fa6ed3dd73..6808dbef5c79 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -437,19 +437,17 @@ int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj, static void etnaviv_gem_describe_fence(struct dma_fence *fence, const char *type, struct seq_file *m) { - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - seq_printf(m, "\t%9s: %s %s seq %llu\n", - type, - fence->ops->get_driver_name(fence), - fence->ops->get_timeline_name(fence), - fence->seqno); + seq_printf(m, "\t%9s: %s %s seq %llu\n", type, + fence->ops->get_driver_name(fence), + fence->ops->get_timeline_name(fence), + fence->seqno); }
static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m) { struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); struct dma_resv *robj = obj->resv; - struct dma_resv_list *fobj; + struct dma_resv_cursor cursor; struct dma_fence *fence; unsigned long off = drm_vma_node_start(&obj->vma_node);
@@ -459,19 +457,12 @@ static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m) off, etnaviv_obj->vaddr, obj->size);
rcu_read_lock(); - fobj = dma_resv_shared_list(robj); - if (fobj) { - unsigned int i, shared_count = fobj->shared_count; - - for (i = 0; i < shared_count; i++) { - fence = rcu_dereference(fobj->shared[i]); + dma_resv_for_each_fence_unlocked(robj, &cursor, true, fence) { + if (cursor.is_exclusive) + etnaviv_gem_describe_fence(fence, "Exclusive", m); + else etnaviv_gem_describe_fence(fence, "Shared", m); - } } - - fence = dma_resv_excl_fence(robj); - if (fence) - etnaviv_gem_describe_fence(fence, "Exclusive", m); rcu_read_unlock(); }
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 44 ++++++++---------------- 1 file changed, 14 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 862eb3c1c4c5..031ba20debb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -252,41 +252,25 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner) { - struct dma_resv_list *flist; + struct dma_resv_cursor cursor; struct dma_fence *f; - unsigned i; - int r = 0; + int r;
if (resv == NULL) return -EINVAL;
- /* always sync to the exclusive fence */ - f = dma_resv_excl_fence(resv); - dma_fence_chain_for_each(f, f) { - struct dma_fence_chain *chain = to_dma_fence_chain(f); - - if (amdgpu_sync_test_fence(adev, mode, owner, chain ? - chain->fence : f)) { - r = amdgpu_sync_fence(sync, f); - dma_fence_put(f); - if (r) - return r; - break; - } - } - - flist = dma_resv_shared_list(resv); - if (!flist) - return 0; - - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(resv)); - - if (amdgpu_sync_test_fence(adev, mode, owner, f)) { - r = amdgpu_sync_fence(sync, f); - if (r) - return r; + dma_resv_for_each_fence(resv, &cursor, true, f) { + dma_fence_chain_for_each(f, f) { + struct dma_fence_chain *chain = to_dma_fence_chain(f); + + if (amdgpu_sync_test_fence(adev, mode, owner, chain ? + chain->fence : f)) { + r = amdgpu_sync_fence(sync, f); + dma_fence_put(f); + if (r) + return r; + break; + } } } return 0;
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 80dff29f2bc7..d86b0cbff889 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1334,10 +1334,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) { unsigned long num_pages = bo->resource->num_pages; + struct dma_resv_cursor resv_cursor; struct amdgpu_res_cursor cursor; - struct dma_resv_list *flist; struct dma_fence *f; - int i;
/* Swapout? */ if (bo->resource->mem_type == TTM_PL_SYSTEM) @@ -1351,14 +1350,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - flist = dma_resv_shared_list(bo->base.resv); - if (flist) { - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(bo->base.resv)); - if (amdkfd_fence_check_mm(f, current->mm)) - return false; - } + dma_resv_for_each_fence(bo->base.resv, &resv_cursor, true, f) { + if (amdkfd_fence_check_mm(f, current->mm)) + return false; }
switch (bo->resource->mem_type) {
Simplifying the code a bit.
Untested since I can't get the driver to compile on !ARM.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/msm/msm_gem.c | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 72a07e311de3..24f8c0603385 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -813,25 +813,11 @@ void msm_gem_vunmap(struct drm_gem_object *obj) int msm_gem_sync_object(struct drm_gem_object *obj, struct msm_fence_context *fctx, bool exclusive) { - struct dma_resv_list *fobj; + struct dma_resv_cursor cursor; struct dma_fence *fence; - int i, ret; - - fence = dma_resv_excl_fence(obj->resv); - /* don't need to wait on our own fences, since ring is fifo */ - if (fence && (fence->context != fctx->context)) { - ret = dma_fence_wait(fence, true); - if (ret) - return ret; - } - - fobj = dma_resv_shared_list(obj->resv); - if (!exclusive || !fobj) - return 0; + int ret;
- for (i = 0; i < fobj->shared_count; i++) { - fence = rcu_dereference_protected(fobj->shared[i], - dma_resv_held(obj->resv)); + dma_resv_for_each_fence(obj->resv, &cursor, exclusive, fence) { if (fence->context != fctx->context) { ret = dma_fence_wait(fence, true); if (ret)
Simplifying the code a bit. Also drop the RCU read side lock since the object is locked anyway.
Untested since I can't get the driver to compile on !ARM.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/msm/msm_gem.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 24f8c0603385..8b10d82b5d7b 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -932,7 +932,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, { struct msm_gem_object *msm_obj = to_msm_bo(obj); struct dma_resv *robj = obj->resv; - struct dma_resv_list *fobj; + struct dma_resv_cursor cursor; struct dma_fence *fence; struct msm_gem_vma *vma; uint64_t off = drm_vma_node_start(&obj->vma_node); @@ -1007,22 +1007,13 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, seq_puts(m, "\n"); }
- rcu_read_lock(); - fobj = dma_resv_shared_list(robj); - if (fobj) { - unsigned int i, shared_count = fobj->shared_count; - - for (i = 0; i < shared_count; i++) { - fence = rcu_dereference(fobj->shared[i]); + dma_resv_for_each_fence(robj, &cursor, true, fence) { + if (cursor.is_exclusive) + describe_fence(fence, "Exclusive", m); + else describe_fence(fence, "Shared", m); - } }
- fence = dma_resv_excl_fence(robj); - if (fence) - describe_fence(fence, "Exclusive", m); - rcu_read_unlock(); - msm_gem_unlock(obj); }
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/nouveau/nouveau_fence.c | 48 +++++++------------------ 1 file changed, 12 insertions(+), 36 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 05d0b3eb3690..dc8d7ca1e239 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -339,14 +339,15 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) }
int -nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool exclusive, bool intr) +nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, + bool exclusive, bool intr) { struct nouveau_fence_chan *fctx = chan->fence; - struct dma_fence *fence; struct dma_resv *resv = nvbo->bo.base.resv; - struct dma_resv_list *fobj; + struct dma_resv_cursor cursor; + struct dma_fence *fence; struct nouveau_fence *f; - int ret = 0, i; + int ret;
if (!exclusive) { ret = dma_resv_reserve_shared(resv, 1); @@ -355,10 +356,7 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e return ret; }
- fobj = dma_resv_shared_list(resv); - fence = dma_resv_excl_fence(resv); - - if (fence) { + dma_resv_for_each_fence(resv, &cursor, exclusive, fence) { struct nouveau_channel *prev = NULL; bool must_wait = true;
@@ -366,41 +364,19 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e if (f) { rcu_read_lock(); prev = rcu_dereference(f->channel); - if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0)) + if (prev && (prev == chan || + fctx->sync(f, prev, chan) == 0)) must_wait = false; rcu_read_unlock(); }
- if (must_wait) + if (must_wait) { ret = dma_fence_wait(fence, intr); - - return ret; - } - - if (!exclusive || !fobj) - return ret; - - for (i = 0; i < fobj->shared_count && !ret; ++i) { - struct nouveau_channel *prev = NULL; - bool must_wait = true; - - fence = rcu_dereference_protected(fobj->shared[i], - dma_resv_held(resv)); - - f = nouveau_local_fence(fence, chan->drm); - if (f) { - rcu_read_lock(); - prev = rcu_dereference(f->channel); - if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0)) - must_wait = false; - rcu_read_unlock(); + if (ret) + return ret; } - - if (must_wait) - ret = dma_fence_wait(fence, intr); } - - return ret; + return 0; }
void
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_sync.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c index 9257b60144c4..14a4d8135bad 100644 --- a/drivers/gpu/drm/radeon/radeon_sync.c +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -91,33 +91,17 @@ int radeon_sync_resv(struct radeon_device *rdev, struct dma_resv *resv, bool shared) { - struct dma_resv_list *flist; - struct dma_fence *f; + struct dma_resv_cursor cursor; struct radeon_fence *fence; - unsigned i; + struct dma_fence *f; int r = 0;
- /* always sync to the exclusive fence */ - f = dma_resv_excl_fence(resv); - fence = f ? to_radeon_fence(f) : NULL; - if (fence && fence->rdev == rdev) - radeon_sync_fence(sync, fence); - else if (f) - r = dma_fence_wait(f, true); - - flist = dma_resv_shared_list(resv); - if (shared || !flist || r) - return r; - - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(resv)); + dma_resv_for_each_fence(resv, &cursor, shared, f) { fence = to_radeon_fence(f); if (fence && fence->rdev == rdev) radeon_sync_fence(sync, fence); else r = dma_fence_wait(f, true); - if (r) break; }
linaro-mm-sig@lists.linaro.org