Hopefully the last round for this.
Added dma_resv_iter_begin/end as requested by Daniel. Fixed a bunch of problems pointed out by the CI systems and found a few more myselve.
Please review and/or comment, Christian.
Abstract the complexity of iterating over all the fences in a dma_resv object.
The new loop handles the whole RCU and retry dance and returns only fences where we can be sure we grabbed the right one.
v2: fix accessing the shared fences while they might be freed, improve kerneldoc, rename _cursor to _iter, add dma_resv_iter_is_exclusive, add dma_resv_iter_begin/end
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 61 +++++++++++++++++++++++++++ include/linux/dma-resv.h | 84 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 84fbe60629e3..3e77cad2c9d4 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -323,6 +323,67 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) } EXPORT_SYMBOL(dma_resv_add_excl_fence);
+/** + * dma_resv_iter_walk_unlocked - walk over fences in a dma_resv obj + * @cursor: cursor to record the current position + * @first: if we should start over + * + * Return all the fences in the dma_resv object which are not yet signaled. + * The returned fence has an extra local reference so will stay alive. + * If a concurrent modify is detected the whole iterration is started over again. + */ +struct dma_fence *dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor, + bool first) +{ + struct dma_resv *obj = cursor->obj; + + first |= read_seqcount_retry(&obj->seq, cursor->seq); + do { + /* Drop the reference from the previous round */ + dma_fence_put(cursor->fence); + + cursor->is_first = first; + if (first) { + cursor->seq = read_seqcount_begin(&obj->seq); + cursor->index = -1; + cursor->fences = dma_resv_shared_list(obj); + + cursor->fence = dma_resv_excl_fence(obj); + if (cursor->fence && + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &cursor->fence->flags)) + cursor->fence = NULL; + } else { + cursor->fence = NULL; + } + + if (cursor->fence) { + cursor->fence = dma_fence_get_rcu(cursor->fence); + } else if (cursor->all_fences && cursor->fences) { + struct dma_resv_list *fences = cursor->fences; + + while (++cursor->index < fences->shared_count) { + cursor->fence = rcu_dereference( + fences->shared[cursor->index]); + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &cursor->fence->flags)) + break; + } + if (cursor->index < fences->shared_count) + cursor->fence = + dma_fence_get_rcu(cursor->fence); + else + cursor->fence = NULL; + } + + /* For the eventually next round */ + first = true; + } while (read_seqcount_retry(&obj->seq, cursor->seq)); + + return cursor->fence; +} +EXPORT_SYMBOL_GPL(dma_resv_iter_walk_unlocked); + /** * dma_resv_copy_fences - Copy all fences from src to dst. * @dst: the destination reservation object diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 9100dd3dc21f..693d16117153 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -149,6 +149,90 @@ struct dma_resv { struct dma_resv_list __rcu *fence; };
+/** + * struct dma_resv_iter - current position into the dma_resv fences + * + * Don't touch this directly in the driver, use the accessor function instead. + */ +struct dma_resv_iter { + /** @obj: The dma_resv object we iterate over */ + struct dma_resv *obj; + + /** @all_fences: If all fences should be returned */ + bool all_fences; + + /** @fence: the currently handled fence */ + struct dma_fence *fence; + + /** @seq: sequence number to check for modifications */ + unsigned int seq; + + /** @index: index into the shared fences */ + unsigned int index; + + /** @fences: the shared fences */ + struct dma_resv_list *fences; + + /** @is_first: true if this is the first returned fence */ + bool is_first; +}; + +struct dma_fence *dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor, + bool first); + +/** + * dma_resv_iter_begin - initialize a dma_resv_iter object + * @cursor: The dma_resv_iter object to initialize + * @obj: The dma_resv object which we want to iterator over + * @all_fences: If all fences should be returned or just the exclusive one + */ +static inline void dma_resv_iter_begin(struct dma_resv_iter *cursor, + struct dma_resv *obj, + bool all_fences) +{ + cursor->obj = obj; + cursor->all_fences = all_fences; + cursor->fence = NULL; +} + +/** + * dma_resv_iter_end - cleanup a dma_resv_iter object + * @cursor: the dma_resv_iter object which should be cleaned up + * + * Make sure that the reference to the fence in the cursor is properly + * dropped. + */ +static inline void dma_resv_iter_end(struct dma_resv_iter *cursor) +{ + dma_fence_put(cursor->fence); +} + +/** + * dma_resv_iter_is_exclusive - test if the current fence is the exclusive one + * @cursor: the cursor of the current position + * + * Returns true if the currently returned fence is the exclusive one. + */ +static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) +{ + return cursor->index == -1; +} + +/** + * dma_resv_for_each_fence_unlocked - unlocked fence iterator + * @cursor: a struct dma_resv_iter pointer + * @fence: the current fence + * + * Iterate over the fences in a struct dma_resv object without holding the + * dma_resv::lock. The RCU read side lock must be hold when using this, but can + * be dropped and re-taken as necessary inside the loop. The cursor needs to be + * initialized with dma_resv_iter_begin_unlocked() and cleaned up with + * dma_resv_iter_end_unlocked(). + */ +#define dma_resv_for_each_fence_unlocked(cursor, fence) \ + for (fence = dma_resv_iter_walk_unlocked(cursor, true); \ + fence; fence = dma_resv_iter_walk_unlocked(cursor, false)) + #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
On Fri, Sep 17, 2021 at 02:34:48PM +0200, Christian König wrote:
Abstract the complexity of iterating over all the fences in a dma_resv object.
The new loop handles the whole RCU and retry dance and returns only fences where we can be sure we grabbed the right one.
v2: fix accessing the shared fences while they might be freed, improve kerneldoc, rename _cursor to _iter, add dma_resv_iter_is_exclusive, add dma_resv_iter_begin/end
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-resv.c | 61 +++++++++++++++++++++++++++ include/linux/dma-resv.h | 84 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 84fbe60629e3..3e77cad2c9d4 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -323,6 +323,67 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) } EXPORT_SYMBOL(dma_resv_add_excl_fence); +/**
- dma_resv_iter_walk_unlocked - walk over fences in a dma_resv obj
- @cursor: cursor to record the current position
- @first: if we should start over
- Return all the fences in the dma_resv object which are not yet signaled.
- The returned fence has an extra local reference so will stay alive.
- If a concurrent modify is detected the whole iterration is started over again.
- */
+struct dma_fence *dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor,
Bit ocd, but I'd still just call that iter_next.
bool first)
Hm I'd put all the init code into iter_begin ...
+{
- struct dma_resv *obj = cursor->obj;
Aren't we missing rcu_read_lock() around the entire thing here?
- first |= read_seqcount_retry(&obj->seq, cursor->seq);
- do {
/* Drop the reference from the previous round */
dma_fence_put(cursor->fence);
cursor->is_first = first;
if (first) {
cursor->seq = read_seqcount_begin(&obj->seq);
cursor->index = -1;
cursor->fences = dma_resv_shared_list(obj);
And then also call iter_begin from here. That way we guarantee that read_seqcount_begin is always called before _retry(). It's not a problem with the seqcount implementation (I think at least), but it definitely looks funny.
Calling iter_begin here also makes it clear that we're essentially restarting.
cursor->fence = dma_resv_excl_fence(obj);
if (cursor->fence &&
test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
Please use the right dma_fence wrapper here for this and don't look at the bits/flags outside of dma_fence.[hc] code. I just realized that we don't have the right amount of barriers in there for the fastpath, i.e. if we have:
x = 0; /* static initializer */
thread a x = 1; dma_fence_signal(fence);
thread b; if (dma_fence_is_signalled(fence)) printk("%i\n", x);
Then you might actually be able to observe x == 0 in thread b. Which is not what we want at all.
So no open-coding of dma_fence flag bits code outside of drm_fence.[hc] please. And yes i915-gem code is unfortunately a disaster.
&cursor->fence->flags))
cursor->fence = NULL;
} else {
cursor->fence = NULL;
}
if (cursor->fence) {
cursor->fence = dma_fence_get_rcu(cursor->fence);
} else if (cursor->all_fences && cursor->fences) {
struct dma_resv_list *fences = cursor->fences;
while (++cursor->index < fences->shared_count) {
cursor->fence = rcu_dereference(
fences->shared[cursor->index]);
if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&cursor->fence->flags))
break;
}
if (cursor->index < fences->shared_count)
cursor->fence =
dma_fence_get_rcu(cursor->fence);
else
cursor->fence = NULL;
}
The control flow here is very hairy, but I'm not sure how to best do this. With my suggestion to move the read_seqcount_begin into iter_begin maybe something like this:
iter_next() { do { dma_fence_put(cursor->fence) cursor->fence = NULL;
if (cursor->index == -1) { /* reset by iter_begin() cursor->fence = get_exclusive(); cusor->index++; } else { cursor->fence = shared_fences[++cursor->index] }
if (!dma_fence_is_signalled(cursor->fence)) continue; /* just grab the next fence. */
cursor->fence = dma_fence_get_rcu(cursor->fence);
if (!cursor->fence || read_seqcount_retry()) { /* we lost the race, restart completely */ iter_begin(); /* ->fence will be cleaned up at beginning of the loop */ continue; }
return cursor->fence; } while (true); }
Maybe I missed something, but that avoids the duplication of all the tricky code, i.e. checking for signalling, rcu protected conditional fence_get, and the retry is also nicely at the end.
/* For the eventually next round */
first = true;
- } while (read_seqcount_retry(&obj->seq, cursor->seq));
- return cursor->fence;
+} +EXPORT_SYMBOL_GPL(dma_resv_iter_walk_unlocked);
/**
- dma_resv_copy_fences - Copy all fences from src to dst.
- @dst: the destination reservation object
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 9100dd3dc21f..693d16117153 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -149,6 +149,90 @@ struct dma_resv { struct dma_resv_list __rcu *fence; }; +/**
- struct dma_resv_iter - current position into the dma_resv fences
- Don't touch this directly in the driver, use the accessor function instead.
- */
+struct dma_resv_iter {
- /** @obj: The dma_resv object we iterate over */
- struct dma_resv *obj;
- /** @all_fences: If all fences should be returned */
- bool all_fences;
- /** @fence: the currently handled fence */
- struct dma_fence *fence;
- /** @seq: sequence number to check for modifications */
- unsigned int seq;
- /** @index: index into the shared fences */
If you go with my suggestion (assuming it works): Please add "-1 indicates to pick the exclusive fence instead."
- unsigned int index;
- /** @fences: the shared fences */
- struct dma_resv_list *fences;
- /** @is_first: true if this is the first returned fence */
- bool is_first;
I think if we just rely on -1 == exclusive fence/is_first we don't need this one here?
+};
+struct dma_fence *dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor,
bool first);
+/**
- dma_resv_iter_begin - initialize a dma_resv_iter object
- @cursor: The dma_resv_iter object to initialize
- @obj: The dma_resv object which we want to iterator over
- @all_fences: If all fences should be returned or just the exclusive one
Please add: "Callers must clean up the iterator with dma_resv_iter_end()."
- */
+static inline void dma_resv_iter_begin(struct dma_resv_iter *cursor,
struct dma_resv *obj,
bool all_fences)
+{
- cursor->obj = obj;
- cursor->all_fences = all_fences;
- cursor->fence = NULL;
+}
+/**
- dma_resv_iter_end - cleanup a dma_resv_iter object
- @cursor: the dma_resv_iter object which should be cleaned up
- Make sure that the reference to the fence in the cursor is properly
- dropped.
Please add:
"This function must be called every time dma_resv_iter_begin() was called to clean up any references."
- */
+static inline void dma_resv_iter_end(struct dma_resv_iter *cursor) +{
- dma_fence_put(cursor->fence);
+}
+/**
- dma_resv_iter_is_exclusive - test if the current fence is the exclusive one
- @cursor: the cursor of the current position
- Returns true if the currently returned fence is the exclusive one.
- */
+static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) +{
- return cursor->index == -1;
+}
+/**
- dma_resv_for_each_fence_unlocked - unlocked fence iterator
- @cursor: a struct dma_resv_iter pointer
- @fence: the current fence
- Iterate over the fences in a struct dma_resv object without holding the
- dma_resv::lock. The RCU read side lock must be hold when using this, but can
- be dropped and re-taken as necessary inside the loop. The cursor needs to be
- initialized with dma_resv_iter_begin_unlocked() and cleaned up with
We don't have an _unlocked version?
- dma_resv_iter_end_unlocked().
- */
+#define dma_resv_for_each_fence_unlocked(cursor, fence) \
- for (fence = dma_resv_iter_walk_unlocked(cursor, true); \
fence; fence = dma_resv_iter_walk_unlocked(cursor, false))
#define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base) -- 2.25.1
On 17/09/2021 14:23, Daniel Vetter wrote:
On Fri, Sep 17, 2021 at 02:34:48PM +0200, Christian König wrote:
Abstract the complexity of iterating over all the fences in a dma_resv object.
The new loop handles the whole RCU and retry dance and returns only fences where we can be sure we grabbed the right one.
v2: fix accessing the shared fences while they might be freed, improve kerneldoc, rename _cursor to _iter, add dma_resv_iter_is_exclusive, add dma_resv_iter_begin/end
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-resv.c | 61 +++++++++++++++++++++++++++ include/linux/dma-resv.h | 84 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 84fbe60629e3..3e77cad2c9d4 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -323,6 +323,67 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) } EXPORT_SYMBOL(dma_resv_add_excl_fence); +/**
- dma_resv_iter_walk_unlocked - walk over fences in a dma_resv obj
- @cursor: cursor to record the current position
- @first: if we should start over
- Return all the fences in the dma_resv object which are not yet signaled.
- The returned fence has an extra local reference so will stay alive.
- If a concurrent modify is detected the whole iterration is started over again.
- */
+struct dma_fence *dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor,
Bit ocd, but I'd still just call that iter_next.
bool first)
Hm I'd put all the init code into iter_begin ...
@Christian:
Could you engineer something in here which would, at least in debug builds, catch failures to call "iter begin" before using the iterator macro?
+{
- struct dma_resv *obj = cursor->obj;
Aren't we missing rcu_read_lock() around the entire thing here?
- first |= read_seqcount_retry(&obj->seq, cursor->seq);
- do {
/* Drop the reference from the previous round */
dma_fence_put(cursor->fence);
cursor->is_first = first;
if (first) {
cursor->seq = read_seqcount_begin(&obj->seq);
cursor->index = -1;
cursor->fences = dma_resv_shared_list(obj);
And then also call iter_begin from here. That way we guarantee that read_seqcount_begin is always called before _retry(). It's not a problem with the seqcount implementation (I think at least), but it definitely looks funny.
Calling iter_begin here also makes it clear that we're essentially restarting.
cursor->fence = dma_resv_excl_fence(obj);
if (cursor->fence &&
test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
Please use the right dma_fence wrapper here for this and don't look at the bits/flags outside of dma_fence.[hc] code. I just realized that we don't have the right amount of barriers in there for the fastpath, i.e. if we have:
x = 0; /* static initializer */
thread a x = 1; dma_fence_signal(fence);
thread b; if (dma_fence_is_signalled(fence)) printk("%i\n", x);
Then you might actually be able to observe x == 0 in thread b. Which is not what we want at all.
@Daniel:
What do you mean here - in terms of if 'x' is "external" (not part of dma-fence), then are you suggesting dma-fence code should serialise it by using barriers?
That would sound incorrect to me, or in other words, I think it's fine if x == 0 is observed in your example thread B since that code is mixing external data with dma-fence.
Hm also, there is that annoying bit where by using dma_fence_is_signaled any code becomes a fence signaling critical path, which I never bought into. There should be a way to test the signaled status without actually doing the signaling. Or I am misunderstanding something so badly that is really really has to be like this?
So no open-coding of dma_fence flag bits code outside of drm_fence.[hc] please. And yes i915-gem code is unfortunately a disaster.
Don't even miss an opportunity for some good trashing no? :D
But yes, deconstructed dma_fence_signal I thought we were supposed to add to core. Or at least propose, don't exactly remember how that went.
&cursor->fence->flags))
cursor->fence = NULL;
} else {
cursor->fence = NULL;
}
if (cursor->fence) {
cursor->fence = dma_fence_get_rcu(cursor->fence);
} else if (cursor->all_fences && cursor->fences) {
struct dma_resv_list *fences = cursor->fences;
while (++cursor->index < fences->shared_count) {
cursor->fence = rcu_dereference(
fences->shared[cursor->index]);
if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&cursor->fence->flags))
break;
}
if (cursor->index < fences->shared_count)
cursor->fence =
dma_fence_get_rcu(cursor->fence);
else
cursor->fence = NULL;
}
The control flow here is very hairy, but I'm not sure how to best do this. With my suggestion to move the read_seqcount_begin into iter_begin maybe something like this:
iter_next() { do { dma_fence_put(cursor->fence) cursor->fence = NULL;
if (cursor->index == -1) { /* reset by iter_begin() cursor->fence = get_exclusive(); cusor->index++; } else { cursor->fence = shared_fences[++cursor->index] } if (!dma_fence_is_signalled(cursor->fence)) continue; /* just grab the next fence. */ cursor->fence = dma_fence_get_rcu(cursor->fence); if (!cursor->fence || read_seqcount_retry()) { /* we lost the race, restart completely */ iter_begin(); /* ->fence will be cleaned up at beginning of the loop */ continue; } return cursor->fence;
} while (true); }
Maybe I missed something, but that avoids the duplication of all the tricky code, i.e. checking for signalling, rcu protected conditional fence_get, and the retry is also nicely at the end.
/* For the eventually next round */
first = true;
- } while (read_seqcount_retry(&obj->seq, cursor->seq));
- return cursor->fence;
+} +EXPORT_SYMBOL_GPL(dma_resv_iter_walk_unlocked);
- /**
- dma_resv_copy_fences - Copy all fences from src to dst.
- @dst: the destination reservation object
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 9100dd3dc21f..693d16117153 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -149,6 +149,90 @@ struct dma_resv { struct dma_resv_list __rcu *fence; }; +/**
- struct dma_resv_iter - current position into the dma_resv fences
- Don't touch this directly in the driver, use the accessor function instead.
- */
+struct dma_resv_iter {
- /** @obj: The dma_resv object we iterate over */
- struct dma_resv *obj;
- /** @all_fences: If all fences should be returned */
- bool all_fences;
- /** @fence: the currently handled fence */
- struct dma_fence *fence;
- /** @seq: sequence number to check for modifications */
- unsigned int seq;
- /** @index: index into the shared fences */
If you go with my suggestion (assuming it works): Please add "-1 indicates to pick the exclusive fence instead."
- unsigned int index;
- /** @fences: the shared fences */
- struct dma_resv_list *fences;
- /** @is_first: true if this is the first returned fence */
- bool is_first;
I think if we just rely on -1 == exclusive fence/is_first we don't need this one here?
+};
+struct dma_fence *dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor,
bool first);
+/**
- dma_resv_iter_begin - initialize a dma_resv_iter object
- @cursor: The dma_resv_iter object to initialize
- @obj: The dma_resv object which we want to iterator over
- @all_fences: If all fences should be returned or just the exclusive one
Please add: "Callers must clean up the iterator with dma_resv_iter_end()."
- */
+static inline void dma_resv_iter_begin(struct dma_resv_iter *cursor,
struct dma_resv *obj,
bool all_fences)
+{
- cursor->obj = obj;
- cursor->all_fences = all_fences;
- cursor->fence = NULL;
+}
+/**
- dma_resv_iter_end - cleanup a dma_resv_iter object
- @cursor: the dma_resv_iter object which should be cleaned up
- Make sure that the reference to the fence in the cursor is properly
- dropped.
Please add:
"This function must be called every time dma_resv_iter_begin() was called to clean up any references."
- */
+static inline void dma_resv_iter_end(struct dma_resv_iter *cursor) +{
- dma_fence_put(cursor->fence);
+}
+/**
- dma_resv_iter_is_exclusive - test if the current fence is the exclusive one
- @cursor: the cursor of the current position
- Returns true if the currently returned fence is the exclusive one.
- */
+static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) +{
- return cursor->index == -1;
+}
+/**
- dma_resv_for_each_fence_unlocked - unlocked fence iterator
- @cursor: a struct dma_resv_iter pointer
- @fence: the current fence
- Iterate over the fences in a struct dma_resv object without holding the
- dma_resv::lock. The RCU read side lock must be hold when using this, but can
- be dropped and re-taken as necessary inside the loop. The cursor needs to be
- initialized with dma_resv_iter_begin_unlocked() and cleaned up with
We don't have an _unlocked version?
@Christian:
I'd also mention that the fence reference is held during the walk so someone is less likely to grab extra ones.
- dma_resv_iter_end_unlocked().
- */
+#define dma_resv_for_each_fence_unlocked(cursor, fence) \
- for (fence = dma_resv_iter_walk_unlocked(cursor, true); \
fence; fence = dma_resv_iter_walk_unlocked(cursor, false))
- #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
2.25.1
Regards,
Tvrtko
Am 20.09.21 um 10:43 schrieb Tvrtko Ursulin:
On 17/09/2021 14:23, Daniel Vetter wrote:
On Fri, Sep 17, 2021 at 02:34:48PM +0200, Christian König wrote:
Abstract the complexity of iterating over all the fences in a dma_resv object.
The new loop handles the whole RCU and retry dance and returns only fences where we can be sure we grabbed the right one.
v2: fix accessing the shared fences while they might be freed, improve kerneldoc, rename _cursor to _iter, add dma_resv_iter_is_exclusive, add dma_resv_iter_begin/end
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-resv.c | 61 +++++++++++++++++++++++++++ include/linux/dma-resv.h | 84 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 84fbe60629e3..3e77cad2c9d4 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -323,6 +323,67 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) } EXPORT_SYMBOL(dma_resv_add_excl_fence); +/**
- dma_resv_iter_walk_unlocked - walk over fences in a dma_resv obj
- @cursor: cursor to record the current position
- @first: if we should start over
- Return all the fences in the dma_resv object which are not yet
signaled.
- The returned fence has an extra local reference so will stay alive.
- If a concurrent modify is detected the whole iterration is
started over again.
- */
+struct dma_fence *dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor,
Bit ocd, but I'd still just call that iter_next.
+ bool first)
Hm I'd put all the init code into iter_begin ...
@Christian:
Could you engineer something in here which would, at least in debug builds, catch failures to call "iter begin" before using the iterator macro?
Yeah, I've already played with the thought of somehow teaching lockdep that. But then abandoned this as abusive of lockdep.
+{ + struct dma_resv *obj = cursor->obj;
Aren't we missing rcu_read_lock() around the entire thing here?
+ first |= read_seqcount_retry(&obj->seq, cursor->seq); + do { + /* Drop the reference from the previous round */ + dma_fence_put(cursor->fence);
+ cursor->is_first = first; + if (first) { + cursor->seq = read_seqcount_begin(&obj->seq); + cursor->index = -1; + cursor->fences = dma_resv_shared_list(obj);
And then also call iter_begin from here. That way we guarantee that read_seqcount_begin is always called before _retry(). It's not a problem with the seqcount implementation (I think at least), but it definitely looks funny.
Calling iter_begin here also makes it clear that we're essentially restarting.
+ cursor->fence = dma_resv_excl_fence(obj); + if (cursor->fence && + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
Please use the right dma_fence wrapper here for this and don't look at the bits/flags outside of dma_fence.[hc] code. I just realized that we don't have the right amount of barriers in there for the fastpath, i.e. if we have:
x = 0; /* static initializer */
thread a x = 1; dma_fence_signal(fence);
thread b; if (dma_fence_is_signalled(fence)) printk("%i\n", x);
Then you might actually be able to observe x == 0 in thread b. Which is not what we want at all.
@Daniel:
What do you mean here - in terms of if 'x' is "external" (not part of dma-fence), then are you suggesting dma-fence code should serialise it by using barriers?
That would sound incorrect to me, or in other words, I think it's fine if x == 0 is observed in your example thread B since that code is mixing external data with dma-fence.
No, Daniel is right. The problem is that on architectures other than x86 barriers are per memory address (or rather cache line in practice).
So you need to be really careful that you see the fully consistent state and not just one variable but others in the old state.
But this was buggy before as well. I'm just pulling the existing test into the new iterator.
Hm also, there is that annoying bit where by using dma_fence_is_signaled any code becomes a fence signaling critical path, which I never bought into. There should be a way to test the signaled status without actually doing the signaling. Or I am misunderstanding something so badly that is really really has to be like this?
You are mixing things up. Testing is unproblematic, signaling is the problematic one.
So no open-coding of dma_fence flag bits code outside of drm_fence.[hc] please. And yes i915-gem code is unfortunately a disaster.
Don't even miss an opportunity for some good trashing no? :D
But yes, deconstructed dma_fence_signal I thought we were supposed to add to core. Or at least propose, don't exactly remember how that went.
The problem is that you need to grab a reference to call dma_fence_signal while testing the flag works without one.
Regards, Christian.
- &cursor->fence->flags))
+ cursor->fence = NULL; + } else { + cursor->fence = NULL; + }
+ if (cursor->fence) { + cursor->fence = dma_fence_get_rcu(cursor->fence); + } else if (cursor->all_fences && cursor->fences) { + struct dma_resv_list *fences = cursor->fences;
+ while (++cursor->index < fences->shared_count) { + cursor->fence = rcu_dereference( + fences->shared[cursor->index]); + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &cursor->fence->flags)) + break; + } + if (cursor->index < fences->shared_count) + cursor->fence = + dma_fence_get_rcu(cursor->fence); + else + cursor->fence = NULL; + }
The control flow here is very hairy, but I'm not sure how to best do this. With my suggestion to move the read_seqcount_begin into iter_begin maybe something like this:
iter_next() { do { dma_fence_put(cursor->fence) cursor->fence = NULL;
if (cursor->index == -1) { /* reset by iter_begin() cursor->fence = get_exclusive(); cusor->index++; } else { cursor->fence = shared_fences[++cursor->index] }
if (!dma_fence_is_signalled(cursor->fence)) continue; /* just grab the next fence. */
cursor->fence = dma_fence_get_rcu(cursor->fence);
if (!cursor->fence || read_seqcount_retry()) { /* we lost the race, restart completely */ iter_begin(); /* ->fence will be cleaned up at beginning of the loop */ continue; }
return cursor->fence; } while (true); }
Maybe I missed something, but that avoids the duplication of all the tricky code, i.e. checking for signalling, rcu protected conditional fence_get, and the retry is also nicely at the end.
+ /* For the eventually next round */ + first = true; + } while (read_seqcount_retry(&obj->seq, cursor->seq));
+ return cursor->fence; +} +EXPORT_SYMBOL_GPL(dma_resv_iter_walk_unlocked);
/** * dma_resv_copy_fences - Copy all fences from src to dst. * @dst: the destination reservation object diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 9100dd3dc21f..693d16117153 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -149,6 +149,90 @@ struct dma_resv { struct dma_resv_list __rcu *fence; }; +/**
- struct dma_resv_iter - current position into the dma_resv fences
- Don't touch this directly in the driver, use the accessor
function instead.
- */
+struct dma_resv_iter { + /** @obj: The dma_resv object we iterate over */ + struct dma_resv *obj;
+ /** @all_fences: If all fences should be returned */ + bool all_fences;
+ /** @fence: the currently handled fence */ + struct dma_fence *fence;
+ /** @seq: sequence number to check for modifications */ + unsigned int seq;
+ /** @index: index into the shared fences */
If you go with my suggestion (assuming it works): Please add "-1 indicates to pick the exclusive fence instead."
+ unsigned int index;
+ /** @fences: the shared fences */ + struct dma_resv_list *fences;
+ /** @is_first: true if this is the first returned fence */ + bool is_first;
I think if we just rely on -1 == exclusive fence/is_first we don't need this one here?
+};
+struct dma_fence *dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor, + bool first);
+/**
- dma_resv_iter_begin - initialize a dma_resv_iter object
- @cursor: The dma_resv_iter object to initialize
- @obj: The dma_resv object which we want to iterator over
- @all_fences: If all fences should be returned or just the
exclusive one
Please add: "Callers must clean up the iterator with dma_resv_iter_end()."
- */
+static inline void dma_resv_iter_begin(struct dma_resv_iter *cursor, + struct dma_resv *obj, + bool all_fences) +{ + cursor->obj = obj; + cursor->all_fences = all_fences; + cursor->fence = NULL; +}
+/**
- dma_resv_iter_end - cleanup a dma_resv_iter object
- @cursor: the dma_resv_iter object which should be cleaned up
- Make sure that the reference to the fence in the cursor is properly
- dropped.
Please add:
"This function must be called every time dma_resv_iter_begin() was called to clean up any references."
- */
+static inline void dma_resv_iter_end(struct dma_resv_iter *cursor) +{ + dma_fence_put(cursor->fence); +}
+/**
- dma_resv_iter_is_exclusive - test if the current fence is the
exclusive one
- @cursor: the cursor of the current position
- Returns true if the currently returned fence is the exclusive one.
- */
+static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) +{ + return cursor->index == -1; +}
+/**
- dma_resv_for_each_fence_unlocked - unlocked fence iterator
- @cursor: a struct dma_resv_iter pointer
- @fence: the current fence
- Iterate over the fences in a struct dma_resv object without
holding the
- dma_resv::lock. The RCU read side lock must be hold when using
this, but can
- be dropped and re-taken as necessary inside the loop. The cursor
needs to be
- initialized with dma_resv_iter_begin_unlocked() and cleaned up with
We don't have an _unlocked version?
@Christian:
I'd also mention that the fence reference is held during the walk so someone is less likely to grab extra ones.
- dma_resv_iter_end_unlocked().
- */
+#define dma_resv_for_each_fence_unlocked(cursor, fence) \ + for (fence = dma_resv_iter_walk_unlocked(cursor, true); \ + fence; fence = dma_resv_iter_walk_unlocked(cursor, false))
#define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base) -- 2.25.1
Regards,
Tvrtko
On 20/09/2021 11:09, Christian König wrote:
Am 20.09.21 um 10:43 schrieb Tvrtko Ursulin:
On 17/09/2021 14:23, Daniel Vetter wrote:
On Fri, Sep 17, 2021 at 02:34:48PM +0200, Christian König wrote:
Abstract the complexity of iterating over all the fences in a dma_resv object.
The new loop handles the whole RCU and retry dance and returns only fences where we can be sure we grabbed the right one.
v2: fix accessing the shared fences while they might be freed, improve kerneldoc, rename _cursor to _iter, add dma_resv_iter_is_exclusive, add dma_resv_iter_begin/end
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-resv.c | 61 +++++++++++++++++++++++++++ include/linux/dma-resv.h | 84 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 84fbe60629e3..3e77cad2c9d4 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -323,6 +323,67 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) } EXPORT_SYMBOL(dma_resv_add_excl_fence); +/**
- dma_resv_iter_walk_unlocked - walk over fences in a dma_resv obj
- @cursor: cursor to record the current position
- @first: if we should start over
- Return all the fences in the dma_resv object which are not yet
signaled.
- The returned fence has an extra local reference so will stay alive.
- If a concurrent modify is detected the whole iterration is
started over again.
- */
+struct dma_fence *dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor,
Bit ocd, but I'd still just call that iter_next.
+ bool first)
Hm I'd put all the init code into iter_begin ...
@Christian:
Could you engineer something in here which would, at least in debug builds, catch failures to call "iter begin" before using the iterator macro?
Yeah, I've already played with the thought of somehow teaching lockdep that. But then abandoned this as abusive of lockdep.
Yes probably not lockdep but would need to be a separate build time option akin to DEBUG_WW_MUTEXES and similar.
+{ + struct dma_resv *obj = cursor->obj;
Aren't we missing rcu_read_lock() around the entire thing here?
+ first |= read_seqcount_retry(&obj->seq, cursor->seq); + do { + /* Drop the reference from the previous round */ + dma_fence_put(cursor->fence);
+ cursor->is_first = first; + if (first) { + cursor->seq = read_seqcount_begin(&obj->seq); + cursor->index = -1; + cursor->fences = dma_resv_shared_list(obj);
And then also call iter_begin from here. That way we guarantee that read_seqcount_begin is always called before _retry(). It's not a problem with the seqcount implementation (I think at least), but it definitely looks funny.
Calling iter_begin here also makes it clear that we're essentially restarting.
+ cursor->fence = dma_resv_excl_fence(obj); + if (cursor->fence && + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
Please use the right dma_fence wrapper here for this and don't look at the bits/flags outside of dma_fence.[hc] code. I just realized that we don't have the right amount of barriers in there for the fastpath, i.e. if we have:
x = 0; /* static initializer */
thread a x = 1; dma_fence_signal(fence);
thread b; if (dma_fence_is_signalled(fence)) printk("%i\n", x);
Then you might actually be able to observe x == 0 in thread b. Which is not what we want at all.
@Daniel:
What do you mean here - in terms of if 'x' is "external" (not part of dma-fence), then are you suggesting dma-fence code should serialise it by using barriers?
That would sound incorrect to me, or in other words, I think it's fine if x == 0 is observed in your example thread B since that code is mixing external data with dma-fence.
No, Daniel is right. The problem is that on architectures other than x86 barriers are per memory address (or rather cache line in practice).
So you need to be really careful that you see the fully consistent state and not just one variable but others in the old state.
I don't see it yet - what are the variables we are talking about here? Ordering relating to the iterator code in here or something truly external?
Iterator can obviously race and "return" and already signaled fence (transitioned from unsignaled to signaled between iterator checking and deciding to walk it). But that I don't think you can, or plan to, fix.
But this was buggy before as well. I'm just pulling the existing test into the new iterator.
Okay.
Hm also, there is that annoying bit where by using dma_fence_is_signaled any code becomes a fence signaling critical path, which I never bought into. There should be a way to test the signaled status without actually doing the signaling. Or I am misunderstanding something so badly that is really really has to be like this?
You are mixing things up. Testing is unproblematic, signaling is the problematic one.
I was pointing out dma_fence_is_signaled can call dma_fence_signal. And that has in the past, AFAIR at least, caused some fence annotation splats which IMO are questionable.
Regards,
Tvrtko
A simpler version of the iterator to be used when the dma_resv object is locked.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 33 +++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 17 +++++++++++++++++ 2 files changed, 50 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 3e77cad2c9d4..a3c79a99fb44 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -384,6 +384,39 @@ struct dma_fence *dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor, } EXPORT_SYMBOL_GPL(dma_resv_iter_walk_unlocked);
+/** + * dma_resv_iter_walk - walk over fences in a dma_resv obj + * @cursor: cursor to record the current position + * @first: if we should start over + * + * Return all the fences in the dma_resv object while holding the + * dma_resv::lock. + */ +struct dma_fence *dma_resv_iter_walk(struct dma_resv_iter *cursor, bool first) +{ + dma_resv_assert_held(cursor->obj); + + cursor->is_first = first; + if (first) { + struct dma_fence *fence; + + cursor->index = -1; + cursor->fences = dma_resv_shared_list(cursor->obj); + + fence = dma_resv_excl_fence(cursor->obj); + if (fence) + return fence; + } + + if (!cursor->all_fences || !cursor->fences || + ++cursor->index >= cursor->fences->shared_count) + return NULL; + + return rcu_dereference_protected(cursor->fences->shared[cursor->index], + dma_resv_held(cursor->obj)); +} +EXPORT_SYMBOL_GPL(dma_resv_iter_walk); + /** * dma_resv_copy_fences - Copy all fences from src to dst. * @dst: the destination reservation object diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 693d16117153..8c968f8c9d33 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -179,6 +179,7 @@ struct dma_resv_iter {
struct dma_fence *dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor, bool first); +struct dma_fence *dma_resv_iter_walk(struct dma_resv_iter *cursor, bool first);
/** * dma_resv_iter_begin - initialize a dma_resv_iter object @@ -233,6 +234,22 @@ static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) for (fence = dma_resv_iter_walk_unlocked(cursor, true); \ fence; fence = dma_resv_iter_walk_unlocked(cursor, false))
+/** + * dma_resv_for_each_fence - fence iterator + * @cursor: a struct dma_resv_iter pointer + * @obj: a dma_resv object pointer + * @all_fences: true if all fences should be returned + * @fence: the current fence + * + * Iterate over the fences in a struct dma_resv object while holding the + * dma_resv::lock. @all_fences controls if the shared fences are returned as + * well. The cursor initialisation is part of the iterator. + */ +#define dma_resv_for_each_fence(cursor, obj, all_fences, fence) \ + for (dma_resv_iter_begin(cursor, obj, all_fences), \ + fence = dma_resv_iter_walk(cursor, true); fence; \ + fence = dma_resv_iter_walk(cursor, false)) + #define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base)
On Fri, Sep 17, 2021 at 02:34:49PM +0200, Christian König wrote:
A simpler version of the iterator to be used when the dma_resv object is locked.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-resv.c | 33 +++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 17 +++++++++++++++++ 2 files changed, 50 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 3e77cad2c9d4..a3c79a99fb44 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -384,6 +384,39 @@ struct dma_fence *dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor, } EXPORT_SYMBOL_GPL(dma_resv_iter_walk_unlocked); +/**
- dma_resv_iter_walk - walk over fences in a dma_resv obj
- @cursor: cursor to record the current position
- @first: if we should start over
- Return all the fences in the dma_resv object while holding the
- dma_resv::lock.
- */
+struct dma_fence *dma_resv_iter_walk(struct dma_resv_iter *cursor, bool first) +{
- dma_resv_assert_held(cursor->obj);
- cursor->is_first = first;
- if (first) {
struct dma_fence *fence;
cursor->index = -1;
cursor->fences = dma_resv_shared_list(cursor->obj);
fence = dma_resv_excl_fence(cursor->obj);
if (fence)
return fence;
- }
I think you can still use the shared iter_begin/end functions even with my suggestions for patch 1, but would mean changes here too.
- if (!cursor->all_fences || !cursor->fences ||
++cursor->index >= cursor->fences->shared_count)
return NULL;
- return rcu_dereference_protected(cursor->fences->shared[cursor->index],
dma_resv_held(cursor->obj));
+} +EXPORT_SYMBOL_GPL(dma_resv_iter_walk);
/**
- dma_resv_copy_fences - Copy all fences from src to dst.
- @dst: the destination reservation object
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 693d16117153..8c968f8c9d33 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -179,6 +179,7 @@ struct dma_resv_iter { struct dma_fence *dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor, bool first); +struct dma_fence *dma_resv_iter_walk(struct dma_resv_iter *cursor, bool first); /**
- dma_resv_iter_begin - initialize a dma_resv_iter object
@@ -233,6 +234,22 @@ static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) for (fence = dma_resv_iter_walk_unlocked(cursor, true); \ fence; fence = dma_resv_iter_walk_unlocked(cursor, false)) +/**
- dma_resv_for_each_fence - fence iterator
- @cursor: a struct dma_resv_iter pointer
- @obj: a dma_resv object pointer
- @all_fences: true if all fences should be returned
- @fence: the current fence
- Iterate over the fences in a struct dma_resv object while holding the
- dma_resv::lock. @all_fences controls if the shared fences are returned as
&dma_resv.lock is how you reference struct members in kerneldoc. I think you had this also in patch 1.
- well. The cursor initialisation is part of the iterator.
Please also link to the iter_begin/end functions here.
Aside from doc nits and obviously changes due to changes in patch 1 (if we do them), this looks good. -Daniel
- */
+#define dma_resv_for_each_fence(cursor, obj, all_fences, fence) \
- for (dma_resv_iter_begin(cursor, obj, all_fences), \
fence = dma_resv_iter_walk(cursor, true); fence; \
fence = dma_resv_iter_walk(cursor, false))
#define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base) -- 2.25.1
On Fri, Sep 17, 2021 at 03:27:55PM +0200, Daniel Vetter wrote:
On Fri, Sep 17, 2021 at 02:34:49PM +0200, Christian König wrote:
A simpler version of the iterator to be used when the dma_resv object is locked.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-resv.c | 33 +++++++++++++++++++++++++++++++++ include/linux/dma-resv.h | 17 +++++++++++++++++ 2 files changed, 50 insertions(+)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 3e77cad2c9d4..a3c79a99fb44 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -384,6 +384,39 @@ struct dma_fence *dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor, } EXPORT_SYMBOL_GPL(dma_resv_iter_walk_unlocked); +/**
- dma_resv_iter_walk - walk over fences in a dma_resv obj
- @cursor: cursor to record the current position
- @first: if we should start over
- Return all the fences in the dma_resv object while holding the
- dma_resv::lock.
I think we should document here that the fence is valid for as long as the dma_resv_lock is held, which is not like the _unlocked version, where the fence stops being valid either on the next call to iter_next() or on the call to iter_end() to clean up everything.
Might be good to clarify that also for the unlocked version to be really precise here when the fence is valid and when not. -Daniel
- */
+struct dma_fence *dma_resv_iter_walk(struct dma_resv_iter *cursor, bool first) +{
- dma_resv_assert_held(cursor->obj);
- cursor->is_first = first;
- if (first) {
struct dma_fence *fence;
cursor->index = -1;
cursor->fences = dma_resv_shared_list(cursor->obj);
fence = dma_resv_excl_fence(cursor->obj);
if (fence)
return fence;
- }
I think you can still use the shared iter_begin/end functions even with my suggestions for patch 1, but would mean changes here too.
- if (!cursor->all_fences || !cursor->fences ||
++cursor->index >= cursor->fences->shared_count)
return NULL;
- return rcu_dereference_protected(cursor->fences->shared[cursor->index],
dma_resv_held(cursor->obj));
+} +EXPORT_SYMBOL_GPL(dma_resv_iter_walk);
/**
- dma_resv_copy_fences - Copy all fences from src to dst.
- @dst: the destination reservation object
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 693d16117153..8c968f8c9d33 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -179,6 +179,7 @@ struct dma_resv_iter { struct dma_fence *dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor, bool first); +struct dma_fence *dma_resv_iter_walk(struct dma_resv_iter *cursor, bool first); /**
- dma_resv_iter_begin - initialize a dma_resv_iter object
@@ -233,6 +234,22 @@ static inline bool dma_resv_iter_is_exclusive(struct dma_resv_iter *cursor) for (fence = dma_resv_iter_walk_unlocked(cursor, true); \ fence; fence = dma_resv_iter_walk_unlocked(cursor, false)) +/**
- dma_resv_for_each_fence - fence iterator
- @cursor: a struct dma_resv_iter pointer
- @obj: a dma_resv object pointer
- @all_fences: true if all fences should be returned
- @fence: the current fence
- Iterate over the fences in a struct dma_resv object while holding the
- dma_resv::lock. @all_fences controls if the shared fences are returned as
&dma_resv.lock is how you reference struct members in kerneldoc. I think you had this also in patch 1.
- well. The cursor initialisation is part of the iterator.
Please also link to the iter_begin/end functions here.
Aside from doc nits and obviously changes due to changes in patch 1 (if we do them), this looks good. -Daniel
- */
+#define dma_resv_for_each_fence(cursor, obj, all_fences, fence) \
- for (dma_resv_iter_begin(cursor, obj, all_fences), \
fence = dma_resv_iter_walk(cursor, true); fence; \
fence = dma_resv_iter_walk(cursor, false))
#define dma_resv_held(obj) lockdep_is_held(&(obj)->lock.base) #define dma_resv_assert_held(obj) lockdep_assert_held(&(obj)->lock.base) -- 2.25.1
-- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 86 ++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 51 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index a3c79a99fb44..406150dea5e4 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -426,74 +426,58 @@ EXPORT_SYMBOL_GPL(dma_resv_iter_walk); */ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) { - struct dma_resv_list *src_list, *dst_list; - struct dma_fence *old, *new; - unsigned int i; + struct dma_resv_iter cursor; + struct dma_resv_list *list; + struct dma_fence *f, *excl;
dma_resv_assert_held(dst);
- rcu_read_lock(); - src_list = dma_resv_shared_list(src); - -retry: - if (src_list) { - unsigned int shared_count = src_list->shared_count; - - rcu_read_unlock(); + list = NULL; + excl = NULL;
- dst_list = dma_resv_list_alloc(shared_count); - if (!dst_list) - return -ENOMEM; + rcu_read_lock(); + dma_resv_iter_begin(&cursor, src, true); + dma_resv_for_each_fence_unlocked(&cursor, f) {
- rcu_read_lock(); - src_list = dma_resv_shared_list(src); - if (!src_list || src_list->shared_count > shared_count) { - kfree(dst_list); - goto retry; - } + if (cursor.is_first) { + dma_resv_list_free(list); + dma_fence_put(excl);
- dst_list->shared_count = 0; - for (i = 0; i < src_list->shared_count; ++i) { - struct dma_fence __rcu **dst; - struct dma_fence *fence; + if (cursor.fences) { + unsigned int cnt = cursor.fences->shared_count;
- fence = rcu_dereference(src_list->shared[i]); - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &fence->flags)) - continue; + rcu_read_unlock(); + list = dma_resv_list_alloc(cnt); + if (!list) { + dma_resv_iter_end(&cursor); + return -ENOMEM; + }
- if (!dma_fence_get_rcu(fence)) { - dma_resv_list_free(dst_list); - src_list = dma_resv_shared_list(src); - goto retry; - } + list->shared_count = 0; + rcu_read_lock();
- if (dma_fence_is_signaled(fence)) { - dma_fence_put(fence); - continue; + } else { + list = NULL; } - - dst = &dst_list->shared[dst_list->shared_count++]; - rcu_assign_pointer(*dst, fence); + excl = NULL; } - } else { - dst_list = NULL; - }
- new = dma_fence_get_rcu_safe(&src->fence_excl); + dma_fence_get(f); + if (dma_resv_iter_is_exclusive(&cursor)) + excl = f; + else + RCU_INIT_POINTER(list->shared[list->shared_count++], f); + } + dma_resv_iter_end(&cursor); rcu_read_unlock();
- src_list = dma_resv_shared_list(dst); - old = dma_resv_excl_fence(dst); - write_seqcount_begin(&dst->seq); - /* write_seqcount_begin provides the necessary memory barrier */ - RCU_INIT_POINTER(dst->fence_excl, new); - RCU_INIT_POINTER(dst->fence, dst_list); + excl = rcu_replace_pointer(dst->fence_excl, excl, dma_resv_held(dst)); + list = rcu_replace_pointer(dst->fence, list, dma_resv_held(dst)); write_seqcount_end(&dst->seq);
- dma_resv_list_free(src_list); - dma_fence_put(old); + dma_resv_list_free(list); + dma_fence_put(excl);
return 0; }
On Fri, Sep 17, 2021 at 02:34:50PM +0200, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-resv.c | 86 ++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 51 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index a3c79a99fb44..406150dea5e4 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -426,74 +426,58 @@ EXPORT_SYMBOL_GPL(dma_resv_iter_walk); */ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) {
- struct dma_resv_list *src_list, *dst_list;
- struct dma_fence *old, *new;
- unsigned int i;
- struct dma_resv_iter cursor;
- struct dma_resv_list *list;
- struct dma_fence *f, *excl;
dma_resv_assert_held(dst);
- rcu_read_lock();
- src_list = dma_resv_shared_list(src);
-retry:
- if (src_list) {
unsigned int shared_count = src_list->shared_count;
rcu_read_unlock();
- list = NULL;
- excl = NULL;
dst_list = dma_resv_list_alloc(shared_count);
if (!dst_list)
return -ENOMEM;
- rcu_read_lock();
- dma_resv_iter_begin(&cursor, src, true);
- dma_resv_for_each_fence_unlocked(&cursor, f) {
rcu_read_lock();
src_list = dma_resv_shared_list(src);
if (!src_list || src_list->shared_count > shared_count) {
kfree(dst_list);
goto retry;
}
if (cursor.is_first) {
Maybe have a wrapper for this, like dma_resv_iter_is_reset or is_first or is_restart (my preference) with some nice docs that this returns true everytime we had to restart the sequence?
Otherwise I fully agree, this is so much better with all the hairy restarting and get_rcu and test_bit shovelled away somewhere.
Either way (but I much prefer a wrapper for is_first):
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
dma_resv_list_free(list);
dma_fence_put(excl);
dst_list->shared_count = 0;
for (i = 0; i < src_list->shared_count; ++i) {
struct dma_fence __rcu **dst;
struct dma_fence *fence;
if (cursor.fences) {
unsigned int cnt = cursor.fences->shared_count;
fence = rcu_dereference(src_list->shared[i]);
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&fence->flags))
continue;
rcu_read_unlock();
list = dma_resv_list_alloc(cnt);
if (!list) {
dma_resv_iter_end(&cursor);
return -ENOMEM;
}
if (!dma_fence_get_rcu(fence)) {
dma_resv_list_free(dst_list);
src_list = dma_resv_shared_list(src);
goto retry;
}
list->shared_count = 0;
rcu_read_lock();
if (dma_fence_is_signaled(fence)) {
dma_fence_put(fence);
continue;
} else {
list = NULL; }
dst = &dst_list->shared[dst_list->shared_count++];
rcu_assign_pointer(*dst, fence);
}excl = NULL;
- } else {
dst_list = NULL;
- }
- new = dma_fence_get_rcu_safe(&src->fence_excl);
dma_fence_get(f);
if (dma_resv_iter_is_exclusive(&cursor))
excl = f;
else
RCU_INIT_POINTER(list->shared[list->shared_count++], f);
- }
- dma_resv_iter_end(&cursor); rcu_read_unlock();
- src_list = dma_resv_shared_list(dst);
- old = dma_resv_excl_fence(dst);
- write_seqcount_begin(&dst->seq);
- /* write_seqcount_begin provides the necessary memory barrier */
- RCU_INIT_POINTER(dst->fence_excl, new);
- RCU_INIT_POINTER(dst->fence, dst_list);
- excl = rcu_replace_pointer(dst->fence_excl, excl, dma_resv_held(dst));
- list = rcu_replace_pointer(dst->fence, list, dma_resv_held(dst)); write_seqcount_end(&dst->seq);
- dma_resv_list_free(src_list);
- dma_fence_put(old);
- dma_resv_list_free(list);
- dma_fence_put(excl);
return 0; } -- 2.25.1
Am 17.09.21 um 16:35 schrieb Daniel Vetter:
On Fri, Sep 17, 2021 at 02:34:50PM +0200, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-resv.c | 86 ++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 51 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index a3c79a99fb44..406150dea5e4 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -426,74 +426,58 @@ EXPORT_SYMBOL_GPL(dma_resv_iter_walk); */ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) {
- struct dma_resv_list *src_list, *dst_list;
- struct dma_fence *old, *new;
- unsigned int i;
- struct dma_resv_iter cursor;
- struct dma_resv_list *list;
- struct dma_fence *f, *excl;
dma_resv_assert_held(dst);
- rcu_read_lock();
- src_list = dma_resv_shared_list(src);
-retry:
- if (src_list) {
unsigned int shared_count = src_list->shared_count;
rcu_read_unlock();
- list = NULL;
- excl = NULL;
dst_list = dma_resv_list_alloc(shared_count);
if (!dst_list)
return -ENOMEM;
- rcu_read_lock();
- dma_resv_iter_begin(&cursor, src, true);
- dma_resv_for_each_fence_unlocked(&cursor, f) {
rcu_read_lock();
src_list = dma_resv_shared_list(src);
if (!src_list || src_list->shared_count > shared_count) {
kfree(dst_list);
goto retry;
}
if (cursor.is_first) {
Maybe have a wrapper for this, like dma_resv_iter_is_reset or is_first or is_restart (my preference) with some nice docs that this returns true everytime we had to restart the sequence?
Exactly that's what I wanted to avoid since the is_first (or whatever) function should only be used inside of the dma_resv.c code.
On the other hand I can just make that static here and document that it should never be exported.
Christian.
Otherwise I fully agree, this is so much better with all the hairy restarting and get_rcu and test_bit shovelled away somewhere.
Either way (but I much prefer a wrapper for is_first):
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
dma_resv_list_free(list);
dma_fence_put(excl);
dst_list->shared_count = 0;
for (i = 0; i < src_list->shared_count; ++i) {
struct dma_fence __rcu **dst;
struct dma_fence *fence;
if (cursor.fences) {
unsigned int cnt = cursor.fences->shared_count;
fence = rcu_dereference(src_list->shared[i]);
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&fence->flags))
continue;
rcu_read_unlock();
list = dma_resv_list_alloc(cnt);
if (!list) {
dma_resv_iter_end(&cursor);
return -ENOMEM;
}
if (!dma_fence_get_rcu(fence)) {
dma_resv_list_free(dst_list);
src_list = dma_resv_shared_list(src);
goto retry;
}
list->shared_count = 0;
rcu_read_lock();
if (dma_fence_is_signaled(fence)) {
dma_fence_put(fence);
continue;
} else {
list = NULL; }
dst = &dst_list->shared[dst_list->shared_count++];
rcu_assign_pointer(*dst, fence);
}excl = NULL;
- } else {
dst_list = NULL;
- }
- new = dma_fence_get_rcu_safe(&src->fence_excl);
dma_fence_get(f);
if (dma_resv_iter_is_exclusive(&cursor))
excl = f;
else
RCU_INIT_POINTER(list->shared[list->shared_count++], f);
- }
- dma_resv_iter_end(&cursor); rcu_read_unlock();
- src_list = dma_resv_shared_list(dst);
- old = dma_resv_excl_fence(dst);
- write_seqcount_begin(&dst->seq);
- /* write_seqcount_begin provides the necessary memory barrier */
- RCU_INIT_POINTER(dst->fence_excl, new);
- RCU_INIT_POINTER(dst->fence, dst_list);
- excl = rcu_replace_pointer(dst->fence_excl, excl, dma_resv_held(dst));
- list = rcu_replace_pointer(dst->fence, list, dma_resv_held(dst)); write_seqcount_end(&dst->seq);
- dma_resv_list_free(src_list);
- dma_fence_put(old);
- dma_resv_list_free(list);
- dma_fence_put(excl);
return 0; } -- 2.25.1
This makes the function much simpler since the complex retry logic is now handled elsewhere.
v2: use sizeof(void*) instead
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 112 +++++++++++++------------------------ 1 file changed, 40 insertions(+), 72 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 406150dea5e4..9b90bd9ac018 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -487,99 +487,67 @@ EXPORT_SYMBOL(dma_resv_copy_fences); * dma_resv_get_fences - Get an object's shared and exclusive * fences without update side lock held * @obj: the reservation object - * @pfence_excl: the returned exclusive fence (or NULL) - * @pshared_count: the number of shared fences returned - * @pshared: the array of shared fence ptrs returned (array is krealloc'd to + * @fence_excl: the returned exclusive fence (or NULL) + * @shared_count: the number of shared fences returned + * @shared: the array of shared fence ptrs returned (array is krealloc'd to * the required size, and must be freed by caller) * * Retrieve all fences from the reservation object. If the pointer for the * exclusive fence is not specified the fence is put into the array of the * shared fences as well. Returns either zero or -ENOMEM. */ -int dma_resv_get_fences(struct dma_resv *obj, struct dma_fence **pfence_excl, - unsigned int *pshared_count, - struct dma_fence ***pshared) +int dma_resv_get_fences(struct dma_resv *obj, struct dma_fence **fence_excl, + unsigned int *shared_count, struct dma_fence ***shared) { - struct dma_fence **shared = NULL; - struct dma_fence *fence_excl; - unsigned int shared_count; - int ret = 1; - - do { - struct dma_resv_list *fobj; - unsigned int i, seq; - size_t sz = 0; - - shared_count = i = 0; - - rcu_read_lock(); - seq = read_seqcount_begin(&obj->seq); + struct dma_resv_iter cursor; + struct dma_fence *fence;
- fence_excl = dma_resv_excl_fence(obj); - if (fence_excl && !dma_fence_get_rcu(fence_excl)) - goto unlock; + *shared_count = 0; + *shared = NULL;
- fobj = dma_resv_shared_list(obj); - if (fobj) - sz += sizeof(*shared) * fobj->shared_max; + if (fence_excl) + *fence_excl = NULL;
- if (!pfence_excl && fence_excl) - sz += sizeof(*shared); + rcu_read_lock(); + dma_resv_iter_begin(&cursor, obj, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) {
- if (sz) { - struct dma_fence **nshared; + if (cursor.is_first) { + unsigned int count;
- nshared = krealloc(shared, sz, - GFP_NOWAIT | __GFP_NOWARN); - if (!nshared) { - rcu_read_unlock(); + while (*shared_count) + dma_fence_put((*shared)[--(*shared_count)]);
- dma_fence_put(fence_excl); - fence_excl = NULL; + if (fence_excl) + dma_fence_put(*fence_excl);
- nshared = krealloc(shared, sz, GFP_KERNEL); - if (nshared) { - shared = nshared; - continue; - } + count = cursor.fences ? cursor.fences->shared_count : 0; + count += fence_excl ? 0 : 1; + rcu_read_unlock();
- ret = -ENOMEM; - break; - } - shared = nshared; - shared_count = fobj ? fobj->shared_count : 0; - for (i = 0; i < shared_count; ++i) { - shared[i] = rcu_dereference(fobj->shared[i]); - if (!dma_fence_get_rcu(shared[i])) - break; + /* Eventually re-allocate the array */ + *shared = krealloc_array(*shared, count, + sizeof(void *), + GFP_KERNEL); + if (count && !*shared) { + dma_resv_iter_end(&cursor); + return -ENOMEM; } + rcu_read_lock(); }
- if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) { - while (i--) - dma_fence_put(shared[i]); - dma_fence_put(fence_excl); - goto unlock; - } - - ret = 0; -unlock: - rcu_read_unlock(); - } while (ret); - - if (pfence_excl) - *pfence_excl = fence_excl; - else if (fence_excl) - shared[shared_count++] = fence_excl; + if (dma_resv_iter_is_exclusive(&cursor) && fence_excl) + *fence_excl = fence; + else + (*shared)[(*shared_count)++] = fence;
- if (!shared_count) { - kfree(shared); - shared = NULL; + /* Don't drop the reference */ + fence = NULL; } + dma_resv_iter_end(&cursor); + rcu_read_unlock();
- *pshared_count = shared_count; - *pshared = shared; - return ret; + return 0; } EXPORT_SYMBOL_GPL(dma_resv_get_fences);
On Fri, Sep 17, 2021 at 02:34:51PM +0200, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled elsewhere.
v2: use sizeof(void*) instead
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-resv.c | 112 +++++++++++++------------------------ 1 file changed, 40 insertions(+), 72 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 406150dea5e4..9b90bd9ac018 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -487,99 +487,67 @@ EXPORT_SYMBOL(dma_resv_copy_fences);
- dma_resv_get_fences - Get an object's shared and exclusive
- fences without update side lock held
- @obj: the reservation object
- @pfence_excl: the returned exclusive fence (or NULL)
- @pshared_count: the number of shared fences returned
- @pshared: the array of shared fence ptrs returned (array is krealloc'd to
- @fence_excl: the returned exclusive fence (or NULL)
- @shared_count: the number of shared fences returned
*/
- @shared: the array of shared fence ptrs returned (array is krealloc'd to
- the required size, and must be freed by caller)
- Retrieve all fences from the reservation object. If the pointer for the
- exclusive fence is not specified the fence is put into the array of the
- shared fences as well. Returns either zero or -ENOMEM.
-int dma_resv_get_fences(struct dma_resv *obj, struct dma_fence **pfence_excl,
unsigned int *pshared_count,
struct dma_fence ***pshared)
+int dma_resv_get_fences(struct dma_resv *obj, struct dma_fence **fence_excl,
unsigned int *shared_count, struct dma_fence ***shared)
{
- struct dma_fence **shared = NULL;
- struct dma_fence *fence_excl;
- unsigned int shared_count;
- int ret = 1;
- do {
struct dma_resv_list *fobj;
unsigned int i, seq;
size_t sz = 0;
shared_count = i = 0;
rcu_read_lock();
seq = read_seqcount_begin(&obj->seq);
- struct dma_resv_iter cursor;
- struct dma_fence *fence;
fence_excl = dma_resv_excl_fence(obj);
if (fence_excl && !dma_fence_get_rcu(fence_excl))
goto unlock;
- *shared_count = 0;
- *shared = NULL;
fobj = dma_resv_shared_list(obj);
if (fobj)
sz += sizeof(*shared) * fobj->shared_max;
- if (fence_excl)
*fence_excl = NULL;
if (!pfence_excl && fence_excl)
sz += sizeof(*shared);
- rcu_read_lock();
- dma_resv_iter_begin(&cursor, obj, true);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
if (sz) {
struct dma_fence **nshared;
if (cursor.is_first) {
Yeah with the second one here I definitely think we need a dma_resv_iter_is_restart() helper. I'm not sure whether that should have is_first or restart_only semantics, but I guess gcc wont see through the maze anyway, and hence initializing everything to NULL/0 is required.
Also is_first is a bit confusing naming imo. You mean "is this the first fence" but readers could equally read this as "is this the first time we're in the loop", which is rather confusing. Hence why I think an iter_is_restart() or maybe iter_restarted() naming is a notch clearer.
unsigned int count;
nshared = krealloc(shared, sz,
GFP_NOWAIT | __GFP_NOWARN);
if (!nshared) {
rcu_read_unlock();
while (*shared_count)
dma_fence_put((*shared)[--(*shared_count)]);
dma_fence_put(fence_excl);
fence_excl = NULL;
if (fence_excl)
dma_fence_put(*fence_excl);
nshared = krealloc(shared, sz, GFP_KERNEL);
if (nshared) {
shared = nshared;
continue;
}
count = cursor.fences ? cursor.fences->shared_count : 0;
count += fence_excl ? 0 : 1;
rcu_read_unlock();
ret = -ENOMEM;
break;
}
shared = nshared;
shared_count = fobj ? fobj->shared_count : 0;
for (i = 0; i < shared_count; ++i) {
shared[i] = rcu_dereference(fobj->shared[i]);
if (!dma_fence_get_rcu(shared[i]))
break;
/* Eventually re-allocate the array */
*shared = krealloc_array(*shared, count,
sizeof(void *),
GFP_KERNEL);
if (count && !*shared) {
dma_resv_iter_end(&cursor);
return -ENOMEM; }
}rcu_read_lock();
if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) {
while (i--)
dma_fence_put(shared[i]);
dma_fence_put(fence_excl);
goto unlock;
}
ret = 0;
-unlock:
rcu_read_unlock();
- } while (ret);
- if (pfence_excl)
*pfence_excl = fence_excl;
- else if (fence_excl)
shared[shared_count++] = fence_excl;
if (dma_resv_iter_is_exclusive(&cursor) && fence_excl)
*fence_excl = fence;
else
(*shared)[(*shared_count)++] = fence;
- if (!shared_count) {
kfree(shared);
shared = NULL;
/* Don't drop the reference */
}fence = NULL;
- dma_resv_iter_end(&cursor);
- rcu_read_unlock();
- *pshared_count = shared_count;
- *pshared = shared;
- return ret;
- return 0;
} EXPORT_SYMBOL_GPL(dma_resv_get_fences);
With the wrapper I'd like to have:
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
2.25.1
This makes the function much simpler since the complex retry logic is now handled elsewhere.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 68 ++++++-------------------------------- 1 file changed, 10 insertions(+), 58 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 9b90bd9ac018..c7db553ab115 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -569,74 +569,26 @@ long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, unsigned long timeout) { long ret = timeout ? timeout : 1; - unsigned int seq, shared_count; + struct dma_resv_iter cursor; struct dma_fence *fence; - int i;
-retry: - shared_count = 0; - seq = read_seqcount_begin(&obj->seq); rcu_read_lock(); - i = -1; - - fence = dma_resv_excl_fence(obj); - if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - if (!dma_fence_get_rcu(fence)) - goto unlock_retry; + dma_resv_iter_begin(&cursor, obj, wait_all); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + rcu_read_unlock();
- if (dma_fence_is_signaled(fence)) { - dma_fence_put(fence); - fence = NULL; + ret = dma_fence_wait_timeout(fence, intr, ret); + if (ret <= 0) { + dma_resv_iter_end(&cursor); + return ret; }
- } else { - fence = NULL; - } - - if (wait_all) { - struct dma_resv_list *fobj = dma_resv_shared_list(obj); - - if (fobj) - shared_count = fobj->shared_count; - - for (i = 0; !fence && i < shared_count; ++i) { - struct dma_fence *lfence; - - lfence = rcu_dereference(fobj->shared[i]); - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &lfence->flags)) - continue; - - if (!dma_fence_get_rcu(lfence)) - goto unlock_retry; - - if (dma_fence_is_signaled(lfence)) { - dma_fence_put(lfence); - continue; - } - - fence = lfence; - break; - } + rcu_read_lock(); } - + dma_resv_iter_end(&cursor); rcu_read_unlock(); - if (fence) { - if (read_seqcount_retry(&obj->seq, seq)) { - dma_fence_put(fence); - goto retry; - }
- ret = dma_fence_wait_timeout(fence, intr, ret); - dma_fence_put(fence); - if (ret > 0 && wait_all && (i + 1 < shared_count)) - goto retry; - } return ret; - -unlock_retry: - rcu_read_unlock(); - goto retry; } EXPORT_SYMBOL_GPL(dma_resv_wait_timeout);
On Fri, Sep 17, 2021 at 02:34:52PM +0200, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled elsewhere.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-resv.c | 68 ++++++-------------------------------- 1 file changed, 10 insertions(+), 58 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 9b90bd9ac018..c7db553ab115 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -569,74 +569,26 @@ long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, unsigned long timeout) { long ret = timeout ? timeout : 1;
- unsigned int seq, shared_count;
- struct dma_resv_iter cursor; struct dma_fence *fence;
- int i;
-retry:
- shared_count = 0;
- seq = read_seqcount_begin(&obj->seq); rcu_read_lock();
I missed this in my previous conversion reviews, but pls move the rcu_read_lock into the iterator. That should simplify the flow in all of these quite a bit more, and since the iter_next_unlocked grabs a full reference for the iteration body we really don't need that protected by rcu.
We can't toss rcu protection for dma_resv anytime soon (if ever), but we can at least make it an implementation detail.
- i = -1;
- fence = dma_resv_excl_fence(obj);
- if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
if (!dma_fence_get_rcu(fence))
goto unlock_retry;
- dma_resv_iter_begin(&cursor, obj, wait_all);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
rcu_read_unlock();
if (dma_fence_is_signaled(fence)) {
dma_fence_put(fence);
fence = NULL;
ret = dma_fence_wait_timeout(fence, intr, ret);
if (ret <= 0) {
dma_resv_iter_end(&cursor);
}return ret;
- } else {
fence = NULL;
- }
- if (wait_all) {
struct dma_resv_list *fobj = dma_resv_shared_list(obj);
if (fobj)
shared_count = fobj->shared_count;
for (i = 0; !fence && i < shared_count; ++i) {
struct dma_fence *lfence;
lfence = rcu_dereference(fobj->shared[i]);
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&lfence->flags))
continue;
if (!dma_fence_get_rcu(lfence))
goto unlock_retry;
if (dma_fence_is_signaled(lfence)) {
dma_fence_put(lfence);
continue;
}
fence = lfence;
break;
}
}rcu_read_lock();
- dma_resv_iter_end(&cursor); rcu_read_unlock();
- if (fence) {
if (read_seqcount_retry(&obj->seq, seq)) {
dma_fence_put(fence);
goto retry;
}
ret = dma_fence_wait_timeout(fence, intr, ret);
dma_fence_put(fence);
if (ret > 0 && wait_all && (i + 1 < shared_count))
goto retry;
- } return ret;
-unlock_retry:
- rcu_read_unlock();
- goto retry;
I think we still have the same semantics, and it's so much tidier.
With the rcu_read_unlock stuff into iterators (also applies to previous two patches):
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
} EXPORT_SYMBOL_GPL(dma_resv_wait_timeout); -- 2.25.1
Am 17.09.21 um 16:43 schrieb Daniel Vetter:
On Fri, Sep 17, 2021 at 02:34:52PM +0200, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled elsewhere.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-resv.c | 68 ++++++-------------------------------- 1 file changed, 10 insertions(+), 58 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 9b90bd9ac018..c7db553ab115 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -569,74 +569,26 @@ long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, unsigned long timeout) { long ret = timeout ? timeout : 1;
- unsigned int seq, shared_count;
- struct dma_resv_iter cursor; struct dma_fence *fence;
- int i;
-retry:
- shared_count = 0;
- seq = read_seqcount_begin(&obj->seq); rcu_read_lock();
I missed this in my previous conversion reviews, but pls move the rcu_read_lock into the iterator. That should simplify the flow in all of these quite a bit more, and since the iter_next_unlocked grabs a full reference for the iteration body we really don't need that protected by rcu.
I intentionally didn't do that because it makes it much more clear that we are using RCU here and there is absolutely no guarantee that the collection won't change.
But I'm fine if we go down that route instead if you think that's the way to go.
Thanks, Christian.
We can't toss rcu protection for dma_resv anytime soon (if ever), but we can at least make it an implementation detail.
- i = -1;
- fence = dma_resv_excl_fence(obj);
- if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
if (!dma_fence_get_rcu(fence))
goto unlock_retry;
- dma_resv_iter_begin(&cursor, obj, wait_all);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
rcu_read_unlock();
if (dma_fence_is_signaled(fence)) {
dma_fence_put(fence);
fence = NULL;
ret = dma_fence_wait_timeout(fence, intr, ret);
if (ret <= 0) {
dma_resv_iter_end(&cursor);
}return ret;
- } else {
fence = NULL;
- }
- if (wait_all) {
struct dma_resv_list *fobj = dma_resv_shared_list(obj);
if (fobj)
shared_count = fobj->shared_count;
for (i = 0; !fence && i < shared_count; ++i) {
struct dma_fence *lfence;
lfence = rcu_dereference(fobj->shared[i]);
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&lfence->flags))
continue;
if (!dma_fence_get_rcu(lfence))
goto unlock_retry;
if (dma_fence_is_signaled(lfence)) {
dma_fence_put(lfence);
continue;
}
fence = lfence;
break;
}
}rcu_read_lock();
- dma_resv_iter_end(&cursor); rcu_read_unlock();
- if (fence) {
if (read_seqcount_retry(&obj->seq, seq)) {
dma_fence_put(fence);
goto retry;
}
ret = dma_fence_wait_timeout(fence, intr, ret);
dma_fence_put(fence);
if (ret > 0 && wait_all && (i + 1 < shared_count))
goto retry;
- } return ret;
-unlock_retry:
- rcu_read_unlock();
- goto retry;
I think we still have the same semantics, and it's so much tidier.
With the rcu_read_unlock stuff into iterators (also applies to previous two patches):
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
} EXPORT_SYMBOL_GPL(dma_resv_wait_timeout); -- 2.25.1
This makes the function much simpler since the complex retry logic is now handled elsewhere.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/dma-buf/dma-resv.c | 56 ++++++-------------------------------- 1 file changed, 9 insertions(+), 47 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index c7db553ab115..d8f428ddaedd 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -593,22 +593,6 @@ long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, EXPORT_SYMBOL_GPL(dma_resv_wait_timeout);
-static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) -{ - struct dma_fence *fence, *lfence = passed_fence; - int ret = 1; - - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) { - fence = dma_fence_get_rcu(lfence); - if (!fence) - return -1; - - ret = !!dma_fence_is_signaled(fence); - dma_fence_put(fence); - } - return ret; -} - /** * dma_resv_test_signaled - Test if a reservation object's fences have been * signaled. @@ -625,43 +609,21 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) */ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all) { + struct dma_resv_iter cursor; struct dma_fence *fence; - unsigned int seq; - int ret;
rcu_read_lock(); -retry: - ret = true; - seq = read_seqcount_begin(&obj->seq); - - if (test_all) { - struct dma_resv_list *fobj = dma_resv_shared_list(obj); - unsigned int i, shared_count; - - shared_count = fobj ? fobj->shared_count : 0; - for (i = 0; i < shared_count; ++i) { - fence = rcu_dereference(fobj->shared[i]); - ret = dma_resv_test_signaled_single(fence); - if (ret < 0) - goto retry; - else if (!ret) - break; + dma_resv_iter_begin(&cursor, obj, test_all); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (!dma_fence_is_signaled(fence)) { + dma_resv_iter_end(&cursor); + rcu_read_unlock(); + return false; } } - - fence = dma_resv_excl_fence(obj); - if (ret && fence) { - ret = dma_resv_test_signaled_single(fence); - if (ret < 0) - goto retry; - - } - - if (read_seqcount_retry(&obj->seq, seq)) - goto retry; - + dma_resv_iter_end(&cursor); rcu_read_unlock(); - return ret; + return true; } EXPORT_SYMBOL_GPL(dma_resv_test_signaled);
On Fri, Sep 17, 2021 at 02:34:53PM +0200, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled elsewhere.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/dma-buf/dma-resv.c | 56 ++++++-------------------------------- 1 file changed, 9 insertions(+), 47 deletions(-)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index c7db553ab115..d8f428ddaedd 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -593,22 +593,6 @@ long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, EXPORT_SYMBOL_GPL(dma_resv_wait_timeout); -static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) -{
- struct dma_fence *fence, *lfence = passed_fence;
- int ret = 1;
- if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) {
fence = dma_fence_get_rcu(lfence);
if (!fence)
return -1;
ret = !!dma_fence_is_signaled(fence);
dma_fence_put(fence);
- }
- return ret;
-}
/**
- dma_resv_test_signaled - Test if a reservation object's fences have been
- signaled.
@@ -625,43 +609,21 @@ static inline int dma_resv_test_signaled_single(struct dma_fence *passed_fence) */ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all) {
- struct dma_resv_iter cursor; struct dma_fence *fence;
- unsigned int seq;
- int ret;
rcu_read_lock(); -retry:
- ret = true;
- seq = read_seqcount_begin(&obj->seq);
- if (test_all) {
struct dma_resv_list *fobj = dma_resv_shared_list(obj);
unsigned int i, shared_count;
shared_count = fobj ? fobj->shared_count : 0;
for (i = 0; i < shared_count; ++i) {
fence = rcu_dereference(fobj->shared[i]);
ret = dma_resv_test_signaled_single(fence);
if (ret < 0)
goto retry;
else if (!ret)
break;
- dma_resv_iter_begin(&cursor, obj, test_all);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
if (!dma_fence_is_signaled(fence)) {
Should we be extremely clever and document that the iterator already filters out unsignalled fences? We could rely on that here :-) Otoh we don't want to make the full is_signalled check in that iterator, so this makes sense.
Again rcu_read_lock into the iterators pls. With that:
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
dma_resv_iter_end(&cursor);
rcu_read_unlock();
} }return false;
- fence = dma_resv_excl_fence(obj);
- if (ret && fence) {
ret = dma_resv_test_signaled_single(fence);
if (ret < 0)
goto retry;
- }
- if (read_seqcount_retry(&obj->seq, seq))
goto retry;
- dma_resv_iter_end(&cursor); rcu_read_unlock();
- return ret;
- return true;
} EXPORT_SYMBOL_GPL(dma_resv_test_signaled); -- 2.25.1
This is probably a fix since we didn't even grabed a reference to the fences.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/ttm/ttm_bo.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 3b22c0013dbf..7d804c0c69b0 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -269,22 +269,16 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) { struct dma_resv *resv = &bo->base._resv; - struct dma_resv_list *fobj; + struct dma_resv_iter cursor; struct dma_fence *fence; - int i;
rcu_read_lock(); - fobj = dma_resv_shared_list(resv); - fence = dma_resv_excl_fence(resv); - if (fence && !fence->ops->signaled) - dma_fence_enable_sw_signaling(fence); - - for (i = 0; fobj && i < fobj->shared_count; ++i) { - fence = rcu_dereference(fobj->shared[i]); - + dma_resv_iter_begin(&cursor, resv, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) { if (!fence->ops->signaled) dma_fence_enable_sw_signaling(fence); } + dma_resv_iter_end(&cursor); rcu_read_unlock(); }
On Fri, Sep 17, 2021 at 02:34:54PM +0200, Christian König wrote:
This is probably a fix since we didn't even grabed a reference to the fences.
It's rcu protected, and we only care about speeding things up a bit. I think this wont have any impact on correctness, and I don't think any driver could blow up?
But yeah maybe we should have a few assert sprinkled into various dma_fence functions to make sure we never call them when the refcount has dropped to 0. That would catch stuff like this, and help lock down the dma-fence api quite a bit.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/ttm/ttm_bo.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 3b22c0013dbf..7d804c0c69b0 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -269,22 +269,16 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) { struct dma_resv *resv = &bo->base._resv;
- struct dma_resv_list *fobj;
- struct dma_resv_iter cursor; struct dma_fence *fence;
- int i;
rcu_read_lock();
- fobj = dma_resv_shared_list(resv);
- fence = dma_resv_excl_fence(resv);
- if (fence && !fence->ops->signaled)
dma_fence_enable_sw_signaling(fence);
- for (i = 0; fobj && i < fobj->shared_count; ++i) {
fence = rcu_dereference(fobj->shared[i]);
- dma_resv_iter_begin(&cursor, resv, true);
- dma_resv_for_each_fence_unlocked(&cursor, fence) { if (!fence->ops->signaled)
Imo delete this check here. If that really matters for performance we should have it in the core dma_fence function, not replicated all over the place like this. Noodling around in dma_fence internals like this isn't cool.
With that removal included:
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
dma_fence_enable_sw_signaling(fence);
}
- dma_resv_iter_end(&cursor); rcu_read_unlock();
} -- 2.25.1
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 44 ++++++++---------------- 1 file changed, 14 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 862eb3c1c4c5..f7d8487799b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -252,41 +252,25 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner) { - struct dma_resv_list *flist; + struct dma_resv_iter cursor; struct dma_fence *f; - unsigned i; - int r = 0; + int r;
if (resv == NULL) return -EINVAL;
- /* always sync to the exclusive fence */ - f = dma_resv_excl_fence(resv); - dma_fence_chain_for_each(f, f) { - struct dma_fence_chain *chain = to_dma_fence_chain(f); - - if (amdgpu_sync_test_fence(adev, mode, owner, chain ? - chain->fence : f)) { - r = amdgpu_sync_fence(sync, f); - dma_fence_put(f); - if (r) - return r; - break; - } - } - - flist = dma_resv_shared_list(resv); - if (!flist) - return 0; - - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(resv)); - - if (amdgpu_sync_test_fence(adev, mode, owner, f)) { - r = amdgpu_sync_fence(sync, f); - if (r) - return r; + dma_resv_for_each_fence(&cursor, resv, true, f) { + dma_fence_chain_for_each(f, f) { + struct dma_fence_chain *chain = to_dma_fence_chain(f); + + if (amdgpu_sync_test_fence(adev, mode, owner, chain ? + chain->fence : f)) { + r = amdgpu_sync_fence(sync, f); + dma_fence_put(f); + if (r) + return r; + break; + } } } return 0;
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 1129e17e9f09..4511cd15c3a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1332,10 +1332,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place) { unsigned long num_pages = bo->resource->num_pages; + struct dma_resv_iter resv_cursor; struct amdgpu_res_cursor cursor; - struct dma_resv_list *flist; struct dma_fence *f; - int i;
/* Swapout? */ if (bo->resource->mem_type == TTM_PL_SYSTEM) @@ -1349,14 +1348,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - flist = dma_resv_shared_list(bo->base.resv); - if (flist) { - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(bo->base.resv)); - if (amdkfd_fence_check_mm(f, current->mm)) - return false; - } + dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) { + if (amdkfd_fence_check_mm(f, current->mm)) + return false; }
switch (bo->resource->mem_type) {
Simplifying the code a bit. Also drop the RCU read side lock since the object is locked anyway.
Untested since I can't get the driver to compile on !ARM.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/msm/msm_gem.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 22308a1b66fc..14907622769f 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -880,7 +880,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, { struct msm_gem_object *msm_obj = to_msm_bo(obj); struct dma_resv *robj = obj->resv; - struct dma_resv_list *fobj; + struct dma_resv_iter cursor; struct dma_fence *fence; struct msm_gem_vma *vma; uint64_t off = drm_vma_node_start(&obj->vma_node); @@ -955,22 +955,13 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m, seq_puts(m, "\n"); }
- rcu_read_lock(); - fobj = dma_resv_shared_list(robj); - if (fobj) { - unsigned int i, shared_count = fobj->shared_count; - - for (i = 0; i < shared_count; i++) { - fence = rcu_dereference(fobj->shared[i]); + dma_resv_for_each_fence(&cursor, robj, true, fence) { + if (dma_resv_iter_is_exclusive(&cursor)) + describe_fence(fence, "Exclusive", m); + else describe_fence(fence, "Shared", m); - } }
- fence = dma_resv_excl_fence(robj); - if (fence) - describe_fence(fence, "Exclusive", m); - rcu_read_unlock(); - msm_gem_unlock(obj); }
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/radeon/radeon_sync.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_sync.c b/drivers/gpu/drm/radeon/radeon_sync.c index 9257b60144c4..b991ba1bcd51 100644 --- a/drivers/gpu/drm/radeon/radeon_sync.c +++ b/drivers/gpu/drm/radeon/radeon_sync.c @@ -91,33 +91,17 @@ int radeon_sync_resv(struct radeon_device *rdev, struct dma_resv *resv, bool shared) { - struct dma_resv_list *flist; - struct dma_fence *f; + struct dma_resv_iter cursor; struct radeon_fence *fence; - unsigned i; + struct dma_fence *f; int r = 0;
- /* always sync to the exclusive fence */ - f = dma_resv_excl_fence(resv); - fence = f ? to_radeon_fence(f) : NULL; - if (fence && fence->rdev == rdev) - radeon_sync_fence(sync, fence); - else if (f) - r = dma_fence_wait(f, true); - - flist = dma_resv_shared_list(resv); - if (shared || !flist || r) - return r; - - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(resv)); + dma_resv_for_each_fence(&cursor, resv, shared, f) { fence = to_radeon_fence(f); if (fence && fence->rdev == rdev) radeon_sync_fence(sync, fence); else r = dma_fence_wait(f, true); - if (r) break; }
Simplifying the code a bit.
v2: use dma_resv_for_each_fence
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/scheduler/sched_main.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 042c16b5d54a..5bc5f775abe1 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -699,30 +699,16 @@ int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job, struct drm_gem_object *obj, bool write) { + struct dma_resv_iter cursor; + struct dma_fence *fence; int ret; - struct dma_fence **fences; - unsigned int i, fence_count; - - if (!write) { - struct dma_fence *fence = dma_resv_get_excl_unlocked(obj->resv); - - return drm_sched_job_add_dependency(job, fence); - } - - ret = dma_resv_get_fences(obj->resv, NULL, &fence_count, &fences); - if (ret || !fence_count) - return ret;
- for (i = 0; i < fence_count; i++) { - ret = drm_sched_job_add_dependency(job, fences[i]); + dma_resv_for_each_fence(&cursor, obj->resv, write, fence) { + ret = drm_sched_job_add_dependency(job, fence); if (ret) - break; + return ret; } - - for (; i < fence_count; i++) - dma_fence_put(fences[i]); - kfree(fences); - return ret; + return 0; } EXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies);
On Fri, Sep 17, 2021 at 02:34:59PM +0200, Christian König wrote:
Simplifying the code a bit.
v2: use dma_resv_for_each_fence
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/scheduler/sched_main.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 042c16b5d54a..5bc5f775abe1 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -699,30 +699,16 @@ int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job, struct drm_gem_object *obj, bool write) {
- struct dma_resv_iter cursor;
- struct dma_fence *fence; int ret;
- struct dma_fence **fences;
- unsigned int i, fence_count;
- if (!write) {
struct dma_fence *fence = dma_resv_get_excl_unlocked(obj->resv);
return drm_sched_job_add_dependency(job, fence);
- }
- ret = dma_resv_get_fences(obj->resv, NULL, &fence_count, &fences);
- if (ret || !fence_count)
return ret;
- for (i = 0; i < fence_count; i++) {
ret = drm_sched_job_add_dependency(job, fences[i]);
- dma_resv_for_each_fence(&cursor, obj->resv, write, fence) {
Hah I got tricked reading your 2nd patch, the iter_begin() is included and we don't need iter_end for this. Please correct my comments for patch 2 :-)
On this as-is:
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
if (ret)ret = drm_sched_job_add_dependency(job, fence);
break;
}return ret;
- for (; i < fence_count; i++)
dma_fence_put(fences[i]);
- kfree(fences);
- return ret;
- return 0;
} EXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies); -- 2.25.1
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/gem/i915_gem_busy.c | 32 ++++++++---------------- 1 file changed, 11 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..b1cb7ba688da 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct dma_resv_list *list; - unsigned int seq; + struct dma_resv_iter cursor; + struct dma_fence *fence; int err;
err = -ENOENT; @@ -109,27 +109,17 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry: - seq = raw_read_seqcount(&obj->base.resv->seq); - - /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv)); - - /* Translate shared fences to READ set of engines */ - list = dma_resv_shared_list(obj->base.resv); - if (list) { - unsigned int shared_count = list->shared_count, i; - - for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = - rcu_dereference(list->shared[i]); - + args->busy = false; + dma_resv_iter_begin(&cursor, obj->base.resv, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (dma_resv_iter_is_exclusive(&cursor)) + /* Translate the exclusive fence to the READ *and* WRITE engine */ + args->busy = busy_check_writer(fence); + else + /* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence); - } } - - if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq)) - goto retry; + dma_resv_iter_end(&cursor);
err = 0; out:
On 17/09/2021 13:35, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/gem/i915_gem_busy.c | 32 ++++++++---------------- 1 file changed, 11 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..b1cb7ba688da 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj;
- struct dma_resv_list *list;
- unsigned int seq;
- struct dma_resv_iter cursor;
- struct dma_fence *fence; int err;
err = -ENOENT; @@ -109,27 +109,17 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry:
- seq = raw_read_seqcount(&obj->base.resv->seq);
- /* Translate the exclusive fence to the READ *and* WRITE engine */
- args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv));
- /* Translate shared fences to READ set of engines */
- list = dma_resv_shared_list(obj->base.resv);
- if (list) {
unsigned int shared_count = list->shared_count, i;
for (i = 0; i < shared_count; ++i) {
struct dma_fence *fence =
rcu_dereference(list->shared[i]);
- args->busy = false;
- dma_resv_iter_begin(&cursor, obj->base.resv, true);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
You did not agree with my suggestion to reset args->busy on restart and so preserve current behaviour?
Regards,
Tvrtko
if (dma_resv_iter_is_exclusive(&cursor))
/* Translate the exclusive fence to the READ *and* WRITE engine */
args->busy = busy_check_writer(fence);
else
/* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence);
}}
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq))
goto retry;
- dma_resv_iter_end(&cursor);
err = 0; out:
Am 20.09.21 um 10:45 schrieb Tvrtko Ursulin:
On 17/09/2021 13:35, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/gem/i915_gem_busy.c | 32 ++++++++---------------- 1 file changed, 11 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..b1cb7ba688da 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct dma_resv_list *list; - unsigned int seq; + struct dma_resv_iter cursor; + struct dma_fence *fence; int err; err = -ENOENT; @@ -109,27 +109,17 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry: - seq = raw_read_seqcount(&obj->base.resv->seq);
- /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv));
- /* Translate shared fences to READ set of engines */ - list = dma_resv_shared_list(obj->base.resv); - if (list) { - unsigned int shared_count = list->shared_count, i;
- for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = - rcu_dereference(list->shared[i]);
+ args->busy = false; + dma_resv_iter_begin(&cursor, obj->base.resv, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) {
You did not agree with my suggestion to reset args->busy on restart and so preserve current behaviour?
No, I want to keep the restart behavior internally to the dma_resv object and as far as I can see it should not make a difference here.
Regards, Christian.
Regards,
Tvrtko
+ if (dma_resv_iter_is_exclusive(&cursor)) + /* Translate the exclusive fence to the READ *and* WRITE engine */ + args->busy = busy_check_writer(fence); + else + /* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence); - } }
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq)) - goto retry; + dma_resv_iter_end(&cursor); err = 0; out:
On 20/09/2021 11:13, Christian König wrote:
Am 20.09.21 um 10:45 schrieb Tvrtko Ursulin:
On 17/09/2021 13:35, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/gem/i915_gem_busy.c | 32 ++++++++---------------- 1 file changed, 11 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..b1cb7ba688da 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct dma_resv_list *list; - unsigned int seq; + struct dma_resv_iter cursor; + struct dma_fence *fence; int err; err = -ENOENT; @@ -109,27 +109,17 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry: - seq = raw_read_seqcount(&obj->base.resv->seq);
- /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv));
- /* Translate shared fences to READ set of engines */ - list = dma_resv_shared_list(obj->base.resv); - if (list) { - unsigned int shared_count = list->shared_count, i;
- for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = - rcu_dereference(list->shared[i]);
+ args->busy = false; + dma_resv_iter_begin(&cursor, obj->base.resv, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) {
You did not agree with my suggestion to reset args->busy on restart and so preserve current behaviour?
No, I want to keep the restart behavior internally to the dma_resv object and as far as I can see it should not make a difference here.
To be clear, on paper difference between old and new implementation is if the restart happens while processing the shared fences.
Old implementation unconditionally goes to "args->busy =
busy_check_writer(dma_resv_excl_fence(obj->base.resv));" and so
overwrites the set of flags returned to userspace.
New implementation can merge new read flags to the old set of flags and so return a composition of past and current fences.
Maybe it does not matter hugely in this case, depends if userspace typically just restarts until flags are clear. But I am not sure.
On the higher level - what do you mean with wanting to keep the restart behaviour internal? Not providing iterators users means of detecting it? I think it has to be provided.
Regards,
Tvrtko
Regards, Christian.
Regards,
Tvrtko
+ if (dma_resv_iter_is_exclusive(&cursor)) + /* Translate the exclusive fence to the READ *and* WRITE engine */ + args->busy = busy_check_writer(fence); + else + /* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence); - } }
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq)) - goto retry; + dma_resv_iter_end(&cursor); err = 0; out:
Am 20.09.21 um 12:33 schrieb Tvrtko Ursulin:
On 20/09/2021 11:13, Christian König wrote:
Am 20.09.21 um 10:45 schrieb Tvrtko Ursulin:
On 17/09/2021 13:35, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/gem/i915_gem_busy.c | 32 ++++++++---------------- 1 file changed, 11 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..b1cb7ba688da 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct dma_resv_list *list; - unsigned int seq; + struct dma_resv_iter cursor; + struct dma_fence *fence; int err; err = -ENOENT; @@ -109,27 +109,17 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry: - seq = raw_read_seqcount(&obj->base.resv->seq);
- /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv));
- /* Translate shared fences to READ set of engines */ - list = dma_resv_shared_list(obj->base.resv); - if (list) { - unsigned int shared_count = list->shared_count, i;
- for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = - rcu_dereference(list->shared[i]);
+ args->busy = false; + dma_resv_iter_begin(&cursor, obj->base.resv, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) {
You did not agree with my suggestion to reset args->busy on restart and so preserve current behaviour?
No, I want to keep the restart behavior internally to the dma_resv object and as far as I can see it should not make a difference here.
To be clear, on paper difference between old and new implementation is if the restart happens while processing the shared fences.
Old implementation unconditionally goes to "args->busy =
busy_check_writer(dma_resv_excl_fence(obj->base.resv));" and so
overwrites the set of flags returned to userspace.
New implementation can merge new read flags to the old set of flags and so return a composition of past and current fences.
Maybe it does not matter hugely in this case, depends if userspace typically just restarts until flags are clear. But I am not sure.
On the higher level - what do you mean with wanting to keep the restart behaviour internal? Not providing iterators users means of detecting it? I think it has to be provided.
Ok I will adjust that for now to get the patch set upstream. But in general when somebody outside of the dma_resv code base depends on the restart behavior then that's a bug inside the design of that code.
The callers should only care about what unsignaled fences are inside the dma_resv container and it shouldn't matter if those fences are presented once or multiple times because of a reset..
When this makes a difference we have a bug in the handling and should probably consider taking the dma_resv.lock instead.
Regards, Christian.
Regards,
Tvrtko
Regards, Christian.
Regards,
Tvrtko
+ if (dma_resv_iter_is_exclusive(&cursor)) + /* Translate the exclusive fence to the READ *and* WRITE engine */ + args->busy = busy_check_writer(fence); + else + /* Translate shared fences to READ set of engines */ args->busy |= busy_check_reader(fence); - } }
- if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq)) - goto retry; + dma_resv_iter_end(&cursor); err = 0; out:
On 21/09/2021 10:41, Christian König wrote:
Am 20.09.21 um 12:33 schrieb Tvrtko Ursulin:
On 20/09/2021 11:13, Christian König wrote:
Am 20.09.21 um 10:45 schrieb Tvrtko Ursulin:
On 17/09/2021 13:35, Christian König wrote:
This makes the function much simpler since the complex retry logic is now handled else where.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/gem/i915_gem_busy.c | 32 ++++++++---------------- 1 file changed, 11 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c index 6234e17259c1..b1cb7ba688da 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c @@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - struct dma_resv_list *list; - unsigned int seq; + struct dma_resv_iter cursor; + struct dma_fence *fence; int err; err = -ENOENT; @@ -109,27 +109,17 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * to report the overall busyness. This is what the wait-ioctl does. * */ -retry: - seq = raw_read_seqcount(&obj->base.resv->seq);
- /* Translate the exclusive fence to the READ *and* WRITE engine */ - args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv));
- /* Translate shared fences to READ set of engines */ - list = dma_resv_shared_list(obj->base.resv); - if (list) { - unsigned int shared_count = list->shared_count, i;
- for (i = 0; i < shared_count; ++i) { - struct dma_fence *fence = - rcu_dereference(list->shared[i]);
+ args->busy = false; + dma_resv_iter_begin(&cursor, obj->base.resv, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) {
You did not agree with my suggestion to reset args->busy on restart and so preserve current behaviour?
No, I want to keep the restart behavior internally to the dma_resv object and as far as I can see it should not make a difference here.
To be clear, on paper difference between old and new implementation is if the restart happens while processing the shared fences.
Old implementation unconditionally goes to "args->busy =
busy_check_writer(dma_resv_excl_fence(obj->base.resv));" and so
overwrites the set of flags returned to userspace.
New implementation can merge new read flags to the old set of flags and so return a composition of past and current fences.
Maybe it does not matter hugely in this case, depends if userspace typically just restarts until flags are clear. But I am not sure.
On the higher level - what do you mean with wanting to keep the restart behaviour internal? Not providing iterators users means of detecting it? I think it has to be provided.
Ok I will adjust that for now to get the patch set upstream. But in general when somebody outside of the dma_resv code base depends on the restart behavior then that's a bug inside the design of that code.
Thanks, no change in behaviour makes for an easy r-b. :)
The callers should only care about what unsignaled fences are inside the dma_resv container and it shouldn't matter if those fences are presented once or multiple times because of a reset..
When this makes a difference we have a bug in the handling and should probably consider taking the dma_resv.lock instead.
I agree, which is why I was mentioning earlier how it would be good to completely sort locked from unlocked iterators and avoid situations where unlocked one is called from a path where object is locked.
Unfortunately for the display code path I cannot easily help with the audit of call paths. And I think there are at least two patches in your series which need KMS expertise.
Regards,
Tvrtko
Simplifying the code a bit.
v2: use dma_resv_for_each_fence instead, according to Tvrtko the lock is held here anyway. v3: back to using dma_resv_for_each_fence_unlocked.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/i915_sw_fence.c | 57 ++++++++-------------------- 1 file changed, 15 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index c589a681da77..7635b0478ea5 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -572,56 +572,29 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, unsigned long timeout, gfp_t gfp) { - struct dma_fence *excl; + struct dma_resv_iter cursor; + struct dma_fence *f; int ret = 0, pending;
debug_fence_assert(fence); might_sleep_if(gfpflags_allow_blocking(gfp));
- if (write) { - struct dma_fence **shared; - unsigned int count, i; - - ret = dma_resv_get_fences(resv, &excl, &count, &shared); - if (ret) - return ret; - - for (i = 0; i < count; i++) { - if (shared[i]->ops == exclude) - continue; - - pending = i915_sw_fence_await_dma_fence(fence, - shared[i], - timeout, - gfp); - if (pending < 0) { - ret = pending; - break; - } - - ret |= pending; - } - - for (i = 0; i < count; i++) - dma_fence_put(shared[i]); - kfree(shared); - } else { - excl = dma_resv_get_excl_unlocked(resv); - } - - if (ret >= 0 && excl && excl->ops != exclude) { - pending = i915_sw_fence_await_dma_fence(fence, - excl, - timeout, + rcu_read_lock(); + dma_resv_iter_begin(&cursor, resv, write); + dma_resv_for_each_fence_unlocked(&cursor, f) { + rcu_read_unlock(); + pending = i915_sw_fence_await_dma_fence(fence, f, timeout, gfp); - if (pending < 0) + rcu_read_lock(); + if (pending < 0) { ret = pending; - else - ret |= pending; - } - - dma_fence_put(excl); + break; + }
+ ret |= pending; + } + dma_resv_iter_end(&cursor); + rcu_read_unlock(); return ret; }
On 17/09/2021 13:35, Christian König wrote:
Simplifying the code a bit.
v2: use dma_resv_for_each_fence instead, according to Tvrtko the lock is held here anyway. v3: back to using dma_resv_for_each_fence_unlocked.
It did not work out - what happened?
Regards,
Tvrtko
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/i915_sw_fence.c | 57 ++++++++-------------------- 1 file changed, 15 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index c589a681da77..7635b0478ea5 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -572,56 +572,29 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, unsigned long timeout, gfp_t gfp) {
- struct dma_fence *excl;
- struct dma_resv_iter cursor;
- struct dma_fence *f; int ret = 0, pending;
debug_fence_assert(fence); might_sleep_if(gfpflags_allow_blocking(gfp));
- if (write) {
struct dma_fence **shared;
unsigned int count, i;
ret = dma_resv_get_fences(resv, &excl, &count, &shared);
if (ret)
return ret;
for (i = 0; i < count; i++) {
if (shared[i]->ops == exclude)
continue;
pending = i915_sw_fence_await_dma_fence(fence,
shared[i],
timeout,
gfp);
if (pending < 0) {
ret = pending;
break;
}
ret |= pending;
}
for (i = 0; i < count; i++)
dma_fence_put(shared[i]);
kfree(shared);
- } else {
excl = dma_resv_get_excl_unlocked(resv);
- }
- if (ret >= 0 && excl && excl->ops != exclude) {
pending = i915_sw_fence_await_dma_fence(fence,
excl,
timeout,
- rcu_read_lock();
- dma_resv_iter_begin(&cursor, resv, write);
- dma_resv_for_each_fence_unlocked(&cursor, f) {
rcu_read_unlock();
pending = i915_sw_fence_await_dma_fence(fence, f, timeout, gfp);
if (pending < 0)
rcu_read_lock();
if (pending < 0) { ret = pending;
else
ret |= pending;
- }
- dma_fence_put(excl);
break;
}
ret |= pending;
- }
- dma_resv_iter_end(&cursor);
- rcu_read_unlock(); return ret; }
On 20/09/2021 09:45, Tvrtko Ursulin wrote:
On 17/09/2021 13:35, Christian König wrote:
Simplifying the code a bit.
v2: use dma_resv_for_each_fence instead, according to Tvrtko the lock is held here anyway. v3: back to using dma_resv_for_each_fence_unlocked.
It did not work out - what happened?
Wait, my suggestion to try the locked iterator was against i915_request_await_object. I haven't looked at this one at the time or even now.
Regards,
Tvrtko
Regards,
Tvrtko
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/i915_sw_fence.c | 57 ++++++++-------------------- 1 file changed, 15 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index c589a681da77..7635b0478ea5 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -572,56 +572,29 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, unsigned long timeout, gfp_t gfp) { - struct dma_fence *excl; + struct dma_resv_iter cursor; + struct dma_fence *f; int ret = 0, pending; debug_fence_assert(fence); might_sleep_if(gfpflags_allow_blocking(gfp)); - if (write) { - struct dma_fence **shared; - unsigned int count, i;
- ret = dma_resv_get_fences(resv, &excl, &count, &shared); - if (ret) - return ret;
- for (i = 0; i < count; i++) { - if (shared[i]->ops == exclude) - continue;
- pending = i915_sw_fence_await_dma_fence(fence, - shared[i], - timeout, - gfp); - if (pending < 0) { - ret = pending; - break; - }
- ret |= pending; - }
- for (i = 0; i < count; i++) - dma_fence_put(shared[i]); - kfree(shared); - } else { - excl = dma_resv_get_excl_unlocked(resv); - }
- if (ret >= 0 && excl && excl->ops != exclude) { - pending = i915_sw_fence_await_dma_fence(fence, - excl, - timeout, + rcu_read_lock(); + dma_resv_iter_begin(&cursor, resv, write); + dma_resv_for_each_fence_unlocked(&cursor, f) { + rcu_read_unlock(); + pending = i915_sw_fence_await_dma_fence(fence, f, timeout, gfp); - if (pending < 0) + rcu_read_lock(); + if (pending < 0) { ret = pending; - else - ret |= pending; - }
- dma_fence_put(excl); + break; + } + ret |= pending; + } + dma_resv_iter_end(&cursor); + rcu_read_unlock(); return ret; }
Am 20.09.21 um 10:47 schrieb Tvrtko Ursulin:
On 20/09/2021 09:45, Tvrtko Ursulin wrote:
On 17/09/2021 13:35, Christian König wrote:
Simplifying the code a bit.
v2: use dma_resv_for_each_fence instead, according to Tvrtko the lock is held here anyway. v3: back to using dma_resv_for_each_fence_unlocked.
It did not work out - what happened?
Wait, my suggestion to try the locked iterator was against i915_request_await_object. I haven't looked at this one at the time or even now.
Exactly! I've mixed the two up and this one here is really called without holding a lock.
Regards, Christian.
Regards,
Tvrtko
Regards,
Tvrtko
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/i915_sw_fence.c | 57 ++++++++-------------------- 1 file changed, 15 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index c589a681da77..7635b0478ea5 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -572,56 +572,29 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, unsigned long timeout, gfp_t gfp) { - struct dma_fence *excl; + struct dma_resv_iter cursor; + struct dma_fence *f; int ret = 0, pending; debug_fence_assert(fence); might_sleep_if(gfpflags_allow_blocking(gfp)); - if (write) { - struct dma_fence **shared; - unsigned int count, i;
- ret = dma_resv_get_fences(resv, &excl, &count, &shared); - if (ret) - return ret;
- for (i = 0; i < count; i++) { - if (shared[i]->ops == exclude) - continue;
- pending = i915_sw_fence_await_dma_fence(fence, - shared[i], - timeout, - gfp); - if (pending < 0) { - ret = pending; - break; - }
- ret |= pending; - }
- for (i = 0; i < count; i++) - dma_fence_put(shared[i]); - kfree(shared); - } else { - excl = dma_resv_get_excl_unlocked(resv); - }
- if (ret >= 0 && excl && excl->ops != exclude) { - pending = i915_sw_fence_await_dma_fence(fence, - excl, - timeout, + rcu_read_lock(); + dma_resv_iter_begin(&cursor, resv, write); + dma_resv_for_each_fence_unlocked(&cursor, f) { + rcu_read_unlock(); + pending = i915_sw_fence_await_dma_fence(fence, f, timeout, gfp); - if (pending < 0) + rcu_read_lock(); + if (pending < 0) { ret = pending; - else - ret |= pending; - }
- dma_fence_put(excl); + break; + } + ret |= pending; + } + dma_resv_iter_end(&cursor); + rcu_read_unlock(); return ret; }
Simplifying the code a bit.
v2: add missing rcu_read_lock()/rcu_read_unlock() v3: use dma_resv_for_each_fence instead
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/i915_request.c | 34 +++++------------------------ 1 file changed, 5 insertions(+), 29 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index ce446716d092..3839712ebd23 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1509,38 +1509,14 @@ i915_request_await_object(struct i915_request *to, struct drm_i915_gem_object *obj, bool write) { - struct dma_fence *excl; + struct dma_resv_iter cursor; + struct dma_fence *fence; int ret = 0;
- if (write) { - struct dma_fence **shared; - unsigned int count, i; - - ret = dma_resv_get_fences(obj->base.resv, &excl, &count, - &shared); + dma_resv_for_each_fence(&cursor, obj->base.resv, write, fence) { + ret = i915_request_await_dma_fence(to, fence); if (ret) - return ret; - - for (i = 0; i < count; i++) { - ret = i915_request_await_dma_fence(to, shared[i]); - if (ret) - break; - - dma_fence_put(shared[i]); - } - - for (; i < count; i++) - dma_fence_put(shared[i]); - kfree(shared); - } else { - excl = dma_resv_get_excl_unlocked(obj->base.resv); - } - - if (excl) { - if (ret == 0) - ret = i915_request_await_dma_fence(to, excl); - - dma_fence_put(excl); + break; }
return ret;
Simplifying the code a bit.
v2: add missing rcu read unlock.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 57 ++++++------------------ 1 file changed, 14 insertions(+), 43 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index f909aaa09d9c..e416cf528635 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -37,55 +37,26 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int flags, long timeout) { - struct dma_fence *excl; - bool prune_fences = false; - - if (flags & I915_WAIT_ALL) { - struct dma_fence **shared; - unsigned int count, i; - int ret; - - ret = dma_resv_get_fences(resv, &excl, &count, &shared); - if (ret) - return ret; - - for (i = 0; i < count; i++) { - timeout = i915_gem_object_wait_fence(shared[i], - flags, timeout); - if (timeout < 0) - break; - - dma_fence_put(shared[i]); - } - - for (; i < count; i++) - dma_fence_put(shared[i]); - kfree(shared); - - /* - * If both shared fences and an exclusive fence exist, - * then by construction the shared fences must be later - * than the exclusive fence. If we successfully wait for - * all the shared fences, we know that the exclusive fence - * must all be signaled. If all the shared fences are - * signaled, we can prune the array and recover the - * floating references on the fences/requests. - */ - prune_fences = count && timeout >= 0; - } else { - excl = dma_resv_get_excl_unlocked(resv); + struct dma_resv_iter cursor; + struct dma_fence *fence; + + rcu_read_lock(); + dma_resv_iter_begin(&cursor, resv, flags & I915_WAIT_ALL); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + rcu_read_unlock(); + timeout = i915_gem_object_wait_fence(fence, flags, timeout); + rcu_read_lock(); + if (timeout < 0) + break; } - - if (excl && timeout >= 0) - timeout = i915_gem_object_wait_fence(excl, flags, timeout); - - dma_fence_put(excl); + dma_resv_iter_end(&cursor); + rcu_read_unlock();
/* * Opportunistically prune the fences iff we know they have *all* been * signaled. */ - if (prune_fences) + if (timeout > 0) dma_resv_prune(resv);
return timeout;
On 17/09/2021 13:35, Christian König wrote:
Simplifying the code a bit.
v2: add missing rcu read unlock.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/gem/i915_gem_wait.c | 57 ++++++------------------ 1 file changed, 14 insertions(+), 43 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index f909aaa09d9c..e416cf528635 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -37,55 +37,26 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int flags, long timeout) {
- struct dma_fence *excl;
- bool prune_fences = false;
- if (flags & I915_WAIT_ALL) {
struct dma_fence **shared;
unsigned int count, i;
int ret;
ret = dma_resv_get_fences(resv, &excl, &count, &shared);
if (ret)
return ret;
for (i = 0; i < count; i++) {
timeout = i915_gem_object_wait_fence(shared[i],
flags, timeout);
if (timeout < 0)
break;
dma_fence_put(shared[i]);
}
for (; i < count; i++)
dma_fence_put(shared[i]);
kfree(shared);
/*
* If both shared fences and an exclusive fence exist,
* then by construction the shared fences must be later
* than the exclusive fence. If we successfully wait for
* all the shared fences, we know that the exclusive fence
* must all be signaled. If all the shared fences are
* signaled, we can prune the array and recover the
* floating references on the fences/requests.
*/
prune_fences = count && timeout >= 0;
- } else {
excl = dma_resv_get_excl_unlocked(resv);
- struct dma_resv_iter cursor;
- struct dma_fence *fence;
- rcu_read_lock();
- dma_resv_iter_begin(&cursor, resv, flags & I915_WAIT_ALL);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
rcu_read_unlock();
timeout = i915_gem_object_wait_fence(fence, flags, timeout);
Converting this one could be problematic. It's the wait ioctl which used to grab an atomic snapshot and wait for that rendering to complete. With this change I think it has the potential to run forever keeps catching new activity against the same object.
I am not sure whether or not the difference is relevant for how userspace uses it but I think needs discussion.
Hm actually there are internal callers as well, and at least some of those have the object locked. Would a wider refactoring to separate those into buckets (locked vs unlocked) make sense?
Regards,
Tvrtko
rcu_read_lock();
if (timeout < 0)
}break;
- if (excl && timeout >= 0)
timeout = i915_gem_object_wait_fence(excl, flags, timeout);
- dma_fence_put(excl);
- dma_resv_iter_end(&cursor);
- rcu_read_unlock();
/* * Opportunistically prune the fences iff we know they have *all* been * signaled. */
- if (prune_fences)
- if (timeout > 0) dma_resv_prune(resv);
return timeout;
Am 20.09.21 um 12:00 schrieb Tvrtko Ursulin:
On 17/09/2021 13:35, Christian König wrote:
Simplifying the code a bit.
v2: add missing rcu read unlock.
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/i915/gem/i915_gem_wait.c | 57 ++++++------------------ 1 file changed, 14 insertions(+), 43 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index f909aaa09d9c..e416cf528635 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -37,55 +37,26 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int flags, long timeout) { - struct dma_fence *excl; - bool prune_fences = false;
- if (flags & I915_WAIT_ALL) { - struct dma_fence **shared; - unsigned int count, i; - int ret;
- ret = dma_resv_get_fences(resv, &excl, &count, &shared); - if (ret) - return ret;
- for (i = 0; i < count; i++) { - timeout = i915_gem_object_wait_fence(shared[i], - flags, timeout); - if (timeout < 0) - break;
- dma_fence_put(shared[i]); - }
- for (; i < count; i++) - dma_fence_put(shared[i]); - kfree(shared);
- /* - * If both shared fences and an exclusive fence exist, - * then by construction the shared fences must be later - * than the exclusive fence. If we successfully wait for - * all the shared fences, we know that the exclusive fence - * must all be signaled. If all the shared fences are - * signaled, we can prune the array and recover the - * floating references on the fences/requests. - */ - prune_fences = count && timeout >= 0; - } else { - excl = dma_resv_get_excl_unlocked(resv); + struct dma_resv_iter cursor; + struct dma_fence *fence;
+ rcu_read_lock(); + dma_resv_iter_begin(&cursor, resv, flags & I915_WAIT_ALL); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + rcu_read_unlock(); + timeout = i915_gem_object_wait_fence(fence, flags, timeout);
Converting this one could be problematic. It's the wait ioctl which used to grab an atomic snapshot and wait for that rendering to complete. With this change I think it has the potential to run forever keeps catching new activity against the same object.
I am not sure whether or not the difference is relevant for how userspace uses it but I think needs discussion.
It was years ago, but IIRC we had the same discussion for the dma_resv_wait_timeout() function and the result was that this is not a valid use case and waiting forever if you see new work over and over again is a valid result.
Let me double check the history of this code here as well.
Hm actually there are internal callers as well, and at least some of those have the object locked. Would a wider refactoring to separate those into buckets (locked vs unlocked) make sense?
Yes definitely.
Regards, Christian.
Regards,
Tvrtko
+ rcu_read_lock(); + if (timeout < 0) + break; }
- if (excl && timeout >= 0) - timeout = i915_gem_object_wait_fence(excl, flags, timeout);
- dma_fence_put(excl); + dma_resv_iter_end(&cursor); + rcu_read_unlock(); /* * Opportunistically prune the fences iff we know they have *all* been * signaled. */ - if (prune_fences) + if (timeout > 0) dma_resv_prune(resv); return timeout;
Simplifying the code a bit.
v2: add missing rcu_read_lock()/unlock()
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 34 +++++++----------------- 1 file changed, 10 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index e416cf528635..de8084b6af42 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -122,32 +122,18 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, const struct i915_sched_attr *attr) { - struct dma_fence *excl; - - if (flags & I915_WAIT_ALL) { - struct dma_fence **shared; - unsigned int count, i; - int ret; - - ret = dma_resv_get_fences(obj->base.resv, &excl, &count, - &shared); - if (ret) - return ret; - - for (i = 0; i < count; i++) { - i915_gem_fence_wait_priority(shared[i], attr); - dma_fence_put(shared[i]); - } - - kfree(shared); - } else { - excl = dma_resv_get_excl_unlocked(obj->base.resv); - } + struct dma_resv_iter cursor; + struct dma_fence *fence;
- if (excl) { - i915_gem_fence_wait_priority(excl, attr); - dma_fence_put(excl); + rcu_read_lock(); + dma_resv_iter_begin(&cursor, obj->base.resv, flags & I915_WAIT_ALL); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + rcu_read_unlock(); + i915_gem_fence_wait_priority(fence, attr); + rcu_read_lock(); } + dma_resv_iter_end(&cursor); + rcu_read_unlock(); return 0; }
This is maybe even a fix since the RCU usage here looks incorrect.
v2: add missing rcu_read_lock()/unlock()
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/gem/i915_gem_object.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 48112b9d76df..487329a96e92 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -507,16 +507,18 @@ static inline struct intel_engine_cs * i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) { struct intel_engine_cs *engine = NULL; + struct dma_resv_iter cursor; struct dma_fence *fence;
rcu_read_lock(); - fence = dma_resv_get_excl_unlocked(obj->base.resv); + dma_resv_iter_begin(&cursor, obj->base.resv, false); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (fence && dma_fence_is_i915(fence) && + !dma_fence_is_signaled(fence)) + engine = to_request(fence)->engine; + } + dma_resv_iter_end(&cursor); rcu_read_unlock(); - - if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) - engine = to_request(fence)->engine; - dma_fence_put(fence); - return engine; }
Simplifying the code a bit.
v2: add rcu_read_lock()/unlock()
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/i915/display/intel_display.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 134a6acbd8fb..51e3df0de1ce 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11290,6 +11290,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane, i915_gem_object_flush_frontbuffer(obj, ORIGIN_DIRTYFB);
if (!new_plane_state->uapi.fence) { /* implicit fencing */ + struct dma_resv_iter cursor; struct dma_fence *fence;
ret = i915_sw_fence_await_reservation(&state->commit_ready, @@ -11300,12 +11301,16 @@ intel_prepare_plane_fb(struct drm_plane *_plane, if (ret < 0) goto unpin_fb;
- fence = dma_resv_get_excl_unlocked(obj->base.resv); - if (fence) { + rcu_read_lock(); + dma_resv_iter_begin(&cursor, obj->base.resv, false); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + rcu_read_unlock(); add_rps_boost_after_vblank(new_plane_state->hw.crtc, fence); - dma_fence_put(fence); + rcu_read_lock(); } + dma_resv_iter_end(&cursor); + rcu_read_unlock(); } else { add_rps_boost_after_vblank(new_plane_state->hw.crtc, new_plane_state->uapi.fence);
Simplifying the code a bit.
v2: add missing rcu_read_lock()/unlock()
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/drm_gem.c | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 09c820045859..c2c41b668f40 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1340,31 +1340,21 @@ int drm_gem_fence_array_add_implicit(struct xarray *fence_array, struct drm_gem_object *obj, bool write) { - int ret; - struct dma_fence **fences; - unsigned int i, fence_count; - - if (!write) { - struct dma_fence *fence = - dma_resv_get_excl_unlocked(obj->resv); - - return drm_gem_fence_array_add(fence_array, fence); - } - - ret = dma_resv_get_fences(obj->resv, NULL, - &fence_count, &fences); - if (ret || !fence_count) - return ret; - - for (i = 0; i < fence_count; i++) { - ret = drm_gem_fence_array_add(fence_array, fences[i]); + struct dma_resv_iter cursor; + struct dma_fence *fence; + int ret = 0; + + rcu_read_lock(); + dma_resv_iter_begin(&cursor, obj->resv, write); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + rcu_read_unlock(); + ret = drm_gem_fence_array_add(fence_array, fence); + rcu_read_lock(); if (ret) break; } - - for (; i < fence_count; i++) - dma_fence_put(fences[i]); - kfree(fences); + dma_resv_iter_end(&cursor); + rcu_read_unlock(); return ret; } EXPORT_SYMBOL(drm_gem_fence_array_add_implicit);
On Fri, Sep 17, 2021 at 02:35:07PM +0200, Christian König wrote:
Simplifying the code a bit.
v2: add missing rcu_read_lock()/unlock()
Signed-off-by: Christian König christian.koenig@amd.com
This will be gone as soon as I can land the last conversion patches. Plus it's always called with dma_resv_lock held.
I wouldn't bother tbh. -Daniel
drivers/gpu/drm/drm_gem.c | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 09c820045859..c2c41b668f40 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1340,31 +1340,21 @@ int drm_gem_fence_array_add_implicit(struct xarray *fence_array, struct drm_gem_object *obj, bool write) {
- int ret;
- struct dma_fence **fences;
- unsigned int i, fence_count;
- if (!write) {
struct dma_fence *fence =
dma_resv_get_excl_unlocked(obj->resv);
return drm_gem_fence_array_add(fence_array, fence);
- }
- ret = dma_resv_get_fences(obj->resv, NULL,
&fence_count, &fences);
- if (ret || !fence_count)
return ret;
- for (i = 0; i < fence_count; i++) {
ret = drm_gem_fence_array_add(fence_array, fences[i]);
- struct dma_resv_iter cursor;
- struct dma_fence *fence;
- int ret = 0;
- rcu_read_lock();
- dma_resv_iter_begin(&cursor, obj->resv, write);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
rcu_read_unlock();
ret = drm_gem_fence_array_add(fence_array, fence);
if (ret) break; }rcu_read_lock();
- for (; i < fence_count; i++)
dma_fence_put(fences[i]);
- kfree(fences);
- dma_resv_iter_end(&cursor);
- rcu_read_unlock(); return ret;
} EXPORT_SYMBOL(drm_gem_fence_array_add_implicit); -- 2.25.1
Am 17.09.21 um 16:53 schrieb Daniel Vetter:
On Fri, Sep 17, 2021 at 02:35:07PM +0200, Christian König wrote:
Simplifying the code a bit.
v2: add missing rcu_read_lock()/unlock()
Signed-off-by: Christian König christian.koenig@amd.com
This will be gone as soon as I can land the last conversion patches. Plus it's always called with dma_resv_lock held.
Yeah, already thought so as well. I will just keep that around to get rid of dma_resv_get_excl_unlocked() for now until your patch lands.
Regards, Christian.
I wouldn't bother tbh. -Daniel
drivers/gpu/drm/drm_gem.c | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 09c820045859..c2c41b668f40 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1340,31 +1340,21 @@ int drm_gem_fence_array_add_implicit(struct xarray *fence_array, struct drm_gem_object *obj, bool write) {
- int ret;
- struct dma_fence **fences;
- unsigned int i, fence_count;
- if (!write) {
struct dma_fence *fence =
dma_resv_get_excl_unlocked(obj->resv);
return drm_gem_fence_array_add(fence_array, fence);
- }
- ret = dma_resv_get_fences(obj->resv, NULL,
&fence_count, &fences);
- if (ret || !fence_count)
return ret;
- for (i = 0; i < fence_count; i++) {
ret = drm_gem_fence_array_add(fence_array, fences[i]);
- struct dma_resv_iter cursor;
- struct dma_fence *fence;
- int ret = 0;
- rcu_read_lock();
- dma_resv_iter_begin(&cursor, obj->resv, write);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
rcu_read_unlock();
ret = drm_gem_fence_array_add(fence_array, fence);
if (ret) break; }rcu_read_lock();
- for (; i < fence_count; i++)
dma_fence_put(fences[i]);
- kfree(fences);
- dma_resv_iter_end(&cursor);
- rcu_read_unlock(); return ret; } EXPORT_SYMBOL(drm_gem_fence_array_add_implicit);
-- 2.25.1
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
v2: add missing rcu_read_lock()/unlock()
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/drm_gem_atomic_helper.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index e570398abd78..d8f9c6432544 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -143,6 +143,7 @@ */ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) { + struct dma_resv_iter cursor; struct drm_gem_object *obj; struct dma_fence *fence;
@@ -150,9 +151,18 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st return 0;
obj = drm_gem_fb_get_obj(state->fb, 0); - fence = dma_resv_get_excl_unlocked(obj->resv); - drm_atomic_set_fence_for_plane(state, fence); + rcu_read_lock(); + dma_resv_iter_begin(&cursor, obj->resv, false); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + rcu_read_unlock(); + /* TODO: We only use the first write fence here */ + drm_atomic_set_fence_for_plane(state, fence); + return 0; + } + dma_resv_iter_end(&cursor); + rcu_read_unlock();
+ drm_atomic_set_fence_for_plane(state, NULL); return 0; } EXPORT_SYMBOL_GPL(drm_gem_plane_helper_prepare_fb);
On Fri, Sep 17, 2021 at 02:35:08PM +0200, Christian König wrote:
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
v2: add missing rcu_read_lock()/unlock()
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/drm_gem_atomic_helper.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index e570398abd78..d8f9c6432544 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -143,6 +143,7 @@ */ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) {
- struct dma_resv_iter cursor; struct drm_gem_object *obj; struct dma_fence *fence;
@@ -150,9 +151,18 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st return 0; obj = drm_gem_fb_get_obj(state->fb, 0);
- fence = dma_resv_get_excl_unlocked(obj->resv);
- drm_atomic_set_fence_for_plane(state, fence);
- rcu_read_lock();
- dma_resv_iter_begin(&cursor, obj->resv, false);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
rcu_read_unlock();
/* TODO: We only use the first write fence here */
drm_atomic_set_fence_for_plane(state, fence);
Yeah I wonder whether we should/need to collate them all together. But I guesss whomever hits that first with their funny multi-plane yuv or whatever gets to do that. Or I'm not clear on what exactly your TODO here means?
return 0;
- }
- dma_resv_iter_end(&cursor);
- rcu_read_unlock();
Imo we should do full dma_resv_lock here. atomic helpers are designed to allow this, and it simplifies things. Also it really doesn't matter for atomic, we should be able to do 60fps*a few planes easily :-) -Daniel
- drm_atomic_set_fence_for_plane(state, NULL); return 0;
} EXPORT_SYMBOL_GPL(drm_gem_plane_helper_prepare_fb); -- 2.25.1
Am 17.09.21 um 16:55 schrieb Daniel Vetter:
On Fri, Sep 17, 2021 at 02:35:08PM +0200, Christian König wrote:
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
v2: add missing rcu_read_lock()/unlock()
Signed-off-by: Christian König christian.koenig@amd.com
drivers/gpu/drm/drm_gem_atomic_helper.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index e570398abd78..d8f9c6432544 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -143,6 +143,7 @@ */ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) {
- struct dma_resv_iter cursor; struct drm_gem_object *obj; struct dma_fence *fence;
@@ -150,9 +151,18 @@ int drm_gem_plane_helper_prepare_fb(struct drm_plane *plane, struct drm_plane_st return 0; obj = drm_gem_fb_get_obj(state->fb, 0);
- fence = dma_resv_get_excl_unlocked(obj->resv);
- drm_atomic_set_fence_for_plane(state, fence);
- rcu_read_lock();
- dma_resv_iter_begin(&cursor, obj->resv, false);
- dma_resv_for_each_fence_unlocked(&cursor, fence) {
rcu_read_unlock();
/* TODO: We only use the first write fence here */
drm_atomic_set_fence_for_plane(state, fence);
Yeah I wonder whether we should/need to collate them all together. But I guesss whomever hits that first with their funny multi-plane yuv or whatever gets to do that. Or I'm not clear on what exactly your TODO here means?
Yeah, exactly that. Basically we have use cases where where we have more than one fence to wait for.
The TODO is here because adding that to the atomic helper is just not my construction site at the moment.
Regards, Christian.
return 0;
- }
- dma_resv_iter_end(&cursor);
- rcu_read_unlock();
Imo we should do full dma_resv_lock here. atomic helpers are designed to allow this, and it simplifies things. Also it really doesn't matter for atomic, we should be able to do 60fps*a few planes easily :-) -Daniel
- drm_atomic_set_fence_for_plane(state, NULL); return 0; } EXPORT_SYMBOL_GPL(drm_gem_plane_helper_prepare_fb);
-- 2.25.1
Simplifying the code a bit.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/nouveau/nouveau_fence.c | 48 +++++++------------------ 1 file changed, 12 insertions(+), 36 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 05d0b3eb3690..26f9299df881 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -339,14 +339,15 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) }
int -nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool exclusive, bool intr) +nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, + bool exclusive, bool intr) { struct nouveau_fence_chan *fctx = chan->fence; - struct dma_fence *fence; struct dma_resv *resv = nvbo->bo.base.resv; - struct dma_resv_list *fobj; + struct dma_resv_iter cursor; + struct dma_fence *fence; struct nouveau_fence *f; - int ret = 0, i; + int ret;
if (!exclusive) { ret = dma_resv_reserve_shared(resv, 1); @@ -355,10 +356,7 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e return ret; }
- fobj = dma_resv_shared_list(resv); - fence = dma_resv_excl_fence(resv); - - if (fence) { + dma_resv_for_each_fence(&cursor, resv, exclusive, fence) { struct nouveau_channel *prev = NULL; bool must_wait = true;
@@ -366,41 +364,19 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e if (f) { rcu_read_lock(); prev = rcu_dereference(f->channel); - if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0)) + if (prev && (prev == chan || + fctx->sync(f, prev, chan) == 0)) must_wait = false; rcu_read_unlock(); }
- if (must_wait) + if (must_wait) { ret = dma_fence_wait(fence, intr); - - return ret; - } - - if (!exclusive || !fobj) - return ret; - - for (i = 0; i < fobj->shared_count && !ret; ++i) { - struct nouveau_channel *prev = NULL; - bool must_wait = true; - - fence = rcu_dereference_protected(fobj->shared[i], - dma_resv_held(resv)); - - f = nouveau_local_fence(fence, chan->drm); - if (f) { - rcu_read_lock(); - prev = rcu_dereference(f->channel); - if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0)) - must_wait = false; - rcu_read_unlock(); + if (ret) + return ret; } - - if (must_wait) - ret = dma_fence_wait(fence, intr); } - - return ret; + return 0; }
void
Makes the handling a bit more complex, but avoids the use of dma_resv_get_excl_unlocked().
v2: add missing rcu_read_lock()/unlock()
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/nouveau/dispnv50/wndw.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 8d048bacd6f0..566f50f53f24 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -539,6 +539,8 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) struct nouveau_bo *nvbo; struct nv50_head_atom *asyh; struct nv50_wndw_ctxdma *ctxdma; + struct dma_resv_iter cursor; + struct dma_fence *fence; int ret;
NV_ATOMIC(drm, "%s prepare: %p\n", plane->name, fb); @@ -561,7 +563,15 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) asyw->image.handle[0] = ctxdma->object.handle; }
- asyw->state.fence = dma_resv_get_excl_unlocked(nvbo->bo.base.resv); + rcu_read_lock(); + dma_resv_iter_begin(&cursor, nvbo->bo.base.resv, false); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + /* TODO: We only use the first writer here */ + asyw->state.fence = dma_fence_get(fence); + break; + } + dma_resv_iter_end(&cursor); + rcu_read_unlock(); asyw->image.offset[0] = nvbo->offset;
if (wndw->func->prepare) {
Instead of hand rolling the logic.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 29 ++++++++++----------------- 1 file changed, 11 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 8f1b5af47dd6..16f5991446c8 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -428,19 +428,17 @@ int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj, static void etnaviv_gem_describe_fence(struct dma_fence *fence, const char *type, struct seq_file *m) { - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - seq_printf(m, "\t%9s: %s %s seq %llu\n", - type, - fence->ops->get_driver_name(fence), - fence->ops->get_timeline_name(fence), - fence->seqno); + seq_printf(m, "\t%9s: %s %s seq %llu\n", type, + fence->ops->get_driver_name(fence), + fence->ops->get_timeline_name(fence), + fence->seqno); }
static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m) { struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); struct dma_resv *robj = obj->resv; - struct dma_resv_list *fobj; + struct dma_resv_iter cursor; struct dma_fence *fence; unsigned long off = drm_vma_node_start(&obj->vma_node);
@@ -450,19 +448,14 @@ static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m) off, etnaviv_obj->vaddr, obj->size);
rcu_read_lock(); - fobj = dma_resv_shared_list(robj); - if (fobj) { - unsigned int i, shared_count = fobj->shared_count; - - for (i = 0; i < shared_count; i++) { - fence = rcu_dereference(fobj->shared[i]); + dma_resv_iter_begin(&cursor, robj, true); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (dma_resv_iter_is_exclusive(&cursor)) + etnaviv_gem_describe_fence(fence, "Exclusive", m); + else etnaviv_gem_describe_fence(fence, "Shared", m); - } } - - fence = dma_resv_excl_fence(robj); - if (fence) - etnaviv_gem_describe_fence(fence, "Exclusive", m); + dma_resv_iter_end(&cursor); rcu_read_unlock(); }
We certainly hold the reservation lock here, no need for the RCU dance.
Signed-off-by: Christian König christian.koenig@amd.com --- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 4dd7d9d541c0..7e17bc2b5df1 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -195,7 +195,7 @@ static int submit_fence_sync(struct etnaviv_gem_submit *submit) if (ret) return ret; } else { - bo->excl = dma_resv_get_excl_unlocked(robj); + bo->excl = dma_fence_get(dma_resv_excl_fence(robj)); }
}
Heureka, that's finally not used any more.
Signed-off-by: Christian König christian.koenig@amd.com --- include/linux/dma-resv.h | 26 -------------------------- 1 file changed, 26 deletions(-)
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 8c968f8c9d33..f42ca254acb5 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -423,32 +423,6 @@ dma_resv_excl_fence(struct dma_resv *obj) return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj)); }
-/** - * dma_resv_get_excl_unlocked - get the reservation object's - * exclusive fence, without lock held. - * @obj: the reservation object - * - * If there is an exclusive fence, this atomically increments it's - * reference count and returns it. - * - * RETURNS - * The exclusive fence or NULL if none - */ -static inline struct dma_fence * -dma_resv_get_excl_unlocked(struct dma_resv *obj) -{ - struct dma_fence *fence; - - if (!rcu_access_pointer(obj->fence_excl)) - return NULL; - - rcu_read_lock(); - fence = dma_fence_get_rcu_safe(&obj->fence_excl); - rcu_read_unlock(); - - return fence; -} - /** * dma_resv_shared_list - get the reservation object's shared fence list * @obj: the reservation object
On Fri, Sep 17, 2021 at 02:35:13PM +0200, Christian König wrote:
Heureka, that's finally not used any more.
Signed-off-by: Christian König christian.koenig@amd.com
Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch
include/linux/dma-resv.h | 26 -------------------------- 1 file changed, 26 deletions(-)
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index 8c968f8c9d33..f42ca254acb5 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -423,32 +423,6 @@ dma_resv_excl_fence(struct dma_resv *obj) return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj)); } -/**
- dma_resv_get_excl_unlocked - get the reservation object's
- exclusive fence, without lock held.
- @obj: the reservation object
- If there is an exclusive fence, this atomically increments it's
- reference count and returns it.
- RETURNS
- The exclusive fence or NULL if none
- */
-static inline struct dma_fence * -dma_resv_get_excl_unlocked(struct dma_resv *obj) -{
- struct dma_fence *fence;
- if (!rcu_access_pointer(obj->fence_excl))
return NULL;
- rcu_read_lock();
- fence = dma_fence_get_rcu_safe(&obj->fence_excl);
- rcu_read_unlock();
- return fence;
-}
/**
- dma_resv_shared_list - get the reservation object's shared fence list
- @obj: the reservation object
-- 2.25.1
linaro-mm-sig@lists.linaro.org