The catch-all evict can fail due to object lock contention, since it only goes as far as trylocking the object, due to us already holding the vm->mutex. Doing a full object lock here can deadlock, since the vm->mutex is always our inner lock. Add another execbuf pass which drops the vm->mutex and then tries to grab the object will the full lock, before then retrying the eviction. This should be good enough for now to fix the immediate regression with userspace seeing -ENOSPC from execbuf due to contended object locks during GTT eviction.
Testcase: igt@gem_ppgtt@shrink-vs-evict-* Fixes: 7e00897be8bf ("drm/i915: Add object locking to i915_gem_evict_for_node and i915_gem_evict_something, v2.") References: https://gitlab.freedesktop.org/drm/intel/-/issues/7627 References: https://gitlab.freedesktop.org/drm/intel/-/issues/7570 References: https://bugzilla.mozilla.org/show_bug.cgi?id=1779558 Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Andrzej Hajda andrzej.hajda@intel.com Cc: Mani Milani mani@chromium.org Cc: stable@vger.kernel.org # v5.18+ --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 25 +++++++++++-- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +- drivers/gpu/drm/i915/i915_gem_evict.c | 37 ++++++++++++++----- drivers/gpu/drm/i915/i915_gem_evict.h | 4 +- drivers/gpu/drm/i915/i915_vma.c | 2 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- 6 files changed, 56 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 86956b902c97..e2ce1e4e9723 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -745,25 +745,44 @@ static int eb_reserve(struct i915_execbuffer *eb) * * Defragmenting is skipped if all objects are pinned at a fixed location. */ - for (pass = 0; pass <= 2; pass++) { + for (pass = 0; pass <= 3; pass++) { int pin_flags = PIN_USER | PIN_VALIDATE;
if (pass == 0) pin_flags |= PIN_NONBLOCK;
if (pass >= 1) - unpinned = eb_unbind(eb, pass == 2); + unpinned = eb_unbind(eb, pass >= 2);
if (pass == 2) { err = mutex_lock_interruptible(&eb->context->vm->mutex); if (!err) { - err = i915_gem_evict_vm(eb->context->vm, &eb->ww); + err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL); mutex_unlock(&eb->context->vm->mutex); } if (err) return err; }
+ if (pass == 3) { +retry: + err = mutex_lock_interruptible(&eb->context->vm->mutex); + if (!err) { + struct drm_i915_gem_object *busy_bo = NULL; + + err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo); + mutex_unlock(&eb->context->vm->mutex); + if (err && busy_bo) { + err = i915_gem_object_lock(busy_bo, &eb->ww); + i915_gem_object_put(busy_bo); + if (!err) + goto retry; + } + } + if (err) + return err; + } + list_for_each_entry(ev, &eb->unbound, bind_link) { err = eb_reserve_vma(eb, ev, pin_flags); if (err) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index d73ba0f5c4c5..4f69bff63068 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -369,7 +369,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) if (vma == ERR_PTR(-ENOSPC)) { ret = mutex_lock_interruptible(&ggtt->vm.mutex); if (!ret) { - ret = i915_gem_evict_vm(&ggtt->vm, &ww); + ret = i915_gem_evict_vm(&ggtt->vm, &ww, NULL); mutex_unlock(&ggtt->vm.mutex); } if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 4cfe36b0366b..c02ebd6900ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -441,6 +441,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * @vm: Address space to cleanse * @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm * will be able to evict vma's locked by the ww as well. + * @busy_bo: Optional pointer to struct drm_i915_gem_object. If not NULL, then + * in the event i915_gem_evict_vm() is unable to trylock an object for eviction, + * then @busy_bo will point to it. -EBUSY is also returned. The caller must drop + * the vm->mutex, before trying again to acquire the contended lock. The caller + * also owns a reference to the object. * * This function evicts all vmas from a vm. * @@ -450,7 +455,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * To clarify: This is for freeing up virtual address space, not for freeing * memory in e.g. the shrinker. */ -int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) +int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object **busy_bo) { int ret = 0;
@@ -482,15 +488,22 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) * the resv is shared among multiple objects, we still * need the object ref. */ - if (dying_vma(vma) || + if (!i915_gem_object_get_rcu(vma->obj) || (ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) { __i915_vma_pin(vma); list_add(&vma->evict_link, &locked_eviction_list); continue; }
- if (!i915_gem_object_trylock(vma->obj, ww)) + if (!i915_gem_object_trylock(vma->obj, ww)) { + if (busy_bo) { + *busy_bo = vma->obj; /* holds ref */ + ret = -EBUSY; + break; + } + i915_gem_object_put(vma->obj); continue; + }
__i915_vma_pin(vma); list_add(&vma->evict_link, &eviction_list); @@ -498,25 +511,29 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) if (list_empty(&eviction_list) && list_empty(&locked_eviction_list)) break;
- ret = 0; /* Unbind locked objects first, before unlocking the eviction_list */ list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) { __i915_vma_unpin(vma);
- if (ret == 0) + if (ret == 0) { ret = __i915_vma_unbind(vma); - if (ret != -EINTR) /* "Get me out of here!" */ - ret = 0; + if (ret != -EINTR) /* "Get me out of here!" */ + ret = 0; + } + if (!dying_vma(vma)) + i915_gem_object_put(vma->obj); }
list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) { __i915_vma_unpin(vma); - if (ret == 0) + if (ret == 0) { ret = __i915_vma_unbind(vma); - if (ret != -EINTR) /* "Get me out of here!" */ - ret = 0; + if (ret != -EINTR) /* "Get me out of here!" */ + ret = 0; + }
i915_gem_object_unlock(vma->obj); + i915_gem_object_put(vma->obj); } } while (ret == 0);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.h b/drivers/gpu/drm/i915/i915_gem_evict.h index e593c530f9bd..bf0ee0e4fe60 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.h +++ b/drivers/gpu/drm/i915/i915_gem_evict.h @@ -11,6 +11,7 @@ struct drm_mm_node; struct i915_address_space; struct i915_gem_ww_ctx; +struct drm_i915_gem_object;
int __must_check i915_gem_evict_something(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww, @@ -23,6 +24,7 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, struct drm_mm_node *node, unsigned int flags); int i915_gem_evict_vm(struct i915_address_space *vm, - struct i915_gem_ww_ctx *ww); + struct i915_gem_ww_ctx *ww, + struct drm_i915_gem_object **busy_bo);
#endif /* __I915_GEM_EVICT_H__ */ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 34f0e6c923c2..7d044888ac33 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1599,7 +1599,7 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, * locked objects when called from execbuf when pinning * is removed. This would probably regress badly. */ - i915_gem_evict_vm(vm, NULL); + i915_gem_evict_vm(vm, NULL, NULL); mutex_unlock(&vm->mutex); } } while (1); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 8c6517d29b8e..37068542aafe 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -344,7 +344,7 @@ static int igt_evict_vm(void *arg)
/* Everything is pinned, nothing should happen */ mutex_lock(&ggtt->vm.mutex); - err = i915_gem_evict_vm(&ggtt->vm, NULL); + err = i915_gem_evict_vm(&ggtt->vm, NULL, NULL); mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", @@ -356,7 +356,7 @@ static int igt_evict_vm(void *arg)
for_i915_gem_ww(&ww, err, false) { mutex_lock(&ggtt->vm.mutex); - err = i915_gem_evict_vm(&ggtt->vm, &ww); + err = i915_gem_evict_vm(&ggtt->vm, &ww, NULL); mutex_unlock(&ggtt->vm.mutex); }
Thank you for the patch.
I briefly tested this patch and it does fix my original problem of "user-space application crashing due to receiving an -ENOSPC". Once the code is reviewed, I can test it further and report back.
However, there are a few changes in this patch that change the code behaviour, which I do not understand. I must admit that I am not very familiar with this code, but I decided to raise these points anyway, I hope that is OK. Please find my comments inline below:
On Wed, Dec 7, 2022 at 3:11 AM Matthew Auld matthew.auld@intel.com wrote:
The catch-all evict can fail due to object lock contention, since it only goes as far as trylocking the object, due to us already holding the vm->mutex. Doing a full object lock here can deadlock, since the vm->mutex is always our inner lock. Add another execbuf pass which drops the vm->mutex and then tries to grab the object will the full lock, before then retrying the eviction. This should be good enough for now to fix the immediate regression with userspace seeing -ENOSPC from execbuf due to contended object locks during GTT eviction.
Testcase: igt@gem_ppgtt@shrink-vs-evict-* Fixes: 7e00897be8bf ("drm/i915: Add object locking to i915_gem_evict_for_node and i915_gem_evict_something, v2.") References: https://gitlab.freedesktop.org/drm/intel/-/issues/7627 References: https://gitlab.freedesktop.org/drm/intel/-/issues/7570 References: https://bugzilla.mozilla.org/show_bug.cgi?id=1779558 Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Andrzej Hajda andrzej.hajda@intel.com Cc: Mani Milani mani@chromium.org Cc: stable@vger.kernel.org # v5.18+
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 25 +++++++++++-- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +- drivers/gpu/drm/i915/i915_gem_evict.c | 37 ++++++++++++++----- drivers/gpu/drm/i915/i915_gem_evict.h | 4 +- drivers/gpu/drm/i915/i915_vma.c | 2 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- 6 files changed, 56 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 86956b902c97..e2ce1e4e9723 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -745,25 +745,44 @@ static int eb_reserve(struct i915_execbuffer *eb) * * Defragmenting is skipped if all objects are pinned at a fixed location. */
for (pass = 0; pass <= 2; pass++) {
for (pass = 0; pass <= 3; pass++) { int pin_flags = PIN_USER | PIN_VALIDATE; if (pass == 0) pin_flags |= PIN_NONBLOCK; if (pass >= 1)
unpinned = eb_unbind(eb, pass == 2);
unpinned = eb_unbind(eb, pass >= 2); if (pass == 2) { err = mutex_lock_interruptible(&eb->context->vm->mutex); if (!err) {
err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL); mutex_unlock(&eb->context->vm->mutex); } if (err) return err; }
if (pass == 3) {
+retry:
err = mutex_lock_interruptible(&eb->context->vm->mutex);
if (!err) {
struct drm_i915_gem_object *busy_bo = NULL;
err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo);
mutex_unlock(&eb->context->vm->mutex);
if (err && busy_bo) {
err = i915_gem_object_lock(busy_bo, &eb->ww);
i915_gem_object_put(busy_bo);
if (!err)
goto retry;
Could we possibly get stuck in a never-ending 'retry' loop here?
}
}
if (err)
return err;
}
list_for_each_entry(ev, &eb->unbound, bind_link) { err = eb_reserve_vma(eb, ev, pin_flags); if (err)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index d73ba0f5c4c5..4f69bff63068 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -369,7 +369,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) if (vma == ERR_PTR(-ENOSPC)) { ret = mutex_lock_interruptible(&ggtt->vm.mutex); if (!ret) {
ret = i915_gem_evict_vm(&ggtt->vm, &ww);
ret = i915_gem_evict_vm(&ggtt->vm, &ww, NULL); mutex_unlock(&ggtt->vm.mutex); } if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 4cfe36b0366b..c02ebd6900ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -441,6 +441,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
- @vm: Address space to cleanse
- @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm
- will be able to evict vma's locked by the ww as well.
- @busy_bo: Optional pointer to struct drm_i915_gem_object. If not NULL, then
- in the event i915_gem_evict_vm() is unable to trylock an object for eviction,
- then @busy_bo will point to it. -EBUSY is also returned. The caller must drop
- the vm->mutex, before trying again to acquire the contended lock. The caller
- also owns a reference to the object.
- This function evicts all vmas from a vm.
@@ -450,7 +455,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
- To clarify: This is for freeing up virtual address space, not for freeing
- memory in e.g. the shrinker.
*/ -int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) +int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww,
struct drm_i915_gem_object **busy_bo)
{ int ret = 0;
@@ -482,15 +488,22 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) * the resv is shared among multiple objects, we still * need the object ref. */
if (dying_vma(vma) ||
if (!i915_gem_object_get_rcu(vma->obj) || (ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) { __i915_vma_pin(vma); list_add(&vma->evict_link, &locked_eviction_list); continue; }
if (!i915_gem_object_trylock(vma->obj, ww))
if (!i915_gem_object_trylock(vma->obj, ww)) {
if (busy_bo) {
*busy_bo = vma->obj; /* holds ref */
ret = -EBUSY;
break;
}
i915_gem_object_put(vma->obj);
If the 'trylock' above fails and 'busy_bo' is NULL, then the code reaches here twice for every call of this function. This means the 'i915_gem_object_put()' above gets called twice, as opposed to the previous behaviour where it was never called. I wonder why the change?
continue;
} __i915_vma_pin(vma); list_add(&vma->evict_link, &eviction_list);
@@ -498,25 +511,29 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) if (list_empty(&eviction_list) && list_empty(&locked_eviction_list)) break;
ret = 0;
I don't understand why the line above is removed? This causes the inconsistencies I have explained in my comments further down. Whereas if we keep this line, then all the vma's already in 'locked_eviction_list' and 'eviction_list' lists get evicted normally, and with 'trylock' failing again on the consecutive loop iteration(s) where the eviction lists are empty, we will return with the correct -EBUSY code anyway.
/* Unbind locked objects first, before unlocking the eviction_list */ list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) { __i915_vma_unpin(vma);
if (ret == 0)
if (ret == 0) { ret = __i915_vma_unbind(vma);
if (ret != -EINTR) /* "Get me out of here!" */
ret = 0;
if (ret != -EINTR) /* "Get me out of here!" */
ret = 0;
}
if (!dying_vma(vma))
i915_gem_object_put(vma->obj);
If 'busy_bo' != NULL and the 'trylock' above fails resulting in 'ret' = -EBUSY, then for vma's prior to that, we end up calling 'i915_gem_object_put()' without calling '__i915_vma_unbind()'. IIUC, this means we effectively end up calling 'i915_gem_object_put()' twice for vma's appearing before 'trylock' failure in the list, and only once for vma's appearing after. This: 1. Does not seem correct! 2. Is different from previous code behaviour. Why the change?
} list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) { __i915_vma_unpin(vma);
if (ret == 0)
if (ret == 0) { ret = __i915_vma_unbind(vma);
if (ret != -EINTR) /* "Get me out of here!" */
ret = 0;
if (ret != -EINTR) /* "Get me out of here!" */
ret = 0;
} i915_gem_object_unlock(vma->obj);
i915_gem_object_put(vma->obj);
Same as my previous comment above.
} } while (ret == 0);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.h b/drivers/gpu/drm/i915/i915_gem_evict.h index e593c530f9bd..bf0ee0e4fe60 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.h +++ b/drivers/gpu/drm/i915/i915_gem_evict.h @@ -11,6 +11,7 @@ struct drm_mm_node; struct i915_address_space; struct i915_gem_ww_ctx; +struct drm_i915_gem_object;
int __must_check i915_gem_evict_something(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww, @@ -23,6 +24,7 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, struct drm_mm_node *node, unsigned int flags); int i915_gem_evict_vm(struct i915_address_space *vm,
struct i915_gem_ww_ctx *ww);
struct i915_gem_ww_ctx *ww,
struct drm_i915_gem_object **busy_bo);
#endif /* __I915_GEM_EVICT_H__ */ diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 34f0e6c923c2..7d044888ac33 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1599,7 +1599,7 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, * locked objects when called from execbuf when pinning * is removed. This would probably regress badly. */
i915_gem_evict_vm(vm, NULL);
i915_gem_evict_vm(vm, NULL, NULL); mutex_unlock(&vm->mutex); } } while (1);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 8c6517d29b8e..37068542aafe 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -344,7 +344,7 @@ static int igt_evict_vm(void *arg)
/* Everything is pinned, nothing should happen */ mutex_lock(&ggtt->vm.mutex);
err = i915_gem_evict_vm(&ggtt->vm, NULL);
err = i915_gem_evict_vm(&ggtt->vm, NULL, NULL); mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
@@ -356,7 +356,7 @@ static int igt_evict_vm(void *arg)
for_i915_gem_ww(&ww, err, false) { mutex_lock(&ggtt->vm.mutex);
err = i915_gem_evict_vm(&ggtt->vm, &ww);
err = i915_gem_evict_vm(&ggtt->vm, &ww, NULL); mutex_unlock(&ggtt->vm.mutex); }
-- 2.38.1
On 14/12/2022 03:32, Mani Milani wrote:
Thank you for the patch.
I briefly tested this patch and it does fix my original problem of "user-space application crashing due to receiving an -ENOSPC". Once the code is reviewed, I can test it further and report back.
However, there are a few changes in this patch that change the code behaviour, which I do not understand. I must admit that I am not very familiar with this code, but I decided to raise these points anyway, I hope that is OK.
Thanks a lot for taking a look.
Please find my comments inline below:
On Wed, Dec 7, 2022 at 3:11 AM Matthew Auld matthew.auld@intel.com wrote:
The catch-all evict can fail due to object lock contention, since it only goes as far as trylocking the object, due to us already holding the vm->mutex. Doing a full object lock here can deadlock, since the vm->mutex is always our inner lock. Add another execbuf pass which drops the vm->mutex and then tries to grab the object will the full lock, before then retrying the eviction. This should be good enough for now to fix the immediate regression with userspace seeing -ENOSPC from execbuf due to contended object locks during GTT eviction.
Testcase: igt@gem_ppgtt@shrink-vs-evict-* Fixes: 7e00897be8bf ("drm/i915: Add object locking to i915_gem_evict_for_node and i915_gem_evict_something, v2.") References: https://gitlab.freedesktop.org/drm/intel/-/issues/7627 References: https://gitlab.freedesktop.org/drm/intel/-/issues/7570 References: https://bugzilla.mozilla.org/show_bug.cgi?id=1779558 Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Andrzej Hajda andrzej.hajda@intel.com Cc: Mani Milani mani@chromium.org Cc: stable@vger.kernel.org # v5.18+
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 25 +++++++++++-- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +- drivers/gpu/drm/i915/i915_gem_evict.c | 37 ++++++++++++++----- drivers/gpu/drm/i915/i915_gem_evict.h | 4 +- drivers/gpu/drm/i915/i915_vma.c | 2 +- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- 6 files changed, 56 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 86956b902c97..e2ce1e4e9723 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -745,25 +745,44 @@ static int eb_reserve(struct i915_execbuffer *eb) * * Defragmenting is skipped if all objects are pinned at a fixed location. */
for (pass = 0; pass <= 2; pass++) {
for (pass = 0; pass <= 3; pass++) { int pin_flags = PIN_USER | PIN_VALIDATE; if (pass == 0) pin_flags |= PIN_NONBLOCK; if (pass >= 1)
unpinned = eb_unbind(eb, pass == 2);
unpinned = eb_unbind(eb, pass >= 2); if (pass == 2) { err = mutex_lock_interruptible(&eb->context->vm->mutex); if (!err) {
err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL); mutex_unlock(&eb->context->vm->mutex); } if (err) return err; }
if (pass == 3) {
+retry:
err = mutex_lock_interruptible(&eb->context->vm->mutex);
if (!err) {
struct drm_i915_gem_object *busy_bo = NULL;
err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo);
mutex_unlock(&eb->context->vm->mutex);
if (err && busy_bo) {
err = i915_gem_object_lock(busy_bo, &eb->ww);
i915_gem_object_put(busy_bo);
if (!err)
goto retry;
Could we possibly get stuck in a never-ending 'retry' loop here?
Each time we encounter a contended object lock we do the backoff here, drop the vm->mutex and retry the lock with the full lock. When re-entering evict_vm() that is one less object we need to lock again (we don't drop it until we destroy the ww ctx). The number of objects in a VM is finite (hopefully not too many for normal workloads).
Note that in in evict_vm() there is: dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx, which returns true if we have already locked this object, with the given ww ctx (so eb->ww here).
}
}
if (err)
return err;
}
list_for_each_entry(ev, &eb->unbound, bind_link) { err = eb_reserve_vma(eb, ev, pin_flags); if (err)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index d73ba0f5c4c5..4f69bff63068 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -369,7 +369,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) if (vma == ERR_PTR(-ENOSPC)) { ret = mutex_lock_interruptible(&ggtt->vm.mutex); if (!ret) {
ret = i915_gem_evict_vm(&ggtt->vm, &ww);
ret = i915_gem_evict_vm(&ggtt->vm, &ww, NULL); mutex_unlock(&ggtt->vm.mutex); } if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 4cfe36b0366b..c02ebd6900ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -441,6 +441,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
- @vm: Address space to cleanse
- @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm
- will be able to evict vma's locked by the ww as well.
- @busy_bo: Optional pointer to struct drm_i915_gem_object. If not NULL, then
- in the event i915_gem_evict_vm() is unable to trylock an object for eviction,
- then @busy_bo will point to it. -EBUSY is also returned. The caller must drop
- the vm->mutex, before trying again to acquire the contended lock. The caller
- also owns a reference to the object.
- This function evicts all vmas from a vm.
@@ -450,7 +455,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
- To clarify: This is for freeing up virtual address space, not for freeing
- memory in e.g. the shrinker.
*/ -int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) +int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww,
{ int ret = 0;struct drm_i915_gem_object **busy_bo)
@@ -482,15 +488,22 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) * the resv is shared among multiple objects, we still * need the object ref. */
if (dying_vma(vma) ||
if (!i915_gem_object_get_rcu(vma->obj) || (ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) { __i915_vma_pin(vma); list_add(&vma->evict_link, &locked_eviction_list); continue; }
if (!i915_gem_object_trylock(vma->obj, ww))
if (!i915_gem_object_trylock(vma->obj, ww)) {
if (busy_bo) {
*busy_bo = vma->obj; /* holds ref */
ret = -EBUSY;
break;
}
i915_gem_object_put(vma->obj);
If the 'trylock' above fails and 'busy_bo' is NULL, then the code reaches here twice for every call of this function. This means the 'i915_gem_object_put()' above gets called twice, as opposed to the previous behaviour where it was never called. I wonder why the change?
We now do i915_gem_object_get_rcu() above (still returns false if dying), since busy_bo will need a reference to the BO to keep it alive, when returning from evict_vm(). If busy_bo is NULL we just drop that reference here to keep it all balanced.
continue;
} __i915_vma_pin(vma); list_add(&vma->evict_link, &eviction_list);
@@ -498,25 +511,29 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) if (list_empty(&eviction_list) && list_empty(&locked_eviction_list)) break;
ret = 0;
I don't understand why the line above is removed? This causes the inconsistencies I have explained in my comments further down. Whereas if we keep this line, then all the vma's already in 'locked_eviction_list' and 'eviction_list' lists get evicted normally, and with 'trylock' failing again on the consecutive loop iteration(s) where the eviction lists are empty, we will return with the correct -EBUSY code anyway.
Ignoring this patch, the ret = 0 here looks to not be needed. The only time ret is non zero is below when unbinding stuff, and here the "while (ret == 0)" at the end will catch that and bail anyway. So this shouldn't have any change in behaviour AFAICT.
With this patch I just want to set ret = -EBUSY and eventually return that to the caller, and setting ret = 0 here gets in the way.
/* Unbind locked objects first, before unlocking the eviction_list */ list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) { __i915_vma_unpin(vma);
if (ret == 0)
if (ret == 0) { ret = __i915_vma_unbind(vma);
if (ret != -EINTR) /* "Get me out of here!" */
ret = 0;
if (ret != -EINTR) /* "Get me out of here!" */
ret = 0;
}
if (!dying_vma(vma))
i915_gem_object_put(vma->obj);
If 'busy_bo' != NULL and the 'trylock' above fails resulting in 'ret' = -EBUSY, then for vma's prior to that, we end up calling 'i915_gem_object_put()' without calling '__i915_vma_unbind()'. IIUC, this means we effectively end up calling 'i915_gem_object_put()' twice for vma's appearing before 'trylock' failure in the list, and only once for vma's appearing after. This:
The unbind() here just nukes the GPU page-tables for this VMA (once we are sure the GPU is not still accessing them) and deletes the drm_mm node from the VM, so the next user can use that GPU address/range. I guess we could still do the unbind with -EBUSY, but not doing it shouldn't break anything.
- Does not seem correct!
- Is different from previous code behaviour.
Why the change?
Above we do i915_gem_object_get_rcu(), so this is just balancing that. We get here either because the object is dying, in which case we don't have or need a reference (or lock it seems). If it's not dying then we already locked it, either because of the new retry thing added in this patch, or if the object was locked as part of the execbuf. We also hold a ref in that case.
} list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) { __i915_vma_unpin(vma);
if (ret == 0)
if (ret == 0) { ret = __i915_vma_unbind(vma);
if (ret != -EINTR) /* "Get me out of here!" */
ret = 0;
if (ret != -EINTR) /* "Get me out of here!" */
ret = 0;
} i915_gem_object_unlock(vma->obj);
i915_gem_object_put(vma->obj);
Same as my previous comment above.
} } while (ret == 0);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.h b/drivers/gpu/drm/i915/i915_gem_evict.h index e593c530f9bd..bf0ee0e4fe60 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.h +++ b/drivers/gpu/drm/i915/i915_gem_evict.h @@ -11,6 +11,7 @@ struct drm_mm_node; struct i915_address_space; struct i915_gem_ww_ctx; +struct drm_i915_gem_object;
int __must_check i915_gem_evict_something(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww, @@ -23,6 +24,7 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm, struct drm_mm_node *node, unsigned int flags); int i915_gem_evict_vm(struct i915_address_space *vm,
struct i915_gem_ww_ctx *ww);
struct i915_gem_ww_ctx *ww,
struct drm_i915_gem_object **busy_bo);
#endif /* __I915_GEM_EVICT_H__ */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 34f0e6c923c2..7d044888ac33 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1599,7 +1599,7 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, * locked objects when called from execbuf when pinning * is removed. This would probably regress badly. */
i915_gem_evict_vm(vm, NULL);
i915_gem_evict_vm(vm, NULL, NULL); mutex_unlock(&vm->mutex); } } while (1);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 8c6517d29b8e..37068542aafe 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -344,7 +344,7 @@ static int igt_evict_vm(void *arg)
/* Everything is pinned, nothing should happen */ mutex_lock(&ggtt->vm.mutex);
err = i915_gem_evict_vm(&ggtt->vm, NULL);
err = i915_gem_evict_vm(&ggtt->vm, NULL, NULL); mutex_unlock(&ggtt->vm.mutex); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
@@ -356,7 +356,7 @@ static int igt_evict_vm(void *arg)
for_i915_gem_ww(&ww, err, false) { mutex_lock(&ggtt->vm.mutex);
err = i915_gem_evict_vm(&ggtt->vm, &ww);
err = i915_gem_evict_vm(&ggtt->vm, &ww, NULL); mutex_unlock(&ggtt->vm.mutex); }
-- 2.38.1
Thanks for the explanations Matthew. It all makes sense now. I will now test this patch further and report back the results.
There is just one comment block that needs to be updated I think. See below:
On Wed, Dec 14, 2022 at 10:47 PM Matthew Auld matthew.auld@intel.com wrote:
...
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 86956b902c97..e2ce1e4e9723 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -745,25 +745,44 @@ static int eb_reserve(struct i915_execbuffer *eb) * * Defragmenting is skipped if all objects are pinned at a fixed location. */
Could you please update the comment block above and add a little explanation for the new pass=3 you added?
for (pass = 0; pass <= 2; pass++) {
for (pass = 0; pass <= 3; pass++) { int pin_flags = PIN_USER | PIN_VALIDATE; if (pass == 0) pin_flags |= PIN_NONBLOCK;
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 4cfe36b0366b..c02ebd6900ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -441,6 +441,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
- @vm: Address space to cleanse
- @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm
- will be able to evict vma's locked by the ww as well.
- @busy_bo: Optional pointer to struct drm_i915_gem_object. If not NULL, then
- in the event i915_gem_evict_vm() is unable to trylock an object for eviction,
- then @busy_bo will point to it. -EBUSY is also returned. The caller must drop
- the vm->mutex, before trying again to acquire the contended lock. The caller
- also owns a reference to the object.
- This function evicts all vmas from a vm.
@@ -450,7 +455,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
- To clarify: This is for freeing up virtual address space, not for freeing
- memory in e.g. the shrinker.
*/ -int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) +int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww,
{ int ret = 0;struct drm_i915_gem_object **busy_bo)
@@ -482,15 +488,22 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) * the resv is shared among multiple objects, we still * need the object ref. */
if (dying_vma(vma) ||
if (!i915_gem_object_get_rcu(vma->obj) ||
Oops, sorry, I had missed the one line change above. After you pointed that out, all the 'i915_gem_object_put()' calls now make perfect sense. Thanks.
(ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) { __i915_vma_pin(vma); list_add(&vma->evict_link, &locked_eviction_list); continue; }
if (!i915_gem_object_trylock(vma->obj, ww))
if (!i915_gem_object_trylock(vma->obj, ww)) {
if (busy_bo) {
*busy_bo = vma->obj; /* holds ref */
ret = -EBUSY;
break;
}
i915_gem_object_put(vma->obj); continue;
}
This patch passed all my tests, some of which were failing before applying the patch. Thanks.
Reviewed-by: Mani Milani mani@chromium.org Tested-by: Mani Milani mani@chromium.org
On Thu, Dec 15, 2022 at 4:46 PM Mani Milani mani@chromium.org wrote:
Thanks for the explanations Matthew. It all makes sense now. I will now test this patch further and report back the results.
There is just one comment block that needs to be updated I think. See below:
On Wed, Dec 14, 2022 at 10:47 PM Matthew Auld matthew.auld@intel.com wrote:
...
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 86956b902c97..e2ce1e4e9723 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -745,25 +745,44 @@ static int eb_reserve(struct i915_execbuffer *eb) * * Defragmenting is skipped if all objects are pinned at a fixed location. */
Could you please update the comment block above and add a little explanation for the new pass=3 you added?
for (pass = 0; pass <= 2; pass++) {
for (pass = 0; pass <= 3; pass++) { int pin_flags = PIN_USER | PIN_VALIDATE; if (pass == 0) pin_flags |= PIN_NONBLOCK;
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 4cfe36b0366b..c02ebd6900ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -441,6 +441,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
- @vm: Address space to cleanse
- @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm
- will be able to evict vma's locked by the ww as well.
- @busy_bo: Optional pointer to struct drm_i915_gem_object. If not NULL, then
- in the event i915_gem_evict_vm() is unable to trylock an object for eviction,
- then @busy_bo will point to it. -EBUSY is also returned. The caller must drop
- the vm->mutex, before trying again to acquire the contended lock. The caller
- also owns a reference to the object.
- This function evicts all vmas from a vm.
@@ -450,7 +455,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
- To clarify: This is for freeing up virtual address space, not for freeing
- memory in e.g. the shrinker.
*/ -int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) +int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww,
{ int ret = 0;struct drm_i915_gem_object **busy_bo)
@@ -482,15 +488,22 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww) * the resv is shared among multiple objects, we still * need the object ref. */
if (dying_vma(vma) ||
if (!i915_gem_object_get_rcu(vma->obj) ||
Oops, sorry, I had missed the one line change above. After you pointed that out, all the 'i915_gem_object_put()' calls now make perfect sense. Thanks.
(ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) { __i915_vma_pin(vma); list_add(&vma->evict_link, &locked_eviction_list); continue; }
if (!i915_gem_object_trylock(vma->obj, ww))
if (!i915_gem_object_trylock(vma->obj, ww)) {
if (busy_bo) {
*busy_bo = vma->obj; /* holds ref */
ret = -EBUSY;
break;
}
i915_gem_object_put(vma->obj); continue;
}
linux-stable-mirror@lists.linaro.org