The patch below does not apply to the 6.12-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to stable@vger.kernel.org.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y git checkout FETCH_HEAD git cherry-pick -x d25e3a610bae03bffc5c14b5d944a5d0cd844678 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to 'stable@vger.kernel.org' --in-reply-to '2025110342-exhume-mankind-5952@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d25e3a610bae03bffc5c14b5d944a5d0cd844678 Mon Sep 17 00:00:00 2001 From: Philipp Stanner phasta@kernel.org Date: Wed, 22 Oct 2025 08:34:03 +0200 Subject: [PATCH] drm/sched: Fix race in drm_sched_entity_select_rq()
In a past bug fix it was forgotten that entity access must be protected by the entity lock. That's a data race and potentially UB.
Move the spin_unlock() to the appropriate position.
Cc: stable@vger.kernel.org # v5.13+ Fixes: ac4eb83ab255 ("drm/sched: select new rq even if there is only one v3") Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@igalia.com Signed-off-by: Philipp Stanner phasta@kernel.org Link: https://patch.msgid.link/20251022063402.87318-2-phasta@kernel.org
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 5a4697f636f2..aa222166de58 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -552,10 +552,11 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity) drm_sched_rq_remove_entity(entity->rq, entity); entity->rq = rq; } - spin_unlock(&entity->lock);
if (entity->num_sched_list == 1) entity->sched_list = NULL; + + spin_unlock(&entity->lock); }
/**
From: Tvrtko Ursulin tvrtko.ursulin@igalia.com
[ Upstream commit d42a254633c773921884a19e8a1a0f53a31150c3 ]
In FIFO mode (which is the default), both drm_sched_entity_push_job() and drm_sched_rq_update_fifo(), where the latter calls the former, are currently taking and releasing the same entity->rq_lock.
We can avoid that design inelegance, and also have a miniscule efficiency improvement on the submit from idle path, by introducing a new drm_sched_rq_update_fifo_locked() helper and pulling up the lock taking to its callers.
v2: * Remove drm_sched_rq_update_fifo() altogether. (Christian)
v3: * Improved commit message. (Philipp)
Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@igalia.com Cc: Christian König christian.koenig@amd.com Cc: Alex Deucher alexander.deucher@amd.com Cc: Luben Tuikov ltuikov89@gmail.com Cc: Matthew Brost matthew.brost@intel.com Cc: Philipp Stanner pstanner@redhat.com Reviewed-by: Christian König christian.koenig@amd.com Signed-off-by: Philipp Stanner pstanner@redhat.com Link: https://patchwork.freedesktop.org/patch/msgid/20241016122013.7857-2-tursulin... Stable-dep-of: d25e3a610bae ("drm/sched: Fix race in drm_sched_entity_select_rq()") Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/scheduler/sched_entity.c | 13 +++++++++---- drivers/gpu/drm/scheduler/sched_main.c | 6 +++--- include/drm/gpu_scheduler.h | 2 +- 3 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 3e75fc1f66072..9dbae7b08bc90 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -505,8 +505,12 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) struct drm_sched_job *next;
next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); - if (next) - drm_sched_rq_update_fifo(entity, next->submit_ts); + if (next) { + spin_lock(&entity->rq_lock); + drm_sched_rq_update_fifo_locked(entity, + next->submit_ts); + spin_unlock(&entity->rq_lock); + } }
/* Jobs and entities might have different lifecycles. Since we're @@ -606,10 +610,11 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) sched = rq->sched;
drm_sched_rq_add_entity(rq, entity); - spin_unlock(&entity->rq_lock);
if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) - drm_sched_rq_update_fifo(entity, submit_ts); + drm_sched_rq_update_fifo_locked(entity, submit_ts); + + spin_unlock(&entity->rq_lock);
drm_sched_wakeup(sched); } diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index d5260cb1ed0ec..0b7976c908dde 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -169,14 +169,15 @@ static inline void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *enti } }
-void drm_sched_rq_update_fifo(struct drm_sched_entity *entity, ktime_t ts) +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts) { /* * Both locks need to be grabbed, one to protect from entity->rq change * for entity from within concurrent drm_sched_entity_select_rq and the * other to update the rb tree structure. */ - spin_lock(&entity->rq_lock); + lockdep_assert_held(&entity->rq_lock); + spin_lock(&entity->rq->lock);
drm_sched_rq_remove_fifo_locked(entity); @@ -187,7 +188,6 @@ void drm_sched_rq_update_fifo(struct drm_sched_entity *entity, ktime_t ts) drm_sched_entity_compare_before);
spin_unlock(&entity->rq->lock); - spin_unlock(&entity->rq_lock); }
/** diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 9c437a057e5de..346a3c261b437 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -593,7 +593,7 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq, void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, struct drm_sched_entity *entity);
-void drm_sched_rq_update_fifo(struct drm_sched_entity *entity, ktime_t ts); +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts);
int drm_sched_entity_init(struct drm_sched_entity *entity, enum drm_sched_priority priority,
From: Tvrtko Ursulin tvrtko.ursulin@igalia.com
[ Upstream commit f93126f5d55920d1447ef00a3fbe6706f40f53de ]
When writing to a drm_sched_entity's run-queue, writers are protected through the lock drm_sched_entity.rq_lock. This naming, however, frequently collides with the separate internal lock of struct drm_sched_rq, resulting in uses like this:
spin_lock(&entity->rq_lock); spin_lock(&entity->rq->lock);
Rename drm_sched_entity.rq_lock to improve readability. While at it, re-order that struct's members to make it more obvious what the lock protects.
v2: * Rename some rq_lock straddlers in kerneldoc, improve commit text. (Philipp)
Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@igalia.com Suggested-by: Christian König christian.koenig@amd.com Cc: Alex Deucher alexander.deucher@amd.com Cc: Luben Tuikov ltuikov89@gmail.com Cc: Matthew Brost matthew.brost@intel.com Cc: Philipp Stanner pstanner@redhat.com Reviewed-by: Christian König christian.koenig@amd.com [pstanner: Fix typo in docstring] Signed-off-by: Philipp Stanner pstanner@redhat.com Link: https://patchwork.freedesktop.org/patch/msgid/20241016122013.7857-5-tursulin... Stable-dep-of: d25e3a610bae ("drm/sched: Fix race in drm_sched_entity_select_rq()") Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/scheduler/sched_entity.c | 28 ++++++++++++------------ drivers/gpu/drm/scheduler/sched_main.c | 2 +- include/drm/gpu_scheduler.h | 21 +++++++++--------- 3 files changed, 26 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 9dbae7b08bc90..089e8ba0435b8 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -105,7 +105,7 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, /* We start in an idle state. */ complete_all(&entity->entity_idle);
- spin_lock_init(&entity->rq_lock); + spin_lock_init(&entity->lock); spsc_queue_init(&entity->job_queue);
atomic_set(&entity->fence_seq, 0); @@ -133,10 +133,10 @@ void drm_sched_entity_modify_sched(struct drm_sched_entity *entity, { WARN_ON(!num_sched_list || !sched_list);
- spin_lock(&entity->rq_lock); + spin_lock(&entity->lock); entity->sched_list = sched_list; entity->num_sched_list = num_sched_list; - spin_unlock(&entity->rq_lock); + spin_unlock(&entity->lock); } EXPORT_SYMBOL(drm_sched_entity_modify_sched);
@@ -245,10 +245,10 @@ static void drm_sched_entity_kill(struct drm_sched_entity *entity) if (!entity->rq) return;
- spin_lock(&entity->rq_lock); + spin_lock(&entity->lock); entity->stopped = true; drm_sched_rq_remove_entity(entity->rq, entity); - spin_unlock(&entity->rq_lock); + spin_unlock(&entity->lock);
/* Make sure this entity is not used by the scheduler at the moment */ wait_for_completion(&entity->entity_idle); @@ -394,9 +394,9 @@ static void drm_sched_entity_wakeup(struct dma_fence *f, void drm_sched_entity_set_priority(struct drm_sched_entity *entity, enum drm_sched_priority priority) { - spin_lock(&entity->rq_lock); + spin_lock(&entity->lock); entity->priority = priority; - spin_unlock(&entity->rq_lock); + spin_unlock(&entity->lock); } EXPORT_SYMBOL(drm_sched_entity_set_priority);
@@ -506,10 +506,10 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); if (next) { - spin_lock(&entity->rq_lock); + spin_lock(&entity->lock); drm_sched_rq_update_fifo_locked(entity, next->submit_ts); - spin_unlock(&entity->rq_lock); + spin_unlock(&entity->lock); } }
@@ -550,14 +550,14 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity) if (fence && !dma_fence_is_signaled(fence)) return;
- spin_lock(&entity->rq_lock); + spin_lock(&entity->lock); sched = drm_sched_pick_best(entity->sched_list, entity->num_sched_list); rq = sched ? sched->sched_rq[entity->priority] : NULL; if (rq != entity->rq) { drm_sched_rq_remove_entity(entity->rq, entity); entity->rq = rq; } - spin_unlock(&entity->rq_lock); + spin_unlock(&entity->lock);
if (entity->num_sched_list == 1) entity->sched_list = NULL; @@ -598,9 +598,9 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) struct drm_sched_rq *rq;
/* Add the entity to the run queue */ - spin_lock(&entity->rq_lock); + spin_lock(&entity->lock); if (entity->stopped) { - spin_unlock(&entity->rq_lock); + spin_unlock(&entity->lock);
DRM_ERROR("Trying to push to a killed entity\n"); return; @@ -614,7 +614,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) drm_sched_rq_update_fifo_locked(entity, submit_ts);
- spin_unlock(&entity->rq_lock); + spin_unlock(&entity->lock);
drm_sched_wakeup(sched); } diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 0b7976c908dde..4dde0dc525ce5 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -176,7 +176,7 @@ void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts * for entity from within concurrent drm_sched_entity_select_rq and the * other to update the rb tree structure. */ - lockdep_assert_held(&entity->rq_lock); + lockdep_assert_held(&entity->lock);
spin_lock(&entity->rq->lock);
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 346a3c261b437..e78adc7a91951 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -96,14 +96,22 @@ struct drm_sched_entity { */ struct list_head list;
+ /** + * @lock: + * + * Lock protecting the run-queue (@rq) to which this entity belongs, + * @priority and the list of schedulers (@sched_list, @num_sched_list). + */ + spinlock_t lock; + /** * @rq: * * Runqueue on which this entity is currently scheduled. * * FIXME: Locking is very unclear for this. Writers are protected by - * @rq_lock, but readers are generally lockless and seem to just race - * with not even a READ_ONCE. + * @lock, but readers are generally lockless and seem to just race with + * not even a READ_ONCE. */ struct drm_sched_rq *rq;
@@ -136,17 +144,10 @@ struct drm_sched_entity { * @priority: * * Priority of the entity. This can be modified by calling - * drm_sched_entity_set_priority(). Protected by &rq_lock. + * drm_sched_entity_set_priority(). Protected by @lock. */ enum drm_sched_priority priority;
- /** - * @rq_lock: - * - * Lock to modify the runqueue to which this entity belongs. - */ - spinlock_t rq_lock; - /** * @job_queue: the list of jobs of this entity. */
From: Philipp Stanner phasta@kernel.org
[ Upstream commit d25e3a610bae03bffc5c14b5d944a5d0cd844678 ]
In a past bug fix it was forgotten that entity access must be protected by the entity lock. That's a data race and potentially UB.
Move the spin_unlock() to the appropriate position.
Cc: stable@vger.kernel.org # v5.13+ Fixes: ac4eb83ab255 ("drm/sched: select new rq even if there is only one v3") Reviewed-by: Tvrtko Ursulin tvrtko.ursulin@igalia.com Signed-off-by: Philipp Stanner phasta@kernel.org Link: https://patch.msgid.link/20251022063402.87318-2-phasta@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/scheduler/sched_entity.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 089e8ba0435b8..f5b5729433cbb 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -557,10 +557,11 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity) drm_sched_rq_remove_entity(entity->rq, entity); entity->rq = rq; } - spin_unlock(&entity->lock);
if (entity->num_sched_list == 1) entity->sched_list = NULL; + + spin_unlock(&entity->lock); }
/**
linux-stable-mirror@lists.linaro.org