From: Tuo Li <islituo(a)gmail.com>
[ Upstream commit 0e881c0a4b6146b7e856735226208f48251facd8 ]
The variable phba->fcf.fcf_flag is often protected by the lock
phba->hbalock() when is accessed. Here is an example in
lpfc_unregister_fcf_rescan():
spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag |= FCF_INIT_DISC;
spin_unlock_irq(&phba->hbalock);
However, in the same function, phba->fcf.fcf_flag is assigned with 0
without holding the lock, and thus can cause a data race:
phba->fcf.fcf_flag = 0;
To fix this possible data race, a lock and unlock pair is added when
accessing the variable phba->fcf.fcf_flag.
Reported-by: BassCheck <bass(a)buaa.edu.cn>
Signed-off-by: Tuo Li <islituo(a)gmail.com>
Link: https://lore.kernel.org/r/20230630024748.1035993-1-islituo@gmail.com
Reviewed-by: Justin Tee <justin.tee(a)broadcom.com>
Reviewed-by: Laurence Oberman <loberman(a)redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/scsi/lpfc/lpfc_hbadisc.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 0abce779fbb13..12602b161bd80 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -6699,7 +6699,9 @@ lpfc_unregister_fcf_rescan(struct lpfc_hba *phba)
if (rc)
return;
/* Reset HBA FCF states after successful unregister FCF */
+ spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag = 0;
+ spin_unlock_irq(&phba->hbalock);
phba->fcf.current_rec.flag = 0;
/*
--
2.40.1
From: Tuo Li <islituo(a)gmail.com>
[ Upstream commit 0e881c0a4b6146b7e856735226208f48251facd8 ]
The variable phba->fcf.fcf_flag is often protected by the lock
phba->hbalock() when is accessed. Here is an example in
lpfc_unregister_fcf_rescan():
spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag |= FCF_INIT_DISC;
spin_unlock_irq(&phba->hbalock);
However, in the same function, phba->fcf.fcf_flag is assigned with 0
without holding the lock, and thus can cause a data race:
phba->fcf.fcf_flag = 0;
To fix this possible data race, a lock and unlock pair is added when
accessing the variable phba->fcf.fcf_flag.
Reported-by: BassCheck <bass(a)buaa.edu.cn>
Signed-off-by: Tuo Li <islituo(a)gmail.com>
Link: https://lore.kernel.org/r/20230630024748.1035993-1-islituo@gmail.com
Reviewed-by: Justin Tee <justin.tee(a)broadcom.com>
Reviewed-by: Laurence Oberman <loberman(a)redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/scsi/lpfc/lpfc_hbadisc.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 68ff233f936e5..3ff76ca147a5a 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -6790,7 +6790,9 @@ lpfc_unregister_fcf_rescan(struct lpfc_hba *phba)
if (rc)
return;
/* Reset HBA FCF states after successful unregister FCF */
+ spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag = 0;
+ spin_unlock_irq(&phba->hbalock);
phba->fcf.current_rec.flag = 0;
/*
--
2.40.1
From: Tuo Li <islituo(a)gmail.com>
[ Upstream commit 0e881c0a4b6146b7e856735226208f48251facd8 ]
The variable phba->fcf.fcf_flag is often protected by the lock
phba->hbalock() when is accessed. Here is an example in
lpfc_unregister_fcf_rescan():
spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag |= FCF_INIT_DISC;
spin_unlock_irq(&phba->hbalock);
However, in the same function, phba->fcf.fcf_flag is assigned with 0
without holding the lock, and thus can cause a data race:
phba->fcf.fcf_flag = 0;
To fix this possible data race, a lock and unlock pair is added when
accessing the variable phba->fcf.fcf_flag.
Reported-by: BassCheck <bass(a)buaa.edu.cn>
Signed-off-by: Tuo Li <islituo(a)gmail.com>
Link: https://lore.kernel.org/r/20230630024748.1035993-1-islituo@gmail.com
Reviewed-by: Justin Tee <justin.tee(a)broadcom.com>
Reviewed-by: Laurence Oberman <loberman(a)redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/scsi/lpfc/lpfc_hbadisc.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 4bb0a15cfcc01..54aff304cdcf4 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -6954,7 +6954,9 @@ lpfc_unregister_fcf_rescan(struct lpfc_hba *phba)
if (rc)
return;
/* Reset HBA FCF states after successful unregister FCF */
+ spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag = 0;
+ spin_unlock_irq(&phba->hbalock);
phba->fcf.current_rec.flag = 0;
/*
--
2.40.1
From: Tuo Li <islituo(a)gmail.com>
[ Upstream commit 0e881c0a4b6146b7e856735226208f48251facd8 ]
The variable phba->fcf.fcf_flag is often protected by the lock
phba->hbalock() when is accessed. Here is an example in
lpfc_unregister_fcf_rescan():
spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag |= FCF_INIT_DISC;
spin_unlock_irq(&phba->hbalock);
However, in the same function, phba->fcf.fcf_flag is assigned with 0
without holding the lock, and thus can cause a data race:
phba->fcf.fcf_flag = 0;
To fix this possible data race, a lock and unlock pair is added when
accessing the variable phba->fcf.fcf_flag.
Reported-by: BassCheck <bass(a)buaa.edu.cn>
Signed-off-by: Tuo Li <islituo(a)gmail.com>
Link: https://lore.kernel.org/r/20230630024748.1035993-1-islituo@gmail.com
Reviewed-by: Justin Tee <justin.tee(a)broadcom.com>
Reviewed-by: Laurence Oberman <loberman(a)redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/scsi/lpfc/lpfc_hbadisc.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index d38ebd7281b9b..326ef4942e672 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -6960,7 +6960,9 @@ lpfc_unregister_fcf_rescan(struct lpfc_hba *phba)
if (rc)
return;
/* Reset HBA FCF states after successful unregister FCF */
+ spin_lock_irq(&phba->hbalock);
phba->fcf.fcf_flag = 0;
+ spin_unlock_irq(&phba->hbalock);
phba->fcf.current_rec.flag = 0;
/*
--
2.40.1
From: Boris Brezillon <boris.brezillon(a)collabora.com>
[ Upstream commit e30cb0599799aac099209e3b045379613c80730e ]
drm_sched_entity_kill_jobs_cb() logic is omitting the last fence popped
from the dependency array that was waited upon before
drm_sched_entity_kill() was called (drm_sched_entity::dependency field),
so we're basically waiting for all dependencies except one.
In theory, this wait shouldn't be needed because resources should have
their users registered to the dma_resv object, thus guaranteeing that
future jobs wanting to access these resources wait on all the previous
users (depending on the access type, of course). But we want to keep
these explicit waits in the kill entity path just in case.
Let's make sure we keep all dependencies in the array in
drm_sched_job_dependency(), so we can iterate over the array and wait
in drm_sched_entity_kill_jobs_cb().
We also make sure we wait on drm_sched_fence::finished if we were
originally asked to wait on drm_sched_fence::scheduled. In that case,
we assume the intent was to delegate the wait to the firmware/GPU or
rely on the pipelining done at the entity/scheduler level, but when
killing jobs, we really want to wait for completion not just scheduling.
v2:
- Don't evict deps in drm_sched_job_dependency()
v3:
- Always wait for drm_sched_fence::finished fences in
drm_sched_entity_kill_jobs_cb() when we see a sched_fence
v4:
- Fix commit message
- Fix a use-after-free bug
v5:
- Flag deps on which we should only wait for the scheduled event
at insertion time
v6:
- Back to v4 implementation
- Add Christian's R-b
Cc: Frank Binns <frank.binns(a)imgtec.com>
Cc: Sarah Walker <sarah.walker(a)imgtec.com>
Cc: Donald Robson <donald.robson(a)imgtec.com>
Cc: Luben Tuikov <luben.tuikov(a)amd.com>
Cc: David Airlie <airlied(a)gmail.com>
Cc: Daniel Vetter <daniel(a)ffwll.ch>
Cc: Sumit Semwal <sumit.semwal(a)linaro.org>
Cc: "Christian König" <christian.koenig(a)amd.com>
Signed-off-by: Boris Brezillon <boris.brezillon(a)collabora.com>
Suggested-by: "Christian König" <christian.koenig(a)amd.com>
Reviewed-by: "Christian König" <christian.koenig(a)amd.com>
Acked-by: Luben Tuikov <luben.tuikov(a)amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230619071921.3465992-1-bori…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/gpu/drm/scheduler/sched_entity.c | 41 +++++++++++++++++++-----
1 file changed, 33 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index e0a8890a62e23..42021d1f7e016 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -155,16 +155,32 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
{
struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
finish_cb);
- int r;
+ unsigned long index;
dma_fence_put(f);
/* Wait for all dependencies to avoid data corruptions */
- while (!xa_empty(&job->dependencies)) {
- f = xa_erase(&job->dependencies, job->last_dependency++);
- r = dma_fence_add_callback(f, &job->finish_cb,
- drm_sched_entity_kill_jobs_cb);
- if (!r)
+ xa_for_each(&job->dependencies, index, f) {
+ struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
+
+ if (s_fence && f == &s_fence->scheduled) {
+ /* The dependencies array had a reference on the scheduled
+ * fence, and the finished fence refcount might have
+ * dropped to zero. Use dma_fence_get_rcu() so we get
+ * a NULL fence in that case.
+ */
+ f = dma_fence_get_rcu(&s_fence->finished);
+
+ /* Now that we have a reference on the finished fence,
+ * we can release the reference the dependencies array
+ * had on the scheduled fence.
+ */
+ dma_fence_put(&s_fence->scheduled);
+ }
+
+ xa_erase(&job->dependencies, index);
+ if (f && !dma_fence_add_callback(f, &job->finish_cb,
+ drm_sched_entity_kill_jobs_cb))
return;
dma_fence_put(f);
@@ -394,8 +410,17 @@ static struct dma_fence *
drm_sched_job_dependency(struct drm_sched_job *job,
struct drm_sched_entity *entity)
{
- if (!xa_empty(&job->dependencies))
- return xa_erase(&job->dependencies, job->last_dependency++);
+ struct dma_fence *f;
+
+ /* We keep the fence around, so we can iterate over all dependencies
+ * in drm_sched_entity_kill_jobs_cb() to ensure all deps are signaled
+ * before killing the job.
+ */
+ f = xa_load(&job->dependencies, job->last_dependency);
+ if (f) {
+ job->last_dependency++;
+ return dma_fence_get(f);
+ }
if (job->sched->ops->prepare_job)
return job->sched->ops->prepare_job(job, entity);
--
2.40.1