The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 3db9d590557da3aa2c952f2fecd3e9b703dad790
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678448288166151(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
3db9d590557d ("drm/i915/gt: Reset twice")
cb823ed9915b ("drm/i915/gt: Use intel_gt as the primary object for handling resets")
f2db53f14d3d ("drm/i915: Replace "_load" with "_probe" consequently")
b5893ffc274b ("drm/i915: Drop extern qualifiers from header function prototypes")
2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption on a request")
b8cade5959ac ("drm/i915: Show instdone for each engine in debugfs")
63251685c141 ("drm/i915/selftests: Common live setup/teardown")
092be382a260 ("drm/i915: Lift intel_engines_resume() to callers")
18398904ca9e ("drm/i915: Only recover active engines")
faaa2902b5a9 ("drm/i915/selftests: Fixup atomic reset checking")
1e5deb263265 ("drm/i915/selftests: Drop manual request wakerefs around hangcheck")
d84747956654 ("drm/i915/selftests: Serialise nop reset with retirement")
5f22e5b3116c ("drm/i915: Rename intel_wakeref_[is]_active")
0c91621cad49 ("drm/i915/gt: Pass intel_gt to pm routines")
a93615f900bd ("drm/i915: Throw away the active object retirement complexity")
db56f974941b ("drm/i915: Eliminate dual personality of i915_scratch_offset")
f0c02c1b9188 ("drm/i915: Rename i915_timeline to intel_timeline and move under gt")
4c6d51ea2a68 ("drm/i915: Make timelines gt centric")
ba4134a41931 ("drm/i915: Save trip via top-level i915 in a few more places")
390c82055b74 ("drm/i915: Compartmentalize timeline_init/park/fini")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3db9d590557da3aa2c952f2fecd3e9b703dad790 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Mon, 12 Dec 2022 17:13:38 +0100
Subject: [PATCH] drm/i915/gt: Reset twice
After applying an engine reset, on some platforms like Jasperlake, we
occasionally detect that the engine state is not cleared until shortly
after the resume. As we try to resume the engine with volatile internal
state, the first request fails with a spurious CS event (it looks like
it reports a lite-restore to the hung context, instead of the expected
idle->active context switch).
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: stable(a)vger.kernel.org
Cc: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti(a)linux.intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221212161338.1007659-1-andi…
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index ffde89c5835a..0bb9094fdacd 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -268,6 +268,7 @@ static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
{
struct intel_uncore *uncore = gt->uncore;
+ int loops = 2;
int err;
/*
@@ -275,18 +276,39 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
* for fifo space for the write or forcewake the chip for
* the read
*/
- intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
+ do {
+ intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
- /* Wait for the device to ack the reset requests */
- err = __intel_wait_for_register_fw(uncore,
- GEN6_GDRST, hw_domain_mask, 0,
- 500, 0,
- NULL);
+ /*
+ * Wait for the device to ack the reset requests.
+ *
+ * On some platforms, e.g. Jasperlake, we see that the
+ * engine register state is not cleared until shortly after
+ * GDRST reports completion, causing a failure as we try
+ * to immediately resume while the internal state is still
+ * in flux. If we immediately repeat the reset, the second
+ * reset appears to serialise with the first, and since
+ * it is a no-op, the registers should retain their reset
+ * value. However, there is still a concern that upon
+ * leaving the second reset, the internal engine state
+ * is still in flux and not ready for resuming.
+ */
+ err = __intel_wait_for_register_fw(uncore, GEN6_GDRST,
+ hw_domain_mask, 0,
+ 2000, 0,
+ NULL);
+ } while (err == 0 && --loops);
if (err)
GT_TRACE(gt,
"Wait for 0x%08x engines reset failed\n",
hw_domain_mask);
+ /*
+ * As we have observed that the engine state is still volatile
+ * after GDRST is acked, impose a small delay to let everything settle.
+ */
+ udelay(50);
+
return err;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 3db9d590557da3aa2c952f2fecd3e9b703dad790
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16784482878746(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
3db9d590557d ("drm/i915/gt: Reset twice")
cb823ed9915b ("drm/i915/gt: Use intel_gt as the primary object for handling resets")
f2db53f14d3d ("drm/i915: Replace "_load" with "_probe" consequently")
b5893ffc274b ("drm/i915: Drop extern qualifiers from header function prototypes")
2a98f4e65bba ("drm/i915: add infrastructure to hold off preemption on a request")
b8cade5959ac ("drm/i915: Show instdone for each engine in debugfs")
63251685c141 ("drm/i915/selftests: Common live setup/teardown")
092be382a260 ("drm/i915: Lift intel_engines_resume() to callers")
18398904ca9e ("drm/i915: Only recover active engines")
faaa2902b5a9 ("drm/i915/selftests: Fixup atomic reset checking")
1e5deb263265 ("drm/i915/selftests: Drop manual request wakerefs around hangcheck")
d84747956654 ("drm/i915/selftests: Serialise nop reset with retirement")
5f22e5b3116c ("drm/i915: Rename intel_wakeref_[is]_active")
0c91621cad49 ("drm/i915/gt: Pass intel_gt to pm routines")
a93615f900bd ("drm/i915: Throw away the active object retirement complexity")
db56f974941b ("drm/i915: Eliminate dual personality of i915_scratch_offset")
f0c02c1b9188 ("drm/i915: Rename i915_timeline to intel_timeline and move under gt")
4c6d51ea2a68 ("drm/i915: Make timelines gt centric")
ba4134a41931 ("drm/i915: Save trip via top-level i915 in a few more places")
390c82055b74 ("drm/i915: Compartmentalize timeline_init/park/fini")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3db9d590557da3aa2c952f2fecd3e9b703dad790 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Mon, 12 Dec 2022 17:13:38 +0100
Subject: [PATCH] drm/i915/gt: Reset twice
After applying an engine reset, on some platforms like Jasperlake, we
occasionally detect that the engine state is not cleared until shortly
after the resume. As we try to resume the engine with volatile internal
state, the first request fails with a spurious CS event (it looks like
it reports a lite-restore to the hung context, instead of the expected
idle->active context switch).
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: stable(a)vger.kernel.org
Cc: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti(a)linux.intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221212161338.1007659-1-andi…
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index ffde89c5835a..0bb9094fdacd 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -268,6 +268,7 @@ static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
{
struct intel_uncore *uncore = gt->uncore;
+ int loops = 2;
int err;
/*
@@ -275,18 +276,39 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
* for fifo space for the write or forcewake the chip for
* the read
*/
- intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
+ do {
+ intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
- /* Wait for the device to ack the reset requests */
- err = __intel_wait_for_register_fw(uncore,
- GEN6_GDRST, hw_domain_mask, 0,
- 500, 0,
- NULL);
+ /*
+ * Wait for the device to ack the reset requests.
+ *
+ * On some platforms, e.g. Jasperlake, we see that the
+ * engine register state is not cleared until shortly after
+ * GDRST reports completion, causing a failure as we try
+ * to immediately resume while the internal state is still
+ * in flux. If we immediately repeat the reset, the second
+ * reset appears to serialise with the first, and since
+ * it is a no-op, the registers should retain their reset
+ * value. However, there is still a concern that upon
+ * leaving the second reset, the internal engine state
+ * is still in flux and not ready for resuming.
+ */
+ err = __intel_wait_for_register_fw(uncore, GEN6_GDRST,
+ hw_domain_mask, 0,
+ 2000, 0,
+ NULL);
+ } while (err == 0 && --loops);
if (err)
GT_TRACE(gt,
"Wait for 0x%08x engines reset failed\n",
hw_domain_mask);
+ /*
+ * As we have observed that the engine state is still volatile
+ * after GDRST is acked, impose a small delay to let everything settle.
+ */
+ udelay(50);
+
return err;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 3db9d590557da3aa2c952f2fecd3e9b703dad790
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16784482866392(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
3db9d590557d ("drm/i915/gt: Reset twice")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3db9d590557da3aa2c952f2fecd3e9b703dad790 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Mon, 12 Dec 2022 17:13:38 +0100
Subject: [PATCH] drm/i915/gt: Reset twice
After applying an engine reset, on some platforms like Jasperlake, we
occasionally detect that the engine state is not cleared until shortly
after the resume. As we try to resume the engine with volatile internal
state, the first request fails with a spurious CS event (it looks like
it reports a lite-restore to the hung context, instead of the expected
idle->active context switch).
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: stable(a)vger.kernel.org
Cc: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti(a)linux.intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221212161338.1007659-1-andi…
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index ffde89c5835a..0bb9094fdacd 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -268,6 +268,7 @@ static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
{
struct intel_uncore *uncore = gt->uncore;
+ int loops = 2;
int err;
/*
@@ -275,18 +276,39 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
* for fifo space for the write or forcewake the chip for
* the read
*/
- intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
+ do {
+ intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
- /* Wait for the device to ack the reset requests */
- err = __intel_wait_for_register_fw(uncore,
- GEN6_GDRST, hw_domain_mask, 0,
- 500, 0,
- NULL);
+ /*
+ * Wait for the device to ack the reset requests.
+ *
+ * On some platforms, e.g. Jasperlake, we see that the
+ * engine register state is not cleared until shortly after
+ * GDRST reports completion, causing a failure as we try
+ * to immediately resume while the internal state is still
+ * in flux. If we immediately repeat the reset, the second
+ * reset appears to serialise with the first, and since
+ * it is a no-op, the registers should retain their reset
+ * value. However, there is still a concern that upon
+ * leaving the second reset, the internal engine state
+ * is still in flux and not ready for resuming.
+ */
+ err = __intel_wait_for_register_fw(uncore, GEN6_GDRST,
+ hw_domain_mask, 0,
+ 2000, 0,
+ NULL);
+ } while (err == 0 && --loops);
if (err)
GT_TRACE(gt,
"Wait for 0x%08x engines reset failed\n",
hw_domain_mask);
+ /*
+ * As we have observed that the engine state is still volatile
+ * after GDRST is acked, impose a small delay to let everything settle.
+ */
+ udelay(50);
+
return err;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 3db9d590557da3aa2c952f2fecd3e9b703dad790
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678448285172230(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
3db9d590557d ("drm/i915/gt: Reset twice")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3db9d590557da3aa2c952f2fecd3e9b703dad790 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Mon, 12 Dec 2022 17:13:38 +0100
Subject: [PATCH] drm/i915/gt: Reset twice
After applying an engine reset, on some platforms like Jasperlake, we
occasionally detect that the engine state is not cleared until shortly
after the resume. As we try to resume the engine with volatile internal
state, the first request fails with a spurious CS event (it looks like
it reports a lite-restore to the hung context, instead of the expected
idle->active context switch).
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: stable(a)vger.kernel.org
Cc: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti(a)linux.intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221212161338.1007659-1-andi…
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index ffde89c5835a..0bb9094fdacd 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -268,6 +268,7 @@ static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
{
struct intel_uncore *uncore = gt->uncore;
+ int loops = 2;
int err;
/*
@@ -275,18 +276,39 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
* for fifo space for the write or forcewake the chip for
* the read
*/
- intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
+ do {
+ intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
- /* Wait for the device to ack the reset requests */
- err = __intel_wait_for_register_fw(uncore,
- GEN6_GDRST, hw_domain_mask, 0,
- 500, 0,
- NULL);
+ /*
+ * Wait for the device to ack the reset requests.
+ *
+ * On some platforms, e.g. Jasperlake, we see that the
+ * engine register state is not cleared until shortly after
+ * GDRST reports completion, causing a failure as we try
+ * to immediately resume while the internal state is still
+ * in flux. If we immediately repeat the reset, the second
+ * reset appears to serialise with the first, and since
+ * it is a no-op, the registers should retain their reset
+ * value. However, there is still a concern that upon
+ * leaving the second reset, the internal engine state
+ * is still in flux and not ready for resuming.
+ */
+ err = __intel_wait_for_register_fw(uncore, GEN6_GDRST,
+ hw_domain_mask, 0,
+ 2000, 0,
+ NULL);
+ } while (err == 0 && --loops);
if (err)
GT_TRACE(gt,
"Wait for 0x%08x engines reset failed\n",
hw_domain_mask);
+ /*
+ * As we have observed that the engine state is still volatile
+ * after GDRST is acked, impose a small delay to let everything settle.
+ */
+ udelay(50);
+
return err;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 3db9d590557da3aa2c952f2fecd3e9b703dad790
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167844828519929(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
3db9d590557d ("drm/i915/gt: Reset twice")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3db9d590557da3aa2c952f2fecd3e9b703dad790 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Mon, 12 Dec 2022 17:13:38 +0100
Subject: [PATCH] drm/i915/gt: Reset twice
After applying an engine reset, on some platforms like Jasperlake, we
occasionally detect that the engine state is not cleared until shortly
after the resume. As we try to resume the engine with volatile internal
state, the first request fails with a spurious CS event (it looks like
it reports a lite-restore to the hung context, instead of the expected
idle->active context switch).
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: stable(a)vger.kernel.org
Cc: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti(a)linux.intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221212161338.1007659-1-andi…
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index ffde89c5835a..0bb9094fdacd 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -268,6 +268,7 @@ static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
{
struct intel_uncore *uncore = gt->uncore;
+ int loops = 2;
int err;
/*
@@ -275,18 +276,39 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
* for fifo space for the write or forcewake the chip for
* the read
*/
- intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
+ do {
+ intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
- /* Wait for the device to ack the reset requests */
- err = __intel_wait_for_register_fw(uncore,
- GEN6_GDRST, hw_domain_mask, 0,
- 500, 0,
- NULL);
+ /*
+ * Wait for the device to ack the reset requests.
+ *
+ * On some platforms, e.g. Jasperlake, we see that the
+ * engine register state is not cleared until shortly after
+ * GDRST reports completion, causing a failure as we try
+ * to immediately resume while the internal state is still
+ * in flux. If we immediately repeat the reset, the second
+ * reset appears to serialise with the first, and since
+ * it is a no-op, the registers should retain their reset
+ * value. However, there is still a concern that upon
+ * leaving the second reset, the internal engine state
+ * is still in flux and not ready for resuming.
+ */
+ err = __intel_wait_for_register_fw(uncore, GEN6_GDRST,
+ hw_domain_mask, 0,
+ 2000, 0,
+ NULL);
+ } while (err == 0 && --loops);
if (err)
GT_TRACE(gt,
"Wait for 0x%08x engines reset failed\n",
hw_domain_mask);
+ /*
+ * As we have observed that the engine state is still volatile
+ * after GDRST is acked, impose a small delay to let everything settle.
+ */
+ udelay(50);
+
return err;
}