The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 1b5078f01b953a43d6198180ca5b110017315672 Mon Sep 17 00:00:00 2001
From: Zhikai Zhai <zhikai.zhai(a)amd.com>
Date: Mon, 29 Jan 2024 17:02:18 +0800
Subject: [PATCH] drm/amd/display: Add align done check
[WHY]
We Double-check link status if training successful,
but miss the lane align status.
[HOW]
Add the lane align status check
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Wenjing Liu <wenjing.liu(a)amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai(a)amd.com>
Signed-off-by: Zhikai Zhai <zhikai.zhai(a)amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
---
.../gpu/drm/amd/display/dc/link/protocols/link_dp_training.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
index e06d3c2d89102..e538c67d3ed91 100644
--- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
+++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_training.c
@@ -517,6 +517,7 @@ enum link_training_result dp_check_link_loss_status(
{
enum link_training_result status = LINK_TRAINING_SUCCESS;
union lane_status lane_status;
+ union lane_align_status_updated dpcd_lane_status_updated;
uint8_t dpcd_buf[6] = {0};
uint32_t lane;
@@ -532,10 +533,12 @@ enum link_training_result dp_check_link_loss_status(
* check lanes status
*/
lane_status.raw = dp_get_nibble_at_index(&dpcd_buf[2], lane);
+ dpcd_lane_status_updated.raw = dpcd_buf[4];
if (!lane_status.bits.CHANNEL_EQ_DONE_0 ||
!lane_status.bits.CR_DONE_0 ||
- !lane_status.bits.SYMBOL_LOCKED_0) {
+ !lane_status.bits.SYMBOL_LOCKED_0 ||
+ !dp_is_interlane_aligned(dpcd_lane_status_updated)) {
/* if one of the channel equalization, clock
* recovery or symbol lock is dropped
* consider it as (link has been
--
2.43.0
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 002bf2fbc00e5c4b95fb167287e2ae7d1973281e Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <stanislaw.gruszka(a)linux.intel.com>
Date: Mon, 12 Feb 2024 13:01:35 +0100
Subject: [PATCH] PCI/AER: Block runtime suspend when handling errors
PM runtime can be done simultaneously with AER error handling. Avoid that
by using pm_runtime_get_sync() before and pm_runtime_put() after reset in
pcie_do_recovery() for all recovering devices.
pm_runtime_get_sync() will increase dev->power.usage_count counter to
prevent any possible future request to runtime suspend a device. It will
also resume a device, if it was previously in D3hot state.
I tested with igc device by doing simultaneous aer_inject and rpm
suspend/resume via /sys/bus/pci/devices/PCI_ID/power/control and can
reproduce:
igc 0000:02:00.0: not ready 65535ms after bus reset; giving up
pcieport 0000:00:1c.2: AER: Root Port link has been reset (-25)
pcieport 0000:00:1c.2: AER: subordinate device reset failed
pcieport 0000:00:1c.2: AER: device recovery failed
igc 0000:02:00.0: Unable to change power state from D3hot to D0, device inaccessible
The problem disappears when this patch is applied.
Link: https://lore.kernel.org/r/20240212120135.146068-1-stanislaw.gruszka@linux.i…
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka(a)linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy(a)linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
---
drivers/pci/pcie/err.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 59c90d04a609a..705893b5f7b09 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -13,6 +13,7 @@
#define dev_fmt(fmt) "AER: " fmt
#include <linux/pci.h>
+#include <linux/pm_runtime.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/errno.h>
@@ -85,6 +86,18 @@ static int report_error_detected(struct pci_dev *dev,
return 0;
}
+static int pci_pm_runtime_get_sync(struct pci_dev *pdev, void *data)
+{
+ pm_runtime_get_sync(&pdev->dev);
+ return 0;
+}
+
+static int pci_pm_runtime_put(struct pci_dev *pdev, void *data)
+{
+ pm_runtime_put(&pdev->dev);
+ return 0;
+}
+
static int report_frozen_detected(struct pci_dev *dev, void *data)
{
return report_error_detected(dev, pci_channel_io_frozen, data);
@@ -207,6 +220,8 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
else
bridge = pci_upstream_bridge(dev);
+ pci_walk_bridge(bridge, pci_pm_runtime_get_sync, NULL);
+
pci_dbg(bridge, "broadcast error_detected message\n");
if (state == pci_channel_io_frozen) {
pci_walk_bridge(bridge, report_frozen_detected, &status);
@@ -251,10 +266,15 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
pcie_clear_device_status(dev);
pci_aer_clear_nonfatal_status(dev);
}
+
+ pci_walk_bridge(bridge, pci_pm_runtime_put, NULL);
+
pci_info(bridge, "device recovery successful\n");
return status;
failed:
+ pci_walk_bridge(bridge, pci_pm_runtime_put, NULL);
+
pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT);
/* TODO: Should kernel panic here? */
--
2.43.0
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 3a6a32b31a111f6e66526fb2d3cb13a876465076 Mon Sep 17 00:00:00 2001
From: Gabe Teeger <gabe.teeger(a)amd.com>
Date: Mon, 29 Jan 2024 13:31:44 -0500
Subject: [PATCH] Revert "drm/amd/display: Send DTBCLK disable message on first
commit"
This reverts commit f341055b10bd8be55c3c995dff5f770b236b8ca9.
System hang observed, this commit is thought to be the
regression point.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Ovidiu Bunea <ovidiu.bunea(a)amd.com>
Acked-by: Aurabindo Pillai <aurabindo.pillai(a)amd.com>
Signed-off-by: Gabe Teeger <gabe.teeger(a)amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
---
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
index 06edca50a8fa1..36e5bb611fb10 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
@@ -414,7 +414,6 @@ static void init_clk_states(struct clk_mgr *clk_mgr)
uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz;
memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
- clk_mgr->clks.dtbclk_en = true;
clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk
clk_mgr->clks.p_state_change_support = true;
clk_mgr->clks.prev_p_state_change_support = true;
--
2.43.0
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 3b84525544be4ca0481110263a6d73eb00741cf3 Mon Sep 17 00:00:00 2001
From: Wayne Lin <Wayne.Lin(a)amd.com>
Date: Tue, 2 Jan 2024 14:20:37 +0800
Subject: [PATCH] drm/amd/display: Align the returned error code with legacy DP
[Why]
For usb4 connector, AUX transaction is handled by dmub utilizing a differnt
code path comparing to legacy DP connector. If the usb4 DP connector is
disconnected, AUX access will report EBUSY and cause igt@kms_dp_aux_dev
fail.
[How]
Align the error code with the one reported by legacy DP as EIO.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Acked-by: Alex Hung <alex.hung(a)amd.com>
Signed-off-by: Wayne Lin <Wayne.Lin(a)amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
---
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index eaf8d9f482446..85b7f58a7f35a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -979,6 +979,11 @@ int dm_helper_dmub_aux_transfer_sync(
struct aux_payload *payload,
enum aux_return_code_type *operation_result)
{
+ if (!link->hpd_status) {
+ *operation_result = AUX_RET_ERROR_HPD_DISCON;
+ return -1;
+ }
+
return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload,
operation_result);
}
--
2.43.0
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Thanks,
Sasha
------------------ original commit in Linus's tree ------------------
From 3d066f9547dd58329b526db44f42c487a7974703 Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas(a)amd.com>
Date: Wed, 21 Feb 2024 12:27:31 -0500
Subject: [PATCH] drm/amd/display: Fix idle check for shared firmware state
[WHY]
We still had an instance of get_idle_state checking the PMFW scratch
register instead of the actual idle allow signal.
[HOW]
Replace it with the SW state check for whether we had allowed idle
through notify_idle.
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Duncan Ma <duncan.ma(a)amd.com>
Acked-by: Alex Hung <alex.hung(a)amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas(a)amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
---
drivers/gpu/drm/amd/display/dc/core/dc.c | 12 +++---------
1 file changed, 3 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 613d09c42f3b9..958552a8605ff 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -4847,22 +4847,16 @@ void dc_exit_ips_for_hw_access(struct dc *dc)
bool dc_dmub_is_ips_idle_state(struct dc *dc)
{
- uint32_t idle_state = 0;
-
if (dc->debug.disable_idle_power_optimizations)
return false;
if (!dc->caps.ips_support || (dc->config.disable_ips == DMUB_IPS_DISABLE_ALL))
return false;
- if (dc->hwss.get_idle_state)
- idle_state = dc->hwss.get_idle_state(dc);
-
- if (!(idle_state & DMUB_IPS1_ALLOW_MASK) ||
- !(idle_state & DMUB_IPS2_ALLOW_MASK))
- return true;
+ if (!dc->ctx->dmub_srv)
+ return false;
- return false;
+ return dc->ctx->dmub_srv->idle_allowed;
}
/* set min and max memory clock to lowest and highest DPM level, respectively */
--
2.43.0