[PATCH AUTOSEL 6.11 12/15] drm/amdgpu/vcn: reset fw_shared when VCPU buffers corrupted on vcn v4.0.3

4 Dec 2024

From: Xiang Liu xiang.liu@amd.com
[ Upstream commit 928cd772e18ffbd7723cb2361db4a8ccf2222235 ]
It is not necessarily corrupted. When there is RAS fatal error, device
memory access is blocked. Hence vcpu bo cannot be saved to system memory
as in a regular suspend sequence before going for reset. In other full
device reset cases, that gets saved and restored during resume.
v2: Remove redundant code like vcn_v4_0 did
v2: Refine commit message
v3: Drop the volatile
v3: Refine commit message
Signed-off-by: Xiang Liu xiang.liu@amd.com
Acked-by: Christian König christian.koenig@amd.com
Reviewed-by: Stanley.Yang Stanley.Yang@amd.com
Signed-off-by: Alex Deucher alexander.deucher@amd.com
Signed-off-by: Sasha Levin sashal@kernel.org
---
 drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 30 ++++++++++++++++++-------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 9bae95538b628..77071967f965a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -80,6 +80,20 @@ static int vcn_v4_0_3_early_init(void *handle)
    return amdgpu_vcn_early_init(adev);
 }
+static int vcn_v4_0_3_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
+{
+	struct amdgpu_vcn4_fw_shared *fw_shared;
+
+	fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+	fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+	fw_shared->sq.is_enabled = 1;
+
+	if (amdgpu_vcnfw_log)
+		amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]);
+
+	return 0;
+}
+
 /**
  * vcn_v4_0_3_sw_init - sw init for VCN block
  *
@@ -110,8 +124,6 @@ static int vcn_v4_0_3_sw_init(void *handle)
    	return r;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
-		volatile struct amdgpu_vcn4_fw_shared *fw_shared;
-
    	vcn_inst = GET_INST(VCN, i);
ring = &adev->vcn.inst[i].ring_enc[0];
@@ -134,12 +146,7 @@ static int vcn_v4_0_3_sw_init(void *handle)
    	if (r)
    		return r;
-		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
-		fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
-		fw_shared->sq.is_enabled = true;
-
-		if (amdgpu_vcnfw_log)
-			amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+		vcn_v4_0_3_fw_shared_init(adev, i);
    }
if (amdgpu_sriov_vf(adev)) {
@@ -224,6 +231,8 @@ static int vcn_v4_0_3_hw_init(void *handle)
    	}
    } else {
    	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+			struct amdgpu_vcn4_fw_shared *fw_shared;
+
    		vcn_inst = GET_INST(VCN, i);
    		ring = &adev->vcn.inst[i].ring_enc[0];
@@ -247,6 +256,11 @@ static int vcn_v4_0_3_hw_init(void *handle)
    				regVCN_RB1_DB_CTRL);
    		}
+			/* Re-init fw_shared when RAS fatal error occurred */
+			fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+			if (!fw_shared->sq.is_enabled)
+				vcn_v4_0_3_fw_shared_init(adev, i);
+
    		r = amdgpu_ring_test_helper(ring);
    		if (r)
    			return r;
-- 
2.43.0



    

2025

2024

2023

2022

2021

2020

2019

2018

2017

[PATCH AUTOSEL 6.11 12/15] drm/amdgpu/vcn: reset fw_shared when VCPU buffers corrupted on vcn v4.0.3