From: Xiang Liu xiang.liu@amd.com
[ Upstream commit 928cd772e18ffbd7723cb2361db4a8ccf2222235 ]
It is not necessarily corrupted. When there is RAS fatal error, device memory access is blocked. Hence vcpu bo cannot be saved to system memory as in a regular suspend sequence before going for reset. In other full device reset cases, that gets saved and restored during resume.
v2: Remove redundant code like vcn_v4_0 did v2: Refine commit message v3: Drop the volatile v3: Refine commit message
Signed-off-by: Xiang Liu xiang.liu@amd.com Acked-by: Christian König christian.koenig@amd.com Reviewed-by: Stanley.Yang Stanley.Yang@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 30 ++++++++++++++++++------- 1 file changed, 22 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 9bae95538b628..77071967f965a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -80,6 +80,20 @@ static int vcn_v4_0_3_early_init(void *handle) return amdgpu_vcn_early_init(adev); }
+static int vcn_v4_0_3_fw_shared_init(struct amdgpu_device *adev, int inst_idx) +{ + struct amdgpu_vcn4_fw_shared *fw_shared; + + fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); + fw_shared->sq.is_enabled = 1; + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]); + + return 0; +} + /** * vcn_v4_0_3_sw_init - sw init for VCN block * @@ -110,8 +124,6 @@ static int vcn_v4_0_3_sw_init(void *handle) return r;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; - vcn_inst = GET_INST(VCN, i);
ring = &adev->vcn.inst[i].ring_enc[0]; @@ -134,12 +146,7 @@ static int vcn_v4_0_3_sw_init(void *handle) if (r) return r;
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); - fw_shared->sq.is_enabled = true; - - if (amdgpu_vcnfw_log) - amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); + vcn_v4_0_3_fw_shared_init(adev, i); }
if (amdgpu_sriov_vf(adev)) { @@ -224,6 +231,8 @@ static int vcn_v4_0_3_hw_init(void *handle) } } else { for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn4_fw_shared *fw_shared; + vcn_inst = GET_INST(VCN, i); ring = &adev->vcn.inst[i].ring_enc[0];
@@ -247,6 +256,11 @@ static int vcn_v4_0_3_hw_init(void *handle) regVCN_RB1_DB_CTRL); }
+ /* Re-init fw_shared when RAS fatal error occurred */ + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + if (!fw_shared->sq.is_enabled) + vcn_v4_0_3_fw_shared_init(adev, i); + r = amdgpu_ring_test_helper(ring); if (r) return r;