[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241204221627.2247598-12-sashal@kernel.org>
Date: Wed, 4 Dec 2024 17:16:06 -0500
From: Sasha Levin <sashal@...nel.org>
To: linux-kernel@...r.kernel.org,
stable@...r.kernel.org
Cc: Xiang Liu <xiang.liu@....com>,
Christian König <christian.koenig@....com>,
"Stanley . Yang" <Stanley.Yang@....com>,
Alex Deucher <alexander.deucher@....com>,
Sasha Levin <sashal@...nel.org>,
Xinhui.Pan@....com,
airlied@...il.com,
simona@...ll.ch,
sunil.khatri@....com,
lijo.lazar@....com,
leo.liu@....com,
Jane.Jian@....com,
David.Wu3@....com,
sathishkumar.sundararaju@....com,
amd-gfx@...ts.freedesktop.org,
dri-devel@...ts.freedesktop.org
Subject: [PATCH AUTOSEL 6.12 12/15] drm/amdgpu/vcn: reset fw_shared when VCPU buffers corrupted on vcn v4.0.3
From: Xiang Liu <xiang.liu@....com>
[ Upstream commit 928cd772e18ffbd7723cb2361db4a8ccf2222235 ]
It is not necessarily corrupted. When there is RAS fatal error, device
memory access is blocked. Hence vcpu bo cannot be saved to system memory
as in a regular suspend sequence before going for reset. In other full
device reset cases, that gets saved and restored during resume.
v2: Remove redundant code like vcn_v4_0 did
v2: Refine commit message
v3: Drop the volatile
v3: Refine commit message
Signed-off-by: Xiang Liu <xiang.liu@....com>
Acked-by: Christian König <christian.koenig@....com>
Reviewed-by: Stanley.Yang <Stanley.Yang@....com>
Signed-off-by: Alex Deucher <alexander.deucher@....com>
Signed-off-by: Sasha Levin <sashal@...nel.org>
---
drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 30 ++++++++++++++++++-------
1 file changed, 22 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 0fda703363004..6fca2915ea8fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -116,6 +116,20 @@ static int vcn_v4_0_3_early_init(void *handle)
return amdgpu_vcn_early_init(adev);
}
+static int vcn_v4_0_3_fw_shared_init(struct amdgpu_device *adev, int inst_idx)
+{
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+
+ fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
+ fw_shared->sq.is_enabled = 1;
+
+ if (amdgpu_vcnfw_log)
+ amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]);
+
+ return 0;
+}
+
/**
* vcn_v4_0_3_sw_init - sw init for VCN block
*
@@ -148,8 +162,6 @@ static int vcn_v4_0_3_sw_init(void *handle)
return r;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
-
vcn_inst = GET_INST(VCN, i);
ring = &adev->vcn.inst[i].ring_enc[0];
@@ -172,12 +184,7 @@ static int vcn_v4_0_3_sw_init(void *handle)
if (r)
return r;
- fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
- fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
- fw_shared->sq.is_enabled = true;
-
- if (amdgpu_vcnfw_log)
- amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
+ vcn_v4_0_3_fw_shared_init(adev, i);
}
if (amdgpu_sriov_vf(adev)) {
@@ -273,6 +280,8 @@ static int vcn_v4_0_3_hw_init(void *handle)
}
} else {
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ struct amdgpu_vcn4_fw_shared *fw_shared;
+
vcn_inst = GET_INST(VCN, i);
ring = &adev->vcn.inst[i].ring_enc[0];
@@ -296,6 +305,11 @@ static int vcn_v4_0_3_hw_init(void *handle)
regVCN_RB1_DB_CTRL);
}
+ /* Re-init fw_shared when RAS fatal error occurred */
+ fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
+ if (!fw_shared->sq.is_enabled)
+ vcn_v4_0_3_fw_shared_init(adev, i);
+
r = amdgpu_ring_test_helper(ring);
if (r)
return r;
--
2.43.0
Powered by blists - more mailing lists