[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20250717165622.1162091-3-avadhut.naik@amd.com>
Date: Thu, 17 Jul 2025 16:48:43 +0000
From: Avadhut Naik <avadhut.naik@....com>
To: <linux-edac@...r.kernel.org>
CC: <bp@...en8.de>, <yazen.ghannam@....com>, <john.allen@....com>,
<linux-kernel@...r.kernel.org>, <avadhut.naik@....com>
Subject: [PATCH 2/2] EDAC/amd64: Incorporate DRAM Address in EDAC message
Currently, the amd64_edac module only provides UMC normalized and system
physical address when a DRAM ECC error occurs. DRAM Address on which the
error has occurred is not provided since the support required to translate
the normalized address into DRAM address has not yet been implemented.
This support however, has now been implemented through an earlier patch
(RAS/AMD/ATL: Translate UMC normalized address to DRAM address using PRM)
and DRAM address, which provides additional debugging information relating
to the error received, can now be logged by the module.
Add the required support to log DRAM address on which the error has been
received in dmesg and through the RAS tracepoint.
Signed-off-by: Avadhut Naik <avadhut.naik@....com>
---
drivers/edac/amd64_edac.c | 23 +++++++++++++++++++++++
drivers/edac/amd64_edac.h | 1 +
2 files changed, 24 insertions(+)
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 07f1e9dc1ca7..36b46cd81bb2 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2724,6 +2724,22 @@ static void __log_ecc_error(struct mem_ctl_info *mci, struct err_info *err,
switch (err->err_code) {
case DECODE_OK:
string = "";
+ if (err->dram_addr) {
+ char s[100];
+
+ memset(s, 0, 100);
+ sprintf(s, "Cs: 0x%x Bank Grp: 0x%x Bank Addr: 0x%x"
+ " Row: 0x%x Column: 0x%x"
+ " RankMul: 0x%x SubChannel: 0x%x",
+ err->dram_addr->chip_select,
+ err->dram_addr->bank_group,
+ err->dram_addr->bank_addr,
+ err->dram_addr->row_addr,
+ err->dram_addr->col_addr,
+ err->dram_addr->rank_mul,
+ err->dram_addr->sub_ch);
+ string = s;
+ }
break;
case ERR_NODE:
string = "Failed to map error addr to a node";
@@ -2808,11 +2824,13 @@ static void umc_get_err_info(struct mce *m, struct err_info *err)
static void decode_umc_error(int node_id, struct mce *m)
{
u8 ecc_type = (m->status >> 45) & 0x3;
+ struct dram_addr dram_addr;
struct mem_ctl_info *mci;
unsigned long sys_addr;
struct amd64_pvt *pvt;
struct atl_err a_err;
struct err_info err;
+ int ret;
node_id = fixup_node_id(node_id, m);
@@ -2822,6 +2840,7 @@ static void decode_umc_error(int node_id, struct mce *m)
pvt = mci->pvt_info;
+ memset(&dram_addr, 0, sizeof(dram_addr));
memset(&err, 0, sizeof(err));
if (m->status & MCI_STATUS_DEFERRED)
@@ -2853,6 +2872,10 @@ static void decode_umc_error(int node_id, struct mce *m)
goto log_error;
}
+ ret = amd_convert_umc_mca_addr_to_dram_addr(&a_err, &dram_addr);
+ if (!ret)
+ err.dram_addr = &dram_addr;
+
error_address_to_page_and_offset(sys_addr, &err);
log_error:
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 17228d07de4c..88b0b8425ab3 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -399,6 +399,7 @@ struct err_info {
u16 syndrome;
u32 page;
u32 offset;
+ struct dram_addr *dram_addr;
};
static inline u32 get_umc_base(u8 channel)
--
2.43.0
Powered by blists - more mailing lists