[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250717165622.1162091-2-avadhut.naik@amd.com>
Date: Thu, 17 Jul 2025 16:48:42 +0000
From: Avadhut Naik <avadhut.naik@....com>
To: <linux-edac@...r.kernel.org>
CC: <bp@...en8.de>, <yazen.ghannam@....com>, <john.allen@....com>,
<linux-kernel@...r.kernel.org>, <avadhut.naik@....com>
Subject: [PATCH 1/2] RAS/AMD/ATL: Translate UMC normalized address to DRAM address using PRM
Modern AMD SOCs like Zen5 provide UEFI PRM module that implements various
address translation PRM handlers.[1] These handlers can be invoked by the
OS or hypervisor at runtime to perform address translations.
On AMD's Zen-based SOCs, Unified Memory Controller (UMC) relative
"normalized" address is reported through MCA_ADDR of UMC SMCA bank type
on occurrence of a DRAM ECC error. This address must be converted into
system physical address and DRAM address to export additional information
about the error.
Add support to convert normalized address into DRAM address through the
appropriate PRM handler. Support for obtaining the system physical address
already exists. Instead of logging the translated DRAM address locally,
register the translating function when the Address Translation library is
initialized. Modules like amd64_edac can then invoke the PRM handler to
add the DRAM address to their error records. Additionally, it can also be
exported through the RAS tracepont.
[1] AMD Family 1Ah Models 00h–0Fh and Models 10h–1Fh ACPI v6.5 Porting Guide, Chapter 22
Signed-off-by: Avadhut Naik <avadhut.naik@....com>
---
drivers/ras/amd/atl/core.c | 3 ++-
drivers/ras/amd/atl/internal.h | 9 +++++++++
drivers/ras/amd/atl/prm.c | 36 ++++++++++++++++++++++++++++++----
drivers/ras/amd/atl/umc.c | 12 ++++++++++++
drivers/ras/ras.c | 18 +++++++++++++++--
include/linux/ras.h | 19 +++++++++++++++++-
6 files changed, 89 insertions(+), 8 deletions(-)
diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c
index 4197e10993ac..ca1646d030ca 100644
--- a/drivers/ras/amd/atl/core.c
+++ b/drivers/ras/amd/atl/core.c
@@ -207,7 +207,8 @@ static int __init amd_atl_init(void)
/* Increment this module's recount so that it can't be easily unloaded. */
__module_get(THIS_MODULE);
- amd_atl_register_decoder(convert_umc_mca_addr_to_sys_addr);
+ amd_atl_register_decoder(convert_umc_mca_addr_to_sys_addr,
+ convert_umc_mca_addr_to_dram_addr);
pr_info("AMD Address Translation Library initialized\n");
return 0;
diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
index 2b6279d32774..53095310438c 100644
--- a/drivers/ras/amd/atl/internal.h
+++ b/drivers/ras/amd/atl/internal.h
@@ -279,18 +279,27 @@ int dehash_address(struct addr_ctx *ctx);
unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr);
unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
+int convert_umc_mca_addr_to_dram_addr(struct atl_err *err, struct dram_addr *dram_addr);
u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr);
u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr);
#ifdef CONFIG_AMD_ATL_PRM
unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 umc_bank_inst_id, unsigned long addr);
+int prm_umc_norm_to_dram_addr(u8 socket_id, u64 bank_id,
+ unsigned long addr, struct dram_addr *dram_addr);
#else
static inline unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 umc_bank_inst_id,
unsigned long addr)
{
return -ENODEV;
}
+
+static inline int prm_umc_norm_to_dram_addr(u8 socket_id, u64 bank_id,
+ unsigned long addr, struct dram_addr *dram_addr)
+{
+ return -ENODEV;
+}
#endif
/*
diff --git a/drivers/ras/amd/atl/prm.c b/drivers/ras/amd/atl/prm.c
index 0931a20d213b..9bbaf8c85da0 100644
--- a/drivers/ras/amd/atl/prm.c
+++ b/drivers/ras/amd/atl/prm.c
@@ -19,10 +19,11 @@
#include <linux/prmt.h>
/*
- * PRM parameter buffer - normalized to system physical address, as described
- * in the "PRM Parameter Buffer" section of the AMD ACPI Porting Guide.
+ * PRM parameter buffer - normalized to system physical address and normalized
+ * to DRAM address, as described in the "PRM Parameter Buffer" section of the
+ * AMD ACPI Porting Guide.
*/
-struct norm_to_sys_param_buf {
+struct prm_parameter_buffer {
u64 norm_addr;
u8 socket;
u64 bank_id;
@@ -33,9 +34,13 @@ static const guid_t norm_to_sys_guid = GUID_INIT(0xE7180659, 0xA65D, 0x451D,
0x92, 0xCD, 0x2B, 0x56, 0xF1,
0x2B, 0xEB, 0xA6);
+static const guid_t norm_to_dram_guid = GUID_INIT(0x7626C6AE, 0xF973, 0x429C,
+ 0xA9, 0x1C, 0x10, 0x7D, 0x7B,
+ 0xE2, 0x98, 0xB0);
+
unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 bank_id, unsigned long addr)
{
- struct norm_to_sys_param_buf p_buf;
+ struct prm_parameter_buffer p_buf;
unsigned long ret_addr;
int ret;
@@ -55,3 +60,26 @@ unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 bank_id, unsigned long
return ret;
}
+
+int prm_umc_norm_to_dram_addr(u8 socket_id, u64 bank_id,
+ unsigned long addr, struct dram_addr *dram_addr)
+{
+ struct prm_parameter_buffer p_buf;
+ int ret;
+
+ p_buf.norm_addr = addr;
+ p_buf.socket = socket_id;
+ p_buf.bank_id = bank_id;
+ p_buf.out_buf = dram_addr;
+
+ ret = acpi_call_prm_handler(norm_to_dram_guid, &p_buf);
+ if (!ret)
+ return ret;
+
+ if (ret == -ENODEV)
+ pr_debug("PRM module/handler not available.\n");
+ else
+ pr_notice_once("PRM DRAM Address Translation failed.\n");
+
+ return ret;
+}
diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c
index 6e072b7667e9..df6accae8929 100644
--- a/drivers/ras/amd/atl/umc.c
+++ b/drivers/ras/amd/atl/umc.c
@@ -427,3 +427,15 @@ unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err)
return norm_to_sys_addr(socket_id, die_id, coh_st_inst_id, addr);
}
+
+int convert_umc_mca_addr_to_dram_addr(struct atl_err *err, struct dram_addr *dram_addr)
+{
+ u8 socket_id = topology_physical_package_id(err->cpu);
+ unsigned long addr = get_addr(err->addr);
+ u64 bank_id = err->ipid;
+ int ret;
+
+ ret = prm_umc_norm_to_dram_addr(socket_id, bank_id, addr, dram_addr);
+
+ return ret;
+}
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index a6e4792a1b2e..cae6388d41be 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -19,15 +19,20 @@
*/
static unsigned long (*amd_atl_umc_na_to_spa)(struct atl_err *err);
-void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *))
+static int (*amd_atl_umc_na_to_dram_addr)(struct atl_err *err, struct dram_addr *dram_addr);
+
+void amd_atl_register_decoder(unsigned long (*f1)(struct atl_err *),
+ int (*f2)(struct atl_err *, struct dram_addr *))
{
- amd_atl_umc_na_to_spa = f;
+ amd_atl_umc_na_to_spa = f1;
+ amd_atl_umc_na_to_dram_addr = f2;
}
EXPORT_SYMBOL_GPL(amd_atl_register_decoder);
void amd_atl_unregister_decoder(void)
{
amd_atl_umc_na_to_spa = NULL;
+ amd_atl_umc_na_to_dram_addr = NULL;
}
EXPORT_SYMBOL_GPL(amd_atl_unregister_decoder);
@@ -39,6 +44,15 @@ unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err)
return amd_atl_umc_na_to_spa(err);
}
EXPORT_SYMBOL_GPL(amd_convert_umc_mca_addr_to_sys_addr);
+
+int amd_convert_umc_mca_addr_to_dram_addr(struct atl_err *err, struct dram_addr *dram_addr)
+{
+ if (!amd_atl_umc_na_to_dram_addr)
+ return -EINVAL;
+
+ return amd_atl_umc_na_to_dram_addr(err, dram_addr);
+}
+EXPORT_SYMBOL_GPL(amd_convert_umc_mca_addr_to_dram_addr);
#endif /* CONFIG_AMD_ATL */
#define CREATE_TRACE_POINTS
diff --git a/include/linux/ras.h b/include/linux/ras.h
index a64182bc72ad..feb53f8470b0 100644
--- a/include/linux/ras.h
+++ b/include/linux/ras.h
@@ -42,15 +42,32 @@ struct atl_err {
u32 cpu;
};
+struct dram_addr {
+ u8 chip_select;
+ u8 bank_group;
+ u8 bank_addr;
+ u32 row_addr;
+ u16 col_addr;
+ u8 rank_mul;
+ u8 sub_ch;
+} __packed;
+
#if IS_ENABLED(CONFIG_AMD_ATL)
-void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *));
+void amd_atl_register_decoder(unsigned long (*f1)(struct atl_err *),
+ int (*f2)(struct atl_err *, struct dram_addr *));
void amd_atl_unregister_decoder(void);
void amd_retire_dram_row(struct atl_err *err);
unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err);
+int amd_convert_umc_mca_addr_to_dram_addr(struct atl_err *err, struct dram_addr *dram_addr);
#else
static inline void amd_retire_dram_row(struct atl_err *err) { }
static inline unsigned long
amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; }
+static inline int amd_convert_umc_mca_addr_to_dram_addr(struct atl_err *err,
+ struct dram_addr *dram_addr)
+{
+ return -EINVAL;
+}
#endif /* CONFIG_AMD_ATL */
#endif /* __RAS_H__ */
--
2.43.0
Powered by blists - more mailing lists