lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20250915212244.886668-3-avadhut.naik@amd.com>
Date: Mon, 15 Sep 2025 21:20:23 +0000
From: Avadhut Naik <avadhut.naik@....com>
To: <linux-edac@...r.kernel.org>
CC: <bp@...en8.de>, <yazen.ghannam@....com>, <john.allen@....com>,
	<linux-kernel@...r.kernel.org>, <avadnaik@....com>
Subject: [PATCH v2 2/2] EDAC/amd64: Incorporate DRAM Address in EDAC message

Currently, the amd64_edac module provides decoded error data to the EDAC
interface. This data involves the system physical address (PFN + offset).
Furthermore, the UMC normalized address, gathered from MCA error decoding,
is also provided. The DRAM Address on which the error has occurred,
however, is not provided.

Use the new PRM call in the AMD Address Translation Library to gather the
DRAM address of an error. Include this data in the EDAC 'string' so it
is available in the kernel messages and the RAS tracepoint.

Signed-off-by: Avadhut Naik <avadhut.naik@....com>
---
Changes in v2:
1. Modify commit message per feedback received.
2. Pass the DRAM Address to edac_mc_handle_error() through "other_detail"
parameter instead of "msg".
3. Replace sprintf call with scnprintf in __log_ecc_error().
---
 drivers/edac/amd64_edac.c | 23 ++++++++++++++++++++++-
 drivers/edac/amd64_edac.h |  1 +
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 07f1e9dc1ca7..a10a6134eb04 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2709,6 +2709,9 @@ static void __log_ecc_error(struct mem_ctl_info *mci, struct err_info *err,
 {
 	enum hw_event_mc_err_type err_type;
 	const char *string;
+	char s[100];
+
+	memset(s, 0, sizeof(s));
 
 	if (ecc_type == 2)
 		err_type = HW_EVENT_ERR_CORRECTED;
@@ -2724,6 +2727,17 @@ static void __log_ecc_error(struct mem_ctl_info *mci, struct err_info *err,
 	switch (err->err_code) {
 	case DECODE_OK:
 		string = "";
+
+		if (err->dram_addr) {
+			scnprintf(s, sizeof(s), "Cs: 0x%x Bank Grp: 0x%x Bank Addr: 0x%x Row: 0x%x Column: 0x%x RankMul: 0x%x SubChannel: 0x%x",
+				  err->dram_addr->chip_select,
+				  err->dram_addr->bank_group,
+				  err->dram_addr->bank_addr,
+				  err->dram_addr->row_addr,
+				  err->dram_addr->col_addr,
+				  err->dram_addr->rank_mul,
+				  err->dram_addr->sub_ch);
+		}
 		break;
 	case ERR_NODE:
 		string = "Failed to map error addr to a node";
@@ -2748,7 +2762,7 @@ static void __log_ecc_error(struct mem_ctl_info *mci, struct err_info *err,
 	edac_mc_handle_error(err_type, mci, 1,
 			     err->page, err->offset, err->syndrome,
 			     err->csrow, err->channel, -1,
-			     string, "");
+			     string, s);
 }
 
 static inline void decode_bus_error(int node_id, struct mce *m)
@@ -2808,11 +2822,13 @@ static void umc_get_err_info(struct mce *m, struct err_info *err)
 static void decode_umc_error(int node_id, struct mce *m)
 {
 	u8 ecc_type = (m->status >> 45) & 0x3;
+	struct atl_dram_addr dram_addr;
 	struct mem_ctl_info *mci;
 	unsigned long sys_addr;
 	struct amd64_pvt *pvt;
 	struct atl_err a_err;
 	struct err_info err;
+	int ret;
 
 	node_id = fixup_node_id(node_id, m);
 
@@ -2822,6 +2838,7 @@ static void decode_umc_error(int node_id, struct mce *m)
 
 	pvt = mci->pvt_info;
 
+	memset(&dram_addr, 0, sizeof(dram_addr));
 	memset(&err, 0, sizeof(err));
 
 	if (m->status & MCI_STATUS_DEFERRED)
@@ -2853,6 +2870,10 @@ static void decode_umc_error(int node_id, struct mce *m)
 		goto log_error;
 	}
 
+	ret = amd_convert_umc_mca_addr_to_dram_addr(&a_err, &dram_addr);
+	if (!ret)
+		err.dram_addr = &dram_addr;
+
 	error_address_to_page_and_offset(sys_addr, &err);
 
 log_error:
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 17228d07de4c..56de2857369a 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -399,6 +399,7 @@ struct err_info {
 	u16 syndrome;
 	u32 page;
 	u32 offset;
+	struct atl_dram_addr *dram_addr;
 };
 
 static inline u32 get_umc_base(u8 channel)
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ