lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250207143028.1865-3-shiju.jose@huawei.com>
Date: Fri, 7 Feb 2025 14:30:23 +0000
From: <shiju.jose@...wei.com>
To: <linux-edac@...r.kernel.org>, <linux-cxl@...r.kernel.org>,
	<mchehab@...nel.org>, <dave.jiang@...el.com>, <dan.j.williams@...el.com>,
	<bp@...en8.de>, <jonathan.cameron@...wei.com>, <alison.schofield@...el.com>,
	<vishal.l.verma@...el.com>, <ira.weiny@...el.com>, <dave@...olabs.net>
CC: <linux-kernel@...r.kernel.org>, <linuxarm@...wei.com>,
	<tanxiaofei@...wei.com>, <prime.zeng@...ilicon.com>, <shiju.jose@...wei.com>
Subject: [PATCH 2/4] rasdaemon: cxl: Add support for memory soft PPR operation

From: Shiju Jose <shiju.jose@...wei.com>

CXL spec 3.1, Section 8.2.9.2.1, Table 8-43, "Common Event Record Format"
table defines the Event Record Flags: 'Maintenance Needed' and 'Maintenance
Operation Subclass Valid Flag' flags, which indicate when these flags are
set, signaling that the memory device requires maintenance. When the device
sets the maintenance operation class and maintenance operation subclass for
memory soft PPR(Post Package Repair), the CXL DRAM event handler and CXL
general media handler sets attributes for memory PPR via the EDAC memory
repair sysfs interface, initiating the soft PPR operation in the CXL memory
device.

Add support for the memory soft PPR operation and enable for the CXL DRAM
event and CXL general media event if auto repair is on.

Signed-off-by: Shiju Jose <shiju.jose@...wei.com>
---
 ras-cxl-handler.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)

diff --git a/ras-cxl-handler.c b/ras-cxl-handler.c
index 3311949..ae49740 100644
--- a/ras-cxl-handler.c
+++ b/ras-cxl-handler.c
@@ -735,6 +735,10 @@ static int handle_ras_cxl_common_hdr(struct trace_seq *s,
 #define CXL_MAINT_SUBCLASS_BANK_SPARING		0x02
 #define CXL_MAINT_SUBCLASS_RANK_SPARING		0x03
 
+#define CXL_MAINT_CLASS_PPR		0x01
+#define CXL_MAINT_SUBCLASS_SPPR		0x00
+#define CXL_MAINT_SUBCLASS_HPPR		0x01
+
 #define CXL_CMD_BUF_SIZE	256
 
 enum cxl_mem_sparing_type {
@@ -791,6 +795,34 @@ error:
 	return -1;
 }
 
+static int get_sysfs_data_uint32(const char *dir, const char *file)
+{
+	char path[CXL_CMD_BUF_SIZE];
+	char buf[2] = "";
+	int fd, num;
+
+	snprintf(path, CXL_CMD_BUF_SIZE, "%s/%s", dir, file);
+	fd = open(path, O_RDONLY);
+	if (fd == -1) {
+		log(TERM, LOG_ERR, "[%s]:open file: %s failed\n", __func__, path);
+		return -1;
+	}
+
+	if (read(fd, buf, 1) <= 0)
+		goto error;
+
+	if (sscanf(buf, "%d", &num) <= 0)
+		goto error;
+
+	close(fd);
+
+	return num;
+
+error:
+	close(fd);
+	return -1;
+}
+
 static int set_sysfs_data_uint32(const char *dir, const char *file, uint32_t data)
 {
 	char path[CXL_CMD_BUF_SIZE];
@@ -858,6 +890,34 @@ static int cxl_find_spare(const char *repair_dev, const char *repair_type)
 	return -1;
 }
 
+static int cxl_find_ppr(const char *repair_dev, const char *repair_type)
+{
+	char dir[CXL_CMD_BUF_SIZE];
+	char out[CXL_CMD_BUF_SIZE];
+	int idx = 0;
+	int persist;
+
+	while (1) {
+		snprintf(dir, CXL_CMD_BUF_SIZE, "%s%s%s/mem_repair%d",
+			 edac_bus_path, EDAC_CXL_DEV_PREFIX, repair_dev, idx);
+
+		persist = get_sysfs_data_uint32(dir, "persist_mode");
+		if (persist < 0)
+			return -1;
+		if (persist)
+			continue;
+
+		if (get_sysfs_data_str(dir, "repair_type", out))
+			return -1;
+
+		if (!strcmp(repair_type, out))
+			return idx;
+		idx++;
+	}
+
+	return -1;
+}
+
 int ras_cxl_generic_event_handler(struct trace_seq *s,
 				  struct tep_record *record,
 				  struct tep_event *event, void *context)
@@ -973,6 +1033,36 @@ static const char * const cxl_gmer_trans_type[] = {
 	"Media Initialization",
 };
 
+static int cxl_ppr(struct ras_cxl_event_common_hdr *hdr, uint64_t dpa, uint32_t nibble_mask)
+{
+	char dir[CXL_CMD_BUF_SIZE];
+	int idx = 0;
+
+	if (!(hdr->hdr_flags & CXL_EVENT_RECORD_FLAG_MAINT_NEEDED) ||
+	    !(hdr->hdr_flags & CXL_EVENT_RECORD_FLAG_MAINT_OP_SUB_CLASS_VALID) ||
+	    hdr->hdr_maint_op_class != CXL_MAINT_CLASS_PPR ||
+	    hdr->hdr_maint_op_sub_class != CXL_MAINT_SUBCLASS_SPPR)
+		return -1;
+
+	idx = cxl_find_ppr(hdr->memdev, "ppr");
+	if (idx < 0)
+		return -1;
+
+	snprintf(dir, CXL_CMD_BUF_SIZE, "%s%s%s/mem_repair%d",
+		 edac_bus_path, EDAC_CXL_DEV_PREFIX, hdr->memdev, idx);
+
+	if (set_sysfs_data_uint64(dir, "dpa", dpa))
+		return -1;
+
+	if (set_sysfs_data_uint32(dir, "nibble_mask", nibble_mask))
+		return -1;
+
+	if (set_sysfs_data_uint32(dir, "repair", 1))
+		return -1;
+
+	return 0;
+}
+
 int ras_cxl_general_media_event_handler(struct trace_seq *s,
 					struct tep_record *record,
 					struct tep_event *event, void *context)
@@ -1133,6 +1223,9 @@ int ras_cxl_general_media_event_handler(struct trace_seq *s,
 	ras_report_cxl_general_media_event(ras, &ev);
 #endif
 
+	if (!(ev.dpa_flags & CXL_DPA_NOT_REPAIRABLE))
+		cxl_ppr(&ev.hdr, ev.dpa, 0);
+
 	return 0;
 }
 
@@ -1518,6 +1611,9 @@ int ras_cxl_dram_event_handler(struct trace_seq *s,
 
 	cxl_dram_sparing(&ev);
 
+	if (!(ev.dpa_flags & CXL_DPA_NOT_REPAIRABLE))
+		cxl_ppr(&ev.hdr, ev.dpa, ev.nibble_mask);
+
 	/* Insert data into the SGBD */
 #ifdef HAVE_SQLITE3
 	ras_store_cxl_dram_event(ras, &ev);
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ