lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20231012230301.58500-2-Smita.KoralahalliChannabasappa@amd.com>
Date:   Thu, 12 Oct 2023 23:02:59 +0000
From:   Smita Koralahalli <Smita.KoralahalliChannabasappa@....com>
To:     <linux-efi@...r.kernel.org>, <linux-cxl@...r.kernel.org>,
        <linux-kernel@...r.kernel.org>
CC:     Ard Biesheuvel <ardb@...nel.org>,
        Alison Schofield <alison.schofield@...el.com>,
        Vishal Verma <vishal.l.verma@...el.com>,
        "Ira Weiny" <ira.weiny@...el.com>,
        Dan Williams <dan.j.williams@...el.com>,
        Jonathan Cameron <Jonathan.Cameron@...wei.com>,
        Yazen Ghannam <yazen.ghannam@....com>,
        Smita Koralahalli <Smita.KoralahalliChannabasappa@....com>
Subject: [PATCH 1/3] efi/cper, cxl: Decode CXL Component Events General Media Event Record

Add support for decoding CXL Component Events General Media Event Record
as defined in CXL rev 3.0 section 8.2.9.2.1.1.

All the event records as defined in Event Record Identifier field of the
Common Event Record Format in CXL rev 3.0 section 8.2.9.2.1 follow the
CPER format for representing the hardware errors if reported by a
platform.

According to the CPER format, each event record including the General
Media is logged as a CXL Component Event as defined in UEFI 2.10
Section N.2.14 and is identified by a UUID as defined by Event Record
Identifier field in Common Event Record Format of CXL rev 3.0 section
8.2.9.2.1. CXL Component Event Log field in Component Events Section
corresponds to the component/event specified by the section type UUID.

Add support for decoding CXL Component Events as defined in UEFI 2.10
Section N.2.14 and decoding Common Event Record as defined in CXL rev 3.0
section 8.2.9.2.1.

Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@....com>
---
 drivers/firmware/efi/cper.c     |   8 ++
 drivers/firmware/efi/cper_cxl.c | 143 ++++++++++++++++++++++++++++++++
 drivers/firmware/efi/cper_cxl.h |  62 ++++++++++++++
 3 files changed, 213 insertions(+)

diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 35c37f667781..b911b1f574db 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -607,6 +607,14 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata
 			cper_print_prot_err(newpfx, prot_err);
 		else
 			goto err_section_too_small;
+	} else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA)) {
+		struct cper_sec_comp_event *gmer = acpi_hest_get_payload(gdata);
+
+		printk("%ssection_type: CXL General Media Event\n", newpfx);
+		if (gdata->error_data_length >= sizeof(*gmer))
+			cper_print_gen_media(newpfx, gmer);
+		else
+			goto err_section_too_small;
 	} else {
 		const void *err = acpi_hest_get_payload(gdata);
 
diff --git a/drivers/firmware/efi/cper_cxl.c b/drivers/firmware/efi/cper_cxl.c
index a55771b99a97..8f7b88cc574b 100644
--- a/drivers/firmware/efi/cper_cxl.c
+++ b/drivers/firmware/efi/cper_cxl.c
@@ -18,6 +18,18 @@
 #define PROT_ERR_VALID_DVSEC			BIT_ULL(5)
 #define PROT_ERR_VALID_ERROR_LOG		BIT_ULL(6)
 
+#define COMP_EVENT_VALID_DEVICE_ID		BIT_ULL(0)
+#define COMP_EVENT_VALID_SERIAL_NUMBER		BIT_ULL(1)
+#define COMP_EVENT_VALID_EVENT_LOG		BIT_ULL(2)
+
+#define EVENT_RECORD_SEVERITY_MASK		GENMASK(1, 0)
+#define EVENT_RECORD_FLAGS_SHIFT		2
+
+#define GMER_VALID_CHANNEL			BIT_ULL(0)
+#define GMER_VALID_RANK				BIT_ULL(1)
+#define GMER_VALID_DEVICE			BIT_ULL(2)
+#define GMER_VALID_COMP_ID			BIT_ULL(3)
+
 /* CXL RAS Capability Structure, CXL v3.0 sec 8.2.4.16 */
 struct cxl_ras_capability_regs {
 	u32 uncor_status;
@@ -55,6 +67,42 @@ enum {
 	USP,	/* CXL Upstream Switch Port */
 };
 
+static const char * const cxl_evt_severity_strs[] = {
+	"informational",
+	"warning",
+	"failure",
+	"fatal",
+};
+
+static const char * const cxl_evt_flags_strs[] = {
+	"permanent condition",
+	"maintenance needed",
+	"performance degraded",
+	"hardware replacement needed",
+};
+
+static const char * const mem_evt_descriptor_strs[] = {
+	"uncorrectable",
+	"threshold",
+	"poison list overflow",
+};
+
+static const char * const gmer_mem_type_strs[] = {
+	"media ECC error",
+	"invalid address",
+	"data path error",
+};
+
+static const char * const transaction_type_strs[] = {
+	"unknown/unreported",
+	"host read",
+	"host write",
+	"host scan media",
+	"host inject poison",
+	"internal media scrub",
+	"internal media management",
+};
+
 void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_err)
 {
 	if (prot_err->valid_bits & PROT_ERR_VALID_AGENT_TYPE)
@@ -187,3 +235,98 @@ void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_e
 			       sizeof(cxl_ras->header_log), 0);
 	}
 }
+
+static void cper_print_comp_event(const char *pfx, const struct cper_sec_comp_event *event)
+{
+	pr_info("%s length of entire structure: 0x%08x\n", pfx, event->length);
+
+	if (event->valid_bits & COMP_EVENT_VALID_DEVICE_ID) {
+		pr_info("%s device_id: %04x:%02x:%02x.%x\n",
+			pfx, event->device_id.segment, event->device_id.bus,
+			event->device_id.device, event->device_id.function);
+		pr_info("%s slot: %d\n", pfx,
+			event->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
+		pr_info("%s vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
+			event->device_id.vendor_id, event->device_id.device_id);
+	}
+
+	if (event->valid_bits & COMP_EVENT_VALID_SERIAL_NUMBER) {
+		pr_info("%s lower_dw: 0x%08x, upper_dw: 0x%08x\n", pfx,
+			event->dev_serial_num.lower_dw,
+			event->dev_serial_num.upper_dw);
+	}
+}
+
+static void cper_print_event_record(const char *pfx,
+				    const struct common_event_record *record)
+{
+	const __u8 *flags = record->flags;
+	u8 severity, event_flag;
+
+	pr_info("%s event record length: 0x%02x\n", pfx, record->length);
+
+	severity = flags[0] & EVENT_RECORD_SEVERITY_MASK;
+	pr_info("%s event record severity: %s\n", pfx,
+		severity < ARRAY_SIZE(cxl_evt_severity_strs)
+		? cxl_evt_severity_strs[severity] : "unknown");
+
+	event_flag = flags[0] >> EVENT_RECORD_FLAGS_SHIFT;
+	pr_info("%s event record flags: 0x%02x\n", pfx, event_flag);
+	cper_print_bits(pfx, event_flag, cxl_evt_flags_strs,
+			ARRAY_SIZE(cxl_evt_flags_strs));
+
+	pr_info("%s event record handle: 0x%04x\n", pfx, record->handle);
+	pr_info("%s related event record handle: 0x%04x\n", pfx,
+		record->related_handle);
+	pr_info("%s event record timestamp: 0x%016llx\n", pfx, record->timestamp);
+	pr_info("%s maintenance operation class: 0x%02x\n", pfx,
+		record->maint_op_class);
+}
+
+void cper_print_gen_media(const char *pfx, const struct cper_sec_comp_event *event)
+{
+	struct cper_sec_gen_media *gmer;
+
+	cper_print_comp_event(pfx, event);
+
+	if (!(event->valid_bits & COMP_EVENT_VALID_EVENT_LOG))
+		return;
+
+	gmer = (struct cper_sec_gen_media *)(event + 1);
+
+	cper_print_event_record(pfx, &gmer->record);
+
+	pr_info("%s device physical address: 0x%016llx\n", pfx, gmer->dpa);
+	pr_info("%s memory event descriptor: 0x%02x\n", pfx, gmer->descriptor);
+	cper_print_bits(pfx, gmer->descriptor, mem_evt_descriptor_strs,
+			ARRAY_SIZE(mem_evt_descriptor_strs));
+
+	pr_info("%s memory event type: %d, %s\n", pfx, gmer->type,
+		gmer->type < ARRAY_SIZE(gmer_mem_type_strs)
+		? gmer_mem_type_strs[gmer->type] : "unknown");
+
+	pr_info("%s transaction type: %d, %s\n", pfx, gmer->transaction_type,
+		gmer->transaction_type < ARRAY_SIZE(transaction_type_strs)
+		? transaction_type_strs[gmer->transaction_type]
+		: "unknown");
+
+	if (gmer->validity_flags & GMER_VALID_CHANNEL)
+		pr_info("%s channel: 0x%02x\n", pfx, gmer->channel);
+
+	if (gmer->validity_flags & GMER_VALID_RANK)
+		pr_info("%s rank: 0x%02x\n", pfx, gmer->rank);
+
+	if (gmer->validity_flags & GMER_VALID_DEVICE) {
+		const __u8 *device;
+
+		device = gmer->device;
+		pr_info("%s device: %02x%02x%02x\n", pfx, device[2], device[1],
+			device[0]);
+	}
+
+	if (gmer->validity_flags & GMER_VALID_COMP_ID) {
+		pr_info("%s component identifer :\n", pfx);
+		print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4, gmer->comp_id,
+			       sizeof(gmer->comp_id), 0);
+	}
+}
diff --git a/drivers/firmware/efi/cper_cxl.h b/drivers/firmware/efi/cper_cxl.h
index 86bfcf7909ec..94528db208de 100644
--- a/drivers/firmware/efi/cper_cxl.h
+++ b/drivers/firmware/efi/cper_cxl.h
@@ -15,6 +15,11 @@
 	GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78,	\
 		  0x4B, 0x77, 0x10, 0x48)
 
+/* CXL General Media Section */
+#define CPER_SEC_CXL_GEN_MEDIA						\
+	GUID_INIT(0xFBCD0A77, 0xC260, 0x417F, 0x85, 0xA9, 0x08, 0x8B,	\
+		  0x16, 0x21, 0xEB, 0xA6)
+
 #pragma pack(1)
 
 /* Compute Express Link Protocol Error Section, UEFI v2.10 sec N.2.13 */
@@ -59,8 +64,65 @@ struct cper_sec_prot_err {
 	u8 reserved_2[4];
 };
 
+/* Compute Express Link Component Events Section, UEFI v2.10 sec N.2.14 */
+struct cper_sec_comp_event {
+	u32 length;
+	u64 valid_bits;
+
+	struct {
+		u16 vendor_id;
+		u16 device_id;
+		u8 function;
+		u8 device;
+		u8 bus;
+		u16 segment;
+		u16 slot;
+		u8 reserved_1;
+	} device_id;
+
+	struct {
+		u32 lower_dw;
+		u32 upper_dw;
+	} dev_serial_num;
+
+};
+
+/*
+ * Compute Express Link Common Event Record
+ * CXL rev 3.0 section 8.2.9.2.1; Table 8-42
+ */
+struct common_event_record {
+	u8 identifier[16];
+	u8 length;
+	u8 flags[3];
+	u16 handle;
+	u16 related_handle;
+	u64 timestamp;
+	u8 maint_op_class;
+	u8 reserved[15];
+};
+
+/*
+ * CXL General Media Event Record - GMER
+ * CXL rev 3.0 section 8.2.9.2.1.1; Table 8-43
+ */
+struct cper_sec_gen_media {
+	struct common_event_record record;
+	u64 dpa;
+	u8 descriptor;
+	u8 type;
+	u8 transaction_type;
+	u16 validity_flags;
+	u8 channel;
+	u8 rank;
+	u8 device[3];
+	u8 comp_id[16];
+	u8 reserved[46];
+};
+
 #pragma pack()
 
 void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_err);
+void cper_print_gen_media(const char *pfx, const struct cper_sec_comp_event *event);
 
 #endif //__CPER_CXL_
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ