[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230601-cxl-cper-v1-2-99ba43f8f770@intel.com>
Date: Thu, 12 Oct 2023 23:55:20 -0700
From: Ira Weiny <ira.weiny@...el.com>
To: Dan Williams <dan.j.williams@...el.com>,
Jonathan Cameron <jonathan.cameron@...wei.com>,
Smita Koralahalli <Smita.KoralahalliChannabasappa@....com>
Cc: Yazen Ghannam <yazen.ghannam@....com>,
Davidlohr Bueso <dave@...olabs.net>,
Dave Jiang <dave.jiang@...el.com>,
Alison Schofield <alison.schofield@...el.com>,
Vishal Verma <vishal.l.verma@...el.com>,
Ard Biesheuvel <ardb@...nel.org>, linux-efi@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-cxl@...r.kernel.org,
Ira Weiny <ira.weiny@...el.com>
Subject: [PATCH RFC 2/2] cxl/memdev: Register for and process CPER events
If the firmware has configured CXL event support to be firmware first
the OS can process those events through CPER records.
Detect firmware first configuration and register a notifier callback to
process catch records for this memdev. Process those records destined
for this memdev through the normal trace mechanism.
Not-Yet-Signed-off-by: Ira Weiny <ira.weiny@...el.com>
---
RFC comments:
The matching of the CPER event to the MDS is a bit hacky right now and
could probably be much more robust. But the general approach seems
sound. Simply register a notifier for each device and when that device
finds a record for itself call the normal trace mechanisms.
---
drivers/cxl/core/mbox.c | 7 ++---
drivers/cxl/cxlmem.h | 5 ++++
drivers/cxl/pci.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 78 insertions(+), 4 deletions(-)
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 4df4f614f490..3a8ce7801e04 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -860,9 +860,9 @@ static const uuid_t mem_mod_event_uuid =
UUID_INIT(0xfe927475, 0xdd59, 0x4339,
0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74);
-static void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
- enum cxl_event_log_type type,
- struct cxl_event_record_raw *record)
+void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
+ enum cxl_event_log_type type,
+ struct cxl_event_record_raw *record)
{
uuid_t *id = &record->hdr.id;
@@ -885,6 +885,7 @@ static void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
trace_cxl_generic_event(cxlmd, type, record);
}
}
+EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, CXL);
static int cxl_clear_event_record(struct cxl_memdev_state *mds,
enum cxl_event_log_type log,
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 706f8a6d1ef4..2b4210c291b9 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -477,6 +477,8 @@ struct cxl_memdev_state {
struct cxl_security_state security;
struct cxl_fw_state fw;
+ struct notifier_block cxl_cper_nb;
+
struct rcuwait mbox_wait;
int (*mbox_send)(struct cxl_memdev_state *mds,
struct cxl_mbox_cmd *cmd);
@@ -863,6 +865,9 @@ void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
unsigned long *cmds);
void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status);
+void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
+ enum cxl_event_log_type type,
+ struct cxl_event_record_raw *record);
int cxl_set_timestamp(struct cxl_memdev_state *mds);
int cxl_poison_state_init(struct cxl_memdev_state *mds);
int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 44a21ab7add5..19922e32c098 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
+#include <asm-generic/unaligned.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/moduleparam.h>
#include <linux/module.h>
@@ -10,6 +11,7 @@
#include <linux/pci.h>
#include <linux/aer.h>
#include <linux/io.h>
+#include <linux/efi.h>
#include "cxlmem.h"
#include "cxlpci.h"
#include "cxl.h"
@@ -748,6 +750,70 @@ static bool cxl_event_int_is_fw(u8 setting)
return mode == CXL_INT_FW;
}
+#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
+int cxl_cper_event(struct notifier_block *nb, unsigned long action, void *data)
+{
+ struct cxl_cper_notifier_data *nd = data;
+ struct cxl_event_record_raw record;
+ enum cxl_event_log_type log_type;
+ struct cxl_memdev_state *mds;
+ u32 hdr_flags;
+
+ mds = container_of(nb, struct cxl_memdev_state, cxl_cper_nb);
+
+ /* Need serial number for device identification */
+ if (!(nd->rec->hdr.validation_bits & CPER_CXL_DEVICE_SN_VALID))
+ return NOTIFY_DONE;
+
+ /* FIXME endianess and bytes of serial number need verification */
+ /* FIXME Should other values be checked? */
+ if (memcmp(&mds->cxlds.serial, &nd->rec->hdr.dev_serial_num,
+ sizeof(mds->cxlds.serial)))
+ return NOTIFY_DONE;
+
+ /*
+ * UEFI v2.10 defines N.2.14 defines the CXL CPER record as not
+ * including the uuid field from the CXL record.
+ *
+ * Build the record from the UUID passed.
+ */
+ record = (struct cxl_event_record_raw) {
+ .hdr.id = nd->uuid,
+ };
+ memcpy(&record.hdr.length, &nd->rec->comp_event_log,
+ CPER_CXL_REC_LEN(nd->rec));
+
+ /* ensure record can always handle the full CPER provided data */
+ BUILD_BUG_ON(sizeof(record) <
+ (CPER_CXL_COMP_EVENT_LOG_SIZE + sizeof(record.hdr.id)));
+
+ hdr_flags = get_unaligned_le24(record.hdr.flags);
+ log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags);
+
+ cxl_event_trace_record(mds->cxlds.cxlmd, log_type, &record);
+
+ return NOTIFY_OK;
+}
+
+static void cxl_unregister_cper_events(void *_mds)
+{
+ struct cxl_memdev_state *mds = _mds;
+
+ unregister_cxl_cper_notifier(&mds->cxl_cper_nb);
+}
+
+static void register_cper_events(struct cxl_memdev_state *mds)
+{
+ mds->cxl_cper_nb.notifier_call = cxl_cper_event;
+
+ if (register_cxl_cper_notifier(&mds->cxl_cper_nb)) {
+ dev_err(mds->cxlds.dev, "CPER registration failed\n");
+ return;
+ }
+
+ devm_add_action_or_reset(mds->cxlds.dev, cxl_unregister_cper_events, mds);
+}
+
static int cxl_event_config(struct pci_host_bridge *host_bridge,
struct cxl_memdev_state *mds)
{
@@ -758,8 +824,10 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge,
* When BIOS maintains CXL error reporting control, it will process
* event records. Only one agent can do so.
*/
- if (!host_bridge->native_cxl_error)
+ if (!host_bridge->native_cxl_error) {
+ register_cper_events(mds);
return 0;
+ }
rc = cxl_mem_alloc_event_buf(mds);
if (rc)
--
2.41.0
Powered by blists - more mailing lists