lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <81b90308-fdb1-3686-33a3-1e7ec42a7ef8@amd.com>
Date:   Mon, 30 Oct 2023 14:03:19 -0700
From:   Smita Koralahalli <Smita.KoralahalliChannabasappa@....com>
To:     Ira Weiny <ira.weiny@...el.com>,
        Dan Williams <dan.j.williams@...el.com>,
        Jonathan Cameron <jonathan.cameron@...wei.com>
Cc:     Yazen Ghannam <yazen.ghannam@....com>,
        Davidlohr Bueso <dave@...olabs.net>,
        Dave Jiang <dave.jiang@...el.com>,
        Alison Schofield <alison.schofield@...el.com>,
        Vishal Verma <vishal.l.verma@...el.com>,
        Ard Biesheuvel <ardb@...nel.org>, linux-efi@...r.kernel.org,
        linux-kernel@...r.kernel.org, linux-cxl@...r.kernel.org
Subject: Re: [PATCH RFC v2 3/3] cxl/memdev: Register for and process CPER
 events

Hi Ira,

On 10/26/2023 11:21 AM, Ira Weiny wrote:
> If the firmware has configured CXL event support to be firmware first
> the OS can process those events through CPER records.  Matching memory
> devices to the CPER records can be done via the serial number which is
> part of the CPER record header.
> 
> Detect firmware first, register a notifier callback for each memdev, and
> trace events when they match a device registered.
> 
> Signed-off-by: Ira Weiny <ira.weiny@...el.com>
> 
> ---
> Changes from RFC v1:
> [iweiny: adjust to cper_event enum instead of converting guids]
> ---
>   drivers/cxl/core/mbox.c | 45 +++++++++++++++++++++++++-------
>   drivers/cxl/cxlmem.h    |  7 +++++
>   drivers/cxl/pci.c       | 69 ++++++++++++++++++++++++++++++++++++++++++++++++-
>   3 files changed, 110 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 4df4f614f490..3f760d1d21de 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -860,26 +860,51 @@ static const uuid_t mem_mod_event_uuid =
>   	UUID_INIT(0xfe927475, 0xdd59, 0x4339,
>   		  0xa5, 0x86, 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74);
>   
> -static void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> -				   enum cxl_event_log_type type,
> -				   struct cxl_event_record_raw *record)
> +void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> +			    enum cxl_event_log_type type,
> +			    struct cxl_event_record_raw *record,
> +			    enum cxl_cper_event cper_event)
>   {
> -	uuid_t *id = &record->hdr.id;
> -
> -	if (uuid_equal(id, &gen_media_event_uuid)) {
> +	switch (cper_event) {
> +	case CXL_CPER_EVENT_GEN_MEDIA: {
>   		struct cxl_event_gen_media *rec =
>   				(struct cxl_event_gen_media *)record;
>   
>   		trace_cxl_general_media(cxlmd, type, rec);
> -	} else if (uuid_equal(id, &dram_event_uuid)) {
> +		break;
> +		}
> +	case CXL_CPER_EVENT_DRAM: {
>   		struct cxl_event_dram *rec = (struct cxl_event_dram *)record;
>   
>   		trace_cxl_dram(cxlmd, type, rec);
> -	} else if (uuid_equal(id, &mem_mod_event_uuid)) {
> +		break;
> +		}
> +	case CXL_CPER_EVENT_MEM_MODULE: {
>   		struct cxl_event_mem_module *rec =
>   				(struct cxl_event_mem_module *)record;
>   
>   		trace_cxl_memory_module(cxlmd, type, rec);
> +		break;
> +		}
> +	}
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, CXL);
> +
> +static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> +				     enum cxl_event_log_type type,
> +				     struct cxl_event_record_raw *record)
> +{
> +	uuid_t *id = &record->hdr.id;
> +
> +	if (uuid_equal(id, &gen_media_event_uuid)) {
> +		cxl_event_trace_record(cxlmd, type, record,
> +				       CXL_CPER_EVENT_GEN_MEDIA);
> +	} else if (uuid_equal(id, &dram_event_uuid)) {
> +		cxl_event_trace_record(cxlmd, type, record,
> +				       CXL_CPER_EVENT_DRAM);
> +	} else if (uuid_equal(id, &mem_mod_event_uuid)) {
> +		cxl_event_trace_record(cxlmd, type, record,
> +				       CXL_CPER_EVENT_MEM_MODULE);
>   	} else {
>   		/* For unknown record types print just the header */
>   		trace_cxl_generic_event(cxlmd, type, record);
> @@ -991,8 +1016,8 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
>   			break;
>   
>   		for (i = 0; i < nr_rec; i++)
> -			cxl_event_trace_record(cxlmd, type,
> -					       &payload->records[i]);
> +			__cxl_event_trace_record(cxlmd, type,
> +						 &payload->records[i]);
>   
>   		if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW)
>   			trace_cxl_overflow(cxlmd, type, payload);
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 706f8a6d1ef4..89bd85e7f51c 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -6,6 +6,7 @@
>   #include <linux/cdev.h>
>   #include <linux/uuid.h>
>   #include <linux/rcuwait.h>
> +#include <linux/efi.h>
>   #include "cxl.h"
>   
>   /* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */
> @@ -477,6 +478,8 @@ struct cxl_memdev_state {
>   	struct cxl_security_state security;
>   	struct cxl_fw_state fw;
>   
> +	struct notifier_block cxl_cper_nb;
> +
>   	struct rcuwait mbox_wait;
>   	int (*mbox_send)(struct cxl_memdev_state *mds,
>   			 struct cxl_mbox_cmd *cmd);
> @@ -863,6 +866,10 @@ void set_exclusive_cxl_commands(struct cxl_memdev_state *mds,
>   void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds,
>   				  unsigned long *cmds);
>   void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status);
> +void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
> +			    enum cxl_event_log_type type,
> +			    struct cxl_event_record_raw *record,
> +			    enum cxl_cper_event cper_event);
>   int cxl_set_timestamp(struct cxl_memdev_state *mds);
>   int cxl_poison_state_init(struct cxl_memdev_state *mds);
>   int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 44a21ab7add5..36d6f03e55de 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -1,5 +1,6 @@
>   // SPDX-License-Identifier: GPL-2.0-only
>   /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
> +#include <asm-generic/unaligned.h>
>   #include <linux/io-64-nonatomic-lo-hi.h>
>   #include <linux/moduleparam.h>
>   #include <linux/module.h>
> @@ -10,6 +11,7 @@
>   #include <linux/pci.h>
>   #include <linux/aer.h>
>   #include <linux/io.h>
> +#include <linux/efi.h>
>   #include "cxlmem.h"
>   #include "cxlpci.h"
>   #include "cxl.h"
> @@ -748,6 +750,69 @@ static bool cxl_event_int_is_fw(u8 setting)
>   	return mode == CXL_INT_FW;
>   }
>   
> +#define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
> +int cxl_cper_event_call(struct notifier_block *nb, unsigned long action, void *data)
> +{
> +	struct cxl_cper_notifier_data *nd = data;
> +	struct cxl_event_record_raw record = (struct cxl_event_record_raw) {
> +		.hdr.id = UUID_INIT(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
> +	};
> +	enum cxl_event_log_type log_type;
> +	struct cxl_memdev_state *mds;
> +	u32 hdr_flags;
> +
> +	mds = container_of(nb, struct cxl_memdev_state, cxl_cper_nb);
> +
> +	/* Need serial number for device identification */
> +	if (!(nd->rec->hdr.validation_bits & CPER_CXL_DEVICE_SN_VALID))
> +		return NOTIFY_DONE;

For all the event records that I tested so far, this has never been 
true. That is CPER_CXL_DEVICE_SN_VALID is never set which might not log 
the records at all. Should we be bit more lenient here and include 
validating device_id (bdf) instead and check if cxlds exist?

pci_get_domain_bus_and_slot() and pci_get_drvdata()..

> +
> +	/* FIXME endianess and bytes of serial number need verification */
> +	/* FIXME Should other values be checked? */
> +	if (memcmp(&mds->cxlds.serial, &nd->rec->hdr.dev_serial_num,
> +		   sizeof(mds->cxlds.serial)))
> +		return NOTIFY_DONE;
> +
> +	/* ensure record can always handle the full CPER provided data */
> +	BUILD_BUG_ON(sizeof(record) <
> +		(CPER_CXL_COMP_EVENT_LOG_SIZE + sizeof(record.hdr.id)));
> +
> +	/*
> +	 * UEFI v2.10 defines N.2.14 defines the CXL CPER record as not
> +	 * including the uuid field.
> +	 */
> +	memcpy(&record.hdr.length, &nd->rec->comp_event_log,
> +		CPER_CXL_REC_LEN(nd->rec));

I'm doubtful this will do the job. I think we should copy into each 
field of struct cxl_event_record_hdr individually starting from length 
by pointer arithmetic (which is definitely bad, but I cannot think of a 
better way to do this) and then do memcpy for data field in struct 
cxl_event_record_raw..

Any other suggestions would be helpful as well.

I can make these changes and validate it on my end if that works..?

Thanks,
Smita

> +
> +	/* Fabricate a log type */
> +	hdr_flags = get_unaligned_le24(record.hdr.flags);
> +	log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags);
> +
> +	cxl_event_trace_record(mds->cxlds.cxlmd, log_type, &record,
> +			       nd->cper_event);
> +
> +	return NOTIFY_OK;
> +}
> +
> +static void cxl_unregister_cper_events(void *_mds)
> +{
> +	struct cxl_memdev_state *mds = _mds;
> +
> +	unregister_cxl_cper_notifier(&mds->cxl_cper_nb);
> +}
> +
> +static void register_cper_events(struct cxl_memdev_state *mds)
> +{
> +	mds->cxl_cper_nb.notifier_call = cxl_cper_event_call;
> +
> +	if (register_cxl_cper_notifier(&mds->cxl_cper_nb)) {
> +		dev_err(mds->cxlds.dev, "CPER registration failed\n");
> +		return;
> +	}
> +
> +	devm_add_action_or_reset(mds->cxlds.dev, cxl_unregister_cper_events, mds);
> +}
> +
>   static int cxl_event_config(struct pci_host_bridge *host_bridge,
>   			    struct cxl_memdev_state *mds)
>   {
> @@ -758,8 +823,10 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge,
>   	 * When BIOS maintains CXL error reporting control, it will process
>   	 * event records.  Only one agent can do so.
>   	 */
> -	if (!host_bridge->native_cxl_error)
> +	if (!host_bridge->native_cxl_error) {
> +		register_cper_events(mds);
>   		return 0;
> +	}
>   
>   	rc = cxl_mem_alloc_event_buf(mds);
>   	if (rc)
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ