lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 16 Jun 2022 12:43:34 -0700
From:   Davidlohr Bueso <dave@...olabs.net>
To:     "alison.schofield@...el.com" <alison.schofield@...el.com>
Cc:     Dan Williams <dan.j.williams@...el.com>,
        Ira Weiny <ira.weiny@...el.com>,
        Vishal Verma <vishal.l.verma@...el.com>,
        Ben Widawsky <bwidawsk@...nel.org>,
        Steven Rostedt <rostedt@...dmis.org>,
        Ingo Molnar <mingo@...hat.com>, linux-cxl@...r.kernel.org,
        linux-kernel@...r.kernel.org, a.manzanares@...sung.com
Subject: Re: [PATCH 2/3] cxl/mbox: Add GET_POISON_LIST mailbox command support

On Tue, 14 Jun 2022, alison.schofield@...el.com wrote:

>From: Alison Schofield <alison.schofield@...el.com>
>
>CXL devices that support persistent memory maintain a list of locations
>that are poisoned or result in poison if the addresses are accessed by
>the host.
>
>Per the spec (CXL 2.0 8.2.8.5.4.1), the device returns this Poison
>list as a set of  Media Error Records that include the source of the
>error, the starting device physical address and length. The length is
>the number of adjacent DPAs in the record and is in units of 64 bytes.
>
>Retrieve the list and log each Media Error Record as a trace event of
>type cxl_poison_list.
>
>Signed-off-by: Alison Schofield <alison.schofield@...el.com>
>---
> drivers/cxl/cxlmem.h    | 43 +++++++++++++++++++++++
> drivers/cxl/core/mbox.c | 75 +++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 118 insertions(+)
>
>diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
>index 60d10ee1e7fc..29cf0459b44a 100644
>--- a/drivers/cxl/cxlmem.h
>+++ b/drivers/cxl/cxlmem.h
>@@ -174,6 +174,7 @@ struct cxl_endpoint_dvsec_info {
>  *                (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register)
>  * @lsa_size: Size of Label Storage Area
>  *                (CXL 2.0 8.2.9.5.1.1 Identify Memory Device)
>+ * @poison_max_mer: maximum Media Error Records tracked in Poison List
>  * @mbox_mutex: Mutex to synchronize mailbox access.
>  * @firmware_version: Firmware version for the memory device.
>  * @enabled_cmds: Hardware commands found enabled in CEL.
>@@ -204,6 +205,7 @@ struct cxl_dev_state {
>
>	size_t payload_size;
>	size_t lsa_size;
>+	u32 poison_max;
>	struct mutex mbox_mutex; /* Protects device mailbox and firmware */
>	char firmware_version[0x10];
>	DECLARE_BITMAP(enabled_cmds, CXL_MEM_COMMAND_ID_MAX);
>@@ -317,6 +319,46 @@ struct cxl_mbox_set_partition_info {
>
> #define  CXL_SET_PARTITION_IMMEDIATE_FLAG	BIT(0)
>
>+struct cxl_mbox_poison_payload_in {
>+	__le64 offset;
>+	__le64 length;
>+} __packed;
>+
>+struct cxl_mbox_poison_payload_out {
>+	u8 flags;
>+	u8 rsvd1;
>+	__le64 overflow_timestamp;
>+	__le16 count;
>+	u8 rsvd2[0x14];
>+	struct cxl_poison_record {
>+		__le64 address;
>+		__le32 length;
>+		__le32 rsvd;
>+	} __packed record[];
>+} __packed;
>+
>+/* CXL 8.2.9.5.4.1 Get Poison List: payload out flags: */
>+#define CXL_POISON_FLAG_MORE            BIT(0)
>+#define CXL_POISON_FLAG_OVERFLOW        BIT(1)
>+#define CXL_POISON_FLAG_SCANNING        BIT(2)
>+
>+/* CXL 8.2.9.5.4.1 Get Poison List: Error is encoded in record.address[2:0] */
>+#define CXL_POISON_SOURCE_MASK		GENMASK(2, 0)
>+#define	CXL_POISON_SOURCE_UNKNOWN	0
>+#define	CXL_POISON_SOURCE_EXTERNAL	1
>+#define	CXL_POISON_SOURCE_INTERNAL	2
>+#define	CXL_POISON_SOURCE_INJECTED	3
>+#define	CXL_POISON_SOURCE_VENDOR	7
>+
>+/* Software define */
>+#define	CXL_POISON_SOURCE_INVALID	99
>+#define CXL_POISON_SOURCE_VALID(x)		\
>+	(((x) == CXL_POISON_SOURCE_UNKNOWN)  ||	\
>+	 ((x) == CXL_POISON_SOURCE_EXTERNAL) ||	\
>+	 ((x) == CXL_POISON_SOURCE_INTERNAL) ||	\
>+	 ((x) == CXL_POISON_SOURCE_INJECTED) ||	\
>+	 ((x) == CXL_POISON_SOURCE_VENDOR))
>+
> /**
>  * struct cxl_mem_command - Driver representation of a memory device command
>  * @info: Command information as it exists for the UAPI
>@@ -351,6 +393,7 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds);
> struct cxl_dev_state *cxl_dev_state_create(struct device *dev);
> void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
> void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
>+int cxl_mem_get_poison_list(struct device *dev);
> #ifdef CONFIG_CXL_SUSPEND
> void cxl_mem_active_inc(void);
> void cxl_mem_active_dec(void);
>diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
>index 54f434733b56..c10c7020ebc2 100644
>--- a/drivers/cxl/core/mbox.c
>+++ b/drivers/cxl/core/mbox.c
>@@ -9,6 +9,9 @@
>
> #include "core.h"
>
>+#define CREATE_TRACE_POINTS
>+#include <trace/events/cxl.h>
>+
> static bool cxl_raw_allow_all;
>
> /**
>@@ -755,6 +758,7 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
> {
>	/* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
>	struct cxl_mbox_identify id;
>+	__le32 val = 0;
>	int rc;
>
>	rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_IDENTIFY, NULL, 0, &id,
>@@ -783,6 +787,9 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
>	cxlds->lsa_size = le32_to_cpu(id.lsa_size);
>	memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision));
>
>+	memcpy(&val, id.poison_list_max_mer, 3);
>+	cxlds->poison_max = le32_to_cpu(val);
>+
>	return 0;
> }
> EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL);
>@@ -826,6 +833,74 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds)
> }
> EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL);
>
>+int cxl_mem_get_poison_list(struct device *dev)
>+{
>+	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
>+	struct cxl_dev_state *cxlds = cxlmd->cxlds;
>+	struct cxl_mbox_poison_payload_out *po;
>+	struct cxl_mbox_poison_payload_in pi;
>+	int nr_records = 0;
>+	int rc, i;
>+
>+	if (range_len(&cxlds->pmem_range)) {
>+		pi.offset = cpu_to_le64(cxlds->pmem_range.start);
>+		pi.length = cpu_to_le64(range_len(&cxlds->pmem_range));

Do you ever see this changing to not always use the full pmem DPA range
but allow arbitrary ones? I also assume this is the reason why you don't
check the range vs cxlds->ram_range to prevent any overlaps, no?

Thanks,
Davidlohr

>+	} else {
>+		return -ENXIO;
>+	}
>+
>+	po = kvmalloc(cxlds->payload_size, GFP_KERNEL);
>+	if (!po)
>+		return -ENOMEM;
>+
>+	do {
>+		rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_POISON, &pi,
>+				       sizeof(pi), po, cxlds->payload_size);
>+		if (rc)
>+			goto out;
>+
>+		if (po->flags & CXL_POISON_FLAG_OVERFLOW) {
>+			time64_t o_time = le64_to_cpu(po->overflow_timestamp);
>+
>+			dev_err(dev, "Poison list overflow at %ptTs UTC\n",
>+				&o_time);
>+			rc = -ENXIO;
>+			goto out;
>+		}
>+
>+		if (po->flags & CXL_POISON_FLAG_SCANNING) {
>+			dev_err(dev, "Scan Media in Progress\n");
>+			rc = -EBUSY;
>+			goto out;
>+		}
>+
>+		for (i = 0; i < le16_to_cpu(po->count); i++) {
>+			u64 addr = le64_to_cpu(po->record[i].address);
>+			u32 len = le32_to_cpu(po->record[i].length);
>+			int source = FIELD_GET(CXL_POISON_SOURCE_MASK, addr);
>+
>+			if (!CXL_POISON_SOURCE_VALID(source)) {
>+				dev_dbg(dev, "Invalid poison source %d",
>+					source);
>+				source = CXL_POISON_SOURCE_INVALID;
>+			}
>+
>+			trace_cxl_poison_list(dev, source, addr, len);
>+		}
>+
>+		/* Protect against an uncleared _FLAG_MORE */
>+		nr_records = nr_records + le16_to_cpu(po->count);
>+		if (nr_records >= cxlds->poison_max)
>+			goto out;
>+
>+	} while (po->flags & CXL_POISON_FLAG_MORE);
>+
>+out:
>+	kvfree(po);
>+	return rc;
>+}
>+EXPORT_SYMBOL_NS_GPL(cxl_mem_get_poison_list, CXL);
>+
> struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
> {
>	struct cxl_dev_state *cxlds;
>
>--
>2.31.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ