lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250905-sdxi-base-v1-6-d0341a1292ba@amd.com>
Date: Fri, 05 Sep 2025 13:48:29 -0500
From: Nathan Lynch via B4 Relay <devnull+nathan.lynch.amd.com@...nel.org>
To: Vinod Koul <vkoul@...nel.org>
Cc: Wei Huang <wei.huang2@....com>, 
 Mario Limonciello <mario.limonciello@....com>, 
 Bjorn Helgaas <bhelgaas@...gle.com>, linux-pci@...r.kernel.org, 
 linux-kernel@...r.kernel.org, dmaengine@...r.kernel.org
Subject: [PATCH RFC 06/13] dmaengine: sdxi: Add error reporting support

From: Nathan Lynch <nathan.lynch@....com>

SDXI implementations provide software with detailed information about
error conditions using a per-device ring buffer in system memory. When
an error condition is signaled via interrupt, the driver retrieves any
pending error log entries and reports them to the kernel log.

Co-developed-by: Wei Huang <wei.huang2@....com>
Signed-off-by: Wei Huang <wei.huang2@....com>
Signed-off-by: Nathan Lynch <nathan.lynch@....com>
---
 drivers/dma/sdxi/error.c | 340 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/dma/sdxi/error.h |  16 +++
 2 files changed, 356 insertions(+)

diff --git a/drivers/dma/sdxi/error.c b/drivers/dma/sdxi/error.c
new file mode 100644
index 0000000000000000000000000000000000000000..c5e33f5989250352f6b081a3049b3b1f972c85a6
--- /dev/null
+++ b/drivers/dma/sdxi/error.c
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * SDXI error reporting.
+ *
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqreturn.h>
+#include <linux/packing.h>
+#include <linux/types.h>
+
+#include "error.h"
+#include "mmio.h"
+#include "sdxi.h"
+
+/*
+ * The error log ring buffer size is configurable, but for now we fix
+ * it to 64 entries (which is the spec minimum).
+ */
+#define ERROR_LOG_ENTRIES 64
+#define ERROR_LOG_SZ (ERROR_LOG_ENTRIES * sizeof(struct sdxi_errlog_hd_ent))
+
+/* The "unpacked" counterpart to ERRLOG_HD_ENT. */
+struct errlog_entry {
+	u64 dsc_index;
+	u16 cxt_num;
+	u16 err_class;
+	u16 type;
+	u8 step;
+	u8 buf;
+	u8 sub_step;
+	u8 re;
+	bool vl;
+	bool cv;
+	bool div;
+	bool bv;
+};
+
+#define ERRLOG_ENTRY_FIELD(hi_, lo_, name_)				\
+	PACKED_FIELD(hi_, lo_, struct errlog_entry, name_)
+#define ERRLOG_ENTRY_FLAG(nr_, name_) \
+	ERRLOG_ENTRY_FIELD(nr_, nr_, name_)
+
+/* Refer to "Error Log Header Entry (ERRLOG_HD_ENT)" */
+static const struct packed_field_u16 errlog_hd_ent_fields[] = {
+	ERRLOG_ENTRY_FLAG(0, vl),
+	ERRLOG_ENTRY_FIELD(13, 8, step),
+	ERRLOG_ENTRY_FIELD(26, 16, type),
+	ERRLOG_ENTRY_FLAG(32, cv),
+	ERRLOG_ENTRY_FLAG(33, div),
+	ERRLOG_ENTRY_FLAG(34, bv),
+	ERRLOG_ENTRY_FIELD(38, 36, buf),
+	ERRLOG_ENTRY_FIELD(43, 40, sub_step),
+	ERRLOG_ENTRY_FIELD(46, 44, re),
+	ERRLOG_ENTRY_FIELD(63, 48, cxt_num),
+	ERRLOG_ENTRY_FIELD(127, 64, dsc_index),
+	ERRLOG_ENTRY_FIELD(367, 352, err_class),
+};
+
+enum {
+	SDXI_PACKING_QUIRKS = QUIRK_LITTLE_ENDIAN | QUIRK_LSW32_IS_FIRST,
+};
+
+/*
+ * Refer to "(Flagged) Processing Step" and
+ * "Error Log Header Entry (ERRLOG_HD_ENT)", subfield "step"
+ */
+enum errv_step {
+	ERRV_INT         = 1,
+	ERRV_CXT_L2      = 2,
+	ERRV_CXT_L1      = 3,
+	ERRV_CXT_CTL     = 4,
+	ERRV_CXT_STS     = 5,
+	ERRV_WRT_IDX     = 6,
+	ERRV_DSC_GEN     = 7,
+	ERRV_DSC_CSB     = 8,
+	ERRV_ATOMIC      = 9,
+	ERRV_DSC_BUF     = 10,
+	ERRV_DSC_AKEY    = 11,
+	ERRV_FN_RKEY     = 12,
+};
+
+static const char *const processing_steps[] = {
+	[ERRV_INT]        = "Internal Error",
+	[ERRV_CXT_L2]     = "Context Level 2 Table Entry - Translate, Read, Validate",
+	[ERRV_CXT_L1]     = "Context Level 1 Table Entry - Translate, Read, Validate",
+	[ERRV_CXT_CTL]    = "Context Control - Translate, Read, Validate",
+	[ERRV_CXT_STS]    = "Context Status - Translate, Access, Validate",
+	[ERRV_WRT_IDX]    = "Write_Index - Translate, Read, Validate",
+	[ERRV_DSC_GEN]    = "Descriptor Entry - Translate, Access, Validate",
+	[ERRV_DSC_CSB]    = "Descriptor CST_BLK - Translate, Access, Validate",
+	[ERRV_ATOMIC]     = "Atomic Return Data - Translate, Access",
+	[ERRV_DSC_BUF]    = "Descriptor: Data Buffer - Translate, Access",
+	[ERRV_DSC_AKEY]   = "Descriptor AKey Lookup - Translate, Access, Validate",
+	[ERRV_FN_RKEY]    = "Function RKey Lookup - Translate, Read, Validate",
+};
+
+static const char *step_str(enum errv_step step)
+{
+	const char *str = "reserved";
+
+	switch (step) {
+	case ERRV_INT ... ERRV_FN_RKEY:
+		str = processing_steps[step];
+		break;
+	}
+
+	return str;
+}
+
+/* Refer to "Error Log Header Entry (ERRLOG_HD_ENT)", subfield "sub_step" */
+enum errv_sub_step {
+	SUB_STEP_OTHER    = 0,
+	SUB_STEP_ATF      = 1,
+	SUB_STEP_DAF      = 2,
+	SUB_STEP_DVF      = 3,
+};
+
+static const char * const processing_sub_steps[] = {
+	[SUB_STEP_OTHER]    = "Other/unknown",
+	[SUB_STEP_ATF]      = "Address Translation Failure",
+	[SUB_STEP_DAF]      = "Data Access Failure",
+	[SUB_STEP_DVF]      = "Data Validation Failure",
+};
+
+static const char *sub_step_str(enum errv_sub_step sub_step)
+{
+	const char *str = "reserved";
+
+	switch (sub_step) {
+	case SUB_STEP_OTHER ... SUB_STEP_DVF:
+		str = processing_sub_steps[sub_step];
+		break;
+	}
+
+	return str;
+}
+
+/* Refer to "Error Log Header Entry (ERRLOG_HD_ENT)", subfield "re" */
+enum fn_reaction {
+	FN_REACT_INFORM      = 0,
+	FN_REACT_CXT_STOP    = 1,
+	FN_REACT_FN_STOP     = 2,
+};
+
+static const char * const fn_reactions[] = {
+	[FN_REACT_INFORM]      = "Informative, nothing stopped",
+	[FN_REACT_CXT_STOP]    = "Context stopped",
+	[FN_REACT_FN_STOP]     = "Function stopped",
+};
+
+static const char *reaction_str(enum fn_reaction reaction)
+{
+	const char *str = "reserved";
+
+	switch (reaction) {
+	case FN_REACT_INFORM ... FN_REACT_FN_STOP:
+		str = fn_reactions[reaction];
+		break;
+	}
+
+	return str;
+}
+
+static void sdxi_print_err(struct sdxi_dev *sdxi, u64 err_rd)
+{
+	struct errlog_entry ent;
+	size_t index;
+
+	index = err_rd % ERROR_LOG_ENTRIES;
+
+	unpack_fields(&sdxi->err_log[index], sizeof(sdxi->err_log[0]),
+		      &ent, errlog_hd_ent_fields, SDXI_PACKING_QUIRKS);
+
+	if (!ent.vl) {
+		dev_err_ratelimited(sdxi_to_dev(sdxi),
+				    "Ignoring error log entry with vl=0\n");
+		return;
+	}
+
+	if (ent.type != OP_TYPE_ERRLOG) {
+		dev_err_ratelimited(sdxi_to_dev(sdxi),
+				    "Ignoring error log entry with type=%#x\n",
+				    ent.type);
+		return;
+	}
+
+	sdxi_err(sdxi, "error log entry[%zu], MMIO_ERR_RD=%#llx:\n",
+		 index, err_rd);
+	sdxi_err(sdxi, "  re: %#x (%s)\n", ent.re, reaction_str(ent.re));
+	sdxi_err(sdxi, "  step: %#x (%s)\n", ent.step, step_str(ent.step));
+	sdxi_err(sdxi, "  sub_step: %#x (%s)\n",
+		 ent.sub_step, sub_step_str(ent.sub_step));
+	sdxi_err(sdxi, "  cv: %u div: %u bv: %u\n", ent.cv, ent.div, ent.bv);
+	if (ent.bv)
+		sdxi_err(sdxi, "  buf: %u\n", ent.buf);
+	if (ent.cv)
+		sdxi_err(sdxi, "  cxt_num: %#x\n", ent.cxt_num);
+	if (ent.div)
+		sdxi_err(sdxi, "  dsc_index: %#llx\n", ent.dsc_index);
+	sdxi_err(sdxi, "  err_class: %#x\n", ent.err_class);
+}
+
+/* Refer to "Error Log Processing by Software" */
+static irqreturn_t sdxi_irq_thread(int irq, void *data)
+{
+	struct sdxi_dev *sdxi = data;
+	u64 write_index;
+	u64 read_index;
+	u64 err_sts;
+
+	/* 1. Check MMIO_ERR_STS and perform any required remediation. */
+	err_sts = sdxi_read64(sdxi, SDXI_MMIO_ERR_STS);
+	if (!(err_sts & SDXI_MMIO_ERR_STS_STS_BIT))
+		return IRQ_HANDLED;
+
+	if (err_sts & SDXI_MMIO_ERR_STS_ERR_BIT) {
+		/*
+		 * Assume this isn't recoverable; e.g. the error log
+		 * isn't configured correctly. Don't clear
+		 * SDXI_MMIO_ERR_STS before returning.
+		 */
+		sdxi_err(sdxi, "attempted but failed to log errors\n");
+		sdxi_err(sdxi, "error log not functional\n");
+		return IRQ_HANDLED;
+	}
+
+	if (err_sts & SDXI_MMIO_ERR_STS_OVF_BIT)
+		sdxi_err(sdxi, "error log overflow, some entries lost\n");
+
+	/* 2. If MMIO_ERR_STS.sts is 1, then compute read_index. */
+	read_index = sdxi_read64(sdxi, SDXI_MMIO_ERR_RD);
+
+	/* 3. Clear MMIO_ERR_STS. The flags in this register are RW1C. */
+	sdxi_write64(sdxi, SDXI_MMIO_ERR_STS,
+		     SDXI_MMIO_ERR_STS_STS_BIT |
+		     SDXI_MMIO_ERR_STS_OVF_BIT |
+		     SDXI_MMIO_ERR_STS_ERR_BIT);
+
+	/* 4. Compute write_index. */
+	write_index = sdxi_read64(sdxi, SDXI_MMIO_ERR_WRT);
+
+	/* 5. If the indexes are equal then exit. */
+	if (read_index == write_index)
+		return IRQ_HANDLED;
+
+	/* 6. While read_index < write_index... */
+	while (read_index < write_index) {
+
+		/*
+		 * 7. and 8. Compute the real ring buffer index from
+		 * read_index and process the entry.
+		 */
+		sdxi_print_err(sdxi, read_index);
+
+		/* 9. Advance read_index. */
+		++read_index;
+
+		/* 10. Return to step 6. */
+	}
+
+	/* 11. Write read_index to MMIO_ERR_RD. */
+	sdxi_write64(sdxi, SDXI_MMIO_ERR_RD, read_index);
+
+	return IRQ_HANDLED;
+}
+
+/* Refer to "Error Log Initialization" */
+int sdxi_error_init(struct sdxi_dev *sdxi)
+{
+	u64 reg;
+	int err;
+
+	/* 1. Clear MMIO_ERR_CFG. Error interrupts are inhibited until step 6. */
+	sdxi_write64(sdxi, SDXI_MMIO_ERR_CFG, 0);
+
+	/* 2. Clear MMIO_ERR_STS. The flags in this register are RW1C. */
+	reg = FIELD_PREP(SDXI_MMIO_ERR_STS_STS_BIT, 1) |
+	      FIELD_PREP(SDXI_MMIO_ERR_STS_OVF_BIT, 1) |
+	      FIELD_PREP(SDXI_MMIO_ERR_STS_ERR_BIT, 1);
+	sdxi_write64(sdxi, SDXI_MMIO_ERR_STS, reg);
+
+	/* 3. Allocate memory for the error log ring buffer, initialize to zero. */
+	sdxi->err_log = dma_alloc_coherent(sdxi_to_dev(sdxi), ERROR_LOG_SZ,
+					   &sdxi->err_log_dma, GFP_KERNEL);
+	if (!sdxi->err_log)
+		return -ENOMEM;
+
+	/*
+	 * 4. Set MMIO_ERR_CTL.intr_en to 1 if interrupts on
+	 * context-level errors are desired.
+	 */
+	reg = sdxi_read64(sdxi, SDXI_MMIO_ERR_CTL);
+	FIELD_MODIFY(SDXI_MMIO_ERR_CTL_EN, &reg, 1);
+	sdxi_write64(sdxi, SDXI_MMIO_ERR_CTL, reg);
+
+	/*
+	 * The spec is not explicit about when to do this, but this
+	 * seems like the right time: enable interrupt on
+	 * function-level transition to error state.
+	 */
+	reg = sdxi_read64(sdxi, SDXI_MMIO_CTL0);
+	FIELD_MODIFY(SDXI_MMIO_CTL0_FN_ERR_INTR_EN, &reg, 1);
+	sdxi_write64(sdxi, SDXI_MMIO_CTL0, reg);
+
+	/* 5. Clear MMIO_ERR_WRT and MMIO_ERR_RD. */
+	sdxi_write64(sdxi, SDXI_MMIO_ERR_WRT, 0);
+	sdxi_write64(sdxi, SDXI_MMIO_ERR_RD, 0);
+
+	/*
+	 * Error interrupts can be generated once MMIO_ERR_CFG.en is
+	 * set in step 6, so set up the handler now.
+	 */
+	err = request_threaded_irq(sdxi->error_irq, NULL, sdxi_irq_thread,
+				   IRQF_TRIGGER_NONE, "SDXI error", sdxi);
+	if (err)
+		goto free_errlog;
+
+	/* 6. Program MMIO_ERR_CFG. */
+	reg = FIELD_PREP(SDXI_MMIO_ERR_CFG_PTR, sdxi->err_log_dma >> 12) |
+	      FIELD_PREP(SDXI_MMIO_ERR_CFG_SZ, ERROR_LOG_ENTRIES >> 6) |
+	      FIELD_PREP(SDXI_MMIO_ERR_CFG_EN, 1);
+	sdxi_write64(sdxi, SDXI_MMIO_ERR_CFG, reg);
+
+	return 0;
+
+free_errlog:
+	dma_free_coherent(sdxi_to_dev(sdxi), ERROR_LOG_SZ,
+			  sdxi->err_log, sdxi->err_log_dma);
+	return err;
+}
+
+void sdxi_error_exit(struct sdxi_dev *sdxi)
+{
+	sdxi_write64(sdxi, SDXI_MMIO_ERR_CFG, 0);
+	free_irq(sdxi->error_irq, sdxi);
+	dma_free_coherent(sdxi_to_dev(sdxi), ERROR_LOG_SZ,
+			  sdxi->err_log, sdxi->err_log_dma);
+}
diff --git a/drivers/dma/sdxi/error.h b/drivers/dma/sdxi/error.h
new file mode 100644
index 0000000000000000000000000000000000000000..50019d9811184464227ae13baa509101a2a3aacc
--- /dev/null
+++ b/drivers/dma/sdxi/error.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * SDXI error handling entry points.
+ *
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#ifndef DMA_SDXI_ERROR_H
+#define DMA_SDXI_ERROR_H
+
+struct sdxi_dev;
+
+int sdxi_error_init(struct sdxi_dev *sdxi);
+void sdxi_error_exit(struct sdxi_dev *sdxi);
+
+#endif  /* DMA_SDXI_ERROR_H */

-- 
2.39.5



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ