[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250915131151.00005f26@huawei.com>
Date: Mon, 15 Sep 2025 13:11:51 +0100
From: Jonathan Cameron <jonathan.cameron@...wei.com>
To: Nathan Lynch via B4 Relay <devnull+nathan.lynch.amd.com@...nel.org>
CC: <nathan.lynch@....com>, Vinod Koul <vkoul@...nel.org>, Wei Huang
<wei.huang2@....com>, Mario Limonciello <mario.limonciello@....com>, "Bjorn
Helgaas" <bhelgaas@...gle.com>, <linux-pci@...r.kernel.org>,
<linux-kernel@...r.kernel.org>, <dmaengine@...r.kernel.org>
Subject: Re: [PATCH RFC 06/13] dmaengine: sdxi: Add error reporting support
On Fri, 05 Sep 2025 13:48:29 -0500
Nathan Lynch via B4 Relay <devnull+nathan.lynch.amd.com@...nel.org> wrote:
> From: Nathan Lynch <nathan.lynch@....com>
>
> SDXI implementations provide software with detailed information about
> error conditions using a per-device ring buffer in system memory. When
> an error condition is signaled via interrupt, the driver retrieves any
> pending error log entries and reports them to the kernel log.
>
> Co-developed-by: Wei Huang <wei.huang2@....com>
> Signed-off-by: Wei Huang <wei.huang2@....com>
> Signed-off-by: Nathan Lynch <nathan.lynch@....com>
Hi,
A few more comments inline. Kind of similar stuff around
having both register definitions for unpacking and the structure
definitions in patch 2.
Thanks,
Jonathan
> ---
> drivers/dma/sdxi/error.c | 340 +++++++++++++++++++++++++++++++++++++++++++++++
> drivers/dma/sdxi/error.h | 16 +++
> 2 files changed, 356 insertions(+)
>
> diff --git a/drivers/dma/sdxi/error.c b/drivers/dma/sdxi/error.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..c5e33f5989250352f6b081a3049b3b1f972c85a6
> --- /dev/null
> +++ b/drivers/dma/sdxi/error.c
> +/* The "unpacked" counterpart to ERRLOG_HD_ENT. */
> +struct errlog_entry {
> + u64 dsc_index;
> + u16 cxt_num;
> + u16 err_class;
> + u16 type;
> + u8 step;
> + u8 buf;
> + u8 sub_step;
> + u8 re;
> + bool vl;
> + bool cv;
> + bool div;
> + bool bv;
> +};
> +
> +#define ERRLOG_ENTRY_FIELD(hi_, lo_, name_) \
> + PACKED_FIELD(hi_, lo_, struct errlog_entry, name_)
> +#define ERRLOG_ENTRY_FLAG(nr_, name_) \
> + ERRLOG_ENTRY_FIELD(nr_, nr_, name_)
> +
> +/* Refer to "Error Log Header Entry (ERRLOG_HD_ENT)" */
> +static const struct packed_field_u16 errlog_hd_ent_fields[] = {
> + ERRLOG_ENTRY_FLAG(0, vl),
> + ERRLOG_ENTRY_FIELD(13, 8, step),
> + ERRLOG_ENTRY_FIELD(26, 16, type),
> + ERRLOG_ENTRY_FLAG(32, cv),
> + ERRLOG_ENTRY_FLAG(33, div),
> + ERRLOG_ENTRY_FLAG(34, bv),
> + ERRLOG_ENTRY_FIELD(38, 36, buf),
> + ERRLOG_ENTRY_FIELD(43, 40, sub_step),
> + ERRLOG_ENTRY_FIELD(46, 44, re),
> + ERRLOG_ENTRY_FIELD(63, 48, cxt_num),
> + ERRLOG_ENTRY_FIELD(127, 64, dsc_index),
> + ERRLOG_ENTRY_FIELD(367, 352, err_class),
The association between the fields here and struct sdxi_err_log_hd_ent
to me should be via some defines in patch 2 for the various fields
embedded in misc0 etc.
> +};
> +static void sdxi_print_err(struct sdxi_dev *sdxi, u64 err_rd)
> +{
> + struct errlog_entry ent;
> + size_t index;
> +
> + index = err_rd % ERROR_LOG_ENTRIES;
> +
> + unpack_fields(&sdxi->err_log[index], sizeof(sdxi->err_log[0]),
> + &ent, errlog_hd_ent_fields, SDXI_PACKING_QUIRKS);
> +
> + if (!ent.vl) {
> + dev_err_ratelimited(sdxi_to_dev(sdxi),
> + "Ignoring error log entry with vl=0\n");
> + return;
> + }
> +
> + if (ent.type != OP_TYPE_ERRLOG) {
> + dev_err_ratelimited(sdxi_to_dev(sdxi),
> + "Ignoring error log entry with type=%#x\n",
> + ent.type);
> + return;
> + }
> +
> + sdxi_err(sdxi, "error log entry[%zu], MMIO_ERR_RD=%#llx:\n",
> + index, err_rd);
> + sdxi_err(sdxi, " re: %#x (%s)\n", ent.re, reaction_str(ent.re));
> + sdxi_err(sdxi, " step: %#x (%s)\n", ent.step, step_str(ent.step));
> + sdxi_err(sdxi, " sub_step: %#x (%s)\n",
> + ent.sub_step, sub_step_str(ent.sub_step));
> + sdxi_err(sdxi, " cv: %u div: %u bv: %u\n", ent.cv, ent.div, ent.bv);
> + if (ent.bv)
> + sdxi_err(sdxi, " buf: %u\n", ent.buf);
> + if (ent.cv)
> + sdxi_err(sdxi, " cxt_num: %#x\n", ent.cxt_num);
> + if (ent.div)
> + sdxi_err(sdxi, " dsc_index: %#llx\n", ent.dsc_index);
> + sdxi_err(sdxi, " err_class: %#x\n", ent.err_class);
Consider using tracepoints for error logging rather than large splats in the
log. Maybe you add those in later patches!
I'd then just fill the tracepoint in directly rather than have an unpacking
step.
> +}
> +/* Refer to "Error Log Initialization" */
> +int sdxi_error_init(struct sdxi_dev *sdxi)
> +{
> + u64 reg;
> + int err;
> +
> + /* 1. Clear MMIO_ERR_CFG. Error interrupts are inhibited until step 6. */
> + sdxi_write64(sdxi, SDXI_MMIO_ERR_CFG, 0);
> +
> + /* 2. Clear MMIO_ERR_STS. The flags in this register are RW1C. */
> + reg = FIELD_PREP(SDXI_MMIO_ERR_STS_STS_BIT, 1) |
> + FIELD_PREP(SDXI_MMIO_ERR_STS_OVF_BIT, 1) |
> + FIELD_PREP(SDXI_MMIO_ERR_STS_ERR_BIT, 1);
> + sdxi_write64(sdxi, SDXI_MMIO_ERR_STS, reg);
> +
> + /* 3. Allocate memory for the error log ring buffer, initialize to zero. */
> + sdxi->err_log = dma_alloc_coherent(sdxi_to_dev(sdxi), ERROR_LOG_SZ,
> + &sdxi->err_log_dma, GFP_KERNEL);
> + if (!sdxi->err_log)
> + return -ENOMEM;
> +
> + /*
> + * 4. Set MMIO_ERR_CTL.intr_en to 1 if interrupts on
> + * context-level errors are desired.
> + */
> + reg = sdxi_read64(sdxi, SDXI_MMIO_ERR_CTL);
> + FIELD_MODIFY(SDXI_MMIO_ERR_CTL_EN, ®, 1);
> + sdxi_write64(sdxi, SDXI_MMIO_ERR_CTL, reg);
> +
> + /*
> + * The spec is not explicit about when to do this, but this
> + * seems like the right time: enable interrupt on
> + * function-level transition to error state.
> + */
> + reg = sdxi_read64(sdxi, SDXI_MMIO_CTL0);
> + FIELD_MODIFY(SDXI_MMIO_CTL0_FN_ERR_INTR_EN, ®, 1);
> + sdxi_write64(sdxi, SDXI_MMIO_CTL0, reg);
> +
> + /* 5. Clear MMIO_ERR_WRT and MMIO_ERR_RD. */
> + sdxi_write64(sdxi, SDXI_MMIO_ERR_WRT, 0);
> + sdxi_write64(sdxi, SDXI_MMIO_ERR_RD, 0);
> +
> + /*
> + * Error interrupts can be generated once MMIO_ERR_CFG.en is
> + * set in step 6, so set up the handler now.
> + */
> + err = request_threaded_irq(sdxi->error_irq, NULL, sdxi_irq_thread,
> + IRQF_TRIGGER_NONE, "SDXI error", sdxi);
> + if (err)
> + goto free_errlog;
> +
> + /* 6. Program MMIO_ERR_CFG. */
I'm guessing these are numbers steps in some bit of the spec?
If not some of these comments like this one provide no value. We can
see what is being written from the code! Perhaps add a very specific
spec reference if you want to show why the numbering is here.
> + reg = FIELD_PREP(SDXI_MMIO_ERR_CFG_PTR, sdxi->err_log_dma >> 12) |
> + FIELD_PREP(SDXI_MMIO_ERR_CFG_SZ, ERROR_LOG_ENTRIES >> 6) |
> + FIELD_PREP(SDXI_MMIO_ERR_CFG_EN, 1);
> + sdxi_write64(sdxi, SDXI_MMIO_ERR_CFG, reg);
> +
> + return 0;
> +
> +free_errlog:
> + dma_free_coherent(sdxi_to_dev(sdxi), ERROR_LOG_SZ,
> + sdxi->err_log, sdxi->err_log_dma);
> + return err;
> +}
> +
> +void sdxi_error_exit(struct sdxi_dev *sdxi)
> +{
> + sdxi_write64(sdxi, SDXI_MMIO_ERR_CFG, 0);
> + free_irq(sdxi->error_irq, sdxi);
> + dma_free_coherent(sdxi_to_dev(sdxi), ERROR_LOG_SZ,
> + sdxi->err_log, sdxi->err_log_dma);
> +}
>
Powered by blists - more mailing lists