[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251029-new-placid-woodlouse-6b975c@kuoka>
Date: Wed, 29 Oct 2025 07:52:54 +0100
From: Krzysztof Kozlowski <krzk@...nel.org>
To: niravkumarlaxmidas.rabara@...era.com
Cc: dinguyen@...nel.org, matthew.gerlach@...era.com, robh@...nel.org,
krzk+dt@...nel.org, conor+dt@...nel.org, bp@...en8.de, tony.luck@...el.com,
linux-edac@...r.kernel.org, devicetree@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 5/6] EDAC/altera: Add support for CRAM SEU error handling
on SoCFPGA
On Tue, Oct 28, 2025 at 05:22:31PM +0800, niravkumarlaxmidas.rabara@...era.com wrote:
> From: Niravkumar L Rabara <niravkumarlaxmidas.rabara@...era.com>
>
> Add new EDAC driver support for detecting and handling Single Event Upset
> (SEU) errors in the FPGA Configuration RAM (CRAM) on Altera SoCFPGA
> devices.
>
> The Secure Device Manager (SDM) is responsible for detecting correctable
> and uncorrectable SEU errors and notifies the CPU through a dedicated
> interrupt. Upon receiving the interrupt, the driver invokes an SMC call
> to the ARM Trusted Firmware (ATF) to query the error status.
> The ATF, in turn, communicates with the SDM via the mailbox interface to
> retrieve the error details and returns to the driver.
>
> Signed-off-by: Niravkumar L Rabara <niravkumarlaxmidas.rabara@...era.com>
> ---
> drivers/edac/Kconfig | 12 ++
> drivers/edac/altera_edac.c | 178 +++++++++++++++++++
> drivers/edac/altera_edac.h | 9 +
> include/linux/firmware/intel/stratix10-smc.h | 37 ++++
> 4 files changed, 236 insertions(+)
>
> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
> index 33a9fccde2fe..701b15e73a39 100644
> --- a/drivers/edac/Kconfig
> +++ b/drivers/edac/Kconfig
> @@ -477,6 +477,18 @@ config EDAC_ALTERA_SDMMC
> Support for error detection and correction on the
> Altera SDMMC FIFO Memory for Altera SoCs.
>
> +config EDAC_ALTERA_CRAM_SEU
> + bool "Altera CRAM SEU"
> + depends on EDAC_ALTERA=y && 64BIT
> + help
> + Support for error detection and correction on Altera SoCs for
> + FPGA Configuration RAM(CRAM) Single Event Upset(SEU).
> + The SEU errors caused by radiation or other transient events are
> + monitored by the Secure Device Manager (SDM), which notifies the
> + CPU through a dedicated interrupt.
> + This driver uses an SMC interface to query the error status and
> + report events to the EDAC framework.
> +
> config EDAC_SIFIVE
> bool "Sifive platform EDAC driver"
> depends on EDAC=y && SIFIVE_CCACHE
> diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
> index a82c3b01be1a..ac2151c625a2 100644
> --- a/drivers/edac/altera_edac.c
> +++ b/drivers/edac/altera_edac.c
> @@ -656,6 +656,19 @@ static const struct file_operations altr_edac_a10_device_inject_fops __maybe_unu
> .llseek = generic_file_llseek,
> };
>
> +#if IS_ENABLED(CONFIG_EDAC_ALTERA_CRAM_SEU)
> +static ssize_t __maybe_unused
> +altr_edac_seu_trig(struct file *file, const char __user *user_buf,
> + size_t count, loff_t *ppos);
> +
> +static const struct file_operations
> +altr_edac_cram_inject_fops __maybe_unused = {
> + .open = simple_open,
> + .write = altr_edac_seu_trig,
> + .llseek = generic_file_llseek,
> +};
> +#endif
> +
> #ifdef CONFIG_EDAC_ALTERA_IO96B
> static ssize_t __maybe_unused
> altr_edac_io96b_device_trig(struct file *file, const char __user *user_buf,
> @@ -1492,6 +1505,56 @@ static const struct edac_device_prv_data a10_usbecc_data = {
>
> #endif /* CONFIG_EDAC_ALTERA_USB */
>
> +#if IS_ENABLED(CONFIG_EDAC_ALTERA_CRAM_SEU)
> +static irqreturn_t seu_irq_handler(int irq, void *dev_id)
> +{
> + struct altr_edac_device_dev *dci = dev_id;
> + struct arm_smccc_res result;
> +
> + arm_smccc_smc(INTEL_SIP_SMC_SEU_ERR_STATUS, 0,
> + 0, 0, 0, 0, 0, 0, &result);
> +
> + if ((u32)result.a0) {
> + edac_printk(KERN_ERR, EDAC_DEVICE,
> + "SEU %s: Count=0x%X, SecAddr=0x%X, ErrData=0x%X\n",
> + ((u32)result.a2 & BIT(28)) == 0 ? "UE" : "CE",
> + (u32)result.a0, (u32)result.a1, (u32)result.a2);
> +
> + if ((u32)result.a2 & BIT(28))
> + edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name);
> + else
> + edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name);
> + }
> + return IRQ_HANDLED;
> +}
> +
> +static ssize_t __maybe_unused
> +altr_edac_seu_trig(struct file *file, const char __user *user_buf,
> + size_t count, loff_t *ppos)
> +{
> + struct edac_device_ctl_info *edac_dci = file->private_data;
> + struct altr_edac_device_dev *dev = edac_dci->pvt_info;
> + u8 trig_type;
> + struct arm_smccc_res result;
> +
> + if (!user_buf || get_user(trig_type, user_buf))
> + return -EFAULT;
> +
> + if (trig_type == ALTR_UE_TRIGGER_CHAR)
> + arm_smccc_smc(INTEL_SIP_SMC_SAFE_INJECT_SEU_ERR,
> + ((u64)dev->seu.ue_msb << 32) |
> + dev->seu.ue_lsb,
> + 2, 0, 0, 0, 0, 0, &result);
> + else
> + arm_smccc_smc(INTEL_SIP_SMC_SAFE_INJECT_SEU_ERR,
> + ((u64)dev->seu.ce_msb << 32) |
> + dev->seu.ce_lsb, 2, 0, 0, 0,
> + 0, 0, &result);
> +
> + return count;
> +}
> +#endif
> +
> /********************** QSPI Device Functions **********************/
>
> #ifdef CONFIG_EDAC_ALTERA_QSPI
> @@ -2031,6 +2094,117 @@ static int get_s10_sdram_edac_resource(struct device_node *np,
> return ret;
> }
>
> +#if IS_ENABLED(CONFIG_EDAC_ALTERA_CRAM_SEU)
> +static int altr_edac_seu_device_add(struct altr_arria10_edac *edac,
> + struct platform_device *pdev, struct device_node *dev_node)
> +{
> + struct edac_device_ctl_info *dci;
> + struct altr_edac_device_dev *altdev;
> + char *ecc_name = kstrdup(dev_node->name, GFP_KERNEL);
> + int edac_idx;
> + int seu_irq;
> + int rc = 0;
> +
> + seu_irq = platform_get_irq_byname(pdev, "sdm_seu");
> + if (seu_irq < 0) {
> + dev_warn(&pdev->dev, "no %s IRQ defined\n", "sdm_seu");
> + return 0;
> + }
> +
> + edac_idx = edac_device_alloc_index();
> + dci = edac_device_alloc_ctl_info(sizeof(*altdev), ecc_name,
> + 1, ecc_name, 1, 0, edac_idx);
> + if (!dci) {
> + edac_printk(KERN_ERR, EDAC_DEVICE,
> + "%s: Unable to allocate EDAC device\n", ecc_name);
NAK, you never print errors on ENOMEM.
Best regards,
Krzysztof
Powered by blists - more mailing lists