[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241104124237.124109-4-orange@aiven.io>
Date: Mon, 4 Nov 2024 12:40:54 +0000
From: Orange Kao <orange@...en.io>
To: tony.luck@...el.com,
qiuxu.zhuo@...el.com
Cc: bp@...en8.de,
james.morse@....com,
orange@...sy.org,
linux-edac@...r.kernel.org,
linux-kernel@...r.kernel.org,
mchehab@...nel.org,
rric@...nel.org,
Orange Kao <orange@...en.io>
Subject: [PATCH 3/3] EDAC/igen6: Add polling support
Some PCs with Intel N100 (with PCI device 8086:461c, DID_ADL_N_SKU4)
experienced issues with error interrupts not working, even with the
following configuration in the BIOS.
In-Band ECC Support: Enabled
In-Band ECC Operation Mode: 2 (make all requests protected and
ignore range checks)
IBECC Error Injection Control: Inject Correctable Error on insertion
counter
Error Injection Insertion Count: 251658240 (0xf000000)
Add polling mode support for these machines to ensure that memory error
events are handled.
Signed-off-by: Orange Kao <orange@...en.io>
---
drivers/edac/igen6_edac.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c
index fa488ba15059..eb783c6b77f1 100644
--- a/drivers/edac/igen6_edac.c
+++ b/drivers/edac/igen6_edac.c
@@ -1170,6 +1170,20 @@ static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar)
return -ENODEV;
}
+static void igen6_check(struct mem_ctl_info *mci)
+{
+ struct igen6_imc *imc = mci->pvt_info;
+ u64 ecclog;
+
+ /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */
+ ecclog = ecclog_read_and_clear(imc);
+ if (!ecclog)
+ return;
+
+ if (!ecclog_gen_pool_add(imc->mc, ecclog))
+ irq_work_queue(&ecclog_irq_work);
+}
+
static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev)
{
struct edac_mc_layer layers[2];
@@ -1211,6 +1225,8 @@ static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev)
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
mci->dev_name = pci_name(pdev);
+ if (edac_op_state == EDAC_OPSTATE_POLL)
+ mci->edac_check = igen6_check;
mci->pvt_info = &igen6_pvt->imc[mc];
imc = mci->pvt_info;
@@ -1352,6 +1368,10 @@ static void unregister_err_handler(void)
static void opstate_set(struct res_config *cfg)
{
+ /* Only the polling mode can be set via the module parameter. */
+ if (edac_op_state == EDAC_OPSTATE_POLL)
+ return;
+
/* Set the mode according to the configuration data. */
if (cfg->machine_check)
edac_op_state = EDAC_OPSTATE_INT;
@@ -1483,3 +1503,6 @@ module_exit(igen6_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Qiuxu Zhuo");
MODULE_DESCRIPTION("MC Driver for Intel client SoC using In-Band ECC");
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll, Others or default=Auto detect");
--
2.47.0
Powered by blists - more mailing lists