[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250626224252.1415009-17-terry.bowman@amd.com>
Date: Thu, 26 Jun 2025 17:42:51 -0500
From: Terry Bowman <terry.bowman@....com>
To: <dave@...olabs.net>, <jonathan.cameron@...wei.com>,
<dave.jiang@...el.com>, <alison.schofield@...el.com>,
<dan.j.williams@...el.com>, <bhelgaas@...gle.com>, <shiju.jose@...wei.com>,
<ming.li@...omail.com>, <Smita.KoralahalliChannabasappa@....com>,
<rrichter@....com>, <dan.carpenter@...aro.org>,
<PradeepVineshReddy.Kodamati@....com>, <lukas@...ner.de>,
<Benjamin.Cheatham@....com>, <sathyanarayanan.kuppuswamy@...ux.intel.com>,
<terry.bowman@....com>, <linux-cxl@...r.kernel.org>
CC: <linux-kernel@...r.kernel.org>, <linux-pci@...r.kernel.org>
Subject: [PATCH v10 16/17] CXL/PCI: Enable CXL protocol errors during CXL Port probe
CXL protocol errors are not enabled for all CXL devices after boot. These
must be enabled inorder to process CXL protocol errors.
Export the AER service driver's pci_aer_unmask_internal_errors().
Introduce cxl_unmask_proto_interrupts() to call pci_aer_unmask_internal_errors().
pci_aer_unmask_internal_errors() expects the pdev->aer_cap is initialized.
But, dev->aer_cap is not initialized for CXL Upstream Switch Ports and CXL
Downstream Switch Ports. Initialize the dev->aer_cap if necessary. Enable AER
correctable internal errors and uncorrectable internal errors for all CXL
devices.
Signed-off-by: Terry Bowman <terry.bowman@....com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@...wei.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@...ux.intel.com>
---
drivers/cxl/port.c | 29 +++++++++++++++++++++++++++--
drivers/pci/pcie/cxl_aer.c | 3 ++-
include/linux/aer.h | 1 +
3 files changed, 30 insertions(+), 3 deletions(-)
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index b52f82925891..b90f5efa5904 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -3,6 +3,7 @@
#include <linux/device.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/pci.h>
#include "cxlmem.h"
#include "cxlpci.h"
@@ -60,6 +61,21 @@ static int discover_region(struct device *dev, void *unused)
#ifdef CONFIG_PCIEAER_CXL
+static void cxl_unmask_proto_interrupts(struct device *dev)
+{
+ struct pci_dev *pdev __free(pci_dev_put) =
+ pci_dev_get(to_pci_dev(dev));
+
+ if (!pdev->aer_cap) {
+ pdev->aer_cap = pci_find_ext_capability(pdev,
+ PCI_EXT_CAP_ID_ERR);
+ if (!pdev->aer_cap)
+ return;
+ }
+
+ pci_aer_unmask_internal_errors(pdev);
+}
+
static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
{
resource_size_t aer_phys;
@@ -118,8 +134,12 @@ static void cxl_uport_init_ras_reporting(struct cxl_port *port,
map->host = host;
if (cxl_map_component_regs(map, &port->uport_regs,
- BIT(CXL_CM_CAP_CAP_ID_RAS)))
+ BIT(CXL_CM_CAP_CAP_ID_RAS))) {
dev_dbg(&port->dev, "Failed to map RAS capability\n");
+ return;
+ }
+
+ cxl_unmask_proto_interrupts(port->uport_dev);
}
/**
@@ -144,9 +164,12 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
}
if (cxl_map_component_regs(&dport->reg_map, &dport->regs.component,
- BIT(CXL_CM_CAP_CAP_ID_RAS)))
+ BIT(CXL_CM_CAP_CAP_ID_RAS))) {
dev_dbg(dport->dport_dev, "Failed to map RAS capability\n");
+ return;
+ }
+ cxl_unmask_proto_interrupts(dport->dport_dev);
}
EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
@@ -180,6 +203,8 @@ static void cxl_endpoint_port_init_ras(struct cxl_port *port)
}
cxl_dport_init_ras_reporting(dport, cxlmd->cxlds->dev);
+
+ cxl_unmask_proto_interrupts(cxlmd->cxlds->dev);
}
#else
diff --git a/drivers/pci/pcie/cxl_aer.c b/drivers/pci/pcie/cxl_aer.c
index 38dc82df0baf..3c5bf162607c 100644
--- a/drivers/pci/pcie/cxl_aer.c
+++ b/drivers/pci/pcie/cxl_aer.c
@@ -18,7 +18,7 @@
* Note: AER must be enabled and supported by the device which must be
* checked in advance, e.g. with pcie_aer_is_native().
*/
-static void pci_aer_unmask_internal_errors(struct pci_dev *dev)
+void pci_aer_unmask_internal_errors(struct pci_dev *dev)
{
int aer = dev->aer_cap;
u32 mask;
@@ -31,6 +31,7 @@ static void pci_aer_unmask_internal_errors(struct pci_dev *dev)
mask &= ~PCI_ERR_COR_INTERNAL;
pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask);
}
+EXPORT_SYMBOL_NS_GPL(pci_aer_unmask_internal_errors, "CXL");
static bool is_cxl_mem_dev(struct pci_dev *dev)
{
diff --git a/include/linux/aer.h b/include/linux/aer.h
index f14db635ef90..8fb1eca97c37 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -113,5 +113,6 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
int cper_severity_to_aer(int cper_severity);
void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
int severity, struct aer_capability_regs *aer_regs);
+void pci_aer_unmask_internal_errors(struct pci_dev *dev);
#endif //_AER_H_
--
2.34.1
Powered by blists - more mailing lists